-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathurl-metadata-parser.ts
67 lines (61 loc) · 2.18 KB
/
url-metadata-parser.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import axios, {AxiosResponse} from 'axios';
import {from} from 'rxjs/internal/observable/from';
import {Observable} from 'rxjs/internal/Observable';
import {concatMap, map} from 'rxjs/operators';
import {of} from 'rxjs/internal/observable/of';
import {Metatag} from './metatag';
import {MetaEntity} from './meta.entity';
import * as iconvLte from 'iconv-lite';
export type Charset = string;
export type IntermediateResult = Charset | null;
export enum Errors {
ContentsDoesNotExists = 'Contents Does not exists.'
}
export class UrlMetadataParser {
public static getCharsetByBom(buf: Buffer): Observable<IntermediateResult> {
const boms: ReadonlyMap<Charset, Buffer> = new Map([
['utf-1', [0xF7, 0x64, 0x4C]],
['utf-7', [0x2B, 0x2F, 0x76, 0x38]],
['utf-7', [0x2B, 0x2F, 0x76, 0x39]],
['utf-7', [0x2B, 0x2F, 0x76, 0x2B]],
['utf-7', [0x2B, 0x2F, 0x76, 0x3F]],
['utf-7', [0x2B, 0x2F, 0x76, 0x38, 0x2D]],
['utf-8', [0xEF, 0xBB, 0xBF]],
['utf-16be', [0xFE, 0xFF]],
['utf-16le', [0xFF, 0xFE]],
['utf-ebcdic', [0xDD, 0x73, 0x66, 0x73]],
['scsu', [0x0E, 0xFE, 0xFF]],
['bocu-1', [0xFB, 0xEE, 0x28]],
['gb-18030', [0x84, 0x31, 0x95, 0x33]],
].map(([c, bytes]: [string, number[]]) => {
return ([c, Buffer.from(bytes)] as [Charset, Buffer]);
}));
const startsWith = (bom) => {
return buf.slice(0, bom.length).equals(bom);
};
for ( const [charset, bom] of boms ) {
if ( startsWith(bom) ) {
return of(charset.toUpperCase());
}
}
return of(null);
}
public static parse(url: string): Observable<MetaEntity> {
return from(axios.get(url, {
responseType: 'arraybuffer',
})).pipe(
concatMap((res: AxiosResponse) => {
return this.getCharsetByBom(res.data).pipe(
map((charset: IntermediateResult) => {
const body = iconvLte.decode(res.data, charset || 'UTF-8');
if (body.length <= 0) {
throw new Error(Errors.ContentsDoesNotExists);
}
return body.match(/<meta[^>]+>/g).map(val => new Metatag(val));
})
);
}),
map((tags: Metatag[]) => new MetaEntity(tags))
);
}
}