forked from sveltejs/svelte
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhtml.ts
113 lines (98 loc) · 2.06 KB
/
html.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import htmlEntities from './entities';
const windows1252 = [
8364,
129,
8218,
402,
8222,
8230,
8224,
8225,
710,
8240,
352,
8249,
338,
141,
381,
143,
144,
8216,
8217,
8220,
8221,
8226,
8211,
8212,
732,
8482,
353,
8250,
339,
157,
382,
376,
];
const entityPattern = new RegExp(
`&(#?(?:x[\\w\\d]+|\\d+|${Object.keys(htmlEntities).join('|')}));?`,
'g'
);
export function decodeCharacterReferences(html: string) {
return html.replace(entityPattern, (match, entity) => {
let code;
// Handle named entities
if (entity[0] !== '#') {
code = htmlEntities[entity];
} else if (entity[1] === 'x') {
code = parseInt(entity.substring(2), 16);
} else {
code = parseInt(entity.substring(1), 10);
}
if (!code) {
return match;
}
return String.fromCodePoint(validateCode(code));
});
}
const NUL = 0;
// some code points are verboten. If we were inserting HTML, the browser would replace the illegal
// code points with alternatives in some cases - since we're bypassing that mechanism, we need
// to replace them ourselves
//
// Source: http://en.wikipedia.org/wiki/Character_encodings_in_HTML#Illegal_characters
function validateCode(code: number) {
// line feed becomes generic whitespace
if (code === 10) {
return 32;
}
// ASCII range. (Why someone would use HTML entities for ASCII characters I don't know, but...)
if (code < 128) {
return code;
}
// code points 128-159 are dealt with leniently by browsers, but they're incorrect. We need
// to correct the mistake or we'll end up with missing € signs and so on
if (code <= 159) {
return windows1252[code - 128];
}
// basic multilingual plane
if (code < 55296) {
return code;
}
// UTF-16 surrogate halves
if (code <= 57343) {
return NUL;
}
// rest of the basic multilingual plane
if (code <= 65535) {
return code;
}
// supplementary multilingual plane 0x10000 - 0x1ffff
if (code >= 65536 && code <= 131071) {
return code;
}
// supplementary ideographic plane 0x20000 - 0x2ffff
if (code >= 131072 && code <= 196607) {
return code;
}
return NUL;
}