percent-encoding.js
4.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"use strict";
const { isASCIIHex } = require("./infra");
const { utf8Encode } = require("./encoding");
// https://url.spec.whatwg.org/#percent-encode
function percentEncode(c) {
let hex = c.toString(16).toUpperCase();
if (hex.length === 1) {
hex = "0" + hex;
}
return "%" + hex;
}
// https://url.spec.whatwg.org/#percent-decode
function percentDecodeBytes(input) {
const output = new Uint8Array(input.byteLength);
let outputIndex = 0;
for (let i = 0; i < input.byteLength; ++i) {
const byte = input[i];
if (byte !== 0x25) {
output[outputIndex++] = byte;
} else if (byte === 0x25 && (!isASCIIHex(input[i + 1]) || !isASCIIHex(input[i + 2]))) {
output[outputIndex++] = byte;
} else {
const bytePoint = parseInt(String.fromCodePoint(input[i + 1], input[i + 2]), 16);
output[outputIndex++] = bytePoint;
i += 2;
}
}
// TODO: remove the Buffer.from in the next major version; it's only needed for back-compat, and sticking to standard
// typed arrays is nicer and simpler.
// See https://github.com/jsdom/data-urls/issues/17 for background.
return Buffer.from(output.slice(0, outputIndex));
}
// https://url.spec.whatwg.org/#string-percent-decode
function percentDecodeString(input) {
const bytes = utf8Encode(input);
return percentDecodeBytes(bytes);
}
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
function isC0ControlPercentEncode(c) {
return c <= 0x1F || c > 0x7E;
}
// https://url.spec.whatwg.org/#fragment-percent-encode-set
const extraFragmentPercentEncodeSet = new Set([32, 34, 60, 62, 96]);
function isFragmentPercentEncode(c) {
return isC0ControlPercentEncode(c) || extraFragmentPercentEncodeSet.has(c);
}
// https://url.spec.whatwg.org/#query-percent-encode-set
const extraQueryPercentEncodeSet = new Set([32, 34, 35, 60, 62]);
function isQueryPercentEncode(c) {
return isC0ControlPercentEncode(c) || extraQueryPercentEncodeSet.has(c);
}
// https://url.spec.whatwg.org/#special-query-percent-encode-set
function isSpecialQueryPercentEncode(c) {
return isQueryPercentEncode(c) || c === 39;
}
// https://url.spec.whatwg.org/#path-percent-encode-set
const extraPathPercentEncodeSet = new Set([63, 96, 123, 125]);
function isPathPercentEncode(c) {
return isQueryPercentEncode(c) || extraPathPercentEncodeSet.has(c);
}
// https://url.spec.whatwg.org/#userinfo-percent-encode-set
const extraUserinfoPercentEncodeSet =
new Set([47, 58, 59, 61, 64, 91, 92, 93, 94, 124]);
function isUserinfoPercentEncode(c) {
return isPathPercentEncode(c) || extraUserinfoPercentEncodeSet.has(c);
}
// https://url.spec.whatwg.org/#component-percent-encode-set
const extraComponentPercentEncodeSet = new Set([36, 37, 38, 43, 44]);
function isComponentPercentEncode(c) {
return isUserinfoPercentEncode(c) || extraComponentPercentEncodeSet.has(c);
}
// https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
const extraURLEncodedPercentEncodeSet = new Set([33, 39, 40, 41, 126]);
function isURLEncodedPercentEncode(c) {
return isComponentPercentEncode(c) || extraURLEncodedPercentEncodeSet.has(c);
}
// https://url.spec.whatwg.org/#code-point-percent-encode-after-encoding
// https://url.spec.whatwg.org/#utf-8-percent-encode
// Assuming encoding is always utf-8 allows us to trim one of the logic branches. TODO: support encoding.
// The "-Internal" variant here has code points as JS strings. The external version used by other files has code points
// as JS numbers, like the rest of the codebase.
function utf8PercentEncodeCodePointInternal(codePoint, percentEncodePredicate) {
const bytes = utf8Encode(codePoint);
let output = "";
for (const byte of bytes) {
// Our percentEncodePredicate operates on bytes, not code points, so this is slightly different from the spec.
if (!percentEncodePredicate(byte)) {
output += String.fromCharCode(byte);
} else {
output += percentEncode(byte);
}
}
return output;
}
function utf8PercentEncodeCodePoint(codePoint, percentEncodePredicate) {
return utf8PercentEncodeCodePointInternal(String.fromCodePoint(codePoint), percentEncodePredicate);
}
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
// https://url.spec.whatwg.org/#string-utf-8-percent-encode
function utf8PercentEncodeString(input, percentEncodePredicate, spaceAsPlus = false) {
let output = "";
for (const codePoint of input) {
if (spaceAsPlus && codePoint === " ") {
output += "+";
} else {
output += utf8PercentEncodeCodePointInternal(codePoint, percentEncodePredicate);
}
}
return output;
}
module.exports = {
isC0ControlPercentEncode,
isFragmentPercentEncode,
isQueryPercentEncode,
isSpecialQueryPercentEncode,
isPathPercentEncode,
isUserinfoPercentEncode,
isURLEncodedPercentEncode,
percentDecodeString,
percentDecodeBytes,
utf8PercentEncodeString,
utf8PercentEncodeCodePoint
};