string-punycode-to-ascii.js
5.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
'use strict';
// based on https://github.com/bestiejs/punycode.js/blob/master/punycode.js
var uncurryThis = require('../internals/function-uncurry-this');
var maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
var base = 36;
var tMin = 1;
var tMax = 26;
var skew = 38;
var damp = 700;
var initialBias = 72;
var initialN = 128; // 0x80
var delimiter = '-'; // '\x2D'
var regexNonASCII = /[^\0-\u007E]/; // non-ASCII chars
var regexSeparators = /[.\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
var OVERFLOW_ERROR = 'Overflow: input needs wider integers to process';
var baseMinusTMin = base - tMin;
var $RangeError = RangeError;
var exec = uncurryThis(regexSeparators.exec);
var floor = Math.floor;
var fromCharCode = String.fromCharCode;
var charCodeAt = uncurryThis(''.charCodeAt);
var join = uncurryThis([].join);
var push = uncurryThis([].push);
var replace = uncurryThis(''.replace);
var split = uncurryThis(''.split);
var toLowerCase = uncurryThis(''.toLowerCase);
/**
* Creates an array containing the numeric code points of each Unicode
* character in the string. While JavaScript uses UCS-2 internally,
* this function will convert a pair of surrogate halves (each of which
* UCS-2 exposes as separate characters) into a single code point,
* matching UTF-16.
*/
var ucs2decode = function (string) {
var output = [];
var counter = 0;
var length = string.length;
while (counter < length) {
var value = charCodeAt(string, counter++);
if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
// It's a high surrogate, and there is a next character.
var extra = charCodeAt(string, counter++);
if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
push(output, ((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
} else {
// It's an unmatched surrogate; only append this code unit, in case the
// next code unit is the high surrogate of a surrogate pair.
push(output, value);
counter--;
}
} else {
push(output, value);
}
}
return output;
};
/**
* Converts a digit/integer into a basic code point.
*/
var digitToBasic = function (digit) {
// 0..25 map to ASCII a..z or A..Z
// 26..35 map to ASCII 0..9
return digit + 22 + 75 * (digit < 26);
};
/**
* Bias adaptation function as per section 3.4 of RFC 3492.
* https://tools.ietf.org/html/rfc3492#section-3.4
*/
var adapt = function (delta, numPoints, firstTime) {
var k = 0;
delta = firstTime ? floor(delta / damp) : delta >> 1;
delta += floor(delta / numPoints);
while (delta > baseMinusTMin * tMax >> 1) {
delta = floor(delta / baseMinusTMin);
k += base;
}
return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
};
/**
* Converts a string of Unicode symbols (e.g. a domain name label) to a
* Punycode string of ASCII-only symbols.
*/
var encode = function (input) {
var output = [];
// Convert the input in UCS-2 to an array of Unicode code points.
input = ucs2decode(input);
// Cache the length.
var inputLength = input.length;
// Initialize the state.
var n = initialN;
var delta = 0;
var bias = initialBias;
var i, currentValue;
// Handle the basic code points.
for (i = 0; i < input.length; i++) {
currentValue = input[i];
if (currentValue < 0x80) {
push(output, fromCharCode(currentValue));
}
}
var basicLength = output.length; // number of basic code points.
var handledCPCount = basicLength; // number of code points that have been handled;
// Finish the basic string with a delimiter unless it's empty.
if (basicLength) {
push(output, delimiter);
}
// Main encoding loop:
while (handledCPCount < inputLength) {
// All non-basic code points < n have been handled already. Find the next larger one:
var m = maxInt;
for (i = 0; i < input.length; i++) {
currentValue = input[i];
if (currentValue >= n && currentValue < m) {
m = currentValue;
}
}
// Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, but guard against overflow.
var handledCPCountPlusOne = handledCPCount + 1;
if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
throw $RangeError(OVERFLOW_ERROR);
}
delta += (m - n) * handledCPCountPlusOne;
n = m;
for (i = 0; i < input.length; i++) {
currentValue = input[i];
if (currentValue < n && ++delta > maxInt) {
throw $RangeError(OVERFLOW_ERROR);
}
if (currentValue == n) {
// Represent delta as a generalized variable-length integer.
var q = delta;
var k = base;
while (true) {
var t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
if (q < t) break;
var qMinusT = q - t;
var baseMinusT = base - t;
push(output, fromCharCode(digitToBasic(t + qMinusT % baseMinusT)));
q = floor(qMinusT / baseMinusT);
k += base;
}
push(output, fromCharCode(digitToBasic(q)));
bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
delta = 0;
handledCPCount++;
}
}
delta++;
n++;
}
return join(output, '');
};
module.exports = function (input) {
var encoded = [];
var labels = split(replace(toLowerCase(input), regexSeparators, '\u002E'), '.');
var i, label;
for (i = 0; i < labels.length; i++) {
label = labels[i];
push(encoded, exec(regexNonASCII, label) ? 'xn--' + encode(label) : label);
}
return join(encoded, '.');
};