index.js
3.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
'use strict';
var url = require('url');
var punycode = require('punycode');
var queryString = require('query-string');
var prependHttp = require('prepend-http');
var sortKeys = require('sort-keys');
var objectAssign = require('object-assign');
var DEFAULT_PORTS = {
'http:': 80,
'https:': 443,
'ftp:': 21
};
// protocols that always contain a `//`` bit
var slashedProtocol = {
'http': true,
'https': true,
'ftp': true,
'gopher': true,
'file': true,
'http:': true,
'https:': true,
'ftp:': true,
'gopher:': true,
'file:': true
};
function testParameter(name, filters) {
return filters.some(function (filter) {
return filter instanceof RegExp ? filter.test(name) : filter === name;
});
}
module.exports = function (str, opts) {
opts = objectAssign({
normalizeProtocol: true,
normalizeHttps: false,
stripFragment: true,
stripWWW: true,
removeQueryParameters: [/^utm_\w+/i],
removeTrailingSlash: true,
removeDirectoryIndex: false
}, opts);
if (typeof str !== 'string') {
throw new TypeError('Expected a string');
}
var hasRelativeProtocol = str.indexOf('//') === 0;
// prepend protocol
str = prependHttp(str.trim()).replace(/^\/\//, 'http://');
var urlObj = url.parse(str);
if (opts.normalizeHttps && urlObj.protocol === 'https:') {
urlObj.protocol = 'http:';
}
if (!urlObj.hostname && !urlObj.pathname) {
throw new Error('Invalid URL');
}
// prevent these from being used by `url.format`
delete urlObj.host;
delete urlObj.query;
// remove fragment
if (opts.stripFragment) {
delete urlObj.hash;
}
// remove default port
var port = DEFAULT_PORTS[urlObj.protocol];
if (Number(urlObj.port) === port) {
delete urlObj.port;
}
// remove duplicate slashes
if (urlObj.pathname) {
urlObj.pathname = urlObj.pathname.replace(/\/{2,}/g, '/');
}
// decode URI octets
if (urlObj.pathname) {
urlObj.pathname = decodeURI(urlObj.pathname);
}
// remove directory index
if (opts.removeDirectoryIndex === true) {
opts.removeDirectoryIndex = [/^index\.[a-z]+$/];
}
if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length) {
var pathComponents = urlObj.pathname.split('/');
var lastComponent = pathComponents[pathComponents.length - 1];
if (testParameter(lastComponent, opts.removeDirectoryIndex)) {
pathComponents = pathComponents.slice(0, pathComponents.length - 1);
urlObj.pathname = pathComponents.slice(1).join('/') + '/';
}
}
// resolve relative paths, but only for slashed protocols
if (slashedProtocol[urlObj.protocol]) {
var domain = urlObj.protocol + '//' + urlObj.hostname;
var relative = url.resolve(domain, urlObj.pathname);
urlObj.pathname = relative.replace(domain, '');
}
if (urlObj.hostname) {
// IDN to Unicode
urlObj.hostname = punycode.toUnicode(urlObj.hostname).toLowerCase();
// remove trailing dot
urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
// remove `www.`
if (opts.stripWWW) {
urlObj.hostname = urlObj.hostname.replace(/^www\./, '');
}
}
// remove URL with empty query string
if (urlObj.search === '?') {
delete urlObj.search;
}
var queryParameters = queryString.parse(urlObj.search);
// remove query unwanted parameters
if (Array.isArray(opts.removeQueryParameters)) {
for (var key in queryParameters) {
if (testParameter(key, opts.removeQueryParameters)) {
delete queryParameters[key];
}
}
}
// sort query parameters
urlObj.search = queryString.stringify(sortKeys(queryParameters));
// decode query parameters
urlObj.search = decodeURIComponent(urlObj.search);
// take advantage of many of the Node `url` normalizations
str = url.format(urlObj);
// remove ending `/`
if (opts.removeTrailingSlash || urlObj.pathname === '/') {
str = str.replace(/\/$/, '');
}
// restore relative protocol, if applicable
if (hasRelativeProtocol && !opts.normalizeProtocol) {
str = str.replace(/^http:\/\//, '//');
}
return str;
};