decoder_spec.js
3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
var should = require('should'),
needle = require('./../'),
Q = require('q'),
chardet = require('jschardet'),
helpers = require('./helpers');
describe('character encoding', function() {
var url;
this.timeout(5000);
describe('Given content-type: "text/html; charset=EUC-JP"', function() {
before(function() {
url = 'http://www.nina.jp/server/slackware/webapp/tomcat_charset.html';
})
describe('with decode = false', function() {
it('does not decode', function(done) {
needle.get(url, { decode: false }, function(err, resp) {
resp.body.should.be.a.String;
chardet.detect(resp.body).encoding.should.eql('windows-1252');
resp.body.indexOf('EUCを使う').should.eql(-1);
done();
})
})
})
describe('with decode = true', function() {
it('decodes', function(done) {
needle.get(url, { decode: true }, function(err, resp) {
resp.body.should.be.a.String;
chardet.detect(resp.body).encoding.should.eql('ascii');
resp.body.indexOf('EUCを使う').should.not.eql(-1);
done();
})
})
})
})
describe('Given content-type: "text/html but file is charset: gb2312', function() {
it('encodes to UTF-8', function(done) {
// Our Needle wrapper that requests a chinese website.
var task = Q.nbind(needle.get, needle, 'http://www.chinesetop100.com/');
// Different instantiations of this task
var tasks = [Q.fcall(task, {decode: true}),
Q.fcall(task, {decode: false})];
var results = tasks.map(function(task) {
return task.then(function(obj) {
return obj[0].body;
});
});
// Execute all requests concurrently
Q.all(results).done(function(bodies) {
var charsets = [
chardet.detect(bodies[0]).encoding,
chardet.detect(bodies[1]).encoding,
]
// We wanted to decode our first stream as specified by options
charsets[0].should.equal('ascii');
bodies[0].indexOf('全球中文网站前二十强').should.not.equal(-1);
// But not our second stream
charsets[1].should.equal('windows-1252');
bodies[1].indexOf('全球中文网站前二十强').should.equal(-1);
done();
});
})
})
describe('Given content-type: "text/html"', function () {
var server,
port = 54321,
text = 'Magyarországi Fióktelepe'
before(function(done) {
server = helpers.server({
port: port,
response: text,
headers: { 'Content-Type': 'text/html' }
}, done);
})
after(function(done) {
server.close(done)
})
describe('with decode = false', function () {
it('decodes by default to utf-8', function (done) {
needle.get('http://localhost:' + port, { decode: false }, function (err, resp) {
resp.body.should.be.a.String;
chardet.detect(resp.body).encoding.should.eql('ISO-8859-2');
resp.body.should.eql('Magyarországi Fióktelepe')
done();
})
})
})
})
})