l.js
4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
'use strict';
var Lexer = exports.Lexer = function() {
this.pos = 0;
this.buf = null;
this.buflen = 0;
// Operator table, mapping operator -> token name
this.optable = {
'+': 'PLUS',
'-': 'MINUS',
'*': 'MULTIPLY',
'.': 'PERIOD',
'\\': 'BACKSLASH',
':': 'COLON',
'%': 'PERCENT',
'|': 'PIPE',
'!': 'EXCLAMATION',
'?': 'QUESTION',
'#': 'POUND',
'&': 'AMPERSAND',
';': 'SEMI',
',': 'COMMA',
'(': 'L_PAREN',
')': 'R_PAREN',
'<': 'L_ANG',
'>': 'R_ANG',
'{': 'L_BRACE',
'}': 'R_BRACE',
'[': 'L_BRACKET',
']': 'R_BRACKET',
'=': 'EQUALS'
};
}
// Initialize the Lexer's buffer. This resets the lexer's internal
// state and subsequent tokens will be returned starting with the
// beginning of the new buffer.
Lexer.prototype.input = function(buf) {
this.pos = 0;
this.buf = buf;
this.buflen = buf.length;
}
// Get the next token from the current buffer. A token is an object with
// the following properties:
// - name: name of the pattern that this token matched (taken from rules).
// - value: actual string value of the token.
// - pos: offset in the current buffer where the token starts.
//
// If there are no more tokens in the buffer, returns null. In case of
// an error throws Error.
Lexer.prototype.token = function() {
this._skipnontokens();
if (this.pos >= this.buflen) {
return null;
}
// The char at this.pos is part of a real token. Figure out which.
var c = this.buf.charAt(this.pos);
// '/' is treated specially, because it starts a comment if followed by
// another '/'. If not followed by another '/', it's the DIVIDE
// operator.
if (c === '/') {
var next_c = this.buf.charAt(this.pos + 1);
if (next_c === '/') {
return this._process_comment();
} else {
return {name: 'DIVIDE', value: '/', pos: this.pos++};
}
} else {
// Look it up in the table of operators
var op = this.optable[c];
if (op !== undefined) {
return {name: op, value: c, pos: this.pos++};
} else {
// Not an operator - so it's the beginning of another token.
if (Lexer._isalpha(c)) {
return this._process_identifier();
} else if (Lexer._isdigit(c)) {
return this._process_number();
} else if (c === '"') {
return this._process_quote();
} else {
throw Error('Token error at ' + this.pos);
}
}
}
}
Lexer._isnewline = function(c) {
return c === '\r' || c === '\n';
}
Lexer._isdigit = function(c) {
return c >= '0' && c <= '9';
}
Lexer._isalpha = function(c) {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c === '_' || c === '$';
}
Lexer._isalphanum = function(c) {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c === '_' || c === '$';
}
Lexer.prototype._process_number = function() {
var endpos = this.pos + 1;
while (endpos < this.buflen &&
Lexer._isdigit(this.buf.charAt(endpos))) {
endpos++;
}
var tok = {
name: 'NUMBER',
value: this.buf.substring(this.pos, endpos),
pos: this.pos
};
this.pos = endpos;
return tok;
}
Lexer.prototype._process_comment = function() {
var endpos = this.pos + 2;
// Skip until the end of the line
var c = this.buf.charAt(this.pos + 2);
while (endpos < this.buflen &&
!Lexer._isnewline(this.buf.charAt(endpos))) {
endpos++;
}
var tok = {
name: 'COMMENT',
value: this.buf.substring(this.pos, endpos),
pos: this.pos
};
this.pos = endpos + 1;
return tok;
}
Lexer.prototype._process_identifier = function() {
var endpos = this.pos + 1;
while (endpos < this.buflen &&
Lexer._isalphanum(this.buf.charAt(endpos))) {
endpos++;
}
var tok = {
name: 'IDENTIFIER',
value: this.buf.substring(this.pos, endpos),
pos: this.pos
};
this.pos = endpos;
return tok;
}
Lexer.prototype._process_quote = function() {
// this.pos points at the opening quote. Find the ending quote.
var end_index = this.buf.indexOf('"', this.pos + 1);
if (end_index === -1) {
throw Error('Unterminated quote at ' + this.pos);
} else {
var tok = {
name: 'QUOTE',
value: this.buf.substring(this.pos, end_index + 1),
pos: this.pos
};
this.pos = end_index + 1;
return tok;
}
}
Lexer.prototype._skipnontokens = function() {
while (this.pos < this.buflen) {
var c = this.buf.charAt(this.pos);
if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
this.pos++;
} else {
break;
}
}
}