1 |
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
2 |
* |
3 |
* ***** BEGIN LICENSE BLOCK ***** |
4 |
* Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
5 |
* |
6 |
* The contents of this file are subject to the Mozilla Public License Version |
7 |
* 1.1 (the "License"); you may not use this file except in compliance with |
8 |
* the License. You may obtain a copy of the License at |
9 |
* http://www.mozilla.org/MPL/ |
10 |
* |
11 |
* Software distributed under the License is distributed on an "AS IS" basis, |
12 |
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
13 |
* for the specific language governing rights and limitations under the |
14 |
* License. |
15 |
* |
16 |
* The Original Code is Mozilla Communicator client code, released |
17 |
* March 31, 1998. |
18 |
* |
19 |
* The Initial Developer of the Original Code is |
20 |
* Netscape Communications Corporation. |
21 |
* Portions created by the Initial Developer are Copyright (C) 1998 |
22 |
* the Initial Developer. All Rights Reserved. |
23 |
* |
24 |
* Contributor(s): |
25 |
* |
26 |
* Alternatively, the contents of this file may be used under the terms of |
27 |
* either of the GNU General Public License Version 2 or later (the "GPL"), |
28 |
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
29 |
* in which case the provisions of the GPL or the LGPL are applicable instead |
30 |
* of those above. If you wish to allow use of your version of this file only |
31 |
* under the terms of either the GPL or the LGPL, and not to allow others to |
32 |
* use your version of this file under the terms of the MPL, indicate your |
33 |
* decision by deleting the provisions above and replace them with the notice |
34 |
* and other provisions required by the GPL or the LGPL. If you do not delete |
35 |
* the provisions above, a recipient may use your version of this file under |
36 |
* the terms of any one of the MPL, the GPL or the LGPL. |
37 |
* |
38 |
* ***** END LICENSE BLOCK ***** */ |
39 |
|
40 |
#ifndef jsscan_h___ |
41 |
#define jsscan_h___ |
42 |
/* |
43 |
* JS lexical scanner interface. |
44 |
*/ |
45 |
#include <stddef.h> |
46 |
#include <stdio.h> |
47 |
#include "jsversion.h" |
48 |
#include "jsopcode.h" |
49 |
#include "jsprvtd.h" |
50 |
#include "jspubtd.h" |
51 |
#include "jsvector.h" |
52 |
|
53 |
JS_BEGIN_EXTERN_C |
54 |
|
55 |
#define JS_KEYWORD(keyword, type, op, version) \ |
56 |
extern const char js_##keyword##_str[]; |
57 |
#include "jskeyword.tbl" |
58 |
#undef JS_KEYWORD |
59 |
|
60 |
typedef enum JSTokenType { |
61 |
TOK_ERROR = -1, /* well-known as the only code < EOF */ |
62 |
TOK_EOF = 0, /* end of file */ |
63 |
TOK_EOL = 1, /* end of line */ |
64 |
TOK_SEMI = 2, /* semicolon */ |
65 |
TOK_COMMA = 3, /* comma operator */ |
66 |
TOK_ASSIGN = 4, /* assignment ops (= += -= etc.) */ |
67 |
TOK_HOOK = 5, TOK_COLON = 6, /* conditional (?:) */ |
68 |
TOK_OR = 7, /* logical or (||) */ |
69 |
TOK_AND = 8, /* logical and (&&) */ |
70 |
TOK_BITOR = 9, /* bitwise-or (|) */ |
71 |
TOK_BITXOR = 10, /* bitwise-xor (^) */ |
72 |
TOK_BITAND = 11, /* bitwise-and (&) */ |
73 |
TOK_EQOP = 12, /* equality ops (== !=) */ |
74 |
TOK_RELOP = 13, /* relational ops (< <= > >=) */ |
75 |
TOK_SHOP = 14, /* shift ops (<< >> >>>) */ |
76 |
TOK_PLUS = 15, /* plus */ |
77 |
TOK_MINUS = 16, /* minus */ |
78 |
TOK_STAR = 17, TOK_DIVOP = 18, /* multiply/divide ops (* / %) */ |
79 |
TOK_UNARYOP = 19, /* unary prefix operator */ |
80 |
TOK_INC = 20, TOK_DEC = 21, /* increment/decrement (++ --) */ |
81 |
TOK_DOT = 22, /* member operator (.) */ |
82 |
TOK_LB = 23, TOK_RB = 24, /* left and right brackets */ |
83 |
TOK_LC = 25, TOK_RC = 26, /* left and right curlies (braces) */ |
84 |
TOK_LP = 27, TOK_RP = 28, /* left and right parentheses */ |
85 |
TOK_NAME = 29, /* identifier */ |
86 |
TOK_NUMBER = 30, /* numeric constant */ |
87 |
TOK_STRING = 31, /* string constant */ |
88 |
TOK_REGEXP = 32, /* RegExp constant */ |
89 |
TOK_PRIMARY = 33, /* true, false, null, this, super */ |
90 |
TOK_FUNCTION = 34, /* function keyword */ |
91 |
TOK_IF = 35, /* if keyword */ |
92 |
TOK_ELSE = 36, /* else keyword */ |
93 |
TOK_SWITCH = 37, /* switch keyword */ |
94 |
TOK_CASE = 38, /* case keyword */ |
95 |
TOK_DEFAULT = 39, /* default keyword */ |
96 |
TOK_WHILE = 40, /* while keyword */ |
97 |
TOK_DO = 41, /* do keyword */ |
98 |
TOK_FOR = 42, /* for keyword */ |
99 |
TOK_BREAK = 43, /* break keyword */ |
100 |
TOK_CONTINUE = 44, /* continue keyword */ |
101 |
TOK_IN = 45, /* in keyword */ |
102 |
TOK_VAR = 46, /* var keyword */ |
103 |
TOK_WITH = 47, /* with keyword */ |
104 |
TOK_RETURN = 48, /* return keyword */ |
105 |
TOK_NEW = 49, /* new keyword */ |
106 |
TOK_DELETE = 50, /* delete keyword */ |
107 |
TOK_DEFSHARP = 51, /* #n= for object/array initializers */ |
108 |
TOK_USESHARP = 52, /* #n# for object/array initializers */ |
109 |
TOK_TRY = 53, /* try keyword */ |
110 |
TOK_CATCH = 54, /* catch keyword */ |
111 |
TOK_FINALLY = 55, /* finally keyword */ |
112 |
TOK_THROW = 56, /* throw keyword */ |
113 |
TOK_INSTANCEOF = 57, /* instanceof keyword */ |
114 |
TOK_DEBUGGER = 58, /* debugger keyword */ |
115 |
TOK_XMLSTAGO = 59, /* XML start tag open (<) */ |
116 |
TOK_XMLETAGO = 60, /* XML end tag open (</) */ |
117 |
TOK_XMLPTAGC = 61, /* XML point tag close (/>) */ |
118 |
TOK_XMLTAGC = 62, /* XML start or end tag close (>) */ |
119 |
TOK_XMLNAME = 63, /* XML start-tag non-final fragment */ |
120 |
TOK_XMLATTR = 64, /* XML quoted attribute value */ |
121 |
TOK_XMLSPACE = 65, /* XML whitespace */ |
122 |
TOK_XMLTEXT = 66, /* XML text */ |
123 |
TOK_XMLCOMMENT = 67, /* XML comment */ |
124 |
TOK_XMLCDATA = 68, /* XML CDATA section */ |
125 |
TOK_XMLPI = 69, /* XML processing instruction */ |
126 |
TOK_AT = 70, /* XML attribute op (@) */ |
127 |
TOK_DBLCOLON = 71, /* namespace qualified name op (::) */ |
128 |
TOK_ANYNAME = 72, /* XML AnyName singleton (*) */ |
129 |
TOK_DBLDOT = 73, /* XML descendant op (..) */ |
130 |
TOK_FILTER = 74, /* XML filtering predicate op (.()) */ |
131 |
TOK_XMLELEM = 75, /* XML element node type (no token) */ |
132 |
TOK_XMLLIST = 76, /* XML list node type (no token) */ |
133 |
TOK_YIELD = 77, /* yield from generator function */ |
134 |
TOK_ARRAYCOMP = 78, /* array comprehension initialiser */ |
135 |
TOK_ARRAYPUSH = 79, /* array push within comprehension */ |
136 |
TOK_LEXICALSCOPE = 80, /* block scope AST node label */ |
137 |
TOK_LET = 81, /* let keyword */ |
138 |
TOK_SEQ = 82, /* synthetic sequence of statements, |
139 |
not a block */ |
140 |
TOK_FORHEAD = 83, /* head of for(;;)-style loop */ |
141 |
TOK_ARGSBODY = 84, /* formal args in list + body at end */ |
142 |
TOK_UPVARS = 85, /* lexical dependencies as JSAtomList |
143 |
of definitions paired with a parse |
144 |
tree full of uses of those names */ |
145 |
TOK_RESERVED, /* reserved keywords */ |
146 |
TOK_LIMIT /* domain size */ |
147 |
} JSTokenType; |
148 |
|
149 |
#define IS_PRIMARY_TOKEN(tt) \ |
150 |
((uintN)((tt) - TOK_NAME) <= (uintN)(TOK_PRIMARY - TOK_NAME)) |
151 |
|
152 |
#define TOKEN_TYPE_IS_XML(tt) \ |
153 |
((tt) == TOK_AT || (tt) == TOK_DBLCOLON || (tt) == TOK_ANYNAME) |
154 |
|
155 |
#define TREE_TYPE_IS_XML(tt) \ |
156 |
((tt) == TOK_XMLCOMMENT || (tt) == TOK_XMLCDATA || (tt) == TOK_XMLPI || \ |
157 |
(tt) == TOK_XMLELEM || (tt) == TOK_XMLLIST) |
158 |
|
159 |
#if JS_HAS_BLOCK_SCOPE |
160 |
# define TOKEN_TYPE_IS_DECL(tt) ((tt) == TOK_VAR || (tt) == TOK_LET) |
161 |
#else |
162 |
# define TOKEN_TYPE_IS_DECL(tt) ((tt) == TOK_VAR) |
163 |
#endif |
164 |
|
165 |
struct JSTokenPtr { |
166 |
uint32 index; /* index of char in physical line */ |
167 |
uint32 lineno; /* physical line number */ |
168 |
|
169 |
bool operator <(const JSTokenPtr& bptr) { |
170 |
return lineno < bptr.lineno || |
171 |
(lineno == bptr.lineno && index < bptr.index); |
172 |
} |
173 |
|
174 |
bool operator <=(const JSTokenPtr& bptr) { |
175 |
return lineno < bptr.lineno || |
176 |
(lineno == bptr.lineno && index <= bptr.index); |
177 |
} |
178 |
|
179 |
bool operator >(const JSTokenPtr& bptr) { |
180 |
return !(*this <= bptr); |
181 |
} |
182 |
|
183 |
bool operator >=(const JSTokenPtr& bptr) { |
184 |
return !(*this < bptr); |
185 |
} |
186 |
}; |
187 |
|
188 |
struct JSTokenPos { |
189 |
JSTokenPtr begin; /* first character and line of token */ |
190 |
JSTokenPtr end; /* index 1 past last char, last line */ |
191 |
|
192 |
bool operator <(const JSTokenPos& bpos) { |
193 |
return begin < bpos.begin; |
194 |
} |
195 |
|
196 |
bool operator <=(const JSTokenPos& bpos) { |
197 |
return begin <= bpos.begin; |
198 |
} |
199 |
|
200 |
bool operator >(const JSTokenPos& bpos) { |
201 |
return !(*this <= bpos); |
202 |
} |
203 |
|
204 |
bool operator >=(const JSTokenPos& bpos) { |
205 |
return !(*this < bpos); |
206 |
} |
207 |
}; |
208 |
|
209 |
struct JSToken { |
210 |
JSTokenType type; /* char value or above enumerator */ |
211 |
JSTokenPos pos; /* token position in file */ |
212 |
jschar *ptr; /* beginning of token in line buffer */ |
213 |
union { |
214 |
struct { /* name or string literal */ |
215 |
JSOp op; /* operator, for minimal parser */ |
216 |
JSAtom *atom; /* atom table entry */ |
217 |
} s; |
218 |
uintN reflags; /* regexp flags, use tokenbuf to access |
219 |
regexp chars */ |
220 |
struct { /* atom pair, for XML PIs */ |
221 |
JSAtom *atom2; /* auxiliary atom table entry */ |
222 |
JSAtom *atom; /* main atom table entry */ |
223 |
} p; |
224 |
jsdouble dval; /* floating point number */ |
225 |
} u; |
226 |
}; |
227 |
|
228 |
#define t_op u.s.op |
229 |
#define t_reflags u.reflags |
230 |
#define t_atom u.s.atom |
231 |
#define t_atom2 u.p.atom2 |
232 |
#define t_dval u.dval |
233 |
|
234 |
typedef struct JSTokenBuf { |
235 |
jschar *base; /* base of line or stream buffer */ |
236 |
jschar *limit; /* limit for quick bounds check */ |
237 |
jschar *ptr; /* next char to get, or slot to use */ |
238 |
} JSTokenBuf; |
239 |
|
240 |
#define JS_LINE_LIMIT 256 /* logical line buffer size limit -- |
241 |
physical line length is unlimited */ |
242 |
#define NTOKENS 4 /* 1 current + 2 lookahead, rounded */ |
243 |
#define NTOKENS_MASK (NTOKENS-1) /* to power of 2 to avoid divmod by 3 */ |
244 |
|
245 |
struct JSTokenStream { |
246 |
JSToken tokens[NTOKENS];/* circular token buffer */ |
247 |
uintN cursor; /* index of last parsed token */ |
248 |
uintN lookahead; /* count of lookahead tokens */ |
249 |
uintN lineno; /* current line number */ |
250 |
uintN ungetpos; /* next free char slot in ungetbuf */ |
251 |
jschar ungetbuf[6]; /* at most 6, for \uXXXX lookahead */ |
252 |
uintN flags; /* flags -- see below */ |
253 |
uint32 linelen; /* physical linebuf segment length */ |
254 |
uint32 linepos; /* linebuf offset in physical line */ |
255 |
JSTokenBuf linebuf; /* line buffer for diagnostics */ |
256 |
JSTokenBuf userbuf; /* user input buffer if !file */ |
257 |
const char *filename; /* input filename or null */ |
258 |
FILE *file; /* stdio stream if reading from file */ |
259 |
JSSourceHandler listener; /* callback for source; eg debugger */ |
260 |
void *listenerData; /* listener 'this' data */ |
261 |
void *listenerTSData;/* listener data for this TokenStream */ |
262 |
jschar *saveEOL; /* save next end of line in userbuf, to |
263 |
optimize for very long lines */ |
264 |
JSCharBuffer tokenbuf; /* current token string buffer */ |
265 |
|
266 |
/* |
267 |
* To construct a JSTokenStream, first call the constructor, which is |
268 |
* infallible, then call |init|, which can fail. To destroy a JSTokenStream, |
269 |
* first call |close| then call the destructor. If |init| fails, do not call |
270 |
* |close|. |
271 |
* |
272 |
* This class uses JSContext.tempPool to allocate internal buffers. The |
273 |
* caller should JS_ARENA_MARK before calling |init| and JS_ARENA_RELEASE |
274 |
* after calling |close|. |
275 |
*/ |
276 |
JSTokenStream(JSContext *); |
277 |
|
278 |
/* |
279 |
* Create a new token stream, either from an input buffer or from a file. |
280 |
* Return false on file-open or memory-allocation failure. |
281 |
*/ |
282 |
bool init(JSContext *, const jschar *base, size_t length, |
283 |
FILE *fp, const char *filename, uintN lineno); |
284 |
|
285 |
void close(JSContext *); |
286 |
~JSTokenStream() {} |
287 |
}; |
288 |
|
289 |
#define CURRENT_TOKEN(ts) ((ts)->tokens[(ts)->cursor]) |
290 |
#define ON_CURRENT_LINE(ts,pos) ((ts)->lineno == (pos).end.lineno) |
291 |
|
292 |
/* JSTokenStream flags */ |
293 |
#define TSF_ERROR 0x01 /* fatal error while compiling */ |
294 |
#define TSF_EOF 0x02 /* hit end of file */ |
295 |
#define TSF_NEWLINES 0x04 /* tokenize newlines */ |
296 |
#define TSF_OPERAND 0x08 /* looking for operand, not operator */ |
297 |
#define TSF_NLFLAG 0x20 /* last linebuf ended with \n */ |
298 |
#define TSF_CRFLAG 0x40 /* linebuf would have ended with \r */ |
299 |
#define TSF_DIRTYLINE 0x80 /* non-whitespace since start of line */ |
300 |
#define TSF_OWNFILENAME 0x100 /* ts->filename is malloc'd */ |
301 |
#define TSF_XMLTAGMODE 0x200 /* scanning within an XML tag in E4X */ |
302 |
#define TSF_XMLTEXTMODE 0x400 /* scanning XMLText terminal from E4X */ |
303 |
#define TSF_XMLONLYMODE 0x800 /* don't scan {expr} within text/tag */ |
304 |
|
305 |
/* Flag indicating unexpected end of input, i.e. TOK_EOF not at top-level. */ |
306 |
#define TSF_UNEXPECTED_EOF 0x1000 |
307 |
|
308 |
/* |
309 |
* To handle the hard case of contiguous HTML comments, we want to clear the |
310 |
* TSF_DIRTYINPUT flag at the end of each such comment. But we'd rather not |
311 |
* scan for --> within every //-style comment unless we have to. So we set |
312 |
* TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and |
313 |
* clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or |
314 |
* only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment. |
315 |
* |
316 |
* This still works as before given a malformed comment hiding hack such as: |
317 |
* |
318 |
* <script> |
319 |
* <!-- comment hiding hack #1 |
320 |
* code goes here |
321 |
* // --> oops, markup for script-unaware browsers goes here! |
322 |
* </script> |
323 |
* |
324 |
* It does not cope with malformed comment hiding hacks where --> is hidden |
325 |
* by C-style comments, or on a dirty line. Such cases are already broken. |
326 |
*/ |
327 |
#define TSF_IN_HTML_COMMENT 0x2000 |
328 |
|
329 |
/* Ignore keywords and return TOK_NAME instead to the parser. */ |
330 |
#define TSF_KEYWORD_IS_NAME 0x4000 |
331 |
|
332 |
/* Parsing a destructuring object or array initialiser pattern. */ |
333 |
#define TSF_DESTRUCTURING 0x8000 |
334 |
|
335 |
/* Unicode separators that are treated as line terminators, in addition to \n, \r */ |
336 |
#define LINE_SEPARATOR 0x2028 |
337 |
#define PARA_SEPARATOR 0x2029 |
338 |
|
339 |
extern void |
340 |
js_CloseTokenStream(JSContext *cx, JSTokenStream *ts); |
341 |
|
342 |
extern JS_FRIEND_API(int) |
343 |
js_fgets(char *buf, int size, FILE *file); |
344 |
|
345 |
/* |
346 |
* If the given char array forms JavaScript keyword, return corresponding |
347 |
* token. Otherwise return TOK_EOF. |
348 |
*/ |
349 |
extern JSTokenType |
350 |
js_CheckKeyword(const jschar *chars, size_t length); |
351 |
|
352 |
/* |
353 |
* Friend-exported API entry point to call a mapping function on each reserved |
354 |
* identifier in the scanner's keyword table. |
355 |
*/ |
356 |
extern JS_FRIEND_API(void) |
357 |
js_MapKeywords(void (*mapfun)(const char *)); |
358 |
|
359 |
/* |
360 |
* Check that str forms a valid JS identifier name. The function does not |
361 |
* check if str is a JS keyword. |
362 |
*/ |
363 |
extern JSBool |
364 |
js_IsIdentifier(JSString *str); |
365 |
|
366 |
/* |
367 |
* Report a compile-time error by its number. Return true for a warning, false |
368 |
* for an error. When pn is not null, use it to report error's location. |
369 |
* Otherwise use ts, which must not be null. |
370 |
*/ |
371 |
JSBool |
372 |
js_ReportCompileErrorNumber(JSContext *cx, JSTokenStream *ts, JSParseNode *pn, |
373 |
uintN flags, uintN errorNumber, ...); |
374 |
|
375 |
/* |
376 |
* Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error |
377 |
* message have const jschar* type, not const char*. |
378 |
*/ |
379 |
#define JSREPORT_UC 0x100 |
380 |
|
381 |
/* |
382 |
* Look ahead one token and return its type. |
383 |
*/ |
384 |
extern JSTokenType |
385 |
js_PeekToken(JSContext *cx, JSTokenStream *ts); |
386 |
|
387 |
extern JSTokenType |
388 |
js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts); |
389 |
|
390 |
/* |
391 |
* Get the next token from ts. |
392 |
*/ |
393 |
extern JSTokenType |
394 |
js_GetToken(JSContext *cx, JSTokenStream *ts); |
395 |
|
396 |
/* |
397 |
* Push back the last scanned token onto ts. |
398 |
*/ |
399 |
extern void |
400 |
js_UngetToken(JSTokenStream *ts); |
401 |
|
402 |
/* |
403 |
* Get the next token from ts if its type is tt. |
404 |
*/ |
405 |
extern JSBool |
406 |
js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt); |
407 |
|
408 |
JS_END_EXTERN_C |
409 |
|
410 |
#endif /* jsscan_h___ */ |