/[jscoverage]/trunk/js/jsscan.cpp
ViewVC logotype

Contents of /trunk/js/jsscan.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 332 - (show annotations)
Thu Oct 23 19:03:33 2008 UTC (11 years, 3 months ago) by siliconforks
File size: 61935 byte(s)
Add SpiderMonkey from Firefox 3.1b1.

The following directories and files were removed:
correct/, correct.js
liveconnect/
nanojit/
t/
v8/
vprof/
xpconnect/
all JavaScript files (Y.js, call.js, if.js, math-partial-sums.js, md5.js, perfect.js, trace-test.js, trace.js)


1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
3 *
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
19 *
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
24 *
25 * Contributor(s):
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 /*
42 * JS lexical scanner.
43 */
44 #include "jsstddef.h"
45 #include <stdio.h> /* first to avoid trouble on some systems */
46 #include <errno.h>
47 #include <limits.h>
48 #include <math.h>
49 #ifdef HAVE_MEMORY_H
50 #include <memory.h>
51 #endif
52 #include <stdarg.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include "jstypes.h"
56 #include "jsarena.h" /* Added by JSIFY */
57 #include "jsutil.h" /* Added by JSIFY */
58 #include "jsdtoa.h"
59 #include "jsprf.h"
60 #include "jsapi.h"
61 #include "jsatom.h"
62 #include "jscntxt.h"
63 #include "jsversion.h"
64 #include "jsemit.h"
65 #include "jsexn.h"
66 #include "jsnum.h"
67 #include "jsopcode.h"
68 #include "jsparse.h"
69 #include "jsregexp.h"
70 #include "jsscan.h"
71 #include "jsscript.h"
72 #include "jsstaticcheck.h"
73
74 #if JS_HAS_XML_SUPPORT
75 #include "jsxml.h"
76 #endif
77
78 #define JS_KEYWORD(keyword, type, op, version) \
79 const char js_##keyword##_str[] = #keyword;
80 #include "jskeyword.tbl"
81 #undef JS_KEYWORD
82
83 struct keyword {
84 const char *chars; /* C string with keyword text */
85 JSTokenType tokentype; /* JSTokenType */
86 JSOp op; /* JSOp */
87 JSVersion version; /* JSVersion */
88 };
89
90 static const struct keyword keyword_defs[] = {
91 #define JS_KEYWORD(keyword, type, op, version) \
92 {js_##keyword##_str, type, op, version},
93 #include "jskeyword.tbl"
94 #undef JS_KEYWORD
95 };
96
97 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
98
99 static const struct keyword *
100 FindKeyword(const jschar *s, size_t length)
101 {
102 register size_t i;
103 const struct keyword *kw;
104 const char *chars;
105
106 JS_ASSERT(length != 0);
107
108 #define JSKW_LENGTH() length
109 #define JSKW_AT(column) s[column]
110 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
111 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
112 #define JSKW_NO_MATCH() goto no_match;
113 #include "jsautokw.h"
114 #undef JSKW_NO_MATCH
115 #undef JSKW_TEST_GUESS
116 #undef JSKW_GOT_MATCH
117 #undef JSKW_AT
118 #undef JSKW_LENGTH
119
120 got_match:
121 return &keyword_defs[i];
122
123 test_guess:
124 kw = &keyword_defs[i];
125 chars = kw->chars;
126 do {
127 if (*s++ != (unsigned char)(*chars++))
128 goto no_match;
129 } while (--length != 0);
130 return kw;
131
132 no_match:
133 return NULL;
134 }
135
136 JSTokenType
137 js_CheckKeyword(const jschar *str, size_t length)
138 {
139 const struct keyword *kw;
140
141 JS_ASSERT(length != 0);
142 kw = FindKeyword(str, length);
143 return kw ? kw->tokentype : TOK_EOF;
144 }
145
146 JS_FRIEND_API(void)
147 js_MapKeywords(void (*mapfun)(const char *))
148 {
149 size_t i;
150
151 for (i = 0; i != KEYWORD_COUNT; ++i)
152 mapfun(keyword_defs[i].chars);
153 }
154
155 JSBool
156 js_IsIdentifier(JSString *str)
157 {
158 size_t length;
159 jschar c, *chars, *end;
160
161 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
162 if (length == 0)
163 return JS_FALSE;
164 c = *chars;
165 if (!JS_ISIDSTART(c))
166 return JS_FALSE;
167 end = chars + length;
168 while (++chars != end) {
169 c = *chars;
170 if (!JS_ISIDENT(c))
171 return JS_FALSE;
172 }
173 return JS_TRUE;
174 }
175
176 #define TBMIN 64
177
178 static JSBool
179 GrowTokenBuf(JSStringBuffer *sb, size_t newlength)
180 {
181 JSContext *cx;
182 jschar *base;
183 ptrdiff_t offset, length;
184 size_t tbsize;
185 JSArenaPool *pool;
186
187 cx = (JSContext*) sb->data;
188 base = sb->base;
189 offset = PTRDIFF(sb->ptr, base, jschar);
190 pool = &cx->tempPool;
191 if (!base) {
192 tbsize = TBMIN * sizeof(jschar);
193 length = TBMIN - 1;
194 JS_ARENA_ALLOCATE_CAST(base, jschar *, pool, tbsize);
195 } else {
196 length = PTRDIFF(sb->limit, base, jschar);
197 if ((size_t)length >= ~(size_t)0 / sizeof(jschar)) {
198 base = NULL;
199 } else {
200 tbsize = (length + 1) * sizeof(jschar);
201 length += length + 1;
202 JS_ARENA_GROW_CAST(base, jschar *, pool, tbsize, tbsize);
203 }
204 }
205 if (!base) {
206 js_ReportOutOfScriptQuota(cx);
207 sb->base = STRING_BUFFER_ERROR_BASE;
208 return JS_FALSE;
209 }
210 sb->base = base;
211 sb->limit = base + length;
212 sb->ptr = base + offset;
213 return JS_TRUE;
214 }
215
216 JSBool
217 js_InitTokenStream(JSContext *cx, JSTokenStream *ts,
218 const jschar *base, size_t length,
219 FILE *fp, const char *filename, uintN lineno)
220 {
221 jschar *buf;
222 size_t nb;
223
224 JS_ASSERT_IF(fp, !base);
225 JS_ASSERT_IF(!base, length == 0);
226 nb = fp
227 ? 2 * JS_LINE_LIMIT * sizeof(jschar)
228 : JS_LINE_LIMIT * sizeof(jschar);
229 JS_ARENA_ALLOCATE_CAST(buf, jschar *, &cx->tempPool, nb);
230 if (!buf) {
231 js_ReportOutOfScriptQuota(cx);
232 return JS_FALSE;
233 }
234 memset(buf, 0, nb);
235 memset(ts, 0, sizeof(*ts));
236 ts->filename = filename;
237 ts->lineno = lineno;
238 ts->linebuf.base = ts->linebuf.limit = ts->linebuf.ptr = buf;
239 if (fp) {
240 ts->file = fp;
241 ts->userbuf.base = buf + JS_LINE_LIMIT;
242 ts->userbuf.ptr = ts->userbuf.limit = ts->userbuf.base + JS_LINE_LIMIT;
243 } else {
244 ts->userbuf.base = (jschar *)base;
245 ts->userbuf.limit = (jschar *)base + length;
246 ts->userbuf.ptr = (jschar *)base;
247 }
248 ts->tokenbuf.grow = GrowTokenBuf;
249 ts->tokenbuf.data = cx;
250 ts->listener = cx->debugHooks->sourceHandler;
251 ts->listenerData = cx->debugHooks->sourceHandlerData;
252 return JS_TRUE;
253 }
254
255 void
256 js_CloseTokenStream(JSContext *cx, JSTokenStream *ts)
257 {
258 if (ts->flags & TSF_OWNFILENAME)
259 JS_free(cx, (void *) ts->filename);
260 }
261
262 JS_FRIEND_API(int)
263 js_fgets(char *buf, int size, FILE *file)
264 {
265 int n, i, c;
266 JSBool crflag;
267
268 n = size - 1;
269 if (n < 0)
270 return -1;
271
272 crflag = JS_FALSE;
273 for (i = 0; i < n && (c = getc(file)) != EOF; i++) {
274 buf[i] = c;
275 if (c == '\n') { /* any \n ends a line */
276 i++; /* keep the \n; we know there is room for \0 */
277 break;
278 }
279 if (crflag) { /* \r not followed by \n ends line at the \r */
280 ungetc(c, file);
281 break; /* and overwrite c in buf with \0 */
282 }
283 crflag = (c == '\r');
284 }
285
286 buf[i] = '\0';
287 return i;
288 }
289
290 static int32
291 GetChar(JSTokenStream *ts)
292 {
293 int32 c;
294 ptrdiff_t i, j, len, olen;
295 JSBool crflag;
296 char cbuf[JS_LINE_LIMIT];
297 jschar *ubuf, *nl;
298
299 if (ts->ungetpos != 0) {
300 c = ts->ungetbuf[--ts->ungetpos];
301 } else {
302 if (ts->linebuf.ptr == ts->linebuf.limit) {
303 len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
304 if (len <= 0) {
305 if (!ts->file) {
306 ts->flags |= TSF_EOF;
307 return EOF;
308 }
309
310 /* Fill ts->userbuf so that \r and \r\n convert to \n. */
311 crflag = (ts->flags & TSF_CRFLAG) != 0;
312 len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
313 if (len <= 0) {
314 ts->flags |= TSF_EOF;
315 return EOF;
316 }
317 olen = len;
318 ubuf = ts->userbuf.base;
319 i = 0;
320 if (crflag) {
321 ts->flags &= ~TSF_CRFLAG;
322 if (cbuf[0] != '\n') {
323 ubuf[i++] = '\n';
324 len++;
325 ts->linepos--;
326 }
327 }
328 for (j = 0; i < len; i++, j++)
329 ubuf[i] = (jschar) (unsigned char) cbuf[j];
330 ts->userbuf.limit = ubuf + len;
331 ts->userbuf.ptr = ubuf;
332 }
333 if (ts->listener) {
334 ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
335 &ts->listenerTSData, ts->listenerData);
336 }
337
338 nl = ts->saveEOL;
339 if (!nl) {
340 /*
341 * Any one of \n, \r, or \r\n ends a line (the longest
342 * match wins). Also allow the Unicode line and paragraph
343 * separators.
344 */
345 for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
346 /*
347 * Try to prevent value-testing on most characters by
348 * filtering out characters that aren't 000x or 202x.
349 */
350 if ((*nl & 0xDFD0) == 0) {
351 if (*nl == '\n')
352 break;
353 if (*nl == '\r') {
354 if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
355 nl++;
356 break;
357 }
358 if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
359 break;
360 }
361 }
362 }
363
364 /*
365 * If there was a line terminator, copy thru it into linebuf.
366 * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
367 */
368 if (nl < ts->userbuf.limit)
369 len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
370 if (len >= JS_LINE_LIMIT) {
371 len = JS_LINE_LIMIT - 1;
372 ts->saveEOL = nl;
373 } else {
374 ts->saveEOL = NULL;
375 }
376 js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
377 ts->userbuf.ptr += len;
378 olen = len;
379
380 /*
381 * Make sure linebuf contains \n for EOL (don't do this in
382 * userbuf because the user's string might be readonly).
383 */
384 if (nl < ts->userbuf.limit) {
385 if (*nl == '\r') {
386 if (ts->linebuf.base[len-1] == '\r') {
387 /*
388 * Does the line segment end in \r? We must check
389 * for a \n at the front of the next segment before
390 * storing a \n into linebuf. This case matters
391 * only when we're reading from a file.
392 */
393 if (nl + 1 == ts->userbuf.limit && ts->file) {
394 len--;
395 ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
396 if (len == 0) {
397 /*
398 * This can happen when a segment ends in
399 * \r\r. Start over. ptr == limit in this
400 * case, so we'll fall into buffer-filling
401 * code.
402 */
403 return GetChar(ts);
404 }
405 } else {
406 ts->linebuf.base[len-1] = '\n';
407 }
408 }
409 } else if (*nl == '\n') {
410 if (nl > ts->userbuf.base &&
411 nl[-1] == '\r' &&
412 ts->linebuf.base[len-2] == '\r') {
413 len--;
414 JS_ASSERT(ts->linebuf.base[len] == '\n');
415 ts->linebuf.base[len-1] = '\n';
416 }
417 } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
418 ts->linebuf.base[len-1] = '\n';
419 }
420 }
421
422 /* Reset linebuf based on adjusted segment length. */
423 ts->linebuf.limit = ts->linebuf.base + len;
424 ts->linebuf.ptr = ts->linebuf.base;
425
426 /* Update position of linebuf within physical userbuf line. */
427 if (!(ts->flags & TSF_NLFLAG))
428 ts->linepos += ts->linelen;
429 else
430 ts->linepos = 0;
431 if (ts->linebuf.limit[-1] == '\n')
432 ts->flags |= TSF_NLFLAG;
433 else
434 ts->flags &= ~TSF_NLFLAG;
435
436 /* Update linelen from original segment length. */
437 ts->linelen = olen;
438 }
439 c = *ts->linebuf.ptr++;
440 }
441 if (c == '\n')
442 ts->lineno++;
443 return c;
444 }
445
446 static void
447 UngetChar(JSTokenStream *ts, int32 c)
448 {
449 if (c == EOF)
450 return;
451 JS_ASSERT(ts->ungetpos < JS_ARRAY_LENGTH(ts->ungetbuf));
452 if (c == '\n')
453 ts->lineno--;
454 ts->ungetbuf[ts->ungetpos++] = (jschar)c;
455 }
456
457 static int32
458 PeekChar(JSTokenStream *ts)
459 {
460 int32 c;
461
462 c = GetChar(ts);
463 UngetChar(ts, c);
464 return c;
465 }
466
467 /*
468 * Peek n chars ahead into ts. Return true if n chars were read, false if
469 * there weren't enough characters in the input stream. This function cannot
470 * be used to peek into or past a newline.
471 */
472 static JSBool
473 PeekChars(JSTokenStream *ts, intN n, jschar *cp)
474 {
475 intN i, j;
476 int32 c;
477
478 for (i = 0; i < n; i++) {
479 c = GetChar(ts);
480 if (c == EOF)
481 break;
482 if (c == '\n') {
483 UngetChar(ts, c);
484 break;
485 }
486 cp[i] = (jschar)c;
487 }
488 for (j = i - 1; j >= 0; j--)
489 UngetChar(ts, cp[j]);
490 return i == n;
491 }
492
493 static void
494 SkipChars(JSTokenStream *ts, intN n)
495 {
496 while (--n >= 0)
497 GetChar(ts);
498 }
499
500 static JSBool
501 MatchChar(JSTokenStream *ts, int32 expect)
502 {
503 int32 c;
504
505 c = GetChar(ts);
506 if (c == expect)
507 return JS_TRUE;
508 UngetChar(ts, c);
509 return JS_FALSE;
510 }
511
512 JSBool
513 js_ReportCompileErrorNumber(JSContext *cx, JSTokenStream *ts, JSParseNode *pn,
514 uintN flags, uintN errorNumber, ...)
515 {
516 JSErrorReport report;
517 char *message;
518 size_t linelength;
519 jschar *linechars;
520 char *linebytes;
521 va_list ap;
522 JSBool warning, ok;
523 JSTokenPos *tp;
524 uintN index, i;
525 JSErrorReporter onError;
526
527 JS_ASSERT(ts->linebuf.limit < ts->linebuf.base + JS_LINE_LIMIT);
528
529 if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx))
530 return JS_TRUE;
531
532 memset(&report, 0, sizeof report);
533 report.flags = flags;
534 report.errorNumber = errorNumber;
535 message = NULL;
536 linechars = NULL;
537 linebytes = NULL;
538
539 MUST_FLOW_THROUGH("out");
540 va_start(ap, errorNumber);
541 ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
542 errorNumber, &message, &report, &warning,
543 !(flags & JSREPORT_UC), ap);
544 va_end(ap);
545 if (!ok) {
546 warning = JS_FALSE;
547 goto out;
548 }
549
550 report.filename = ts->filename;
551
552 if (pn) {
553 report.lineno = pn->pn_pos.begin.lineno;
554 if (report.lineno != ts->lineno)
555 goto report;
556 tp = &pn->pn_pos;
557 } else {
558 /* Point to the current token, not the next one to get. */
559 tp = &ts->tokens[ts->cursor].pos;
560 }
561 report.lineno = ts->lineno;
562 linelength = PTRDIFF(ts->linebuf.limit, ts->linebuf.base, jschar);
563 linechars = (jschar *)JS_malloc(cx, (linelength + 1) * sizeof(jschar));
564 if (!linechars) {
565 warning = JS_FALSE;
566 goto out;
567 }
568 memcpy(linechars, ts->linebuf.base, linelength * sizeof(jschar));
569 linechars[linelength] = 0;
570 linebytes = js_DeflateString(cx, linechars, linelength);
571 if (!linebytes) {
572 warning = JS_FALSE;
573 goto out;
574 }
575 report.linebuf = linebytes;
576
577 /*
578 * FIXME: What should instead happen here is that we should
579 * find error-tokens in userbuf, if !ts->file. That will
580 * allow us to deliver a more helpful error message, which
581 * includes all or part of the bad string or bad token. The
582 * code here yields something that looks truncated.
583 * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
584 */
585 index = 0;
586 if (tp->begin.lineno == tp->end.lineno) {
587 if (tp->begin.index < ts->linepos)
588 goto report;
589
590 index = tp->begin.index - ts->linepos;
591 }
592
593 report.tokenptr = report.linebuf + index;
594 report.uclinebuf = linechars;
595 report.uctokenptr = report.uclinebuf + index;
596
597 /*
598 * If there's a runtime exception type associated with this error
599 * number, set that as the pending exception. For errors occuring at
600 * compile time, this is very likely to be a JSEXN_SYNTAXERR.
601 *
602 * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
603 * flag will be set in report.flags. Proper behavior for an error
604 * reporter is to ignore a report with this flag for all but top-level
605 * compilation errors. The exception will remain pending, and so long
606 * as the non-top-level "load", "eval", or "compile" native function
607 * returns false, the top-level reporter will eventually receive the
608 * uncaught exception report.
609 *
610 * XXX it'd probably be best if there was only one call to this
611 * function, but there seem to be two error reporter call points.
612 */
613 report:
614 onError = cx->errorReporter;
615
616 /*
617 * Try to raise an exception only if there isn't one already set --
618 * otherwise the exception will describe the last compile-time error,
619 * which is likely spurious.
620 */
621 if (!(ts->flags & TSF_ERROR)) {
622 if (js_ErrorToException(cx, message, &report))
623 onError = NULL;
624 }
625
626 /*
627 * Suppress any compile-time errors that don't occur at the top level.
628 * This may still fail, as interplevel may be zero in contexts where we
629 * don't really want to call the error reporter, as when js is called
630 * by other code which could catch the error.
631 */
632 if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
633 onError = NULL;
634
635 if (onError) {
636 JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
637
638 /*
639 * If debugErrorHook is present then we give it a chance to veto
640 * sending the error on to the regular error reporter.
641 */
642 if (hook && !hook(cx, message, &report,
643 cx->debugHooks->debugErrorHookData)) {
644 onError = NULL;
645 }
646 }
647 if (onError)
648 (*onError)(cx, message, &report);
649
650 out:
651 if (linebytes)
652 JS_free(cx, linebytes);
653 if (linechars)
654 JS_free(cx, linechars);
655 if (message)
656 JS_free(cx, message);
657 if (report.ucmessage)
658 JS_free(cx, (void *)report.ucmessage);
659
660 if (report.messageArgs) {
661 if (!(flags & JSREPORT_UC)) {
662 i = 0;
663 while (report.messageArgs[i])
664 JS_free(cx, (void *)report.messageArgs[i++]);
665 }
666 JS_free(cx, (void *)report.messageArgs);
667 }
668
669 if (!JSREPORT_IS_WARNING(flags)) {
670 /* Set the error flag to suppress spurious reports. */
671 ts->flags |= TSF_ERROR;
672 }
673
674 return warning;
675 }
676
677 static JSBool
678 GrowStringBuffer(JSStringBuffer *sb, size_t newlength)
679 {
680 ptrdiff_t offset;
681 jschar *bp;
682
683 offset = PTRDIFF(sb->ptr, sb->base, jschar);
684 JS_ASSERT(offset >= 0);
685 newlength += offset + 1;
686 if ((size_t)offset < newlength && newlength < ~(size_t)0 / sizeof(jschar))
687 bp = (jschar *) realloc(sb->base, newlength * sizeof(jschar));
688 else
689 bp = NULL;
690 if (!bp) {
691 free(sb->base);
692 sb->base = STRING_BUFFER_ERROR_BASE;
693 return JS_FALSE;
694 }
695 sb->base = bp;
696 sb->ptr = bp + offset;
697 sb->limit = bp + newlength - 1;
698 return JS_TRUE;
699 }
700
701 static void
702 FreeStringBuffer(JSStringBuffer *sb)
703 {
704 JS_ASSERT(STRING_BUFFER_OK(sb));
705 if (sb->base)
706 free(sb->base);
707 }
708
709 void
710 js_InitStringBuffer(JSStringBuffer *sb)
711 {
712 sb->base = sb->limit = sb->ptr = NULL;
713 sb->data = NULL;
714 sb->grow = GrowStringBuffer;
715 sb->free = FreeStringBuffer;
716 }
717
718 void
719 js_FinishStringBuffer(JSStringBuffer *sb)
720 {
721 sb->free(sb);
722 }
723
724 #define ENSURE_STRING_BUFFER(sb,n) \
725 ((sb)->ptr + (n) <= (sb)->limit || sb->grow(sb, n))
726
727 static void
728 FastAppendChar(JSStringBuffer *sb, jschar c)
729 {
730 if (!STRING_BUFFER_OK(sb))
731 return;
732 if (!ENSURE_STRING_BUFFER(sb, 1))
733 return;
734 *sb->ptr++ = c;
735 }
736
737 void
738 js_AppendChar(JSStringBuffer *sb, jschar c)
739 {
740 jschar *bp;
741
742 if (!STRING_BUFFER_OK(sb))
743 return;
744 if (!ENSURE_STRING_BUFFER(sb, 1))
745 return;
746 bp = sb->ptr;
747 *bp++ = c;
748 *bp = 0;
749 sb->ptr = bp;
750 }
751
752 #if JS_HAS_XML_SUPPORT
753
754 void
755 js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count)
756 {
757 jschar *bp;
758
759 if (!STRING_BUFFER_OK(sb) || count == 0)
760 return;
761 if (!ENSURE_STRING_BUFFER(sb, count))
762 return;
763 for (bp = sb->ptr; count; --count)
764 *bp++ = c;
765 *bp = 0;
766 sb->ptr = bp;
767 }
768
769 void
770 js_AppendCString(JSStringBuffer *sb, const char *asciiz)
771 {
772 size_t length;
773 jschar *bp;
774
775 if (!STRING_BUFFER_OK(sb) || *asciiz == '\0')
776 return;
777 length = strlen(asciiz);
778 if (!ENSURE_STRING_BUFFER(sb, length))
779 return;
780 for (bp = sb->ptr; length; --length)
781 *bp++ = (jschar) *asciiz++;
782 *bp = 0;
783 sb->ptr = bp;
784 }
785
786 void
787 js_AppendJSString(JSStringBuffer *sb, JSString *str)
788 {
789 size_t length;
790 jschar *bp;
791
792 if (!STRING_BUFFER_OK(sb))
793 return;
794 length = JSSTRING_LENGTH(str);
795 if (length == 0 || !ENSURE_STRING_BUFFER(sb, length))
796 return;
797 bp = sb->ptr;
798 js_strncpy(bp, JSSTRING_CHARS(str), length);
799 bp += length;
800 *bp = 0;
801 sb->ptr = bp;
802 }
803
804 static JSBool
805 GetXMLEntity(JSContext *cx, JSTokenStream *ts)
806 {
807 ptrdiff_t offset, length, i;
808 int32 c, d;
809 JSBool ispair;
810 jschar *bp, digit;
811 char *bytes;
812 JSErrNum msg;
813
814 /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */
815 offset = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar);
816 FastAppendChar(&ts->tokenbuf, '&');
817 while ((c = GetChar(ts)) != ';') {
818 if (c == EOF || c == '\n') {
819 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
820 JSMSG_END_OF_XML_ENTITY);
821 return JS_FALSE;
822 }
823 FastAppendChar(&ts->tokenbuf, (jschar) c);
824 }
825
826 /* Let length be the number of jschars after the '&', including the ';'. */
827 length = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar) - offset;
828 bp = ts->tokenbuf.base + offset;
829 c = d = 0;
830 ispair = JS_FALSE;
831 if (length > 2 && bp[1] == '#') {
832 /* Match a well-formed XML Character Reference. */
833 i = 2;
834 if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
835 if (length > 9) /* at most 6 hex digits allowed */
836 goto badncr;
837 while (++i < length) {
838 digit = bp[i];
839 if (!JS7_ISHEX(digit))
840 goto badncr;
841 c = (c << 4) + JS7_UNHEX(digit);
842 }
843 } else {
844 while (i < length) {
845 digit = bp[i++];
846 if (!JS7_ISDEC(digit))
847 goto badncr;
848 c = (c * 10) + JS7_UNDEC(digit);
849 if (c < 0)
850 goto badncr;
851 }
852 }
853
854 if (0x10000 <= c && c <= 0x10FFFF) {
855 /* Form a surrogate pair (c, d) -- c is the high surrogate. */
856 d = 0xDC00 + (c & 0x3FF);
857 c = 0xD7C0 + (c >> 10);
858 ispair = JS_TRUE;
859 } else {
860 /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
861 if (c != 0x9 && c != 0xA && c != 0xD &&
862 !(0x20 <= c && c <= 0xD7FF) &&
863 !(0xE000 <= c && c <= 0xFFFD)) {
864 goto badncr;
865 }
866 }
867 } else {
868 /* Try to match one of the five XML 1.0 predefined entities. */
869 switch (length) {
870 case 3:
871 if (bp[2] == 't') {
872 if (bp[1] == 'l')
873 c = '<';
874 else if (bp[1] == 'g')
875 c = '>';
876 }
877 break;
878 case 4:
879 if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
880 c = '&';
881 break;
882 case 5:
883 if (bp[3] == 'o') {
884 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
885 c = '\'';
886 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
887 c = '"';
888 }
889 break;
890 }
891 if (c == 0) {
892 msg = JSMSG_UNKNOWN_XML_ENTITY;
893 goto bad;
894 }
895 }
896
897 /* If we matched, retract ts->tokenbuf and store the entity's value. */
898 *bp++ = (jschar) c;
899 if (ispair)
900 *bp++ = (jschar) d;
901 *bp = 0;
902 ts->tokenbuf.ptr = bp;
903 return JS_TRUE;
904
905 badncr:
906 msg = JSMSG_BAD_XML_NCR;
907 bad:
908 /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
909 bytes = js_DeflateString(cx, bp + 1,
910 PTRDIFF(ts->tokenbuf.ptr, bp, jschar) - 1);
911 if (bytes) {
912 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
913 msg, bytes);
914 JS_free(cx, bytes);
915 }
916 return JS_FALSE;
917 }
918
919 #endif /* JS_HAS_XML_SUPPORT */
920
921 JSTokenType
922 js_PeekToken(JSContext *cx, JSTokenStream *ts)
923 {
924 JSTokenType tt;
925
926 if (ts->lookahead != 0) {
927 tt = ts->tokens[(ts->cursor + ts->lookahead) & NTOKENS_MASK].type;
928 } else {
929 tt = js_GetToken(cx, ts);
930 js_UngetToken(ts);
931 }
932 return tt;
933 }
934
935 JSTokenType
936 js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts)
937 {
938 JSTokenType tt;
939
940 if (!ON_CURRENT_LINE(ts, CURRENT_TOKEN(ts).pos))
941 return TOK_EOL;
942 ts->flags |= TSF_NEWLINES;
943 tt = js_PeekToken(cx, ts);
944 ts->flags &= ~TSF_NEWLINES;
945 return tt;
946 }
947
948 /*
949 * We have encountered a '\': check for a Unicode escape sequence after it,
950 * returning the character code value if we found a Unicode escape sequence.
951 * Otherwise, non-destructively return the original '\'.
952 */
953 static int32
954 GetUnicodeEscape(JSTokenStream *ts)
955 {
956 jschar cp[5];
957 int32 c;
958
959 if (PeekChars(ts, 5, cp) && cp[0] == 'u' &&
960 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
961 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
962 {
963 c = (((((JS7_UNHEX(cp[1]) << 4)
964 + JS7_UNHEX(cp[2])) << 4)
965 + JS7_UNHEX(cp[3])) << 4)
966 + JS7_UNHEX(cp[4]);
967 SkipChars(ts, 5);
968 return c;
969 }
970 return '\\';
971 }
972
973 static JSToken *
974 NewToken(JSTokenStream *ts, ptrdiff_t adjust)
975 {
976 JSToken *tp;
977
978 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
979 tp = &CURRENT_TOKEN(ts);
980 tp->ptr = ts->linebuf.ptr + adjust;
981 tp->pos.begin.index = ts->linepos +
982 PTRDIFF(tp->ptr, ts->linebuf.base, jschar) -
983 ts->ungetpos;
984 tp->pos.begin.lineno = tp->pos.end.lineno = (uint16)ts->lineno;
985 return tp;
986 }
987
988 static JS_ALWAYS_INLINE JSBool
989 ScanAsSpace(jschar c)
990 {
991 /* Treat little- and big-endian BOMs as whitespace for compatibility. */
992 if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
993 return JS_TRUE;
994 return JS_FALSE;
995 }
996
997 JSTokenType
998 js_GetToken(JSContext *cx, JSTokenStream *ts)
999 {
1000 JSTokenType tt;
1001 int32 c, qc;
1002 JSToken *tp;
1003 JSAtom *atom;
1004 JSBool hadUnicodeEscape;
1005 const struct keyword *kw;
1006 #if JS_HAS_XML_SUPPORT
1007 JSBool inTarget;
1008 size_t targetLength;
1009 ptrdiff_t contentIndex;
1010 #endif
1011
1012 #define INIT_TOKENBUF() (ts->tokenbuf.ptr = ts->tokenbuf.base)
1013 #define TOKENBUF_LENGTH() PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar)
1014 #define TOKENBUF_OK() STRING_BUFFER_OK(&ts->tokenbuf)
1015 #define TOKENBUF_TO_ATOM() (TOKENBUF_OK() \
1016 ? js_AtomizeChars(cx, \
1017 TOKENBUF_BASE(), \
1018 TOKENBUF_LENGTH(), \
1019 0) \
1020 : NULL)
1021 #define ADD_TO_TOKENBUF(c) FastAppendChar(&ts->tokenbuf, (jschar) (c))
1022
1023 /* The following 4 macros should only be used when TOKENBUF_OK() is true. */
1024 #define TOKENBUF_BASE() (ts->tokenbuf.base)
1025 #define TOKENBUF_END() (ts->tokenbuf.ptr)
1026 #define TOKENBUF_CHAR(i) (ts->tokenbuf.base[i])
1027 #define TRIM_TOKENBUF(i) (ts->tokenbuf.ptr = ts->tokenbuf.base + i)
1028 #define NUL_TERM_TOKENBUF() (*ts->tokenbuf.ptr = 0)
1029
1030 /* Check for a pushed-back token resulting from mismatching lookahead. */
1031 while (ts->lookahead != 0) {
1032 JS_ASSERT(!(ts->flags & TSF_XMLTEXTMODE));
1033 ts->lookahead--;
1034 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
1035 tt = CURRENT_TOKEN(ts).type;
1036 if (tt != TOK_EOL || (ts->flags & TSF_NEWLINES))
1037 return tt;
1038 }
1039
1040 /* If there was a fatal error, keep returning TOK_ERROR. */
1041 if (ts->flags & TSF_ERROR)
1042 return TOK_ERROR;
1043
1044 #if JS_HAS_XML_SUPPORT
1045 if (ts->flags & TSF_XMLTEXTMODE) {
1046 tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
1047 tp = NewToken(ts, 0);
1048 INIT_TOKENBUF();
1049 qc = (ts->flags & TSF_XMLONLYMODE) ? '<' : '{';
1050
1051 while ((c = GetChar(ts)) != qc && c != '<' && c != EOF) {
1052 if (c == '&' && qc == '<') {
1053 if (!GetXMLEntity(cx, ts))
1054 goto error;
1055 tt = TOK_XMLTEXT;
1056 continue;
1057 }
1058
1059 if (!JS_ISXMLSPACE(c))
1060 tt = TOK_XMLTEXT;
1061 ADD_TO_TOKENBUF(c);
1062 }
1063 UngetChar(ts, c);
1064
1065 if (TOKENBUF_LENGTH() == 0) {
1066 atom = NULL;
1067 } else {
1068 atom = TOKENBUF_TO_ATOM();
1069 if (!atom)
1070 goto error;
1071 }
1072 tp->pos.end.lineno = (uint16)ts->lineno;
1073 tp->t_op = JSOP_STRING;
1074 tp->t_atom = atom;
1075 goto out;
1076 }
1077
1078 if (ts->flags & TSF_XMLTAGMODE) {
1079 tp = NewToken(ts, 0);
1080 c = GetChar(ts);
1081 if (JS_ISXMLSPACE(c)) {
1082 do {
1083 c = GetChar(ts);
1084 } while (JS_ISXMLSPACE(c));
1085 UngetChar(ts, c);
1086 tt = TOK_XMLSPACE;
1087 goto out;
1088 }
1089
1090 if (c == EOF) {
1091 tt = TOK_EOF;
1092 goto out;
1093 }
1094
1095 INIT_TOKENBUF();
1096 if (JS_ISXMLNSSTART(c)) {
1097 JSBool sawColon = JS_FALSE;
1098
1099 ADD_TO_TOKENBUF(c);
1100 while ((c = GetChar(ts)) != EOF && JS_ISXMLNAME(c)) {
1101 if (c == ':') {
1102 int nextc;
1103
1104 if (sawColon ||
1105 (nextc = PeekChar(ts),
1106 ((ts->flags & TSF_XMLONLYMODE) || nextc != '{') &&
1107 !JS_ISXMLNAME(nextc))) {
1108 js_ReportCompileErrorNumber(cx, ts, NULL,
1109 JSREPORT_ERROR,
1110 JSMSG_BAD_XML_QNAME);
1111 goto error;
1112 }
1113 sawColon = JS_TRUE;
1114 }
1115
1116 ADD_TO_TOKENBUF(c);
1117 }
1118
1119 UngetChar(ts, c);
1120 atom = TOKENBUF_TO_ATOM();
1121 if (!atom)
1122 goto error;
1123 tp->t_op = JSOP_STRING;
1124 tp->t_atom = atom;
1125 tt = TOK_XMLNAME;
1126 goto out;
1127 }
1128
1129 switch (c) {
1130 case '{':
1131 if (ts->flags & TSF_XMLONLYMODE)
1132 goto bad_xml_char;
1133 tt = TOK_LC;
1134 goto out;
1135
1136 case '=':
1137 tt = TOK_ASSIGN;
1138 goto out;
1139
1140 case '"':
1141 case '\'':
1142 qc = c;
1143 while ((c = GetChar(ts)) != qc) {
1144 if (c == EOF) {
1145 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1146 JSMSG_UNTERMINATED_STRING);
1147 goto error;
1148 }
1149
1150 /*
1151 * XML attribute values are double-quoted when pretty-printed,
1152 * so escape " if it is expressed directly in a single-quoted
1153 * attribute value.
1154 */
1155 if (c == '"' && !(ts->flags & TSF_XMLONLYMODE)) {
1156 JS_ASSERT(qc == '\'');
1157 js_AppendCString(&ts->tokenbuf, js_quot_entity_str);
1158 continue;
1159 }
1160
1161 if (c == '&' && (ts->flags & TSF_XMLONLYMODE)) {
1162 if (!GetXMLEntity(cx, ts))
1163 goto error;
1164 continue;
1165 }
1166
1167 ADD_TO_TOKENBUF(c);
1168 }
1169 atom = TOKENBUF_TO_ATOM();
1170 if (!atom)
1171 goto error;
1172 tp->pos.end.lineno = (uint16)ts->lineno;
1173 tp->t_op = JSOP_STRING;
1174 tp->t_atom = atom;
1175 tt = TOK_XMLATTR;
1176 goto out;
1177
1178 case '>':
1179 tt = TOK_XMLTAGC;
1180 goto out;
1181
1182 case '/':
1183 if (MatchChar(ts, '>')) {
1184 tt = TOK_XMLPTAGC;
1185 goto out;
1186 }
1187 /* FALL THROUGH */
1188
1189 bad_xml_char:
1190 default:
1191 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1192 JSMSG_BAD_XML_CHARACTER);
1193 goto error;
1194 }
1195 /* NOTREACHED */
1196 }
1197 #endif /* JS_HAS_XML_SUPPORT */
1198
1199 retry:
1200 do {
1201 c = GetChar(ts);
1202 if (c == '\n') {
1203 ts->flags &= ~TSF_DIRTYLINE;
1204 if (ts->flags & TSF_NEWLINES)
1205 break;
1206 }
1207 } while (ScanAsSpace((jschar)c));
1208
1209 tp = NewToken(ts, -1);
1210 if (c == EOF) {
1211 tt = TOK_EOF;
1212 goto out;
1213 }
1214
1215 hadUnicodeEscape = JS_FALSE;
1216 if (JS_ISIDSTART(c) ||
1217 (c == '\\' &&
1218 (qc = GetUnicodeEscape(ts),
1219 hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1220 if (hadUnicodeEscape)
1221 c = qc;
1222 INIT_TOKENBUF();
1223 for (;;) {
1224 ADD_TO_TOKENBUF(c);
1225 c = GetChar(ts);
1226 if (c == '\\') {
1227 qc = GetUnicodeEscape(ts);
1228 if (!JS_ISIDENT(qc))
1229 break;
1230 c = qc;
1231 hadUnicodeEscape = JS_TRUE;
1232 } else {
1233 if (!JS_ISIDENT(c))
1234 break;
1235 }
1236 }
1237 UngetChar(ts, c);
1238
1239 /*
1240 * Check for keywords unless we saw Unicode escape or parser asks
1241 * to ignore keywords.
1242 */
1243 if (!hadUnicodeEscape &&
1244 !(ts->flags & TSF_KEYWORD_IS_NAME) &&
1245 TOKENBUF_OK() &&
1246 (kw = FindKeyword(TOKENBUF_BASE(), TOKENBUF_LENGTH()))) {
1247 if (kw->tokentype == TOK_RESERVED) {
1248 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1249 JSREPORT_WARNING |
1250 JSREPORT_STRICT,
1251 JSMSG_RESERVED_ID,
1252 kw->chars)) {
1253 goto error;
1254 }
1255 } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1256 tt = kw->tokentype;
1257 tp->t_op = (JSOp) kw->op;
1258 goto out;
1259 }
1260 }
1261
1262 atom = TOKENBUF_TO_ATOM();
1263 if (!atom)
1264 goto error;
1265 tp->t_op = JSOP_NAME;
1266 tp->t_atom = atom;
1267 tt = TOK_NAME;
1268 goto out;
1269 }
1270
1271 if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(PeekChar(ts)))) {
1272 jsint radix;
1273 const jschar *endptr;
1274 jsdouble dval;
1275
1276 radix = 10;
1277 INIT_TOKENBUF();
1278
1279 if (c == '0') {
1280 ADD_TO_TOKENBUF(c);
1281 c = GetChar(ts);
1282 if (JS_TOLOWER(c) == 'x') {
1283 ADD_TO_TOKENBUF(c);
1284 c = GetChar(ts);
1285 radix = 16;
1286 } else if (JS7_ISDEC(c)) {
1287 radix = 8;
1288 }
1289 }
1290
1291 while (JS7_ISHEX(c)) {
1292 if (radix < 16) {
1293 if (JS7_ISLET(c))
1294 break;
1295
1296 /*
1297 * We permit 08 and 09 as decimal numbers, which makes our
1298 * behaviour a superset of the ECMA numeric grammar. We might
1299 * not always be so permissive, so we warn about it.
1300 */
1301 if (radix == 8 && c >= '8') {
1302 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1303 JSREPORT_WARNING,
1304 JSMSG_BAD_OCTAL,
1305 c == '8' ? "08" : "09")) {
1306 goto error;
1307 }
1308 radix = 10;
1309 }
1310 }
1311 ADD_TO_TOKENBUF(c);
1312 c = GetChar(ts);
1313 }
1314
1315 if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1316 if (c == '.') {
1317 do {
1318 ADD_TO_TOKENBUF(c);
1319 c = GetChar(ts);
1320 } while (JS7_ISDEC(c));
1321 }
1322 if (JS_TOLOWER(c) == 'e') {
1323 ADD_TO_TOKENBUF(c);
1324 c = GetChar(ts);
1325 if (c == '+' || c == '-') {
1326 ADD_TO_TOKENBUF(c);
1327 c = GetChar(ts);
1328 }
1329 if (!JS7_ISDEC(c)) {
1330 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1331 JSMSG_MISSING_EXPONENT);
1332 goto error;
1333 }
1334 do {
1335 ADD_TO_TOKENBUF(c);
1336 c = GetChar(ts);
1337 } while (JS7_ISDEC(c));
1338 }
1339 }
1340
1341 /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1342 UngetChar(ts, c);
1343 ADD_TO_TOKENBUF(0);
1344
1345 if (!TOKENBUF_OK())
1346 goto error;
1347 if (radix == 10) {
1348 if (!js_strtod(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1349 &endptr, &dval)) {
1350 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1351 JSMSG_OUT_OF_MEMORY);
1352 goto error;
1353 }
1354 } else {
1355 if (!js_strtointeger(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1356 &endptr, radix, &dval)) {
1357 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1358 JSMSG_OUT_OF_MEMORY);
1359 goto error;
1360 }
1361 }
1362 tp->t_dval = dval;
1363 tt = TOK_NUMBER;
1364 goto out;
1365 }
1366
1367 if (c == '"' || c == '\'') {
1368 qc = c;
1369 INIT_TOKENBUF();
1370 while ((c = GetChar(ts)) != qc) {
1371 if (c == '\n' || c == EOF) {
1372 UngetChar(ts, c);
1373 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1374 JSMSG_UNTERMINATED_STRING);
1375 goto error;
1376 }
1377 if (c == '\\') {
1378 switch (c = GetChar(ts)) {
1379 case 'b': c = '\b'; break;
1380 case 'f': c = '\f'; break;
1381 case 'n': c = '\n'; break;
1382 case 'r': c = '\r'; break;
1383 case 't': c = '\t'; break;
1384 case 'v': c = '\v'; break;
1385
1386 default:
1387 if ('0' <= c && c < '8') {
1388 int32 val = JS7_UNDEC(c);
1389
1390 c = PeekChar(ts);
1391 if ('0' <= c && c < '8') {
1392 val = 8 * val + JS7_UNDEC(c);
1393 GetChar(ts);
1394 c = PeekChar(ts);
1395 if ('0' <= c && c < '8') {
1396 int32 save = val;
1397 val = 8 * val + JS7_UNDEC(c);
1398 if (val <= 0377)
1399 GetChar(ts);
1400 else
1401 val = save;
1402 }
1403 }
1404
1405 c = (jschar)val;
1406 } else if (c == 'u') {
1407 jschar cp[4];
1408 if (PeekChars(ts, 4, cp) &&
1409 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1410 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1411 c = (((((JS7_UNHEX(cp[0]) << 4)
1412 + JS7_UNHEX(cp[1])) << 4)
1413 + JS7_UNHEX(cp[2])) << 4)
1414 + JS7_UNHEX(cp[3]);
1415 SkipChars(ts, 4);
1416 }
1417 } else if (c == 'x') {
1418 jschar cp[2];
1419 if (PeekChars(ts, 2, cp) &&
1420 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1421 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1422 SkipChars(ts, 2);
1423 }
1424 } else if (c == '\n') {
1425 /* ECMA follows C by removing escaped newlines. */
1426 continue;
1427 }
1428 break;
1429 }
1430 }
1431 ADD_TO_TOKENBUF(c);
1432 }
1433 atom = TOKENBUF_TO_ATOM();
1434 if (!atom)
1435 goto error;
1436 tp->pos.end.lineno = (uint16)ts->lineno;
1437 tp->t_op = JSOP_STRING;
1438 tp->t_atom = atom;
1439 tt = TOK_STRING;
1440 goto out;
1441 }
1442
1443 switch (c) {
1444 case '\n': tt = TOK_EOL; goto eol_out;
1445 case ';': tt = TOK_SEMI; break;
1446 case '[': tt = TOK_LB; break;
1447 case ']': tt = TOK_RB; break;
1448 case '{': tt = TOK_LC; break;
1449 case '}': tt = TOK_RC; break;
1450 case '(': tt = TOK_LP; break;
1451 case ')': tt = TOK_RP; break;
1452 case ',': tt = TOK_COMMA; break;
1453 case '?': tt = TOK_HOOK; break;
1454
1455 case '.':
1456 #if JS_HAS_XML_SUPPORT
1457 if (MatchChar(ts, c))
1458 tt = TOK_DBLDOT;
1459 else
1460 #endif
1461 tt = TOK_DOT;
1462 break;
1463
1464 case ':':
1465 #if JS_HAS_XML_SUPPORT
1466 if (MatchChar(ts, c)) {
1467 tt = TOK_DBLCOLON;
1468 break;
1469 }
1470 #endif
1471 /*
1472 * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1473 * object initializer, likewise for setter.
1474 */
1475 tp->t_op = JSOP_NOP;
1476 tt = TOK_COLON;
1477 break;
1478
1479 case '|':
1480 if (MatchChar(ts, c)) {
1481 tt = TOK_OR;
1482 } else if (MatchChar(ts, '=')) {
1483 tp->t_op = JSOP_BITOR;
1484 tt = TOK_ASSIGN;
1485 } else {
1486 tt = TOK_BITOR;
1487 }
1488 break;
1489
1490 case '^':
1491 if (MatchChar(ts, '=')) {
1492 tp->t_op = JSOP_BITXOR;
1493 tt = TOK_ASSIGN;
1494 } else {
1495 tt = TOK_BITXOR;
1496 }
1497 break;
1498
1499 case '&':
1500 if (MatchChar(ts, c)) {
1501 tt = TOK_AND;
1502 } else if (MatchChar(ts, '=')) {
1503 tp->t_op = JSOP_BITAND;
1504 tt = TOK_ASSIGN;
1505 } else {
1506 tt = TOK_BITAND;
1507 }
1508 break;
1509
1510 case '=':
1511 if (MatchChar(ts, c)) {
1512 tp->t_op = MatchChar(ts, c) ? JSOP_STRICTEQ : JSOP_EQ;
1513 tt = TOK_EQOP;
1514 } else {
1515 tp->t_op = JSOP_NOP;
1516 tt = TOK_ASSIGN;
1517 }
1518 break;
1519
1520 case '!':
1521 if (MatchChar(ts, '=')) {
1522 tp->t_op = MatchChar(ts, '=') ? JSOP_STRICTNE : JSOP_NE;
1523 tt = TOK_EQOP;
1524 } else {
1525 tp->t_op = JSOP_NOT;
1526 tt = TOK_UNARYOP;
1527 }
1528 break;
1529
1530 #if JS_HAS_XML_SUPPORT
1531 case '@':
1532 tt = TOK_AT;
1533 break;
1534 #endif
1535
1536 case '<':
1537 #if JS_HAS_XML_SUPPORT
1538 /*
1539 * After much testing, it's clear that Postel's advice to protocol
1540 * designers ("be liberal in what you accept, and conservative in what
1541 * you send") invites a natural-law repercussion for JS as "protocol":
1542 *
1543 * "If you are liberal in what you accept, others will utterly fail to
1544 * be conservative in what they send."
1545 *
1546 * Which means you will get <!-- comments to end of line in the middle
1547 * of .js files, and after if conditions whose then statements are on
1548 * the next line, and other wonders. See at least the following bugs:
1549 * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1550 * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1551 * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1552 *
1553 * So without JSOPTION_XML, we never scan an XML comment or CDATA
1554 * literal. We always scan <! as the start of an HTML comment hack
1555 * to end of line, used since Netscape 2 to hide script tag content
1556 * from script-unaware browsers.
1557 */
1558 if ((ts->flags & TSF_OPERAND) &&
1559 (JS_HAS_XML_OPTION(cx) || PeekChar(ts) != '!')) {
1560 /* Check for XML comment or CDATA section. */
1561 if (MatchChar(ts, '!')) {
1562 INIT_TOKENBUF();
1563
1564 /* Scan XML comment. */
1565 if (MatchChar(ts, '-')) {
1566 if (!MatchChar(ts, '-'))
1567 goto bad_xml_markup;
1568 while ((c = GetChar(ts)) != '-' || !MatchChar(ts, '-')) {
1569 if (c == EOF)
1570 goto bad_xml_markup;
1571 ADD_TO_TOKENBUF(c);
1572 }
1573 tt = TOK_XMLCOMMENT;
1574 tp->t_op = JSOP_XMLCOMMENT;
1575 goto finish_xml_markup;
1576 }
1577
1578 /* Scan CDATA section. */
1579 if (MatchChar(ts, '[')) {
1580 jschar cp[6];
1581 if (PeekChars(ts, 6, cp) &&
1582 cp[0] == 'C' &&
1583 cp[1] == 'D' &&
1584 cp[2] == 'A' &&
1585 cp[3] == 'T' &&
1586 cp[4] == 'A' &&
1587 cp[5] == '[') {
1588 SkipChars(ts, 6);
1589 while ((c = GetChar(ts)) != ']' ||
1590 !PeekChars(ts, 2, cp) ||
1591 cp[0] != ']' ||
1592 cp[1] != '>') {
1593 if (c == EOF)
1594 goto bad_xml_markup;
1595 ADD_TO_TOKENBUF(c);
1596 }
1597 GetChar(ts); /* discard ] but not > */
1598 tt = TOK_XMLCDATA;
1599 tp->t_op = JSOP_XMLCDATA;
1600 goto finish_xml_markup;
1601 }
1602 goto bad_xml_markup;
1603 }
1604 }
1605
1606 /* Check for processing instruction. */
1607 if (MatchChar(ts, '?')) {
1608 inTarget = JS_TRUE;
1609 targetLength = 0;
1610 contentIndex = -1;
1611
1612 INIT_TOKENBUF();
1613 while ((c = GetChar(ts)) != '?' || PeekChar(ts) != '>') {
1614 if (c == EOF)
1615 goto bad_xml_markup;
1616 if (inTarget) {
1617 if (JS_ISXMLSPACE(c)) {
1618 if (TOKENBUF_LENGTH() == 0)
1619 goto bad_xml_markup;
1620 inTarget = JS_FALSE;
1621 } else {
1622 if (!((TOKENBUF_LENGTH() == 0)
1623 ? JS_ISXMLNSSTART(c)
1624 : JS_ISXMLNS(c))) {
1625 goto bad_xml_markup;
1626 }
1627 ++targetLength;
1628 }
1629 } else {
1630 if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1631 contentIndex = TOKENBUF_LENGTH();
1632 }
1633 ADD_TO_TOKENBUF(c);
1634 }
1635 if (targetLength == 0)
1636 goto bad_xml_markup;
1637 if (!TOKENBUF_OK())
1638 goto error;
1639 if (contentIndex < 0) {
1640 atom = cx->runtime->atomState.emptyAtom;
1641 } else {
1642 atom = js_AtomizeChars(cx,
1643 &TOKENBUF_CHAR(contentIndex),
1644 TOKENBUF_LENGTH() - contentIndex,
1645 0);
1646 if (!atom)
1647 goto error;
1648 }
1649 TRIM_TOKENBUF(targetLength);
1650 tp->t_atom2 = atom;
1651 tt = TOK_XMLPI;
1652
1653 finish_xml_markup:
1654 if (!MatchChar(ts, '>'))
1655 goto bad_xml_markup;
1656 atom = TOKENBUF_TO_ATOM();
1657 if (!atom)
1658 goto error;
1659 tp->t_atom = atom;
1660 tp->pos.end.lineno = (uint16)ts->lineno;
1661 goto out;
1662 }
1663
1664 /* An XML start-of-tag character. */
1665 tt = MatchChar(ts, '/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1666 goto out;
1667
1668 bad_xml_markup:
1669 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1670 JSMSG_BAD_XML_MARKUP);
1671 goto error;
1672 }
1673 #endif /* JS_HAS_XML_SUPPORT */
1674
1675 /* NB: treat HTML begin-comment as comment-till-end-of-line */
1676 if (MatchChar(ts, '!')) {
1677 if (MatchChar(ts, '-')) {
1678 if (MatchChar(ts, '-')) {
1679 ts->flags |= TSF_IN_HTML_COMMENT;
1680 goto skipline;
1681 }
1682 UngetChar(ts, '-');
1683 }
1684 UngetChar(ts, '!');
1685 }
1686 if (MatchChar(ts, c)) {
1687 tp->t_op = JSOP_LSH;
1688 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1689 } else {
1690 tp->t_op = MatchChar(ts, '=') ? JSOP_LE : JSOP_LT;
1691 tt = TOK_RELOP;
1692 }
1693 break;
1694
1695 case '>':
1696 if (MatchChar(ts, c)) {
1697 tp->t_op = MatchChar(ts, c) ? JSOP_URSH : JSOP_RSH;
1698 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1699 } else {
1700 tp->t_op = MatchChar(ts, '=') ? JSOP_GE : JSOP_GT;
1701 tt = TOK_RELOP;
1702 }
1703 break;
1704
1705 case '*':
1706 tp->t_op = JSOP_MUL;
1707 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_STAR;
1708 break;
1709
1710 case '/':
1711 if (MatchChar(ts, '/')) {
1712 /*
1713 * Hack for source filters such as the Mozilla XUL preprocessor:
1714 * "//@line 123\n" sets the number of the *next* line after the
1715 * comment to 123.
1716 */
1717 if (JS_HAS_ATLINE_OPTION(cx)) {
1718 jschar cp[5];
1719 uintN i, line, temp;
1720 char filename[1024];
1721
1722 if (PeekChars(ts, 5, cp) &&
1723 cp[0] == '@' &&
1724 cp[1] == 'l' &&
1725 cp[2] == 'i' &&
1726 cp[3] == 'n' &&
1727 cp[4] == 'e') {
1728 SkipChars(ts, 5);
1729 while ((c = GetChar(ts)) != '\n' && ScanAsSpace((jschar)c))
1730 continue;
1731 if (JS7_ISDEC(c)) {
1732 line = JS7_UNDEC(c);
1733 while ((c = GetChar(ts)) != EOF && JS7_ISDEC(c)) {
1734 temp = 10 * line + JS7_UNDEC(c);
1735 if (temp < line) {
1736 /* Ignore overlarge line numbers. */
1737 goto skipline;
1738 }
1739 line = temp;
1740 }
1741 while (c != '\n' && ScanAsSpace((jschar)c))
1742 c = GetChar(ts);
1743 i = 0;
1744 if (c == '"') {
1745 while ((c = GetChar(ts)) != EOF && c != '"') {
1746 if (c == '\n') {
1747 UngetChar(ts, c);
1748 goto skipline;
1749 }
1750 if ((c >> 8) != 0 || i >= sizeof filename - 1)
1751 goto skipline;
1752 filename[i++] = (char) c;
1753 }
1754 if (c == '"') {
1755 while ((c = GetChar(ts)) != '\n' &&
1756 ScanAsSpace((jschar)c)) {
1757 continue;
1758 }
1759 }
1760 }
1761 filename[i] = '\0';
1762 if (c == '\n') {
1763 if (i > 0) {
1764 if (ts->flags & TSF_OWNFILENAME)
1765 JS_free(cx, (void *) ts->filename);
1766 ts->filename = JS_strdup(cx, filename);
1767 if (!ts->filename)
1768 goto error;
1769 ts->flags |= TSF_OWNFILENAME;
1770 }
1771 ts->lineno = line;
1772 }
1773 }
1774 UngetChar(ts, c);
1775 }
1776 }
1777
1778 skipline:
1779 /* Optimize line skipping if we are not in an HTML comment. */
1780 if (ts->flags & TSF_IN_HTML_COMMENT) {
1781 while ((c = GetChar(ts)) != EOF && c != '\n') {
1782 if (c == '-' && MatchChar(ts, '-') && MatchChar(ts, '>'))
1783 ts->flags &= ~TSF_IN_HTML_COMMENT;
1784 }
1785 } else {
1786 while ((c = GetChar(ts)) != EOF && c != '\n')
1787 continue;
1788 }
1789 UngetChar(ts, c);
1790 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1791 goto retry;
1792 }
1793
1794 if (MatchChar(ts, '*')) {
1795 while ((c = GetChar(ts)) != EOF &&
1796 !(c == '*' && MatchChar(ts, '/'))) {
1797 /* Ignore all characters until comment close. */
1798 }
1799 if (c == EOF) {
1800 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1801 JSMSG_UNTERMINATED_COMMENT);
1802 goto error;
1803 }
1804 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1805 goto retry;
1806 }
1807
1808 if (ts->flags & TSF_OPERAND) {
1809 uintN flags;
1810 JSBool inCharClass = JS_FALSE;
1811
1812 INIT_TOKENBUF();
1813 for (;;) {
1814 c = GetChar(ts);
1815 if (c == '\n' || c == EOF) {
1816 UngetChar(ts, c);
1817 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1818 JSMSG_UNTERMINATED_REGEXP);
1819 goto error;
1820 }
1821 if (c == '\\') {
1822 ADD_TO_TOKENBUF(c);
1823 c = GetChar(ts);
1824 } else if (c == '[') {
1825 inCharClass = JS_TRUE;
1826 } else if (c == ']') {
1827 inCharClass = JS_FALSE;
1828 } else if (c == '/' && !inCharClass) {
1829 /* For compat with IE, allow unescaped / in char classes. */
1830 break;
1831 }
1832 ADD_TO_TOKENBUF(c);
1833 }
1834 for (flags = 0; ; ) {
1835 c = PeekChar(ts);
1836 if (c == 'g')
1837 flags |= JSREG_GLOB;
1838 else if (c == 'i')
1839 flags |= JSREG_FOLD;
1840 else if (c == 'm')
1841 flags |= JSREG_MULTILINE;
1842 else if (c == 'y')
1843 flags |= JSREG_STICKY;
1844 else
1845 break;
1846 GetChar(ts);
1847 }
1848 c = PeekChar(ts);
1849 if (JS7_ISLET(c)) {
1850 tp->ptr = ts->linebuf.ptr - 1;
1851 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1852 JSMSG_BAD_REGEXP_FLAG);
1853 (void) GetChar(ts);
1854 goto error;
1855 }
1856 /* XXXbe fix jsregexp.c so it doesn't depend on NUL termination */
1857 if (!TOKENBUF_OK())
1858 goto error;
1859 NUL_TERM_TOKENBUF();
1860 tp->t_reflags = flags;
1861 tt = TOK_REGEXP;
1862 break;
1863 }
1864
1865 tp->t_op = JSOP_DIV;
1866 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1867 break;
1868
1869 case '%':
1870 tp->t_op = JSOP_MOD;
1871 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1872 break;
1873
1874 case '~':
1875 tp->t_op = JSOP_BITNOT;
1876 tt = TOK_UNARYOP;
1877 break;
1878
1879 case '+':
1880 if (MatchChar(ts, '=')) {
1881 tp->t_op = JSOP_ADD;
1882 tt = TOK_ASSIGN;
1883 } else if (MatchChar(ts, c)) {
1884 tt = TOK_INC;
1885 } else {
1886 tp->t_op = JSOP_POS;
1887 tt = TOK_PLUS;
1888 }
1889 break;
1890
1891 case '-':
1892 if (MatchChar(ts, '=')) {
1893 tp->t_op = JSOP_SUB;
1894 tt = TOK_ASSIGN;
1895 } else if (MatchChar(ts, c)) {
1896 if (PeekChar(ts) == '>' && !(ts->flags & TSF_DIRTYLINE)) {
1897 ts->flags &= ~TSF_IN_HTML_COMMENT;
1898 goto skipline;
1899 }
1900 tt = TOK_DEC;
1901 } else {
1902 tp->t_op = JSOP_NEG;
1903 tt = TOK_MINUS;
1904 }
1905 break;
1906
1907 #if JS_HAS_SHARP_VARS
1908 case '#':
1909 {
1910 uint32 n;
1911
1912 c = GetChar(ts);
1913 if (!JS7_ISDEC(c)) {
1914 UngetChar(ts, c);
1915 goto badchar;
1916 }
1917 n = (uint32)JS7_UNDEC(c);
1918 for (;;) {
1919 c = GetChar(ts);
1920 if (!JS7_ISDEC(c))
1921 break;
1922 n = 10 * n + JS7_UNDEC(c);
1923 if (n >= UINT16_LIMIT) {
1924 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1925 JSMSG_SHARPVAR_TOO_BIG);
1926 goto error;
1927 }
1928 }
1929 tp->t_dval = (jsdouble) n;
1930 if (JS_HAS_STRICT_OPTION(cx) &&
1931 (c == '=' || c == '#')) {
1932 char buf[20];
1933 JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1934 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1935 JSREPORT_WARNING |
1936 JSREPORT_STRICT,
1937 JSMSG_DEPRECATED_USAGE,
1938 buf)) {
1939 goto error;
1940 }
1941 }
1942 if (c == '=')
1943 tt = TOK_DEFSHARP;
1944 else if (c == '#')
1945 tt = TOK_USESHARP;
1946 else
1947 goto badchar;
1948 break;
1949 }
1950 #endif /* JS_HAS_SHARP_VARS */
1951
1952 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1953 badchar:
1954 #endif
1955
1956 default:
1957 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1958 JSMSG_ILLEGAL_CHARACTER);
1959 goto error;
1960 }
1961
1962 out:
1963 JS_ASSERT(tt != TOK_EOL);
1964 ts->flags |= TSF_DIRTYLINE;
1965
1966 eol_out:
1967 if (!STRING_BUFFER_OK(&ts->tokenbuf))
1968 tt = TOK_ERROR;
1969 JS_ASSERT(tt < TOK_LIMIT);
1970 tp->pos.end.index = ts->linepos +
1971 PTRDIFF(ts->linebuf.ptr, ts->linebuf.base, jschar) -
1972 ts->ungetpos;
1973 tp->type = tt;
1974 return tt;
1975
1976 error:
1977 tt = TOK_ERROR;
1978 ts->flags |= TSF_ERROR;
1979 goto out;
1980
1981 #undef INIT_TOKENBUF
1982 #undef TOKENBUF_LENGTH
1983 #undef TOKENBUF_OK
1984 #undef TOKENBUF_TO_ATOM
1985 #undef ADD_TO_TOKENBUF
1986 #undef TOKENBUF_BASE
1987 #undef TOKENBUF_CHAR
1988 #undef TRIM_TOKENBUF
1989 #undef NUL_TERM_TOKENBUF
1990 }
1991
1992 void
1993 js_UngetToken(JSTokenStream *ts)
1994 {
1995 JS_ASSERT(ts->lookahead < NTOKENS_MASK);
1996 ts->lookahead++;
1997 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1998 }
1999
2000 JSBool
2001 js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt)
2002 {
2003 if (js_GetToken(cx, ts) == tt)
2004 return JS_TRUE;
2005 js_UngetToken(ts);
2006 return JS_FALSE;
2007 }

  ViewVC Help
Powered by ViewVC 1.1.24