/[jscoverage]/trunk/js/jsscan.cpp
ViewVC logotype

Contents of /trunk/js/jsscan.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 460 - (show annotations)
Sat Sep 26 23:15:22 2009 UTC (12 years, 9 months ago) by siliconforks
File size: 63942 byte(s)
Upgrade to SpiderMonkey from Firefox 3.5.3.

1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
3 *
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
19 *
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
24 *
25 * Contributor(s):
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 /*
42 * JS lexical scanner.
43 */
44 #include "jsstddef.h"
45 #include <stdio.h> /* first to avoid trouble on some systems */
46 #include <errno.h>
47 #include <limits.h>
48 #include <math.h>
49 #ifdef HAVE_MEMORY_H
50 #include <memory.h>
51 #endif
52 #include <stdarg.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include "jstypes.h"
56 #include "jsarena.h" /* Added by JSIFY */
57 #include "jsbit.h"
58 #include "jsutil.h" /* Added by JSIFY */
59 #include "jsdtoa.h"
60 #include "jsprf.h"
61 #include "jsapi.h"
62 #include "jsatom.h"
63 #include "jscntxt.h"
64 #include "jsversion.h"
65 #include "jsemit.h"
66 #include "jsexn.h"
67 #include "jsnum.h"
68 #include "jsopcode.h"
69 #include "jsparse.h"
70 #include "jsregexp.h"
71 #include "jsscan.h"
72 #include "jsscript.h"
73 #include "jsstaticcheck.h"
74
75 #if JS_HAS_XML_SUPPORT
76 #include "jsxml.h"
77 #endif
78
79 #define JS_KEYWORD(keyword, type, op, version) \
80 const char js_##keyword##_str[] = #keyword;
81 #include "jskeyword.tbl"
82 #undef JS_KEYWORD
83
84 struct keyword {
85 const char *chars; /* C string with keyword text */
86 JSTokenType tokentype; /* JSTokenType */
87 JSOp op; /* JSOp */
88 JSVersion version; /* JSVersion */
89 };
90
91 static const struct keyword keyword_defs[] = {
92 #define JS_KEYWORD(keyword, type, op, version) \
93 {js_##keyword##_str, type, op, version},
94 #include "jskeyword.tbl"
95 #undef JS_KEYWORD
96 };
97
98 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
99
100 static const struct keyword *
101 FindKeyword(const jschar *s, size_t length)
102 {
103 register size_t i;
104 const struct keyword *kw;
105 const char *chars;
106
107 JS_ASSERT(length != 0);
108
109 #define JSKW_LENGTH() length
110 #define JSKW_AT(column) s[column]
111 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
112 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
113 #define JSKW_NO_MATCH() goto no_match;
114 #include "jsautokw.h"
115 #undef JSKW_NO_MATCH
116 #undef JSKW_TEST_GUESS
117 #undef JSKW_GOT_MATCH
118 #undef JSKW_AT
119 #undef JSKW_LENGTH
120
121 got_match:
122 return &keyword_defs[i];
123
124 test_guess:
125 kw = &keyword_defs[i];
126 chars = kw->chars;
127 do {
128 if (*s++ != (unsigned char)(*chars++))
129 goto no_match;
130 } while (--length != 0);
131 return kw;
132
133 no_match:
134 return NULL;
135 }
136
137 JSTokenType
138 js_CheckKeyword(const jschar *str, size_t length)
139 {
140 const struct keyword *kw;
141
142 JS_ASSERT(length != 0);
143 kw = FindKeyword(str, length);
144 return kw ? kw->tokentype : TOK_EOF;
145 }
146
147 JS_FRIEND_API(void)
148 js_MapKeywords(void (*mapfun)(const char *))
149 {
150 size_t i;
151
152 for (i = 0; i != KEYWORD_COUNT; ++i)
153 mapfun(keyword_defs[i].chars);
154 }
155
156 JSBool
157 js_IsIdentifier(JSString *str)
158 {
159 size_t length;
160 jschar c, *chars, *end;
161
162 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
163 if (length == 0)
164 return JS_FALSE;
165 c = *chars;
166 if (!JS_ISIDSTART(c))
167 return JS_FALSE;
168 end = chars + length;
169 while (++chars != end) {
170 c = *chars;
171 if (!JS_ISIDENT(c))
172 return JS_FALSE;
173 }
174 return JS_TRUE;
175 }
176
177 #define TBMIN 64
178
179 static JSBool
180 GrowTokenBuf(JSStringBuffer *sb, size_t newlength)
181 {
182 JSContext *cx;
183 jschar *base;
184 ptrdiff_t offset, length;
185 size_t tbsize;
186 JSArenaPool *pool;
187
188 cx = (JSContext*) sb->data;
189 base = sb->base;
190 offset = PTRDIFF(sb->ptr, base, jschar);
191 pool = &cx->tempPool;
192 if (!base) {
193 tbsize = TBMIN * sizeof(jschar);
194 length = TBMIN - 1;
195 JS_ARENA_ALLOCATE_CAST(base, jschar *, pool, tbsize);
196 } else {
197 length = PTRDIFF(sb->limit, base, jschar);
198 if ((size_t)length >= ~(size_t)0 / sizeof(jschar)) {
199 base = NULL;
200 } else {
201 tbsize = (length + 1) * sizeof(jschar);
202 length += length + 1;
203 JS_ARENA_GROW_CAST(base, jschar *, pool, tbsize, tbsize);
204 }
205 }
206 if (!base) {
207 js_ReportOutOfScriptQuota(cx);
208 sb->base = STRING_BUFFER_ERROR_BASE;
209 return JS_FALSE;
210 }
211 sb->base = base;
212 sb->limit = base + length;
213 sb->ptr = base + offset;
214 return JS_TRUE;
215 }
216
217 JSBool
218 js_InitTokenStream(JSContext *cx, JSTokenStream *ts,
219 const jschar *base, size_t length,
220 FILE *fp, const char *filename, uintN lineno)
221 {
222 jschar *buf;
223 size_t nb;
224
225 JS_ASSERT_IF(fp, !base);
226 JS_ASSERT_IF(!base, length == 0);
227 nb = fp
228 ? 2 * JS_LINE_LIMIT * sizeof(jschar)
229 : JS_LINE_LIMIT * sizeof(jschar);
230 JS_ARENA_ALLOCATE_CAST(buf, jschar *, &cx->tempPool, nb);
231 if (!buf) {
232 js_ReportOutOfScriptQuota(cx);
233 return JS_FALSE;
234 }
235 memset(buf, 0, nb);
236 memset(ts, 0, sizeof(*ts));
237 ts->filename = filename;
238 ts->lineno = lineno;
239 ts->linebuf.base = ts->linebuf.limit = ts->linebuf.ptr = buf;
240 if (fp) {
241 ts->file = fp;
242 ts->userbuf.base = buf + JS_LINE_LIMIT;
243 ts->userbuf.ptr = ts->userbuf.limit = ts->userbuf.base + JS_LINE_LIMIT;
244 } else {
245 ts->userbuf.base = (jschar *)base;
246 ts->userbuf.limit = (jschar *)base + length;
247 ts->userbuf.ptr = (jschar *)base;
248 }
249 ts->tokenbuf.grow = GrowTokenBuf;
250 ts->tokenbuf.data = cx;
251 ts->listener = cx->debugHooks->sourceHandler;
252 ts->listenerData = cx->debugHooks->sourceHandlerData;
253 return JS_TRUE;
254 }
255
256 void
257 js_CloseTokenStream(JSContext *cx, JSTokenStream *ts)
258 {
259 if (ts->flags & TSF_OWNFILENAME)
260 JS_free(cx, (void *) ts->filename);
261 }
262
263 JS_FRIEND_API(int)
264 js_fgets(char *buf, int size, FILE *file)
265 {
266 int n, i, c;
267 JSBool crflag;
268
269 n = size - 1;
270 if (n < 0)
271 return -1;
272
273 crflag = JS_FALSE;
274 for (i = 0; i < n && (c = getc(file)) != EOF; i++) {
275 buf[i] = c;
276 if (c == '\n') { /* any \n ends a line */
277 i++; /* keep the \n; we know there is room for \0 */
278 break;
279 }
280 if (crflag) { /* \r not followed by \n ends line at the \r */
281 ungetc(c, file);
282 break; /* and overwrite c in buf with \0 */
283 }
284 crflag = (c == '\r');
285 }
286
287 buf[i] = '\0';
288 return i;
289 }
290
291 static int32
292 GetChar(JSTokenStream *ts)
293 {
294 int32 c;
295 ptrdiff_t i, j, len, olen;
296 JSBool crflag;
297 char cbuf[JS_LINE_LIMIT];
298 jschar *ubuf, *nl;
299
300 if (ts->ungetpos != 0) {
301 c = ts->ungetbuf[--ts->ungetpos];
302 } else {
303 if (ts->linebuf.ptr == ts->linebuf.limit) {
304 len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
305 if (len <= 0) {
306 if (!ts->file) {
307 ts->flags |= TSF_EOF;
308 return EOF;
309 }
310
311 /* Fill ts->userbuf so that \r and \r\n convert to \n. */
312 crflag = (ts->flags & TSF_CRFLAG) != 0;
313 len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
314 if (len <= 0) {
315 ts->flags |= TSF_EOF;
316 return EOF;
317 }
318 olen = len;
319 ubuf = ts->userbuf.base;
320 i = 0;
321 if (crflag) {
322 ts->flags &= ~TSF_CRFLAG;
323 if (cbuf[0] != '\n') {
324 ubuf[i++] = '\n';
325 len++;
326 ts->linepos--;
327 }
328 }
329 for (j = 0; i < len; i++, j++)
330 ubuf[i] = (jschar) (unsigned char) cbuf[j];
331 ts->userbuf.limit = ubuf + len;
332 ts->userbuf.ptr = ubuf;
333 }
334 if (ts->listener) {
335 ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
336 &ts->listenerTSData, ts->listenerData);
337 }
338
339 nl = ts->saveEOL;
340 if (!nl) {
341 /*
342 * Any one of \n, \r, or \r\n ends a line (the longest
343 * match wins). Also allow the Unicode line and paragraph
344 * separators.
345 */
346 for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
347 /*
348 * Try to prevent value-testing on most characters by
349 * filtering out characters that aren't 000x or 202x.
350 */
351 if ((*nl & 0xDFD0) == 0) {
352 if (*nl == '\n')
353 break;
354 if (*nl == '\r') {
355 if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
356 nl++;
357 break;
358 }
359 if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
360 break;
361 }
362 }
363 }
364
365 /*
366 * If there was a line terminator, copy thru it into linebuf.
367 * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
368 */
369 if (nl < ts->userbuf.limit)
370 len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
371 if (len >= JS_LINE_LIMIT) {
372 len = JS_LINE_LIMIT - 1;
373 ts->saveEOL = nl;
374 } else {
375 ts->saveEOL = NULL;
376 }
377 js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
378 ts->userbuf.ptr += len;
379 olen = len;
380
381 /*
382 * Make sure linebuf contains \n for EOL (don't do this in
383 * userbuf because the user's string might be readonly).
384 */
385 if (nl < ts->userbuf.limit) {
386 if (*nl == '\r') {
387 if (ts->linebuf.base[len-1] == '\r') {
388 /*
389 * Does the line segment end in \r? We must check
390 * for a \n at the front of the next segment before
391 * storing a \n into linebuf. This case matters
392 * only when we're reading from a file.
393 */
394 if (nl + 1 == ts->userbuf.limit && ts->file) {
395 len--;
396 ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
397 if (len == 0) {
398 /*
399 * This can happen when a segment ends in
400 * \r\r. Start over. ptr == limit in this
401 * case, so we'll fall into buffer-filling
402 * code.
403 */
404 return GetChar(ts);
405 }
406 } else {
407 ts->linebuf.base[len-1] = '\n';
408 }
409 }
410 } else if (*nl == '\n') {
411 if (nl > ts->userbuf.base &&
412 nl[-1] == '\r' &&
413 ts->linebuf.base[len-2] == '\r') {
414 len--;
415 JS_ASSERT(ts->linebuf.base[len] == '\n');
416 ts->linebuf.base[len-1] = '\n';
417 }
418 } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
419 ts->linebuf.base[len-1] = '\n';
420 }
421 }
422
423 /* Reset linebuf based on adjusted segment length. */
424 ts->linebuf.limit = ts->linebuf.base + len;
425 ts->linebuf.ptr = ts->linebuf.base;
426
427 /* Update position of linebuf within physical userbuf line. */
428 if (!(ts->flags & TSF_NLFLAG))
429 ts->linepos += ts->linelen;
430 else
431 ts->linepos = 0;
432 if (ts->linebuf.limit[-1] == '\n')
433 ts->flags |= TSF_NLFLAG;
434 else
435 ts->flags &= ~TSF_NLFLAG;
436
437 /* Update linelen from original segment length. */
438 ts->linelen = olen;
439 }
440 c = *ts->linebuf.ptr++;
441 }
442 if (c == '\n')
443 ts->lineno++;
444 return c;
445 }
446
447 static void
448 UngetChar(JSTokenStream *ts, int32 c)
449 {
450 if (c == EOF)
451 return;
452 JS_ASSERT(ts->ungetpos < JS_ARRAY_LENGTH(ts->ungetbuf));
453 if (c == '\n')
454 ts->lineno--;
455 ts->ungetbuf[ts->ungetpos++] = (jschar)c;
456 }
457
458 static int32
459 PeekChar(JSTokenStream *ts)
460 {
461 int32 c;
462
463 c = GetChar(ts);
464 UngetChar(ts, c);
465 return c;
466 }
467
468 /*
469 * Peek n chars ahead into ts. Return true if n chars were read, false if
470 * there weren't enough characters in the input stream. This function cannot
471 * be used to peek into or past a newline.
472 */
473 static JSBool
474 PeekChars(JSTokenStream *ts, intN n, jschar *cp)
475 {
476 intN i, j;
477 int32 c;
478
479 for (i = 0; i < n; i++) {
480 c = GetChar(ts);
481 if (c == EOF)
482 break;
483 if (c == '\n') {
484 UngetChar(ts, c);
485 break;
486 }
487 cp[i] = (jschar)c;
488 }
489 for (j = i - 1; j >= 0; j--)
490 UngetChar(ts, cp[j]);
491 return i == n;
492 }
493
494 static void
495 SkipChars(JSTokenStream *ts, intN n)
496 {
497 while (--n >= 0)
498 GetChar(ts);
499 }
500
501 static JSBool
502 MatchChar(JSTokenStream *ts, int32 expect)
503 {
504 int32 c;
505
506 c = GetChar(ts);
507 if (c == expect)
508 return JS_TRUE;
509 UngetChar(ts, c);
510 return JS_FALSE;
511 }
512
513 JSBool
514 js_ReportCompileErrorNumber(JSContext *cx, JSTokenStream *ts, JSParseNode *pn,
515 uintN flags, uintN errorNumber, ...)
516 {
517 JSErrorReport report;
518 char *message;
519 size_t linelength;
520 jschar *linechars;
521 char *linebytes;
522 va_list ap;
523 JSBool warning, ok;
524 JSTokenPos *tp;
525 uintN index, i;
526 JSErrorReporter onError;
527
528 JS_ASSERT(ts->linebuf.limit < ts->linebuf.base + JS_LINE_LIMIT);
529
530 if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx))
531 return JS_TRUE;
532
533 memset(&report, 0, sizeof report);
534 report.flags = flags;
535 report.errorNumber = errorNumber;
536 message = NULL;
537 linechars = NULL;
538 linebytes = NULL;
539
540 MUST_FLOW_THROUGH("out");
541 va_start(ap, errorNumber);
542 ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
543 errorNumber, &message, &report, &warning,
544 !(flags & JSREPORT_UC), ap);
545 va_end(ap);
546 if (!ok) {
547 warning = JS_FALSE;
548 goto out;
549 }
550
551 report.filename = ts->filename;
552
553 if (pn) {
554 report.lineno = pn->pn_pos.begin.lineno;
555 if (report.lineno != ts->lineno)
556 goto report;
557 tp = &pn->pn_pos;
558 } else {
559 /* Point to the current token, not the next one to get. */
560 tp = &ts->tokens[ts->cursor].pos;
561 }
562 report.lineno = ts->lineno;
563 linelength = PTRDIFF(ts->linebuf.limit, ts->linebuf.base, jschar);
564 linechars = (jschar *)JS_malloc(cx, (linelength + 1) * sizeof(jschar));
565 if (!linechars) {
566 warning = JS_FALSE;
567 goto out;
568 }
569 memcpy(linechars, ts->linebuf.base, linelength * sizeof(jschar));
570 linechars[linelength] = 0;
571 linebytes = js_DeflateString(cx, linechars, linelength);
572 if (!linebytes) {
573 warning = JS_FALSE;
574 goto out;
575 }
576 report.linebuf = linebytes;
577
578 /*
579 * FIXME: What should instead happen here is that we should
580 * find error-tokens in userbuf, if !ts->file. That will
581 * allow us to deliver a more helpful error message, which
582 * includes all or part of the bad string or bad token. The
583 * code here yields something that looks truncated.
584 * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
585 */
586 index = 0;
587 if (tp->begin.lineno == tp->end.lineno) {
588 if (tp->begin.index < ts->linepos)
589 goto report;
590
591 index = tp->begin.index - ts->linepos;
592 }
593
594 report.tokenptr = report.linebuf + index;
595 report.uclinebuf = linechars;
596 report.uctokenptr = report.uclinebuf + index;
597
598 /*
599 * If there's a runtime exception type associated with this error
600 * number, set that as the pending exception. For errors occuring at
601 * compile time, this is very likely to be a JSEXN_SYNTAXERR.
602 *
603 * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
604 * flag will be set in report.flags. Proper behavior for an error
605 * reporter is to ignore a report with this flag for all but top-level
606 * compilation errors. The exception will remain pending, and so long
607 * as the non-top-level "load", "eval", or "compile" native function
608 * returns false, the top-level reporter will eventually receive the
609 * uncaught exception report.
610 *
611 * XXX it'd probably be best if there was only one call to this
612 * function, but there seem to be two error reporter call points.
613 */
614 report:
615 onError = cx->errorReporter;
616
617 /*
618 * Try to raise an exception only if there isn't one already set --
619 * otherwise the exception will describe the last compile-time error,
620 * which is likely spurious.
621 */
622 if (!(ts->flags & TSF_ERROR)) {
623 if (js_ErrorToException(cx, message, &report))
624 onError = NULL;
625 }
626
627 /*
628 * Suppress any compile-time errors that don't occur at the top level.
629 * This may still fail, as interplevel may be zero in contexts where we
630 * don't really want to call the error reporter, as when js is called
631 * by other code which could catch the error.
632 */
633 if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
634 onError = NULL;
635
636 if (onError) {
637 JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
638
639 /*
640 * If debugErrorHook is present then we give it a chance to veto
641 * sending the error on to the regular error reporter.
642 */
643 if (hook && !hook(cx, message, &report,
644 cx->debugHooks->debugErrorHookData)) {
645 onError = NULL;
646 }
647 }
648 if (onError)
649 (*onError)(cx, message, &report);
650
651 out:
652 if (linebytes)
653 JS_free(cx, linebytes);
654 if (linechars)
655 JS_free(cx, linechars);
656 if (message)
657 JS_free(cx, message);
658 if (report.ucmessage)
659 JS_free(cx, (void *)report.ucmessage);
660
661 if (report.messageArgs) {
662 if (!(flags & JSREPORT_UC)) {
663 i = 0;
664 while (report.messageArgs[i])
665 JS_free(cx, (void *)report.messageArgs[i++]);
666 }
667 JS_free(cx, (void *)report.messageArgs);
668 }
669
670 if (!JSREPORT_IS_WARNING(flags)) {
671 /* Set the error flag to suppress spurious reports. */
672 ts->flags |= TSF_ERROR;
673 }
674
675 return warning;
676 }
677
678 static JSBool
679 GrowStringBuffer(JSStringBuffer *sb, size_t amount)
680 {
681 ptrdiff_t offset = sb->ptr - sb->base;
682 JS_ASSERT(offset >= 0);
683
684 /*
685 * This addition needs an overflow check, but we can defer bounding against
686 * ~size_t(0) / sizeof(jschar) till later to consolidate that test.
687 */
688 size_t newlength = offset + amount + 1;
689 if (size_t(offset) < newlength) {
690 /* Grow by powers of two until 16MB, then grow by that chunk size. */
691 const size_t CHUNK_SIZE_MASK = JS_BITMASK(24);
692
693 if (newlength <= CHUNK_SIZE_MASK)
694 newlength = JS_BIT(JS_CeilingLog2(newlength));
695 else if (newlength & CHUNK_SIZE_MASK)
696 newlength = (newlength | CHUNK_SIZE_MASK) + 1;
697
698 /* Now do the full overflow check. */
699 if (size_t(offset) < newlength && newlength < ~size_t(0) / sizeof(jschar)) {
700 jschar *bp = (jschar *) realloc(sb->base, newlength * sizeof(jschar));
701 if (bp) {
702 sb->base = bp;
703 sb->ptr = bp + offset;
704 sb->limit = bp + newlength - 1;
705 return true;
706 }
707 }
708 }
709
710 /* Either newlength overflow or realloc failure: poison the well. */
711 free(sb->base);
712 sb->base = STRING_BUFFER_ERROR_BASE;
713 return false;
714 }
715
716 static void
717 FreeStringBuffer(JSStringBuffer *sb)
718 {
719 JS_ASSERT(STRING_BUFFER_OK(sb));
720 if (sb->base)
721 free(sb->base);
722 }
723
724 void
725 js_InitStringBuffer(JSStringBuffer *sb)
726 {
727 sb->base = sb->limit = sb->ptr = NULL;
728 sb->data = NULL;
729 sb->grow = GrowStringBuffer;
730 sb->free = FreeStringBuffer;
731 }
732
733 void
734 js_FinishStringBuffer(JSStringBuffer *sb)
735 {
736 sb->free(sb);
737 }
738
739 void
740 js_AppendChar(JSStringBuffer *sb, jschar c)
741 {
742 jschar *bp;
743
744 if (!STRING_BUFFER_OK(sb))
745 return;
746 if (!ENSURE_STRING_BUFFER(sb, 1))
747 return;
748 bp = sb->ptr;
749 *bp++ = c;
750 *bp = 0;
751 sb->ptr = bp;
752 }
753
754 void
755 js_AppendUCString(JSStringBuffer *sb, const jschar *buf, uintN len)
756 {
757 jschar *bp;
758
759 if (!STRING_BUFFER_OK(sb))
760 return;
761 if (len == 0 || !ENSURE_STRING_BUFFER(sb, len))
762 return;
763 bp = sb->ptr;
764 js_strncpy(bp, buf, len);
765 bp += len;
766 *bp = 0;
767 sb->ptr = bp;
768 }
769
770 #if JS_HAS_XML_SUPPORT
771
772 void
773 js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count)
774 {
775 jschar *bp;
776
777 if (!STRING_BUFFER_OK(sb) || count == 0)
778 return;
779 if (!ENSURE_STRING_BUFFER(sb, count))
780 return;
781 for (bp = sb->ptr; count; --count)
782 *bp++ = c;
783 *bp = 0;
784 sb->ptr = bp;
785 }
786
787 void
788 js_AppendCString(JSStringBuffer *sb, const char *asciiz)
789 {
790 size_t length;
791 jschar *bp;
792
793 if (!STRING_BUFFER_OK(sb) || *asciiz == '\0')
794 return;
795 length = strlen(asciiz);
796 if (!ENSURE_STRING_BUFFER(sb, length))
797 return;
798 for (bp = sb->ptr; length; --length)
799 *bp++ = (jschar) *asciiz++;
800 *bp = 0;
801 sb->ptr = bp;
802 }
803
804 void
805 js_AppendJSString(JSStringBuffer *sb, JSString *str)
806 {
807 js_AppendUCString(sb, JSSTRING_CHARS(str), JSSTRING_LENGTH(str));
808 }
809
810 static JSBool
811 GetXMLEntity(JSContext *cx, JSTokenStream *ts)
812 {
813 ptrdiff_t offset, length, i;
814 int32 c, d;
815 JSBool ispair;
816 jschar *bp, digit;
817 char *bytes;
818 JSErrNum msg;
819
820 /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */
821 offset = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar);
822 js_FastAppendChar(&ts->tokenbuf, '&');
823 if (!STRING_BUFFER_OK(&ts->tokenbuf))
824 return JS_FALSE;
825 while ((c = GetChar(ts)) != ';') {
826 if (c == EOF || c == '\n') {
827 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
828 JSMSG_END_OF_XML_ENTITY);
829 return JS_FALSE;
830 }
831 js_FastAppendChar(&ts->tokenbuf, (jschar) c);
832 if (!STRING_BUFFER_OK(&ts->tokenbuf))
833 return JS_FALSE;
834 }
835
836 /* Let length be the number of jschars after the '&', including the ';'. */
837 length = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar) - offset;
838 bp = ts->tokenbuf.base + offset;
839 c = d = 0;
840 ispair = JS_FALSE;
841 if (length > 2 && bp[1] == '#') {
842 /* Match a well-formed XML Character Reference. */
843 i = 2;
844 if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
845 if (length > 9) /* at most 6 hex digits allowed */
846 goto badncr;
847 while (++i < length) {
848 digit = bp[i];
849 if (!JS7_ISHEX(digit))
850 goto badncr;
851 c = (c << 4) + JS7_UNHEX(digit);
852 }
853 } else {
854 while (i < length) {
855 digit = bp[i++];
856 if (!JS7_ISDEC(digit))
857 goto badncr;
858 c = (c * 10) + JS7_UNDEC(digit);
859 if (c < 0)
860 goto badncr;
861 }
862 }
863
864 if (0x10000 <= c && c <= 0x10FFFF) {
865 /* Form a surrogate pair (c, d) -- c is the high surrogate. */
866 d = 0xDC00 + (c & 0x3FF);
867 c = 0xD7C0 + (c >> 10);
868 ispair = JS_TRUE;
869 } else {
870 /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
871 if (c != 0x9 && c != 0xA && c != 0xD &&
872 !(0x20 <= c && c <= 0xD7FF) &&
873 !(0xE000 <= c && c <= 0xFFFD)) {
874 goto badncr;
875 }
876 }
877 } else {
878 /* Try to match one of the five XML 1.0 predefined entities. */
879 switch (length) {
880 case 3:
881 if (bp[2] == 't') {
882 if (bp[1] == 'l')
883 c = '<';
884 else if (bp[1] == 'g')
885 c = '>';
886 }
887 break;
888 case 4:
889 if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
890 c = '&';
891 break;
892 case 5:
893 if (bp[3] == 'o') {
894 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
895 c = '\'';
896 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
897 c = '"';
898 }
899 break;
900 }
901 if (c == 0) {
902 msg = JSMSG_UNKNOWN_XML_ENTITY;
903 goto bad;
904 }
905 }
906
907 /* If we matched, retract ts->tokenbuf and store the entity's value. */
908 *bp++ = (jschar) c;
909 if (ispair)
910 *bp++ = (jschar) d;
911 *bp = 0;
912 ts->tokenbuf.ptr = bp;
913 return JS_TRUE;
914
915 badncr:
916 msg = JSMSG_BAD_XML_NCR;
917 bad:
918 /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
919 JS_ASSERT(STRING_BUFFER_OK(&ts->tokenbuf));
920 JS_ASSERT(PTRDIFF(ts->tokenbuf.ptr, bp, jschar) >= 1);
921 bytes = js_DeflateString(cx, bp + 1,
922 PTRDIFF(ts->tokenbuf.ptr, bp, jschar) - 1);
923 if (bytes) {
924 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
925 msg, bytes);
926 JS_free(cx, bytes);
927 }
928 return JS_FALSE;
929 }
930
931 #endif /* JS_HAS_XML_SUPPORT */
932
933 JSTokenType
934 js_PeekToken(JSContext *cx, JSTokenStream *ts)
935 {
936 JSTokenType tt;
937
938 if (ts->lookahead != 0) {
939 tt = ts->tokens[(ts->cursor + ts->lookahead) & NTOKENS_MASK].type;
940 } else {
941 tt = js_GetToken(cx, ts);
942 js_UngetToken(ts);
943 }
944 return tt;
945 }
946
947 JSTokenType
948 js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts)
949 {
950 JSTokenType tt;
951
952 if (!ON_CURRENT_LINE(ts, CURRENT_TOKEN(ts).pos))
953 return TOK_EOL;
954 ts->flags |= TSF_NEWLINES;
955 tt = js_PeekToken(cx, ts);
956 ts->flags &= ~TSF_NEWLINES;
957 return tt;
958 }
959
960 /*
961 * We have encountered a '\': check for a Unicode escape sequence after it,
962 * returning the character code value if we found a Unicode escape sequence.
963 * Otherwise, non-destructively return the original '\'.
964 */
965 static int32
966 GetUnicodeEscape(JSTokenStream *ts)
967 {
968 jschar cp[5];
969 int32 c;
970
971 if (PeekChars(ts, 5, cp) && cp[0] == 'u' &&
972 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
973 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
974 {
975 c = (((((JS7_UNHEX(cp[1]) << 4)
976 + JS7_UNHEX(cp[2])) << 4)
977 + JS7_UNHEX(cp[3])) << 4)
978 + JS7_UNHEX(cp[4]);
979 SkipChars(ts, 5);
980 return c;
981 }
982 return '\\';
983 }
984
985 static JSToken *
986 NewToken(JSTokenStream *ts, ptrdiff_t adjust)
987 {
988 JSToken *tp;
989
990 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
991 tp = &CURRENT_TOKEN(ts);
992 tp->ptr = ts->linebuf.ptr + adjust;
993 tp->pos.begin.index = ts->linepos +
994 PTRDIFF(tp->ptr, ts->linebuf.base, jschar) -
995 ts->ungetpos;
996 tp->pos.begin.lineno = tp->pos.end.lineno = (uint16)ts->lineno;
997 return tp;
998 }
999
1000 static JS_ALWAYS_INLINE JSBool
1001 ScanAsSpace(jschar c)
1002 {
1003 /* Treat little- and big-endian BOMs as whitespace for compatibility. */
1004 if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
1005 return JS_TRUE;
1006 return JS_FALSE;
1007 }
1008
1009 JSTokenType
1010 js_GetToken(JSContext *cx, JSTokenStream *ts)
1011 {
1012 JSTokenType tt;
1013 int32 c, qc;
1014 JSToken *tp;
1015 JSAtom *atom;
1016 JSBool hadUnicodeEscape;
1017 const struct keyword *kw;
1018 #if JS_HAS_XML_SUPPORT
1019 JSBool inTarget;
1020 size_t targetLength;
1021 ptrdiff_t contentIndex;
1022 #endif
1023
1024 #define INIT_TOKENBUF() (ts->tokenbuf.ptr = ts->tokenbuf.base)
1025 #define TOKENBUF_LENGTH() PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar)
1026 #define TOKENBUF_OK() STRING_BUFFER_OK(&ts->tokenbuf)
1027 #define TOKENBUF_TO_ATOM() (TOKENBUF_OK() \
1028 ? js_AtomizeChars(cx, \
1029 TOKENBUF_BASE(), \
1030 TOKENBUF_LENGTH(), \
1031 0) \
1032 : NULL)
1033 #define ADD_TO_TOKENBUF(c) JS_BEGIN_MACRO \
1034 js_FastAppendChar(&ts->tokenbuf, jschar(c)); \
1035 if (!TOKENBUF_OK()) \
1036 goto error; \
1037 JS_END_MACRO
1038
1039 /* The following 4 macros should only be used when TOKENBUF_OK() is true. */
1040 #define TOKENBUF_BASE() (ts->tokenbuf.base)
1041 #define TOKENBUF_END() (ts->tokenbuf.ptr)
1042 #define TOKENBUF_CHAR(i) (ts->tokenbuf.base[i])
1043 #define TRIM_TOKENBUF(i) (ts->tokenbuf.ptr = ts->tokenbuf.base + i)
1044 #define NUL_TERM_TOKENBUF() (*ts->tokenbuf.ptr = 0)
1045
1046 /* Check for a pushed-back token resulting from mismatching lookahead. */
1047 while (ts->lookahead != 0) {
1048 JS_ASSERT(!(ts->flags & TSF_XMLTEXTMODE));
1049 ts->lookahead--;
1050 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
1051 tt = CURRENT_TOKEN(ts).type;
1052 if (tt != TOK_EOL || (ts->flags & TSF_NEWLINES))
1053 return tt;
1054 }
1055
1056 /* If there was a fatal error, keep returning TOK_ERROR. */
1057 if (ts->flags & TSF_ERROR)
1058 return TOK_ERROR;
1059
1060 #if JS_HAS_XML_SUPPORT
1061 if (ts->flags & TSF_XMLTEXTMODE) {
1062 tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
1063 tp = NewToken(ts, 0);
1064 INIT_TOKENBUF();
1065 qc = (ts->flags & TSF_XMLONLYMODE) ? '<' : '{';
1066
1067 while ((c = GetChar(ts)) != qc && c != '<' && c != EOF) {
1068 if (c == '&' && qc == '<') {
1069 if (!GetXMLEntity(cx, ts))
1070 goto error;
1071 tt = TOK_XMLTEXT;
1072 continue;
1073 }
1074
1075 if (!JS_ISXMLSPACE(c))
1076 tt = TOK_XMLTEXT;
1077 ADD_TO_TOKENBUF(c);
1078 }
1079 UngetChar(ts, c);
1080
1081 if (TOKENBUF_LENGTH() == 0) {
1082 atom = NULL;
1083 } else {
1084 atom = TOKENBUF_TO_ATOM();
1085 if (!atom)
1086 goto error;
1087 }
1088 tp->pos.end.lineno = (uint16)ts->lineno;
1089 tp->t_op = JSOP_STRING;
1090 tp->t_atom = atom;
1091 goto out;
1092 }
1093
1094 if (ts->flags & TSF_XMLTAGMODE) {
1095 tp = NewToken(ts, 0);
1096 c = GetChar(ts);
1097 if (JS_ISXMLSPACE(c)) {
1098 do {
1099 c = GetChar(ts);
1100 } while (JS_ISXMLSPACE(c));
1101 UngetChar(ts, c);
1102 tt = TOK_XMLSPACE;
1103 goto out;
1104 }
1105
1106 if (c == EOF) {
1107 tt = TOK_EOF;
1108 goto out;
1109 }
1110
1111 INIT_TOKENBUF();
1112 if (JS_ISXMLNSSTART(c)) {
1113 JSBool sawColon = JS_FALSE;
1114
1115 ADD_TO_TOKENBUF(c);
1116 while ((c = GetChar(ts)) != EOF && JS_ISXMLNAME(c)) {
1117 if (c == ':') {
1118 int nextc;
1119
1120 if (sawColon ||
1121 (nextc = PeekChar(ts),
1122 ((ts->flags & TSF_XMLONLYMODE) || nextc != '{') &&
1123 !JS_ISXMLNAME(nextc))) {
1124 js_ReportCompileErrorNumber(cx, ts, NULL,
1125 JSREPORT_ERROR,
1126 JSMSG_BAD_XML_QNAME);
1127 goto error;
1128 }
1129 sawColon = JS_TRUE;
1130 }
1131
1132 ADD_TO_TOKENBUF(c);
1133 }
1134
1135 UngetChar(ts, c);
1136 atom = TOKENBUF_TO_ATOM();
1137 if (!atom)
1138 goto error;
1139 tp->t_op = JSOP_STRING;
1140 tp->t_atom = atom;
1141 tt = TOK_XMLNAME;
1142 goto out;
1143 }
1144
1145 switch (c) {
1146 case '{':
1147 if (ts->flags & TSF_XMLONLYMODE)
1148 goto bad_xml_char;
1149 tt = TOK_LC;
1150 goto out;
1151
1152 case '=':
1153 tt = TOK_ASSIGN;
1154 goto out;
1155
1156 case '"':
1157 case '\'':
1158 qc = c;
1159 while ((c = GetChar(ts)) != qc) {
1160 if (c == EOF) {
1161 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1162 JSMSG_UNTERMINATED_STRING);
1163 goto error;
1164 }
1165
1166 /*
1167 * XML attribute values are double-quoted when pretty-printed,
1168 * so escape " if it is expressed directly in a single-quoted
1169 * attribute value.
1170 */
1171 if (c == '"' && !(ts->flags & TSF_XMLONLYMODE)) {
1172 JS_ASSERT(qc == '\'');
1173 js_AppendCString(&ts->tokenbuf, js_quot_entity_str);
1174 continue;
1175 }
1176
1177 if (c == '&' && (ts->flags & TSF_XMLONLYMODE)) {
1178 if (!GetXMLEntity(cx, ts))
1179 goto error;
1180 continue;
1181 }
1182
1183 ADD_TO_TOKENBUF(c);
1184 }
1185 atom = TOKENBUF_TO_ATOM();
1186 if (!atom)
1187 goto error;
1188 tp->pos.end.lineno = (uint16)ts->lineno;
1189 tp->t_op = JSOP_STRING;
1190 tp->t_atom = atom;
1191 tt = TOK_XMLATTR;
1192 goto out;
1193
1194 case '>':
1195 tt = TOK_XMLTAGC;
1196 goto out;
1197
1198 case '/':
1199 if (MatchChar(ts, '>')) {
1200 tt = TOK_XMLPTAGC;
1201 goto out;
1202 }
1203 /* FALL THROUGH */
1204
1205 bad_xml_char:
1206 default:
1207 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1208 JSMSG_BAD_XML_CHARACTER);
1209 goto error;
1210 }
1211 /* NOTREACHED */
1212 }
1213 #endif /* JS_HAS_XML_SUPPORT */
1214
1215 retry:
1216 do {
1217 c = GetChar(ts);
1218 if (c == '\n') {
1219 ts->flags &= ~TSF_DIRTYLINE;
1220 if (ts->flags & TSF_NEWLINES)
1221 break;
1222 }
1223 } while (ScanAsSpace((jschar)c));
1224
1225 tp = NewToken(ts, -1);
1226 if (c == EOF) {
1227 tt = TOK_EOF;
1228 goto out;
1229 }
1230
1231 hadUnicodeEscape = JS_FALSE;
1232 if (JS_ISIDSTART(c) ||
1233 (c == '\\' &&
1234 (qc = GetUnicodeEscape(ts),
1235 hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1236 if (hadUnicodeEscape)
1237 c = qc;
1238 INIT_TOKENBUF();
1239 for (;;) {
1240 ADD_TO_TOKENBUF(c);
1241 c = GetChar(ts);
1242 if (c == '\\') {
1243 qc = GetUnicodeEscape(ts);
1244 if (!JS_ISIDENT(qc))
1245 break;
1246 c = qc;
1247 hadUnicodeEscape = JS_TRUE;
1248 } else {
1249 if (!JS_ISIDENT(c))
1250 break;
1251 }
1252 }
1253 UngetChar(ts, c);
1254
1255 /*
1256 * Check for keywords unless we saw Unicode escape or parser asks
1257 * to ignore keywords.
1258 */
1259 if (!hadUnicodeEscape &&
1260 !(ts->flags & TSF_KEYWORD_IS_NAME) &&
1261 TOKENBUF_OK() &&
1262 (kw = FindKeyword(TOKENBUF_BASE(), TOKENBUF_LENGTH()))) {
1263 if (kw->tokentype == TOK_RESERVED) {
1264 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1265 JSREPORT_WARNING |
1266 JSREPORT_STRICT,
1267 JSMSG_RESERVED_ID,
1268 kw->chars)) {
1269 goto error;
1270 }
1271 } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1272 tt = kw->tokentype;
1273 tp->t_op = (JSOp) kw->op;
1274 goto out;
1275 }
1276 }
1277
1278 atom = TOKENBUF_TO_ATOM();
1279 if (!atom)
1280 goto error;
1281 tp->t_op = JSOP_NAME;
1282 tp->t_atom = atom;
1283 tt = TOK_NAME;
1284 goto out;
1285 }
1286
1287 if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(PeekChar(ts)))) {
1288 jsint radix;
1289 const jschar *endptr;
1290 jsdouble dval;
1291
1292 radix = 10;
1293 INIT_TOKENBUF();
1294
1295 if (c == '0') {
1296 ADD_TO_TOKENBUF(c);
1297 c = GetChar(ts);
1298 if (JS_TOLOWER(c) == 'x') {
1299 ADD_TO_TOKENBUF(c);
1300 c = GetChar(ts);
1301 radix = 16;
1302 } else if (JS7_ISDEC(c)) {
1303 radix = 8;
1304 }
1305 }
1306
1307 while (JS7_ISHEX(c)) {
1308 if (radix < 16) {
1309 if (JS7_ISLET(c))
1310 break;
1311
1312 /*
1313 * We permit 08 and 09 as decimal numbers, which makes our
1314 * behaviour a superset of the ECMA numeric grammar. We might
1315 * not always be so permissive, so we warn about it.
1316 */
1317 if (radix == 8 && c >= '8') {
1318 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1319 JSREPORT_WARNING,
1320 JSMSG_BAD_OCTAL,
1321 c == '8' ? "08" : "09")) {
1322 goto error;
1323 }
1324 radix = 10;
1325 }
1326 }
1327 ADD_TO_TOKENBUF(c);
1328 c = GetChar(ts);
1329 }
1330
1331 if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1332 if (c == '.') {
1333 do {
1334 ADD_TO_TOKENBUF(c);
1335 c = GetChar(ts);
1336 } while (JS7_ISDEC(c));
1337 }
1338 if (JS_TOLOWER(c) == 'e') {
1339 ADD_TO_TOKENBUF(c);
1340 c = GetChar(ts);
1341 if (c == '+' || c == '-') {
1342 ADD_TO_TOKENBUF(c);
1343 c = GetChar(ts);
1344 }
1345 if (!JS7_ISDEC(c)) {
1346 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1347 JSMSG_MISSING_EXPONENT);
1348 goto error;
1349 }
1350 do {
1351 ADD_TO_TOKENBUF(c);
1352 c = GetChar(ts);
1353 } while (JS7_ISDEC(c));
1354 }
1355 }
1356
1357 /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1358 UngetChar(ts, c);
1359 ADD_TO_TOKENBUF(0);
1360
1361 if (!TOKENBUF_OK())
1362 goto error;
1363 if (radix == 10) {
1364 if (!js_strtod(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1365 &endptr, &dval)) {
1366 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1367 JSMSG_OUT_OF_MEMORY);
1368 goto error;
1369 }
1370 } else {
1371 if (!js_strtointeger(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1372 &endptr, radix, &dval)) {
1373 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1374 JSMSG_OUT_OF_MEMORY);
1375 goto error;
1376 }
1377 }
1378 tp->t_dval = dval;
1379 tt = TOK_NUMBER;
1380 goto out;
1381 }
1382
1383 if (c == '"' || c == '\'') {
1384 qc = c;
1385 INIT_TOKENBUF();
1386 while ((c = GetChar(ts)) != qc) {
1387 if (c == '\n' || c == EOF) {
1388 UngetChar(ts, c);
1389 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1390 JSMSG_UNTERMINATED_STRING);
1391 goto error;
1392 }
1393 if (c == '\\') {
1394 switch (c = GetChar(ts)) {
1395 case 'b': c = '\b'; break;
1396 case 'f': c = '\f'; break;
1397 case 'n': c = '\n'; break;
1398 case 'r': c = '\r'; break;
1399 case 't': c = '\t'; break;
1400 case 'v': c = '\v'; break;
1401
1402 default:
1403 if ('0' <= c && c < '8') {
1404 int32 val = JS7_UNDEC(c);
1405
1406 c = PeekChar(ts);
1407 if ('0' <= c && c < '8') {
1408 val = 8 * val + JS7_UNDEC(c);
1409 GetChar(ts);
1410 c = PeekChar(ts);
1411 if ('0' <= c && c < '8') {
1412 int32 save = val;
1413 val = 8 * val + JS7_UNDEC(c);
1414 if (val <= 0377)
1415 GetChar(ts);
1416 else
1417 val = save;
1418 }
1419 }
1420
1421 c = (jschar)val;
1422 } else if (c == 'u') {
1423 jschar cp[4];
1424 if (PeekChars(ts, 4, cp) &&
1425 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1426 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1427 c = (((((JS7_UNHEX(cp[0]) << 4)
1428 + JS7_UNHEX(cp[1])) << 4)
1429 + JS7_UNHEX(cp[2])) << 4)
1430 + JS7_UNHEX(cp[3]);
1431 SkipChars(ts, 4);
1432 }
1433 } else if (c == 'x') {
1434 jschar cp[2];
1435 if (PeekChars(ts, 2, cp) &&
1436 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1437 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1438 SkipChars(ts, 2);
1439 }
1440 } else if (c == '\n') {
1441 /* ECMA follows C by removing escaped newlines. */
1442 continue;
1443 }
1444 break;
1445 }
1446 }
1447 ADD_TO_TOKENBUF(c);
1448 }
1449 atom = TOKENBUF_TO_ATOM();
1450 if (!atom)
1451 goto error;
1452 tp->pos.end.lineno = (uint16)ts->lineno;
1453 tp->t_op = JSOP_STRING;
1454 tp->t_atom = atom;
1455 tt = TOK_STRING;
1456 goto out;
1457 }
1458
1459 switch (c) {
1460 case '\n': tt = TOK_EOL; goto eol_out;
1461 case ';': tt = TOK_SEMI; break;
1462 case '[': tt = TOK_LB; break;
1463 case ']': tt = TOK_RB; break;
1464 case '{': tt = TOK_LC; break;
1465 case '}': tt = TOK_RC; break;
1466 case '(': tt = TOK_LP; break;
1467 case ')': tt = TOK_RP; break;
1468 case ',': tt = TOK_COMMA; break;
1469 case '?': tt = TOK_HOOK; break;
1470
1471 case '.':
1472 #if JS_HAS_XML_SUPPORT
1473 if (MatchChar(ts, c))
1474 tt = TOK_DBLDOT;
1475 else
1476 #endif
1477 tt = TOK_DOT;
1478 break;
1479
1480 case ':':
1481 #if JS_HAS_XML_SUPPORT
1482 if (MatchChar(ts, c)) {
1483 tt = TOK_DBLCOLON;
1484 break;
1485 }
1486 #endif
1487 /*
1488 * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1489 * object initializer, likewise for setter.
1490 */
1491 tp->t_op = JSOP_NOP;
1492 tt = TOK_COLON;
1493 break;
1494
1495 case '|':
1496 if (MatchChar(ts, c)) {
1497 tt = TOK_OR;
1498 } else if (MatchChar(ts, '=')) {
1499 tp->t_op = JSOP_BITOR;
1500 tt = TOK_ASSIGN;
1501 } else {
1502 tt = TOK_BITOR;
1503 }
1504 break;
1505
1506 case '^':
1507 if (MatchChar(ts, '=')) {
1508 tp->t_op = JSOP_BITXOR;
1509 tt = TOK_ASSIGN;
1510 } else {
1511 tt = TOK_BITXOR;
1512 }
1513 break;
1514
1515 case '&':
1516 if (MatchChar(ts, c)) {
1517 tt = TOK_AND;
1518 } else if (MatchChar(ts, '=')) {
1519 tp->t_op = JSOP_BITAND;
1520 tt = TOK_ASSIGN;
1521 } else {
1522 tt = TOK_BITAND;
1523 }
1524 break;
1525
1526 case '=':
1527 if (MatchChar(ts, c)) {
1528 tp->t_op = MatchChar(ts, c) ? JSOP_STRICTEQ : JSOP_EQ;
1529 tt = TOK_EQOP;
1530 } else {
1531 tp->t_op = JSOP_NOP;
1532 tt = TOK_ASSIGN;
1533 }
1534 break;
1535
1536 case '!':
1537 if (MatchChar(ts, '=')) {
1538 tp->t_op = MatchChar(ts, '=') ? JSOP_STRICTNE : JSOP_NE;
1539 tt = TOK_EQOP;
1540 } else {
1541 tp->t_op = JSOP_NOT;
1542 tt = TOK_UNARYOP;
1543 }
1544 break;
1545
1546 #if JS_HAS_XML_SUPPORT
1547 case '@':
1548 tt = TOK_AT;
1549 break;
1550 #endif
1551
1552 case '<':
1553 #if JS_HAS_XML_SUPPORT
1554 /*
1555 * After much testing, it's clear that Postel's advice to protocol
1556 * designers ("be liberal in what you accept, and conservative in what
1557 * you send") invites a natural-law repercussion for JS as "protocol":
1558 *
1559 * "If you are liberal in what you accept, others will utterly fail to
1560 * be conservative in what they send."
1561 *
1562 * Which means you will get <!-- comments to end of line in the middle
1563 * of .js files, and after if conditions whose then statements are on
1564 * the next line, and other wonders. See at least the following bugs:
1565 * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1566 * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1567 * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1568 *
1569 * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
1570 * an XML comment or CDATA literal. Instead, we always scan <! as the
1571 * start of an HTML comment hack to end of line, used since Netscape 2
1572 * to hide script tag content from script-unaware browsers.
1573 *
1574 * But this still leaves XML resources with certain internal structure
1575 * vulnerable to being loaded as script cross-origin, and some internal
1576 * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
1577 * source consists only of XML literals. See:
1578 *
1579 * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
1580 *
1581 * The check for this is in jsparse.cpp, JSCompiler::compileScript.
1582 */
1583 if ((ts->flags & TSF_OPERAND) &&
1584 (JS_HAS_XML_OPTION(cx) || PeekChar(ts) != '!')) {
1585 /* Check for XML comment or CDATA section. */
1586 if (MatchChar(ts, '!')) {
1587 INIT_TOKENBUF();
1588
1589 /* Scan XML comment. */
1590 if (MatchChar(ts, '-')) {
1591 if (!MatchChar(ts, '-'))
1592 goto bad_xml_markup;
1593 while ((c = GetChar(ts)) != '-' || !MatchChar(ts, '-')) {
1594 if (c == EOF)
1595 goto bad_xml_markup;
1596 ADD_TO_TOKENBUF(c);
1597 }
1598 tt = TOK_XMLCOMMENT;
1599 tp->t_op = JSOP_XMLCOMMENT;
1600 goto finish_xml_markup;
1601 }
1602
1603 /* Scan CDATA section. */
1604 if (MatchChar(ts, '[')) {
1605 jschar cp[6];
1606 if (PeekChars(ts, 6, cp) &&
1607 cp[0] == 'C' &&
1608 cp[1] == 'D' &&
1609 cp[2] == 'A' &&
1610 cp[3] == 'T' &&
1611 cp[4] == 'A' &&
1612 cp[5] == '[') {
1613 SkipChars(ts, 6);
1614 while ((c = GetChar(ts)) != ']' ||
1615 !PeekChars(ts, 2, cp) ||
1616 cp[0] != ']' ||
1617 cp[1] != '>') {
1618 if (c == EOF)
1619 goto bad_xml_markup;
1620 ADD_TO_TOKENBUF(c);
1621 }
1622 GetChar(ts); /* discard ] but not > */
1623 tt = TOK_XMLCDATA;
1624 tp->t_op = JSOP_XMLCDATA;
1625 goto finish_xml_markup;
1626 }
1627 goto bad_xml_markup;
1628 }
1629 }
1630
1631 /* Check for processing instruction. */
1632 if (MatchChar(ts, '?')) {
1633 inTarget = JS_TRUE;
1634 targetLength = 0;
1635 contentIndex = -1;
1636
1637 INIT_TOKENBUF();
1638 while ((c = GetChar(ts)) != '?' || PeekChar(ts) != '>') {
1639 if (c == EOF)
1640 goto bad_xml_markup;
1641 if (inTarget) {
1642 if (JS_ISXMLSPACE(c)) {
1643 if (TOKENBUF_LENGTH() == 0)
1644 goto bad_xml_markup;
1645 inTarget = JS_FALSE;
1646 } else {
1647 if (!((TOKENBUF_LENGTH() == 0)
1648 ? JS_ISXMLNSSTART(c)
1649 : JS_ISXMLNS(c))) {
1650 goto bad_xml_markup;
1651 }
1652 ++targetLength;
1653 }
1654 } else {
1655 if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1656 contentIndex = TOKENBUF_LENGTH();
1657 }
1658 ADD_TO_TOKENBUF(c);
1659 }
1660 if (targetLength == 0)
1661 goto bad_xml_markup;
1662 if (!TOKENBUF_OK())
1663 goto error;
1664 if (contentIndex < 0) {
1665 atom = cx->runtime->atomState.emptyAtom;
1666 } else {
1667 atom = js_AtomizeChars(cx,
1668 &TOKENBUF_CHAR(contentIndex),
1669 TOKENBUF_LENGTH() - contentIndex,
1670 0);
1671 if (!atom)
1672 goto error;
1673 }
1674 TRIM_TOKENBUF(targetLength);
1675 tp->t_atom2 = atom;
1676 tt = TOK_XMLPI;
1677
1678 finish_xml_markup:
1679 if (!MatchChar(ts, '>'))
1680 goto bad_xml_markup;
1681 atom = TOKENBUF_TO_ATOM();
1682 if (!atom)
1683 goto error;
1684 tp->t_atom = atom;
1685 tp->pos.end.lineno = (uint16)ts->lineno;
1686 goto out;
1687 }
1688
1689 /* An XML start-of-tag character. */
1690 tt = MatchChar(ts, '/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1691 goto out;
1692
1693 bad_xml_markup:
1694 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1695 JSMSG_BAD_XML_MARKUP);
1696 goto error;
1697 }
1698 #endif /* JS_HAS_XML_SUPPORT */
1699
1700 /* NB: treat HTML begin-comment as comment-till-end-of-line */
1701 if (MatchChar(ts, '!')) {
1702 if (MatchChar(ts, '-')) {
1703 if (MatchChar(ts, '-')) {
1704 ts->flags |= TSF_IN_HTML_COMMENT;
1705 goto skipline;
1706 }
1707 UngetChar(ts, '-');
1708 }
1709 UngetChar(ts, '!');
1710 }
1711 if (MatchChar(ts, c)) {
1712 tp->t_op = JSOP_LSH;
1713 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1714 } else {
1715 tp->t_op = MatchChar(ts, '=') ? JSOP_LE : JSOP_LT;
1716 tt = TOK_RELOP;
1717 }
1718 break;
1719
1720 case '>':
1721 if (MatchChar(ts, c)) {
1722 tp->t_op = MatchChar(ts, c) ? JSOP_URSH : JSOP_RSH;
1723 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1724 } else {
1725 tp->t_op = MatchChar(ts, '=') ? JSOP_GE : JSOP_GT;
1726 tt = TOK_RELOP;
1727 }
1728 break;
1729
1730 case '*':
1731 tp->t_op = JSOP_MUL;
1732 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_STAR;
1733 break;
1734
1735 case '/':
1736 if (MatchChar(ts, '/')) {
1737 /*
1738 * Hack for source filters such as the Mozilla XUL preprocessor:
1739 * "//@line 123\n" sets the number of the *next* line after the
1740 * comment to 123.
1741 */
1742 if (JS_HAS_ATLINE_OPTION(cx)) {
1743 jschar cp[5];
1744 uintN i, line, temp;
1745 char filename[1024];
1746
1747 if (PeekChars(ts, 5, cp) &&
1748 cp[0] == '@' &&
1749 cp[1] == 'l' &&
1750 cp[2] == 'i' &&
1751 cp[3] == 'n' &&
1752 cp[4] == 'e') {
1753 SkipChars(ts, 5);
1754 while ((c = GetChar(ts)) != '\n' && ScanAsSpace((jschar)c))
1755 continue;
1756 if (JS7_ISDEC(c)) {
1757 line = JS7_UNDEC(c);
1758 while ((c = GetChar(ts)) != EOF && JS7_ISDEC(c)) {
1759 temp = 10 * line + JS7_UNDEC(c);
1760 if (temp < line) {
1761 /* Ignore overlarge line numbers. */
1762 goto skipline;
1763 }
1764 line = temp;
1765 }
1766 while (c != '\n' && ScanAsSpace((jschar)c))
1767 c = GetChar(ts);
1768 i = 0;
1769 if (c == '"') {
1770 while ((c = GetChar(ts)) != EOF && c != '"') {
1771 if (c == '\n') {
1772 UngetChar(ts, c);
1773 goto skipline;
1774 }
1775 if ((c >> 8) != 0 || i >= sizeof filename - 1)
1776 goto skipline;
1777 filename[i++] = (char) c;
1778 }
1779 if (c == '"') {
1780 while ((c = GetChar(ts)) != '\n' &&
1781 ScanAsSpace((jschar)c)) {
1782 continue;
1783 }
1784 }
1785 }
1786 filename[i] = '\0';
1787 if (c == '\n') {
1788 if (i > 0) {
1789 if (ts->flags & TSF_OWNFILENAME)
1790 JS_free(cx, (void *) ts->filename);
1791 ts->filename = JS_strdup(cx, filename);
1792 if (!ts->filename)
1793 goto error;
1794 ts->flags |= TSF_OWNFILENAME;
1795 }
1796 ts->lineno = line;
1797 }
1798 }
1799 UngetChar(ts, c);
1800 }
1801 }
1802
1803 skipline:
1804 /* Optimize line skipping if we are not in an HTML comment. */
1805 if (ts->flags & TSF_IN_HTML_COMMENT) {
1806 while ((c = GetChar(ts)) != EOF && c != '\n') {
1807 if (c == '-' && MatchChar(ts, '-') && MatchChar(ts, '>'))
1808 ts->flags &= ~TSF_IN_HTML_COMMENT;
1809 }
1810 } else {
1811 while ((c = GetChar(ts)) != EOF && c != '\n')
1812 continue;
1813 }
1814 UngetChar(ts, c);
1815 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1816 goto retry;
1817 }
1818
1819 if (MatchChar(ts, '*')) {
1820 uintN lineno = ts->lineno;
1821 while ((c = GetChar(ts)) != EOF &&
1822 !(c == '*' && MatchChar(ts, '/'))) {
1823 /* Ignore all characters until comment close. */
1824 }
1825 if (c == EOF) {
1826 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1827 JSMSG_UNTERMINATED_COMMENT);
1828 goto error;
1829 }
1830 if ((ts->flags & TSF_NEWLINES) && lineno != ts->lineno) {
1831 ts->flags &= ~TSF_DIRTYLINE;
1832 tt = TOK_EOL;
1833 goto eol_out;
1834 }
1835 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1836 goto retry;
1837 }
1838
1839 if (ts->flags & TSF_OPERAND) {
1840 uintN flags, length;
1841 JSBool inCharClass = JS_FALSE;
1842
1843 INIT_TOKENBUF();
1844 for (;;) {
1845 c = GetChar(ts);
1846 if (c == '\n' || c == EOF) {
1847 UngetChar(ts, c);
1848 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1849 JSMSG_UNTERMINATED_REGEXP);
1850 goto error;
1851 }
1852 if (c == '\\') {
1853 ADD_TO_TOKENBUF(c);
1854 c = GetChar(ts);
1855 } else if (c == '[') {
1856 inCharClass = JS_TRUE;
1857 } else if (c == ']') {
1858 inCharClass = JS_FALSE;
1859 } else if (c == '/' && !inCharClass) {
1860 /* For compat with IE, allow unescaped / in char classes. */
1861 break;
1862 }
1863 ADD_TO_TOKENBUF(c);
1864 }
1865 for (flags = 0, length = TOKENBUF_LENGTH() + 1; ; length++) {
1866 c = PeekChar(ts);
1867 if (c == 'g' && !(flags & JSREG_GLOB))
1868 flags |= JSREG_GLOB;
1869 else if (c == 'i' && !(flags & JSREG_FOLD))
1870 flags |= JSREG_FOLD;
1871 else if (c == 'm' && !(flags & JSREG_MULTILINE))
1872 flags |= JSREG_MULTILINE;
1873 else if (c == 'y' && !(flags & JSREG_STICKY))
1874 flags |= JSREG_STICKY;
1875 else
1876 break;
1877 GetChar(ts);
1878 }
1879 c = PeekChar(ts);
1880 if (JS7_ISLET(c)) {
1881 char buf[2] = { '\0' };
1882 tp->pos.begin.index += length + 1;
1883 buf[0] = (char)c;
1884 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1885 JSMSG_BAD_REGEXP_FLAG, buf);
1886 (void) GetChar(ts);
1887 goto error;
1888 }
1889 /* XXXbe fix jsregexp.c so it doesn't depend on NUL termination */
1890 if (!TOKENBUF_OK())
1891 goto error;
1892 NUL_TERM_TOKENBUF();
1893 tp->t_reflags = flags;
1894 tt = TOK_REGEXP;
1895 break;
1896 }
1897
1898 tp->t_op = JSOP_DIV;
1899 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1900 break;
1901
1902 case '%':
1903 tp->t_op = JSOP_MOD;
1904 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1905 break;
1906
1907 case '~':
1908 tp->t_op = JSOP_BITNOT;
1909 tt = TOK_UNARYOP;
1910 break;
1911
1912 case '+':
1913 if (MatchChar(ts, '=')) {
1914 tp->t_op = JSOP_ADD;
1915 tt = TOK_ASSIGN;
1916 } else if (MatchChar(ts, c)) {
1917 tt = TOK_INC;
1918 } else {
1919 tp->t_op = JSOP_POS;
1920 tt = TOK_PLUS;
1921 }
1922 break;
1923
1924 case '-':
1925 if (MatchChar(ts, '=')) {
1926 tp->t_op = JSOP_SUB;
1927 tt = TOK_ASSIGN;
1928 } else if (MatchChar(ts, c)) {
1929 if (PeekChar(ts) == '>' && !(ts->flags & TSF_DIRTYLINE)) {
1930 ts->flags &= ~TSF_IN_HTML_COMMENT;
1931 goto skipline;
1932 }
1933 tt = TOK_DEC;
1934 } else {
1935 tp->t_op = JSOP_NEG;
1936 tt = TOK_MINUS;
1937 }
1938 break;
1939
1940 #if JS_HAS_SHARP_VARS
1941 case '#':
1942 {
1943 uint32 n;
1944
1945 c = GetChar(ts);
1946 if (!JS7_ISDEC(c)) {
1947 UngetChar(ts, c);
1948 goto badchar;
1949 }
1950 n = (uint32)JS7_UNDEC(c);
1951 for (;;) {
1952 c = GetChar(ts);
1953 if (!JS7_ISDEC(c))
1954 break;
1955 n = 10 * n + JS7_UNDEC(c);
1956 if (n >= UINT16_LIMIT) {
1957 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1958 JSMSG_SHARPVAR_TOO_BIG);
1959 goto error;
1960 }
1961 }
1962 tp->t_dval = (jsdouble) n;
1963 if (JS_HAS_STRICT_OPTION(cx) &&
1964 (c == '=' || c == '#')) {
1965 char buf[20];
1966 JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1967 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1968 JSREPORT_WARNING |
1969 JSREPORT_STRICT,
1970 JSMSG_DEPRECATED_USAGE,
1971 buf)) {
1972 goto error;
1973 }
1974 }
1975 if (c == '=')
1976 tt = TOK_DEFSHARP;
1977 else if (c == '#')
1978 tt = TOK_USESHARP;
1979 else
1980 goto badchar;
1981 break;
1982 }
1983 #endif /* JS_HAS_SHARP_VARS */
1984
1985 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1986 badchar:
1987 #endif
1988
1989 default:
1990 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1991 JSMSG_ILLEGAL_CHARACTER);
1992 goto error;
1993 }
1994
1995 out:
1996 JS_ASSERT(tt != TOK_EOL);
1997 ts->flags |= TSF_DIRTYLINE;
1998
1999 eol_out:
2000 if (!STRING_BUFFER_OK(&ts->tokenbuf))
2001 tt = TOK_ERROR;
2002 JS_ASSERT(tt < TOK_LIMIT);
2003 tp->pos.end.index = ts->linepos +
2004 PTRDIFF(ts->linebuf.ptr, ts->linebuf.base, jschar) -
2005 ts->ungetpos;
2006 tp->type = tt;
2007 return tt;
2008
2009 error:
2010 tt = TOK_ERROR;
2011 ts->flags |= TSF_ERROR;
2012 goto out;
2013
2014 #undef INIT_TOKENBUF
2015 #undef TOKENBUF_LENGTH
2016 #undef TOKENBUF_OK
2017 #undef TOKENBUF_TO_ATOM
2018 #undef ADD_TO_TOKENBUF
2019 #undef TOKENBUF_BASE
2020 #undef TOKENBUF_CHAR
2021 #undef TRIM_TOKENBUF
2022 #undef NUL_TERM_TOKENBUF
2023 }
2024
2025 void
2026 js_UngetToken(JSTokenStream *ts)
2027 {
2028 JS_ASSERT(ts->lookahead < NTOKENS_MASK);
2029 ts->lookahead++;
2030 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
2031 }
2032
2033 JSBool
2034 js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt)
2035 {
2036 if (js_GetToken(cx, ts) == tt)
2037 return JS_TRUE;
2038 js_UngetToken(ts);
2039 return JS_FALSE;
2040 }

  ViewVC Help
Powered by ViewVC 1.1.24