/[jscoverage]/trunk/js/jsscan.cpp
ViewVC logotype

Contents of /trunk/js/jsscan.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 399 - (show annotations)
Tue Dec 9 03:37:47 2008 UTC (11 years, 1 month ago) by siliconforks
File size: 62250 byte(s)
Use SpiderMonkey from Firefox 3.1b2.

1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
3 *
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
19 *
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
24 *
25 * Contributor(s):
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 /*
42 * JS lexical scanner.
43 */
44 #include "jsstddef.h"
45 #include <stdio.h> /* first to avoid trouble on some systems */
46 #include <errno.h>
47 #include <limits.h>
48 #include <math.h>
49 #ifdef HAVE_MEMORY_H
50 #include <memory.h>
51 #endif
52 #include <stdarg.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include "jstypes.h"
56 #include "jsarena.h" /* Added by JSIFY */
57 #include "jsutil.h" /* Added by JSIFY */
58 #include "jsdtoa.h"
59 #include "jsprf.h"
60 #include "jsapi.h"
61 #include "jsatom.h"
62 #include "jscntxt.h"
63 #include "jsversion.h"
64 #include "jsemit.h"
65 #include "jsexn.h"
66 #include "jsnum.h"
67 #include "jsopcode.h"
68 #include "jsparse.h"
69 #include "jsregexp.h"
70 #include "jsscan.h"
71 #include "jsscript.h"
72 #include "jsstaticcheck.h"
73
74 #if JS_HAS_XML_SUPPORT
75 #include "jsxml.h"
76 #endif
77
78 #define JS_KEYWORD(keyword, type, op, version) \
79 const char js_##keyword##_str[] = #keyword;
80 #include "jskeyword.tbl"
81 #undef JS_KEYWORD
82
83 struct keyword {
84 const char *chars; /* C string with keyword text */
85 JSTokenType tokentype; /* JSTokenType */
86 JSOp op; /* JSOp */
87 JSVersion version; /* JSVersion */
88 };
89
90 static const struct keyword keyword_defs[] = {
91 #define JS_KEYWORD(keyword, type, op, version) \
92 {js_##keyword##_str, type, op, version},
93 #include "jskeyword.tbl"
94 #undef JS_KEYWORD
95 };
96
97 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
98
99 static const struct keyword *
100 FindKeyword(const jschar *s, size_t length)
101 {
102 register size_t i;
103 const struct keyword *kw;
104 const char *chars;
105
106 JS_ASSERT(length != 0);
107
108 #define JSKW_LENGTH() length
109 #define JSKW_AT(column) s[column]
110 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
111 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
112 #define JSKW_NO_MATCH() goto no_match;
113 #include "jsautokw.h"
114 #undef JSKW_NO_MATCH
115 #undef JSKW_TEST_GUESS
116 #undef JSKW_GOT_MATCH
117 #undef JSKW_AT
118 #undef JSKW_LENGTH
119
120 got_match:
121 return &keyword_defs[i];
122
123 test_guess:
124 kw = &keyword_defs[i];
125 chars = kw->chars;
126 do {
127 if (*s++ != (unsigned char)(*chars++))
128 goto no_match;
129 } while (--length != 0);
130 return kw;
131
132 no_match:
133 return NULL;
134 }
135
136 JSTokenType
137 js_CheckKeyword(const jschar *str, size_t length)
138 {
139 const struct keyword *kw;
140
141 JS_ASSERT(length != 0);
142 kw = FindKeyword(str, length);
143 return kw ? kw->tokentype : TOK_EOF;
144 }
145
146 JS_FRIEND_API(void)
147 js_MapKeywords(void (*mapfun)(const char *))
148 {
149 size_t i;
150
151 for (i = 0; i != KEYWORD_COUNT; ++i)
152 mapfun(keyword_defs[i].chars);
153 }
154
155 JSBool
156 js_IsIdentifier(JSString *str)
157 {
158 size_t length;
159 jschar c, *chars, *end;
160
161 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
162 if (length == 0)
163 return JS_FALSE;
164 c = *chars;
165 if (!JS_ISIDSTART(c))
166 return JS_FALSE;
167 end = chars + length;
168 while (++chars != end) {
169 c = *chars;
170 if (!JS_ISIDENT(c))
171 return JS_FALSE;
172 }
173 return JS_TRUE;
174 }
175
176 #define TBMIN 64
177
178 static JSBool
179 GrowTokenBuf(JSStringBuffer *sb, size_t newlength)
180 {
181 JSContext *cx;
182 jschar *base;
183 ptrdiff_t offset, length;
184 size_t tbsize;
185 JSArenaPool *pool;
186
187 cx = (JSContext*) sb->data;
188 base = sb->base;
189 offset = PTRDIFF(sb->ptr, base, jschar);
190 pool = &cx->tempPool;
191 if (!base) {
192 tbsize = TBMIN * sizeof(jschar);
193 length = TBMIN - 1;
194 JS_ARENA_ALLOCATE_CAST(base, jschar *, pool, tbsize);
195 } else {
196 length = PTRDIFF(sb->limit, base, jschar);
197 if ((size_t)length >= ~(size_t)0 / sizeof(jschar)) {
198 base = NULL;
199 } else {
200 tbsize = (length + 1) * sizeof(jschar);
201 length += length + 1;
202 JS_ARENA_GROW_CAST(base, jschar *, pool, tbsize, tbsize);
203 }
204 }
205 if (!base) {
206 js_ReportOutOfScriptQuota(cx);
207 sb->base = STRING_BUFFER_ERROR_BASE;
208 return JS_FALSE;
209 }
210 sb->base = base;
211 sb->limit = base + length;
212 sb->ptr = base + offset;
213 return JS_TRUE;
214 }
215
216 JSBool
217 js_InitTokenStream(JSContext *cx, JSTokenStream *ts,
218 const jschar *base, size_t length,
219 FILE *fp, const char *filename, uintN lineno)
220 {
221 jschar *buf;
222 size_t nb;
223
224 JS_ASSERT_IF(fp, !base);
225 JS_ASSERT_IF(!base, length == 0);
226 nb = fp
227 ? 2 * JS_LINE_LIMIT * sizeof(jschar)
228 : JS_LINE_LIMIT * sizeof(jschar);
229 JS_ARENA_ALLOCATE_CAST(buf, jschar *, &cx->tempPool, nb);
230 if (!buf) {
231 js_ReportOutOfScriptQuota(cx);
232 return JS_FALSE;
233 }
234 memset(buf, 0, nb);
235 memset(ts, 0, sizeof(*ts));
236 ts->filename = filename;
237 ts->lineno = lineno;
238 ts->linebuf.base = ts->linebuf.limit = ts->linebuf.ptr = buf;
239 if (fp) {
240 ts->file = fp;
241 ts->userbuf.base = buf + JS_LINE_LIMIT;
242 ts->userbuf.ptr = ts->userbuf.limit = ts->userbuf.base + JS_LINE_LIMIT;
243 } else {
244 ts->userbuf.base = (jschar *)base;
245 ts->userbuf.limit = (jschar *)base + length;
246 ts->userbuf.ptr = (jschar *)base;
247 }
248 ts->tokenbuf.grow = GrowTokenBuf;
249 ts->tokenbuf.data = cx;
250 ts->listener = cx->debugHooks->sourceHandler;
251 ts->listenerData = cx->debugHooks->sourceHandlerData;
252 return JS_TRUE;
253 }
254
255 void
256 js_CloseTokenStream(JSContext *cx, JSTokenStream *ts)
257 {
258 if (ts->flags & TSF_OWNFILENAME)
259 JS_free(cx, (void *) ts->filename);
260 }
261
262 JS_FRIEND_API(int)
263 js_fgets(char *buf, int size, FILE *file)
264 {
265 int n, i, c;
266 JSBool crflag;
267
268 n = size - 1;
269 if (n < 0)
270 return -1;
271
272 crflag = JS_FALSE;
273 for (i = 0; i < n && (c = getc(file)) != EOF; i++) {
274 buf[i] = c;
275 if (c == '\n') { /* any \n ends a line */
276 i++; /* keep the \n; we know there is room for \0 */
277 break;
278 }
279 if (crflag) { /* \r not followed by \n ends line at the \r */
280 ungetc(c, file);
281 break; /* and overwrite c in buf with \0 */
282 }
283 crflag = (c == '\r');
284 }
285
286 buf[i] = '\0';
287 return i;
288 }
289
290 static int32
291 GetChar(JSTokenStream *ts)
292 {
293 int32 c;
294 ptrdiff_t i, j, len, olen;
295 JSBool crflag;
296 char cbuf[JS_LINE_LIMIT];
297 jschar *ubuf, *nl;
298
299 if (ts->ungetpos != 0) {
300 c = ts->ungetbuf[--ts->ungetpos];
301 } else {
302 if (ts->linebuf.ptr == ts->linebuf.limit) {
303 len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
304 if (len <= 0) {
305 if (!ts->file) {
306 ts->flags |= TSF_EOF;
307 return EOF;
308 }
309
310 /* Fill ts->userbuf so that \r and \r\n convert to \n. */
311 crflag = (ts->flags & TSF_CRFLAG) != 0;
312 len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
313 if (len <= 0) {
314 ts->flags |= TSF_EOF;
315 return EOF;
316 }
317 olen = len;
318 ubuf = ts->userbuf.base;
319 i = 0;
320 if (crflag) {
321 ts->flags &= ~TSF_CRFLAG;
322 if (cbuf[0] != '\n') {
323 ubuf[i++] = '\n';
324 len++;
325 ts->linepos--;
326 }
327 }
328 for (j = 0; i < len; i++, j++)
329 ubuf[i] = (jschar) (unsigned char) cbuf[j];
330 ts->userbuf.limit = ubuf + len;
331 ts->userbuf.ptr = ubuf;
332 }
333 if (ts->listener) {
334 ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
335 &ts->listenerTSData, ts->listenerData);
336 }
337
338 nl = ts->saveEOL;
339 if (!nl) {
340 /*
341 * Any one of \n, \r, or \r\n ends a line (the longest
342 * match wins). Also allow the Unicode line and paragraph
343 * separators.
344 */
345 for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
346 /*
347 * Try to prevent value-testing on most characters by
348 * filtering out characters that aren't 000x or 202x.
349 */
350 if ((*nl & 0xDFD0) == 0) {
351 if (*nl == '\n')
352 break;
353 if (*nl == '\r') {
354 if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
355 nl++;
356 break;
357 }
358 if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
359 break;
360 }
361 }
362 }
363
364 /*
365 * If there was a line terminator, copy thru it into linebuf.
366 * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
367 */
368 if (nl < ts->userbuf.limit)
369 len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
370 if (len >= JS_LINE_LIMIT) {
371 len = JS_LINE_LIMIT - 1;
372 ts->saveEOL = nl;
373 } else {
374 ts->saveEOL = NULL;
375 }
376 js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
377 ts->userbuf.ptr += len;
378 olen = len;
379
380 /*
381 * Make sure linebuf contains \n for EOL (don't do this in
382 * userbuf because the user's string might be readonly).
383 */
384 if (nl < ts->userbuf.limit) {
385 if (*nl == '\r') {
386 if (ts->linebuf.base[len-1] == '\r') {
387 /*
388 * Does the line segment end in \r? We must check
389 * for a \n at the front of the next segment before
390 * storing a \n into linebuf. This case matters
391 * only when we're reading from a file.
392 */
393 if (nl + 1 == ts->userbuf.limit && ts->file) {
394 len--;
395 ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
396 if (len == 0) {
397 /*
398 * This can happen when a segment ends in
399 * \r\r. Start over. ptr == limit in this
400 * case, so we'll fall into buffer-filling
401 * code.
402 */
403 return GetChar(ts);
404 }
405 } else {
406 ts->linebuf.base[len-1] = '\n';
407 }
408 }
409 } else if (*nl == '\n') {
410 if (nl > ts->userbuf.base &&
411 nl[-1] == '\r' &&
412 ts->linebuf.base[len-2] == '\r') {
413 len--;
414 JS_ASSERT(ts->linebuf.base[len] == '\n');
415 ts->linebuf.base[len-1] = '\n';
416 }
417 } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
418 ts->linebuf.base[len-1] = '\n';
419 }
420 }
421
422 /* Reset linebuf based on adjusted segment length. */
423 ts->linebuf.limit = ts->linebuf.base + len;
424 ts->linebuf.ptr = ts->linebuf.base;
425
426 /* Update position of linebuf within physical userbuf line. */
427 if (!(ts->flags & TSF_NLFLAG))
428 ts->linepos += ts->linelen;
429 else
430 ts->linepos = 0;
431 if (ts->linebuf.limit[-1] == '\n')
432 ts->flags |= TSF_NLFLAG;
433 else
434 ts->flags &= ~TSF_NLFLAG;
435
436 /* Update linelen from original segment length. */
437 ts->linelen = olen;
438 }
439 c = *ts->linebuf.ptr++;
440 }
441 if (c == '\n')
442 ts->lineno++;
443 return c;
444 }
445
446 static void
447 UngetChar(JSTokenStream *ts, int32 c)
448 {
449 if (c == EOF)
450 return;
451 JS_ASSERT(ts->ungetpos < JS_ARRAY_LENGTH(ts->ungetbuf));
452 if (c == '\n')
453 ts->lineno--;
454 ts->ungetbuf[ts->ungetpos++] = (jschar)c;
455 }
456
457 static int32
458 PeekChar(JSTokenStream *ts)
459 {
460 int32 c;
461
462 c = GetChar(ts);
463 UngetChar(ts, c);
464 return c;
465 }
466
467 /*
468 * Peek n chars ahead into ts. Return true if n chars were read, false if
469 * there weren't enough characters in the input stream. This function cannot
470 * be used to peek into or past a newline.
471 */
472 static JSBool
473 PeekChars(JSTokenStream *ts, intN n, jschar *cp)
474 {
475 intN i, j;
476 int32 c;
477
478 for (i = 0; i < n; i++) {
479 c = GetChar(ts);
480 if (c == EOF)
481 break;
482 if (c == '\n') {
483 UngetChar(ts, c);
484 break;
485 }
486 cp[i] = (jschar)c;
487 }
488 for (j = i - 1; j >= 0; j--)
489 UngetChar(ts, cp[j]);
490 return i == n;
491 }
492
493 static void
494 SkipChars(JSTokenStream *ts, intN n)
495 {
496 while (--n >= 0)
497 GetChar(ts);
498 }
499
500 static JSBool
501 MatchChar(JSTokenStream *ts, int32 expect)
502 {
503 int32 c;
504
505 c = GetChar(ts);
506 if (c == expect)
507 return JS_TRUE;
508 UngetChar(ts, c);
509 return JS_FALSE;
510 }
511
512 JSBool
513 js_ReportCompileErrorNumber(JSContext *cx, JSTokenStream *ts, JSParseNode *pn,
514 uintN flags, uintN errorNumber, ...)
515 {
516 JSErrorReport report;
517 char *message;
518 size_t linelength;
519 jschar *linechars;
520 char *linebytes;
521 va_list ap;
522 JSBool warning, ok;
523 JSTokenPos *tp;
524 uintN index, i;
525 JSErrorReporter onError;
526
527 JS_ASSERT(ts->linebuf.limit < ts->linebuf.base + JS_LINE_LIMIT);
528
529 if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx))
530 return JS_TRUE;
531
532 memset(&report, 0, sizeof report);
533 report.flags = flags;
534 report.errorNumber = errorNumber;
535 message = NULL;
536 linechars = NULL;
537 linebytes = NULL;
538
539 MUST_FLOW_THROUGH("out");
540 va_start(ap, errorNumber);
541 ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
542 errorNumber, &message, &report, &warning,
543 !(flags & JSREPORT_UC), ap);
544 va_end(ap);
545 if (!ok) {
546 warning = JS_FALSE;
547 goto out;
548 }
549
550 report.filename = ts->filename;
551
552 if (pn) {
553 report.lineno = pn->pn_pos.begin.lineno;
554 if (report.lineno != ts->lineno)
555 goto report;
556 tp = &pn->pn_pos;
557 } else {
558 /* Point to the current token, not the next one to get. */
559 tp = &ts->tokens[ts->cursor].pos;
560 }
561 report.lineno = ts->lineno;
562 linelength = PTRDIFF(ts->linebuf.limit, ts->linebuf.base, jschar);
563 linechars = (jschar *)JS_malloc(cx, (linelength + 1) * sizeof(jschar));
564 if (!linechars) {
565 warning = JS_FALSE;
566 goto out;
567 }
568 memcpy(linechars, ts->linebuf.base, linelength * sizeof(jschar));
569 linechars[linelength] = 0;
570 linebytes = js_DeflateString(cx, linechars, linelength);
571 if (!linebytes) {
572 warning = JS_FALSE;
573 goto out;
574 }
575 report.linebuf = linebytes;
576
577 /*
578 * FIXME: What should instead happen here is that we should
579 * find error-tokens in userbuf, if !ts->file. That will
580 * allow us to deliver a more helpful error message, which
581 * includes all or part of the bad string or bad token. The
582 * code here yields something that looks truncated.
583 * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
584 */
585 index = 0;
586 if (tp->begin.lineno == tp->end.lineno) {
587 if (tp->begin.index < ts->linepos)
588 goto report;
589
590 index = tp->begin.index - ts->linepos;
591 }
592
593 report.tokenptr = report.linebuf + index;
594 report.uclinebuf = linechars;
595 report.uctokenptr = report.uclinebuf + index;
596
597 /*
598 * If there's a runtime exception type associated with this error
599 * number, set that as the pending exception. For errors occuring at
600 * compile time, this is very likely to be a JSEXN_SYNTAXERR.
601 *
602 * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
603 * flag will be set in report.flags. Proper behavior for an error
604 * reporter is to ignore a report with this flag for all but top-level
605 * compilation errors. The exception will remain pending, and so long
606 * as the non-top-level "load", "eval", or "compile" native function
607 * returns false, the top-level reporter will eventually receive the
608 * uncaught exception report.
609 *
610 * XXX it'd probably be best if there was only one call to this
611 * function, but there seem to be two error reporter call points.
612 */
613 report:
614 onError = cx->errorReporter;
615
616 /*
617 * Try to raise an exception only if there isn't one already set --
618 * otherwise the exception will describe the last compile-time error,
619 * which is likely spurious.
620 */
621 if (!(ts->flags & TSF_ERROR)) {
622 if (js_ErrorToException(cx, message, &report))
623 onError = NULL;
624 }
625
626 /*
627 * Suppress any compile-time errors that don't occur at the top level.
628 * This may still fail, as interplevel may be zero in contexts where we
629 * don't really want to call the error reporter, as when js is called
630 * by other code which could catch the error.
631 */
632 if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
633 onError = NULL;
634
635 if (onError) {
636 JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
637
638 /*
639 * If debugErrorHook is present then we give it a chance to veto
640 * sending the error on to the regular error reporter.
641 */
642 if (hook && !hook(cx, message, &report,
643 cx->debugHooks->debugErrorHookData)) {
644 onError = NULL;
645 }
646 }
647 if (onError)
648 (*onError)(cx, message, &report);
649
650 out:
651 if (linebytes)
652 JS_free(cx, linebytes);
653 if (linechars)
654 JS_free(cx, linechars);
655 if (message)
656 JS_free(cx, message);
657 if (report.ucmessage)
658 JS_free(cx, (void *)report.ucmessage);
659
660 if (report.messageArgs) {
661 if (!(flags & JSREPORT_UC)) {
662 i = 0;
663 while (report.messageArgs[i])
664 JS_free(cx, (void *)report.messageArgs[i++]);
665 }
666 JS_free(cx, (void *)report.messageArgs);
667 }
668
669 if (!JSREPORT_IS_WARNING(flags)) {
670 /* Set the error flag to suppress spurious reports. */
671 ts->flags |= TSF_ERROR;
672 }
673
674 return warning;
675 }
676
677 static JSBool
678 GrowStringBuffer(JSStringBuffer *sb, size_t newlength)
679 {
680 ptrdiff_t offset;
681 jschar *bp;
682
683 offset = PTRDIFF(sb->ptr, sb->base, jschar);
684 JS_ASSERT(offset >= 0);
685 newlength += offset + 1;
686 if ((size_t)offset < newlength && newlength < ~(size_t)0 / sizeof(jschar))
687 bp = (jschar *) realloc(sb->base, newlength * sizeof(jschar));
688 else
689 bp = NULL;
690 if (!bp) {
691 free(sb->base);
692 sb->base = STRING_BUFFER_ERROR_BASE;
693 return JS_FALSE;
694 }
695 sb->base = bp;
696 sb->ptr = bp + offset;
697 sb->limit = bp + newlength - 1;
698 return JS_TRUE;
699 }
700
701 static void
702 FreeStringBuffer(JSStringBuffer *sb)
703 {
704 JS_ASSERT(STRING_BUFFER_OK(sb));
705 if (sb->base)
706 free(sb->base);
707 }
708
709 void
710 js_InitStringBuffer(JSStringBuffer *sb)
711 {
712 sb->base = sb->limit = sb->ptr = NULL;
713 sb->data = NULL;
714 sb->grow = GrowStringBuffer;
715 sb->free = FreeStringBuffer;
716 }
717
718 void
719 js_FinishStringBuffer(JSStringBuffer *sb)
720 {
721 sb->free(sb);
722 }
723
724 #define ENSURE_STRING_BUFFER(sb,n) \
725 ((sb)->ptr + (n) <= (sb)->limit || sb->grow(sb, n))
726
727 static void
728 FastAppendChar(JSStringBuffer *sb, jschar c)
729 {
730 if (!STRING_BUFFER_OK(sb))
731 return;
732 if (!ENSURE_STRING_BUFFER(sb, 1))
733 return;
734 *sb->ptr++ = c;
735 }
736
737 void
738 js_AppendChar(JSStringBuffer *sb, jschar c)
739 {
740 jschar *bp;
741
742 if (!STRING_BUFFER_OK(sb))
743 return;
744 if (!ENSURE_STRING_BUFFER(sb, 1))
745 return;
746 bp = sb->ptr;
747 *bp++ = c;
748 *bp = 0;
749 sb->ptr = bp;
750 }
751
752 void
753 js_AppendUCString(JSStringBuffer *sb, const jschar *buf, uintN len)
754 {
755 jschar *bp;
756
757 if (!STRING_BUFFER_OK(sb))
758 return;
759 if (len == 0 || !ENSURE_STRING_BUFFER(sb, len))
760 return;
761 bp = sb->ptr;
762 js_strncpy(bp, buf, len);
763 bp += len;
764 *bp = 0;
765 sb->ptr = bp;
766 }
767
768 #if JS_HAS_XML_SUPPORT
769
770 void
771 js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count)
772 {
773 jschar *bp;
774
775 if (!STRING_BUFFER_OK(sb) || count == 0)
776 return;
777 if (!ENSURE_STRING_BUFFER(sb, count))
778 return;
779 for (bp = sb->ptr; count; --count)
780 *bp++ = c;
781 *bp = 0;
782 sb->ptr = bp;
783 }
784
785 void
786 js_AppendCString(JSStringBuffer *sb, const char *asciiz)
787 {
788 size_t length;
789 jschar *bp;
790
791 if (!STRING_BUFFER_OK(sb) || *asciiz == '\0')
792 return;
793 length = strlen(asciiz);
794 if (!ENSURE_STRING_BUFFER(sb, length))
795 return;
796 for (bp = sb->ptr; length; --length)
797 *bp++ = (jschar) *asciiz++;
798 *bp = 0;
799 sb->ptr = bp;
800 }
801
802 void
803 js_AppendJSString(JSStringBuffer *sb, JSString *str)
804 {
805 js_AppendUCString(sb, JSSTRING_CHARS(str), JSSTRING_LENGTH(str));
806 }
807
808 static JSBool
809 GetXMLEntity(JSContext *cx, JSTokenStream *ts)
810 {
811 ptrdiff_t offset, length, i;
812 int32 c, d;
813 JSBool ispair;
814 jschar *bp, digit;
815 char *bytes;
816 JSErrNum msg;
817
818 /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */
819 offset = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar);
820 FastAppendChar(&ts->tokenbuf, '&');
821 if (!STRING_BUFFER_OK(&ts->tokenbuf))
822 return JS_FALSE;
823 while ((c = GetChar(ts)) != ';') {
824 if (c == EOF || c == '\n') {
825 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
826 JSMSG_END_OF_XML_ENTITY);
827 return JS_FALSE;
828 }
829 FastAppendChar(&ts->tokenbuf, (jschar) c);
830 if (!STRING_BUFFER_OK(&ts->tokenbuf))
831 return JS_FALSE;
832 }
833
834 /* Let length be the number of jschars after the '&', including the ';'. */
835 length = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar) - offset;
836 bp = ts->tokenbuf.base + offset;
837 c = d = 0;
838 ispair = JS_FALSE;
839 if (length > 2 && bp[1] == '#') {
840 /* Match a well-formed XML Character Reference. */
841 i = 2;
842 if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
843 if (length > 9) /* at most 6 hex digits allowed */
844 goto badncr;
845 while (++i < length) {
846 digit = bp[i];
847 if (!JS7_ISHEX(digit))
848 goto badncr;
849 c = (c << 4) + JS7_UNHEX(digit);
850 }
851 } else {
852 while (i < length) {
853 digit = bp[i++];
854 if (!JS7_ISDEC(digit))
855 goto badncr;
856 c = (c * 10) + JS7_UNDEC(digit);
857 if (c < 0)
858 goto badncr;
859 }
860 }
861
862 if (0x10000 <= c && c <= 0x10FFFF) {
863 /* Form a surrogate pair (c, d) -- c is the high surrogate. */
864 d = 0xDC00 + (c & 0x3FF);
865 c = 0xD7C0 + (c >> 10);
866 ispair = JS_TRUE;
867 } else {
868 /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
869 if (c != 0x9 && c != 0xA && c != 0xD &&
870 !(0x20 <= c && c <= 0xD7FF) &&
871 !(0xE000 <= c && c <= 0xFFFD)) {
872 goto badncr;
873 }
874 }
875 } else {
876 /* Try to match one of the five XML 1.0 predefined entities. */
877 switch (length) {
878 case 3:
879 if (bp[2] == 't') {
880 if (bp[1] == 'l')
881 c = '<';
882 else if (bp[1] == 'g')
883 c = '>';
884 }
885 break;
886 case 4:
887 if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
888 c = '&';
889 break;
890 case 5:
891 if (bp[3] == 'o') {
892 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
893 c = '\'';
894 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
895 c = '"';
896 }
897 break;
898 }
899 if (c == 0) {
900 msg = JSMSG_UNKNOWN_XML_ENTITY;
901 goto bad;
902 }
903 }
904
905 /* If we matched, retract ts->tokenbuf and store the entity's value. */
906 *bp++ = (jschar) c;
907 if (ispair)
908 *bp++ = (jschar) d;
909 *bp = 0;
910 ts->tokenbuf.ptr = bp;
911 return JS_TRUE;
912
913 badncr:
914 msg = JSMSG_BAD_XML_NCR;
915 bad:
916 /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
917 JS_ASSERT(STRING_BUFFER_OK(&ts->tokenbuf));
918 JS_ASSERT(PTRDIFF(ts->tokenbuf.ptr, bp, jschar) >= 1);
919 bytes = js_DeflateString(cx, bp + 1,
920 PTRDIFF(ts->tokenbuf.ptr, bp, jschar) - 1);
921 if (bytes) {
922 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
923 msg, bytes);
924 JS_free(cx, bytes);
925 }
926 return JS_FALSE;
927 }
928
929 #endif /* JS_HAS_XML_SUPPORT */
930
931 JSTokenType
932 js_PeekToken(JSContext *cx, JSTokenStream *ts)
933 {
934 JSTokenType tt;
935
936 if (ts->lookahead != 0) {
937 tt = ts->tokens[(ts->cursor + ts->lookahead) & NTOKENS_MASK].type;
938 } else {
939 tt = js_GetToken(cx, ts);
940 js_UngetToken(ts);
941 }
942 return tt;
943 }
944
945 JSTokenType
946 js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts)
947 {
948 JSTokenType tt;
949
950 if (!ON_CURRENT_LINE(ts, CURRENT_TOKEN(ts).pos))
951 return TOK_EOL;
952 ts->flags |= TSF_NEWLINES;
953 tt = js_PeekToken(cx, ts);
954 ts->flags &= ~TSF_NEWLINES;
955 return tt;
956 }
957
958 /*
959 * We have encountered a '\': check for a Unicode escape sequence after it,
960 * returning the character code value if we found a Unicode escape sequence.
961 * Otherwise, non-destructively return the original '\'.
962 */
963 static int32
964 GetUnicodeEscape(JSTokenStream *ts)
965 {
966 jschar cp[5];
967 int32 c;
968
969 if (PeekChars(ts, 5, cp) && cp[0] == 'u' &&
970 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
971 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
972 {
973 c = (((((JS7_UNHEX(cp[1]) << 4)
974 + JS7_UNHEX(cp[2])) << 4)
975 + JS7_UNHEX(cp[3])) << 4)
976 + JS7_UNHEX(cp[4]);
977 SkipChars(ts, 5);
978 return c;
979 }
980 return '\\';
981 }
982
983 static JSToken *
984 NewToken(JSTokenStream *ts, ptrdiff_t adjust)
985 {
986 JSToken *tp;
987
988 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
989 tp = &CURRENT_TOKEN(ts);
990 tp->ptr = ts->linebuf.ptr + adjust;
991 tp->pos.begin.index = ts->linepos +
992 PTRDIFF(tp->ptr, ts->linebuf.base, jschar) -
993 ts->ungetpos;
994 tp->pos.begin.lineno = tp->pos.end.lineno = (uint16)ts->lineno;
995 return tp;
996 }
997
998 static JS_ALWAYS_INLINE JSBool
999 ScanAsSpace(jschar c)
1000 {
1001 /* Treat little- and big-endian BOMs as whitespace for compatibility. */
1002 if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
1003 return JS_TRUE;
1004 return JS_FALSE;
1005 }
1006
1007 JSTokenType
1008 js_GetToken(JSContext *cx, JSTokenStream *ts)
1009 {
1010 JSTokenType tt;
1011 int32 c, qc;
1012 JSToken *tp;
1013 JSAtom *atom;
1014 JSBool hadUnicodeEscape;
1015 const struct keyword *kw;
1016 #if JS_HAS_XML_SUPPORT
1017 JSBool inTarget;
1018 size_t targetLength;
1019 ptrdiff_t contentIndex;
1020 #endif
1021
1022 #define INIT_TOKENBUF() (ts->tokenbuf.ptr = ts->tokenbuf.base)
1023 #define TOKENBUF_LENGTH() PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar)
1024 #define TOKENBUF_OK() STRING_BUFFER_OK(&ts->tokenbuf)
1025 #define TOKENBUF_TO_ATOM() (TOKENBUF_OK() \
1026 ? js_AtomizeChars(cx, \
1027 TOKENBUF_BASE(), \
1028 TOKENBUF_LENGTH(), \
1029 0) \
1030 : NULL)
1031 #define ADD_TO_TOKENBUF(c) FastAppendChar(&ts->tokenbuf, (jschar) (c))
1032
1033 /* The following 4 macros should only be used when TOKENBUF_OK() is true. */
1034 #define TOKENBUF_BASE() (ts->tokenbuf.base)
1035 #define TOKENBUF_END() (ts->tokenbuf.ptr)
1036 #define TOKENBUF_CHAR(i) (ts->tokenbuf.base[i])
1037 #define TRIM_TOKENBUF(i) (ts->tokenbuf.ptr = ts->tokenbuf.base + i)
1038 #define NUL_TERM_TOKENBUF() (*ts->tokenbuf.ptr = 0)
1039
1040 /* Check for a pushed-back token resulting from mismatching lookahead. */
1041 while (ts->lookahead != 0) {
1042 JS_ASSERT(!(ts->flags & TSF_XMLTEXTMODE));
1043 ts->lookahead--;
1044 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
1045 tt = CURRENT_TOKEN(ts).type;
1046 if (tt != TOK_EOL || (ts->flags & TSF_NEWLINES))
1047 return tt;
1048 }
1049
1050 /* If there was a fatal error, keep returning TOK_ERROR. */
1051 if (ts->flags & TSF_ERROR)
1052 return TOK_ERROR;
1053
1054 #if JS_HAS_XML_SUPPORT
1055 if (ts->flags & TSF_XMLTEXTMODE) {
1056 tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
1057 tp = NewToken(ts, 0);
1058 INIT_TOKENBUF();
1059 qc = (ts->flags & TSF_XMLONLYMODE) ? '<' : '{';
1060
1061 while ((c = GetChar(ts)) != qc && c != '<' && c != EOF) {
1062 if (c == '&' && qc == '<') {
1063 if (!GetXMLEntity(cx, ts))
1064 goto error;
1065 tt = TOK_XMLTEXT;
1066 continue;
1067 }
1068
1069 if (!JS_ISXMLSPACE(c))
1070 tt = TOK_XMLTEXT;
1071 ADD_TO_TOKENBUF(c);
1072 }
1073 UngetChar(ts, c);
1074
1075 if (TOKENBUF_LENGTH() == 0) {
1076 atom = NULL;
1077 } else {
1078 atom = TOKENBUF_TO_ATOM();
1079 if (!atom)
1080 goto error;
1081 }
1082 tp->pos.end.lineno = (uint16)ts->lineno;
1083 tp->t_op = JSOP_STRING;
1084 tp->t_atom = atom;
1085 goto out;
1086 }
1087
1088 if (ts->flags & TSF_XMLTAGMODE) {
1089 tp = NewToken(ts, 0);
1090 c = GetChar(ts);
1091 if (JS_ISXMLSPACE(c)) {
1092 do {
1093 c = GetChar(ts);
1094 } while (JS_ISXMLSPACE(c));
1095 UngetChar(ts, c);
1096 tt = TOK_XMLSPACE;
1097 goto out;
1098 }
1099
1100 if (c == EOF) {
1101 tt = TOK_EOF;
1102 goto out;
1103 }
1104
1105 INIT_TOKENBUF();
1106 if (JS_ISXMLNSSTART(c)) {
1107 JSBool sawColon = JS_FALSE;
1108
1109 ADD_TO_TOKENBUF(c);
1110 while ((c = GetChar(ts)) != EOF && JS_ISXMLNAME(c)) {
1111 if (c == ':') {
1112 int nextc;
1113
1114 if (sawColon ||
1115 (nextc = PeekChar(ts),
1116 ((ts->flags & TSF_XMLONLYMODE) || nextc != '{') &&
1117 !JS_ISXMLNAME(nextc))) {
1118 js_ReportCompileErrorNumber(cx, ts, NULL,
1119 JSREPORT_ERROR,
1120 JSMSG_BAD_XML_QNAME);
1121 goto error;
1122 }
1123 sawColon = JS_TRUE;
1124 }
1125
1126 ADD_TO_TOKENBUF(c);
1127 }
1128
1129 UngetChar(ts, c);
1130 atom = TOKENBUF_TO_ATOM();
1131 if (!atom)
1132 goto error;
1133 tp->t_op = JSOP_STRING;
1134 tp->t_atom = atom;
1135 tt = TOK_XMLNAME;
1136 goto out;
1137 }
1138
1139 switch (c) {
1140 case '{':
1141 if (ts->flags & TSF_XMLONLYMODE)
1142 goto bad_xml_char;
1143 tt = TOK_LC;
1144 goto out;
1145
1146 case '=':
1147 tt = TOK_ASSIGN;
1148 goto out;
1149
1150 case '"':
1151 case '\'':
1152 qc = c;
1153 while ((c = GetChar(ts)) != qc) {
1154 if (c == EOF) {
1155 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1156 JSMSG_UNTERMINATED_STRING);
1157 goto error;
1158 }
1159
1160 /*
1161 * XML attribute values are double-quoted when pretty-printed,
1162 * so escape " if it is expressed directly in a single-quoted
1163 * attribute value.
1164 */
1165 if (c == '"' && !(ts->flags & TSF_XMLONLYMODE)) {
1166 JS_ASSERT(qc == '\'');
1167 js_AppendCString(&ts->tokenbuf, js_quot_entity_str);
1168 continue;
1169 }
1170
1171 if (c == '&' && (ts->flags & TSF_XMLONLYMODE)) {
1172 if (!GetXMLEntity(cx, ts))
1173 goto error;
1174 continue;
1175 }
1176
1177 ADD_TO_TOKENBUF(c);
1178 }
1179 atom = TOKENBUF_TO_ATOM();
1180 if (!atom)
1181 goto error;
1182 tp->pos.end.lineno = (uint16)ts->lineno;
1183 tp->t_op = JSOP_STRING;
1184 tp->t_atom = atom;
1185 tt = TOK_XMLATTR;
1186 goto out;
1187
1188 case '>':
1189 tt = TOK_XMLTAGC;
1190 goto out;
1191
1192 case '/':
1193 if (MatchChar(ts, '>')) {
1194 tt = TOK_XMLPTAGC;
1195 goto out;
1196 }
1197 /* FALL THROUGH */
1198
1199 bad_xml_char:
1200 default:
1201 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1202 JSMSG_BAD_XML_CHARACTER);
1203 goto error;
1204 }
1205 /* NOTREACHED */
1206 }
1207 #endif /* JS_HAS_XML_SUPPORT */
1208
1209 retry:
1210 do {
1211 c = GetChar(ts);
1212 if (c == '\n') {
1213 ts->flags &= ~TSF_DIRTYLINE;
1214 if (ts->flags & TSF_NEWLINES)
1215 break;
1216 }
1217 } while (ScanAsSpace((jschar)c));
1218
1219 tp = NewToken(ts, -1);
1220 if (c == EOF) {
1221 tt = TOK_EOF;
1222 goto out;
1223 }
1224
1225 hadUnicodeEscape = JS_FALSE;
1226 if (JS_ISIDSTART(c) ||
1227 (c == '\\' &&
1228 (qc = GetUnicodeEscape(ts),
1229 hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1230 if (hadUnicodeEscape)
1231 c = qc;
1232 INIT_TOKENBUF();
1233 for (;;) {
1234 ADD_TO_TOKENBUF(c);
1235 c = GetChar(ts);
1236 if (c == '\\') {
1237 qc = GetUnicodeEscape(ts);
1238 if (!JS_ISIDENT(qc))
1239 break;
1240 c = qc;
1241 hadUnicodeEscape = JS_TRUE;
1242 } else {
1243 if (!JS_ISIDENT(c))
1244 break;
1245 }
1246 }
1247 UngetChar(ts, c);
1248
1249 /*
1250 * Check for keywords unless we saw Unicode escape or parser asks
1251 * to ignore keywords.
1252 */
1253 if (!hadUnicodeEscape &&
1254 !(ts->flags & TSF_KEYWORD_IS_NAME) &&
1255 TOKENBUF_OK() &&
1256 (kw = FindKeyword(TOKENBUF_BASE(), TOKENBUF_LENGTH()))) {
1257 if (kw->tokentype == TOK_RESERVED) {
1258 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1259 JSREPORT_WARNING |
1260 JSREPORT_STRICT,
1261 JSMSG_RESERVED_ID,
1262 kw->chars)) {
1263 goto error;
1264 }
1265 } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1266 tt = kw->tokentype;
1267 tp->t_op = (JSOp) kw->op;
1268 goto out;
1269 }
1270 }
1271
1272 atom = TOKENBUF_TO_ATOM();
1273 if (!atom)
1274 goto error;
1275 tp->t_op = JSOP_NAME;
1276 tp->t_atom = atom;
1277 tt = TOK_NAME;
1278 goto out;
1279 }
1280
1281 if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(PeekChar(ts)))) {
1282 jsint radix;
1283 const jschar *endptr;
1284 jsdouble dval;
1285
1286 radix = 10;
1287 INIT_TOKENBUF();
1288
1289 if (c == '0') {
1290 ADD_TO_TOKENBUF(c);
1291 c = GetChar(ts);
1292 if (JS_TOLOWER(c) == 'x') {
1293 ADD_TO_TOKENBUF(c);
1294 c = GetChar(ts);
1295 radix = 16;
1296 } else if (JS7_ISDEC(c)) {
1297 radix = 8;
1298 }
1299 }
1300
1301 while (JS7_ISHEX(c)) {
1302 if (radix < 16) {
1303 if (JS7_ISLET(c))
1304 break;
1305
1306 /*
1307 * We permit 08 and 09 as decimal numbers, which makes our
1308 * behaviour a superset of the ECMA numeric grammar. We might
1309 * not always be so permissive, so we warn about it.
1310 */
1311 if (radix == 8 && c >= '8') {
1312 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1313 JSREPORT_WARNING,
1314 JSMSG_BAD_OCTAL,
1315 c == '8' ? "08" : "09")) {
1316 goto error;
1317 }
1318 radix = 10;
1319 }
1320 }
1321 ADD_TO_TOKENBUF(c);
1322 c = GetChar(ts);
1323 }
1324
1325 if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1326 if (c == '.') {
1327 do {
1328 ADD_TO_TOKENBUF(c);
1329 c = GetChar(ts);
1330 } while (JS7_ISDEC(c));
1331 }
1332 if (JS_TOLOWER(c) == 'e') {
1333 ADD_TO_TOKENBUF(c);
1334 c = GetChar(ts);
1335 if (c == '+' || c == '-') {
1336 ADD_TO_TOKENBUF(c);
1337 c = GetChar(ts);
1338 }
1339 if (!JS7_ISDEC(c)) {
1340 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1341 JSMSG_MISSING_EXPONENT);
1342 goto error;
1343 }
1344 do {
1345 ADD_TO_TOKENBUF(c);
1346 c = GetChar(ts);
1347 } while (JS7_ISDEC(c));
1348 }
1349 }
1350
1351 /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1352 UngetChar(ts, c);
1353 ADD_TO_TOKENBUF(0);
1354
1355 if (!TOKENBUF_OK())
1356 goto error;
1357 if (radix == 10) {
1358 if (!js_strtod(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1359 &endptr, &dval)) {
1360 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1361 JSMSG_OUT_OF_MEMORY);
1362 goto error;
1363 }
1364 } else {
1365 if (!js_strtointeger(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1366 &endptr, radix, &dval)) {
1367 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1368 JSMSG_OUT_OF_MEMORY);
1369 goto error;
1370 }
1371 }
1372 tp->t_dval = dval;
1373 tt = TOK_NUMBER;
1374 goto out;
1375 }
1376
1377 if (c == '"' || c == '\'') {
1378 qc = c;
1379 INIT_TOKENBUF();
1380 while ((c = GetChar(ts)) != qc) {
1381 if (c == '\n' || c == EOF) {
1382 UngetChar(ts, c);
1383 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1384 JSMSG_UNTERMINATED_STRING);
1385 goto error;
1386 }
1387 if (c == '\\') {
1388 switch (c = GetChar(ts)) {
1389 case 'b': c = '\b'; break;
1390 case 'f': c = '\f'; break;
1391 case 'n': c = '\n'; break;
1392 case 'r': c = '\r'; break;
1393 case 't': c = '\t'; break;
1394 case 'v': c = '\v'; break;
1395
1396 default:
1397 if ('0' <= c && c < '8') {
1398 int32 val = JS7_UNDEC(c);
1399
1400 c = PeekChar(ts);
1401 if ('0' <= c && c < '8') {
1402 val = 8 * val + JS7_UNDEC(c);
1403 GetChar(ts);
1404 c = PeekChar(ts);
1405 if ('0' <= c && c < '8') {
1406 int32 save = val;
1407 val = 8 * val + JS7_UNDEC(c);
1408 if (val <= 0377)
1409 GetChar(ts);
1410 else
1411 val = save;
1412 }
1413 }
1414
1415 c = (jschar)val;
1416 } else if (c == 'u') {
1417 jschar cp[4];
1418 if (PeekChars(ts, 4, cp) &&
1419 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1420 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1421 c = (((((JS7_UNHEX(cp[0]) << 4)
1422 + JS7_UNHEX(cp[1])) << 4)
1423 + JS7_UNHEX(cp[2])) << 4)
1424 + JS7_UNHEX(cp[3]);
1425 SkipChars(ts, 4);
1426 }
1427 } else if (c == 'x') {
1428 jschar cp[2];
1429 if (PeekChars(ts, 2, cp) &&
1430 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1431 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1432 SkipChars(ts, 2);
1433 }
1434 } else if (c == '\n') {
1435 /* ECMA follows C by removing escaped newlines. */
1436 continue;
1437 }
1438 break;
1439 }
1440 }
1441 ADD_TO_TOKENBUF(c);
1442 }
1443 atom = TOKENBUF_TO_ATOM();
1444 if (!atom)
1445 goto error;
1446 tp->pos.end.lineno = (uint16)ts->lineno;
1447 tp->t_op = JSOP_STRING;
1448 tp->t_atom = atom;
1449 tt = TOK_STRING;
1450 goto out;
1451 }
1452
1453 switch (c) {
1454 case '\n': tt = TOK_EOL; goto eol_out;
1455 case ';': tt = TOK_SEMI; break;
1456 case '[': tt = TOK_LB; break;
1457 case ']': tt = TOK_RB; break;
1458 case '{': tt = TOK_LC; break;
1459 case '}': tt = TOK_RC; break;
1460 case '(': tt = TOK_LP; break;
1461 case ')': tt = TOK_RP; break;
1462 case ',': tt = TOK_COMMA; break;
1463 case '?': tt = TOK_HOOK; break;
1464
1465 case '.':
1466 #if JS_HAS_XML_SUPPORT
1467 if (MatchChar(ts, c))
1468 tt = TOK_DBLDOT;
1469 else
1470 #endif
1471 tt = TOK_DOT;
1472 break;
1473
1474 case ':':
1475 #if JS_HAS_XML_SUPPORT
1476 if (MatchChar(ts, c)) {
1477 tt = TOK_DBLCOLON;
1478 break;
1479 }
1480 #endif
1481 /*
1482 * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1483 * object initializer, likewise for setter.
1484 */
1485 tp->t_op = JSOP_NOP;
1486 tt = TOK_COLON;
1487 break;
1488
1489 case '|':
1490 if (MatchChar(ts, c)) {
1491 tt = TOK_OR;
1492 } else if (MatchChar(ts, '=')) {
1493 tp->t_op = JSOP_BITOR;
1494 tt = TOK_ASSIGN;
1495 } else {
1496 tt = TOK_BITOR;
1497 }
1498 break;
1499
1500 case '^':
1501 if (MatchChar(ts, '=')) {
1502 tp->t_op = JSOP_BITXOR;
1503 tt = TOK_ASSIGN;
1504 } else {
1505 tt = TOK_BITXOR;
1506 }
1507 break;
1508
1509 case '&':
1510 if (MatchChar(ts, c)) {
1511 tt = TOK_AND;
1512 } else if (MatchChar(ts, '=')) {
1513 tp->t_op = JSOP_BITAND;
1514 tt = TOK_ASSIGN;
1515 } else {
1516 tt = TOK_BITAND;
1517 }
1518 break;
1519
1520 case '=':
1521 if (MatchChar(ts, c)) {
1522 tp->t_op = MatchChar(ts, c) ? JSOP_STRICTEQ : JSOP_EQ;
1523 tt = TOK_EQOP;
1524 } else {
1525 tp->t_op = JSOP_NOP;
1526 tt = TOK_ASSIGN;
1527 }
1528 break;
1529
1530 case '!':
1531 if (MatchChar(ts, '=')) {
1532 tp->t_op = MatchChar(ts, '=') ? JSOP_STRICTNE : JSOP_NE;
1533 tt = TOK_EQOP;
1534 } else {
1535 tp->t_op = JSOP_NOT;
1536 tt = TOK_UNARYOP;
1537 }
1538 break;
1539
1540 #if JS_HAS_XML_SUPPORT
1541 case '@':
1542 tt = TOK_AT;
1543 break;
1544 #endif
1545
1546 case '<':
1547 #if JS_HAS_XML_SUPPORT
1548 /*
1549 * After much testing, it's clear that Postel's advice to protocol
1550 * designers ("be liberal in what you accept, and conservative in what
1551 * you send") invites a natural-law repercussion for JS as "protocol":
1552 *
1553 * "If you are liberal in what you accept, others will utterly fail to
1554 * be conservative in what they send."
1555 *
1556 * Which means you will get <!-- comments to end of line in the middle
1557 * of .js files, and after if conditions whose then statements are on
1558 * the next line, and other wonders. See at least the following bugs:
1559 * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1560 * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1561 * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1562 *
1563 * So without JSOPTION_XML, we never scan an XML comment or CDATA
1564 * literal. We always scan <! as the start of an HTML comment hack
1565 * to end of line, used since Netscape 2 to hide script tag content
1566 * from script-unaware browsers.
1567 */
1568 if ((ts->flags & TSF_OPERAND) &&
1569 (JS_HAS_XML_OPTION(cx) || PeekChar(ts) != '!')) {
1570 /* Check for XML comment or CDATA section. */
1571 if (MatchChar(ts, '!')) {
1572 INIT_TOKENBUF();
1573
1574 /* Scan XML comment. */
1575 if (MatchChar(ts, '-')) {
1576 if (!MatchChar(ts, '-'))
1577 goto bad_xml_markup;
1578 while ((c = GetChar(ts)) != '-' || !MatchChar(ts, '-')) {
1579 if (c == EOF)
1580 goto bad_xml_markup;
1581 ADD_TO_TOKENBUF(c);
1582 }
1583 tt = TOK_XMLCOMMENT;
1584 tp->t_op = JSOP_XMLCOMMENT;
1585 goto finish_xml_markup;
1586 }
1587
1588 /* Scan CDATA section. */
1589 if (MatchChar(ts, '[')) {
1590 jschar cp[6];
1591 if (PeekChars(ts, 6, cp) &&
1592 cp[0] == 'C' &&
1593 cp[1] == 'D' &&
1594 cp[2] == 'A' &&
1595 cp[3] == 'T' &&
1596 cp[4] == 'A' &&
1597 cp[5] == '[') {
1598 SkipChars(ts, 6);
1599 while ((c = GetChar(ts)) != ']' ||
1600 !PeekChars(ts, 2, cp) ||
1601 cp[0] != ']' ||
1602 cp[1] != '>') {
1603 if (c == EOF)
1604 goto bad_xml_markup;
1605 ADD_TO_TOKENBUF(c);
1606 }
1607 GetChar(ts); /* discard ] but not > */
1608 tt = TOK_XMLCDATA;
1609 tp->t_op = JSOP_XMLCDATA;
1610 goto finish_xml_markup;
1611 }
1612 goto bad_xml_markup;
1613 }
1614 }
1615
1616 /* Check for processing instruction. */
1617 if (MatchChar(ts, '?')) {
1618 inTarget = JS_TRUE;
1619 targetLength = 0;
1620 contentIndex = -1;
1621
1622 INIT_TOKENBUF();
1623 while ((c = GetChar(ts)) != '?' || PeekChar(ts) != '>') {
1624 if (c == EOF)
1625 goto bad_xml_markup;
1626 if (inTarget) {
1627 if (JS_ISXMLSPACE(c)) {
1628 if (TOKENBUF_LENGTH() == 0)
1629 goto bad_xml_markup;
1630 inTarget = JS_FALSE;
1631 } else {
1632 if (!((TOKENBUF_LENGTH() == 0)
1633 ? JS_ISXMLNSSTART(c)
1634 : JS_ISXMLNS(c))) {
1635 goto bad_xml_markup;
1636 }
1637 ++targetLength;
1638 }
1639 } else {
1640 if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1641 contentIndex = TOKENBUF_LENGTH();
1642 }
1643 ADD_TO_TOKENBUF(c);
1644 }
1645 if (targetLength == 0)
1646 goto bad_xml_markup;
1647 if (!TOKENBUF_OK())
1648 goto error;
1649 if (contentIndex < 0) {
1650 atom = cx->runtime->atomState.emptyAtom;
1651 } else {
1652 atom = js_AtomizeChars(cx,
1653 &TOKENBUF_CHAR(contentIndex),
1654 TOKENBUF_LENGTH() - contentIndex,
1655 0);
1656 if (!atom)
1657 goto error;
1658 }
1659 TRIM_TOKENBUF(targetLength);
1660 tp->t_atom2 = atom;
1661 tt = TOK_XMLPI;
1662
1663 finish_xml_markup:
1664 if (!MatchChar(ts, '>'))
1665 goto bad_xml_markup;
1666 atom = TOKENBUF_TO_ATOM();
1667 if (!atom)
1668 goto error;
1669 tp->t_atom = atom;
1670 tp->pos.end.lineno = (uint16)ts->lineno;
1671 goto out;
1672 }
1673
1674 /* An XML start-of-tag character. */
1675 tt = MatchChar(ts, '/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1676 goto out;
1677
1678 bad_xml_markup:
1679 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1680 JSMSG_BAD_XML_MARKUP);
1681 goto error;
1682 }
1683 #endif /* JS_HAS_XML_SUPPORT */
1684
1685 /* NB: treat HTML begin-comment as comment-till-end-of-line */
1686 if (MatchChar(ts, '!')) {
1687 if (MatchChar(ts, '-')) {
1688 if (MatchChar(ts, '-')) {
1689 ts->flags |= TSF_IN_HTML_COMMENT;
1690 goto skipline;
1691 }
1692 UngetChar(ts, '-');
1693 }
1694 UngetChar(ts, '!');
1695 }
1696 if (MatchChar(ts, c)) {
1697 tp->t_op = JSOP_LSH;
1698 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1699 } else {
1700 tp->t_op = MatchChar(ts, '=') ? JSOP_LE : JSOP_LT;
1701 tt = TOK_RELOP;
1702 }
1703 break;
1704
1705 case '>':
1706 if (MatchChar(ts, c)) {
1707 tp->t_op = MatchChar(ts, c) ? JSOP_URSH : JSOP_RSH;
1708 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1709 } else {
1710 tp->t_op = MatchChar(ts, '=') ? JSOP_GE : JSOP_GT;
1711 tt = TOK_RELOP;
1712 }
1713 break;
1714
1715 case '*':
1716 tp->t_op = JSOP_MUL;
1717 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_STAR;
1718 break;
1719
1720 case '/':
1721 if (MatchChar(ts, '/')) {
1722 /*
1723 * Hack for source filters such as the Mozilla XUL preprocessor:
1724 * "//@line 123\n" sets the number of the *next* line after the
1725 * comment to 123.
1726 */
1727 if (JS_HAS_ATLINE_OPTION(cx)) {
1728 jschar cp[5];
1729 uintN i, line, temp;
1730 char filename[1024];
1731
1732 if (PeekChars(ts, 5, cp) &&
1733 cp[0] == '@' &&
1734 cp[1] == 'l' &&
1735 cp[2] == 'i' &&
1736 cp[3] == 'n' &&
1737 cp[4] == 'e') {
1738 SkipChars(ts, 5);
1739 while ((c = GetChar(ts)) != '\n' && ScanAsSpace((jschar)c))
1740 continue;
1741 if (JS7_ISDEC(c)) {
1742 line = JS7_UNDEC(c);
1743 while ((c = GetChar(ts)) != EOF && JS7_ISDEC(c)) {
1744 temp = 10 * line + JS7_UNDEC(c);
1745 if (temp < line) {
1746 /* Ignore overlarge line numbers. */
1747 goto skipline;
1748 }
1749 line = temp;
1750 }
1751 while (c != '\n' && ScanAsSpace((jschar)c))
1752 c = GetChar(ts);
1753 i = 0;
1754 if (c == '"') {
1755 while ((c = GetChar(ts)) != EOF && c != '"') {
1756 if (c == '\n') {
1757 UngetChar(ts, c);
1758 goto skipline;
1759 }
1760 if ((c >> 8) != 0 || i >= sizeof filename - 1)
1761 goto skipline;
1762 filename[i++] = (char) c;
1763 }
1764 if (c == '"') {
1765 while ((c = GetChar(ts)) != '\n' &&
1766 ScanAsSpace((jschar)c)) {
1767 continue;
1768 }
1769 }
1770 }
1771 filename[i] = '\0';
1772 if (c == '\n') {
1773 if (i > 0) {
1774 if (ts->flags & TSF_OWNFILENAME)
1775 JS_free(cx, (void *) ts->filename);
1776 ts->filename = JS_strdup(cx, filename);
1777 if (!ts->filename)
1778 goto error;
1779 ts->flags |= TSF_OWNFILENAME;
1780 }
1781 ts->lineno = line;
1782 }
1783 }
1784 UngetChar(ts, c);
1785 }
1786 }
1787
1788 skipline:
1789 /* Optimize line skipping if we are not in an HTML comment. */
1790 if (ts->flags & TSF_IN_HTML_COMMENT) {
1791 while ((c = GetChar(ts)) != EOF && c != '\n') {
1792 if (c == '-' && MatchChar(ts, '-') && MatchChar(ts, '>'))
1793 ts->flags &= ~TSF_IN_HTML_COMMENT;
1794 }
1795 } else {
1796 while ((c = GetChar(ts)) != EOF && c != '\n')
1797 continue;
1798 }
1799 UngetChar(ts, c);
1800 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1801 goto retry;
1802 }
1803
1804 if (MatchChar(ts, '*')) {
1805 while ((c = GetChar(ts)) != EOF &&
1806 !(c == '*' && MatchChar(ts, '/'))) {
1807 /* Ignore all characters until comment close. */
1808 }
1809 if (c == EOF) {
1810 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1811 JSMSG_UNTERMINATED_COMMENT);
1812 goto error;
1813 }
1814 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1815 goto retry;
1816 }
1817
1818 if (ts->flags & TSF_OPERAND) {
1819 uintN flags;
1820 JSBool inCharClass = JS_FALSE;
1821
1822 INIT_TOKENBUF();
1823 for (;;) {
1824 c = GetChar(ts);
1825 if (c == '\n' || c == EOF) {
1826 UngetChar(ts, c);
1827 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1828 JSMSG_UNTERMINATED_REGEXP);
1829 goto error;
1830 }
1831 if (c == '\\') {
1832 ADD_TO_TOKENBUF(c);
1833 c = GetChar(ts);
1834 } else if (c == '[') {
1835 inCharClass = JS_TRUE;
1836 } else if (c == ']') {
1837 inCharClass = JS_FALSE;
1838 } else if (c == '/' && !inCharClass) {
1839 /* For compat with IE, allow unescaped / in char classes. */
1840 break;
1841 }
1842 ADD_TO_TOKENBUF(c);
1843 }
1844 for (flags = 0; ; ) {
1845 c = PeekChar(ts);
1846 if (c == 'g')
1847 flags |= JSREG_GLOB;
1848 else if (c == 'i')
1849 flags |= JSREG_FOLD;
1850 else if (c == 'm')
1851 flags |= JSREG_MULTILINE;
1852 else if (c == 'y')
1853 flags |= JSREG_STICKY;
1854 else
1855 break;
1856 GetChar(ts);
1857 }
1858 c = PeekChar(ts);
1859 if (JS7_ISLET(c)) {
1860 tp->ptr = ts->linebuf.ptr - 1;
1861 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1862 JSMSG_BAD_REGEXP_FLAG);
1863 (void) GetChar(ts);
1864 goto error;
1865 }
1866 /* XXXbe fix jsregexp.c so it doesn't depend on NUL termination */
1867 if (!TOKENBUF_OK())
1868 goto error;
1869 NUL_TERM_TOKENBUF();
1870 tp->t_reflags = flags;
1871 tt = TOK_REGEXP;
1872 break;
1873 }
1874
1875 tp->t_op = JSOP_DIV;
1876 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1877 break;
1878
1879 case '%':
1880 tp->t_op = JSOP_MOD;
1881 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1882 break;
1883
1884 case '~':
1885 tp->t_op = JSOP_BITNOT;
1886 tt = TOK_UNARYOP;
1887 break;
1888
1889 case '+':
1890 if (MatchChar(ts, '=')) {
1891 tp->t_op = JSOP_ADD;
1892 tt = TOK_ASSIGN;
1893 } else if (MatchChar(ts, c)) {
1894 tt = TOK_INC;
1895 } else {
1896 tp->t_op = JSOP_POS;
1897 tt = TOK_PLUS;
1898 }
1899 break;
1900
1901 case '-':
1902 if (MatchChar(ts, '=')) {
1903 tp->t_op = JSOP_SUB;
1904 tt = TOK_ASSIGN;
1905 } else if (MatchChar(ts, c)) {
1906 if (PeekChar(ts) == '>' && !(ts->flags & TSF_DIRTYLINE)) {
1907 ts->flags &= ~TSF_IN_HTML_COMMENT;
1908 goto skipline;
1909 }
1910 tt = TOK_DEC;
1911 } else {
1912 tp->t_op = JSOP_NEG;
1913 tt = TOK_MINUS;
1914 }
1915 break;
1916
1917 #if JS_HAS_SHARP_VARS
1918 case '#':
1919 {
1920 uint32 n;
1921
1922 c = GetChar(ts);
1923 if (!JS7_ISDEC(c)) {
1924 UngetChar(ts, c);
1925 goto badchar;
1926 }
1927 n = (uint32)JS7_UNDEC(c);
1928 for (;;) {
1929 c = GetChar(ts);
1930 if (!JS7_ISDEC(c))
1931 break;
1932 n = 10 * n + JS7_UNDEC(c);
1933 if (n >= UINT16_LIMIT) {
1934 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1935 JSMSG_SHARPVAR_TOO_BIG);
1936 goto error;
1937 }
1938 }
1939 tp->t_dval = (jsdouble) n;
1940 if (JS_HAS_STRICT_OPTION(cx) &&
1941 (c == '=' || c == '#')) {
1942 char buf[20];
1943 JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1944 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1945 JSREPORT_WARNING |
1946 JSREPORT_STRICT,
1947 JSMSG_DEPRECATED_USAGE,
1948 buf)) {
1949 goto error;
1950 }
1951 }
1952 if (c == '=')
1953 tt = TOK_DEFSHARP;
1954 else if (c == '#')
1955 tt = TOK_USESHARP;
1956 else
1957 goto badchar;
1958 break;
1959 }
1960 #endif /* JS_HAS_SHARP_VARS */
1961
1962 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1963 badchar:
1964 #endif
1965
1966 default:
1967 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1968 JSMSG_ILLEGAL_CHARACTER);
1969 goto error;
1970 }
1971
1972 out:
1973 JS_ASSERT(tt != TOK_EOL);
1974 ts->flags |= TSF_DIRTYLINE;
1975
1976 eol_out:
1977 if (!STRING_BUFFER_OK(&ts->tokenbuf))
1978 tt = TOK_ERROR;
1979 JS_ASSERT(tt < TOK_LIMIT);
1980 tp->pos.end.index = ts->linepos +
1981 PTRDIFF(ts->linebuf.ptr, ts->linebuf.base, jschar) -
1982 ts->ungetpos;
1983 tp->type = tt;
1984 return tt;
1985
1986 error:
1987 tt = TOK_ERROR;
1988 ts->flags |= TSF_ERROR;
1989 goto out;
1990
1991 #undef INIT_TOKENBUF
1992 #undef TOKENBUF_LENGTH
1993 #undef TOKENBUF_OK
1994 #undef TOKENBUF_TO_ATOM
1995 #undef ADD_TO_TOKENBUF
1996 #undef TOKENBUF_BASE
1997 #undef TOKENBUF_CHAR
1998 #undef TRIM_TOKENBUF
1999 #undef NUL_TERM_TOKENBUF
2000 }
2001
2002 void
2003 js_UngetToken(JSTokenStream *ts)
2004 {
2005 JS_ASSERT(ts->lookahead < NTOKENS_MASK);
2006 ts->lookahead++;
2007 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
2008 }
2009
2010 JSBool
2011 js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt)
2012 {
2013 if (js_GetToken(cx, ts) == tt)
2014 return JS_TRUE;
2015 js_UngetToken(ts);
2016 return JS_FALSE;
2017 }

  ViewVC Help
Powered by ViewVC 1.1.24