/[jscoverage]/trunk/js/jsscan.cpp
ViewVC logotype

Contents of /trunk/js/jsscan.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 507 - (show annotations)
Sun Jan 10 07:23:34 2010 UTC (9 years, 5 months ago) by siliconforks
File size: 58407 byte(s)
Update SpiderMonkey from Firefox 3.6rc1.

1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set sw=4 ts=8 et tw=78:
3 *
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
19 *
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
24 *
25 * Contributor(s):
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 /*
42 * JS lexical scanner.
43 */
44 #include <stdio.h> /* first to avoid trouble on some systems */
45 #include <errno.h>
46 #include <limits.h>
47 #include <math.h>
48 #ifdef HAVE_MEMORY_H
49 #include <memory.h>
50 #endif
51 #include <stdarg.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include "jstypes.h"
55 #include "jsstdint.h"
56 #include "jsarena.h" /* Added by JSIFY */
57 #include "jsbit.h"
58 #include "jsutil.h" /* Added by JSIFY */
59 #include "jsdtoa.h"
60 #include "jsprf.h"
61 #include "jsapi.h"
62 #include "jsatom.h"
63 #include "jscntxt.h"
64 #include "jsversion.h"
65 #include "jsemit.h"
66 #include "jsexn.h"
67 #include "jsnum.h"
68 #include "jsopcode.h"
69 #include "jsparse.h"
70 #include "jsregexp.h"
71 #include "jsscan.h"
72 #include "jsscript.h"
73 #include "jsstaticcheck.h"
74 #include "jsvector.h"
75
76 #if JS_HAS_XML_SUPPORT
77 #include "jsxml.h"
78 #endif
79
80 #define JS_KEYWORD(keyword, type, op, version) \
81 const char js_##keyword##_str[] = #keyword;
82 #include "jskeyword.tbl"
83 #undef JS_KEYWORD
84
85 struct keyword {
86 const char *chars; /* C string with keyword text */
87 JSTokenType tokentype; /* JSTokenType */
88 JSOp op; /* JSOp */
89 JSVersion version; /* JSVersion */
90 };
91
92 static const struct keyword keyword_defs[] = {
93 #define JS_KEYWORD(keyword, type, op, version) \
94 {js_##keyword##_str, type, op, version},
95 #include "jskeyword.tbl"
96 #undef JS_KEYWORD
97 };
98
99 #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
100
101 static const struct keyword *
102 FindKeyword(const jschar *s, size_t length)
103 {
104 register size_t i;
105 const struct keyword *kw;
106 const char *chars;
107
108 JS_ASSERT(length != 0);
109
110 #define JSKW_LENGTH() length
111 #define JSKW_AT(column) s[column]
112 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
113 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
114 #define JSKW_NO_MATCH() goto no_match;
115 #include "jsautokw.h"
116 #undef JSKW_NO_MATCH
117 #undef JSKW_TEST_GUESS
118 #undef JSKW_GOT_MATCH
119 #undef JSKW_AT
120 #undef JSKW_LENGTH
121
122 got_match:
123 return &keyword_defs[i];
124
125 test_guess:
126 kw = &keyword_defs[i];
127 chars = kw->chars;
128 do {
129 if (*s++ != (unsigned char)(*chars++))
130 goto no_match;
131 } while (--length != 0);
132 return kw;
133
134 no_match:
135 return NULL;
136 }
137
138 JSTokenType
139 js_CheckKeyword(const jschar *str, size_t length)
140 {
141 const struct keyword *kw;
142
143 JS_ASSERT(length != 0);
144 kw = FindKeyword(str, length);
145 return kw ? kw->tokentype : TOK_EOF;
146 }
147
148 JS_FRIEND_API(void)
149 js_MapKeywords(void (*mapfun)(const char *))
150 {
151 size_t i;
152
153 for (i = 0; i != KEYWORD_COUNT; ++i)
154 mapfun(keyword_defs[i].chars);
155 }
156
157 JSBool
158 js_IsIdentifier(JSString *str)
159 {
160 size_t length;
161 jschar c;
162 const jschar *chars, *end;
163
164 str->getCharsAndLength(chars, length);
165 if (length == 0)
166 return JS_FALSE;
167 c = *chars;
168 if (!JS_ISIDSTART(c))
169 return JS_FALSE;
170 end = chars + length;
171 while (++chars != end) {
172 c = *chars;
173 if (!JS_ISIDENT(c))
174 return JS_FALSE;
175 }
176 return JS_TRUE;
177 }
178
179 /* Initialize members that aren't initialized in |init|. */
180 JSTokenStream::JSTokenStream(JSContext *cx)
181 : tokens(), cursor(), lookahead(), ungetpos(), ungetbuf(), flags(), linelen(),
182 linepos(), file(), listenerTSData(), saveEOL(), tokenbuf(cx)
183 {}
184
185 bool
186 JSTokenStream::init(JSContext *cx, const jschar *base, size_t length,
187 FILE *fp, const char *fn, uintN ln)
188 {
189 jschar *buf;
190 size_t nb;
191
192 JS_ASSERT_IF(fp, !base);
193 JS_ASSERT_IF(!base, length == 0);
194 nb = fp
195 ? 2 * JS_LINE_LIMIT * sizeof(jschar)
196 : JS_LINE_LIMIT * sizeof(jschar);
197 JS_ARENA_ALLOCATE_CAST(buf, jschar *, &cx->tempPool, nb);
198 if (!buf) {
199 js_ReportOutOfScriptQuota(cx);
200 return false;
201 }
202 memset(buf, 0, nb);
203
204 /* Initialize members. */
205 filename = fn;
206 lineno = ln;
207 linebuf.base = linebuf.limit = linebuf.ptr = buf;
208 if (fp) {
209 file = fp;
210 userbuf.base = buf + JS_LINE_LIMIT;
211 userbuf.ptr = userbuf.limit = userbuf.base + JS_LINE_LIMIT;
212 } else {
213 userbuf.base = (jschar *)base;
214 userbuf.limit = (jschar *)base + length;
215 userbuf.ptr = (jschar *)base;
216 }
217 listener = cx->debugHooks->sourceHandler;
218 listenerData = cx->debugHooks->sourceHandlerData;
219 return true;
220 }
221
222 void
223 JSTokenStream::close(JSContext *cx)
224 {
225 if (flags & TSF_OWNFILENAME)
226 cx->free((void *) filename);
227 }
228
229 /* Use the fastest available getc. */
230 #if defined(HAVE_GETC_UNLOCKED)
231 # define fast_getc getc_unlocked
232 #elif defined(HAVE__GETC_NOLOCK)
233 # define fast_getc _getc_nolock
234 #else
235 # define fast_getc getc
236 #endif
237
238 JS_FRIEND_API(int)
239 js_fgets(char *buf, int size, FILE *file)
240 {
241 int n, i, c;
242 JSBool crflag;
243
244 n = size - 1;
245 if (n < 0)
246 return -1;
247
248 crflag = JS_FALSE;
249 for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
250 buf[i] = c;
251 if (c == '\n') { /* any \n ends a line */
252 i++; /* keep the \n; we know there is room for \0 */
253 break;
254 }
255 if (crflag) { /* \r not followed by \n ends line at the \r */
256 ungetc(c, file);
257 break; /* and overwrite c in buf with \0 */
258 }
259 crflag = (c == '\r');
260 }
261
262 buf[i] = '\0';
263 return i;
264 }
265
266 static int32
267 GetChar(JSTokenStream *ts)
268 {
269 int32 c;
270 ptrdiff_t i, j, len, olen;
271 JSBool crflag;
272 char cbuf[JS_LINE_LIMIT];
273 jschar *ubuf, *nl;
274
275 if (ts->ungetpos != 0) {
276 c = ts->ungetbuf[--ts->ungetpos];
277 } else {
278 if (ts->linebuf.ptr == ts->linebuf.limit) {
279 len = ts->userbuf.limit - ts->userbuf.ptr;
280 if (len <= 0) {
281 if (!ts->file) {
282 ts->flags |= TSF_EOF;
283 return EOF;
284 }
285
286 /* Fill ts->userbuf so that \r and \r\n convert to \n. */
287 crflag = (ts->flags & TSF_CRFLAG) != 0;
288 len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
289 if (len <= 0) {
290 ts->flags |= TSF_EOF;
291 return EOF;
292 }
293 olen = len;
294 ubuf = ts->userbuf.base;
295 i = 0;
296 if (crflag) {
297 ts->flags &= ~TSF_CRFLAG;
298 if (cbuf[0] != '\n') {
299 ubuf[i++] = '\n';
300 len++;
301 ts->linepos--;
302 }
303 }
304 for (j = 0; i < len; i++, j++)
305 ubuf[i] = (jschar) (unsigned char) cbuf[j];
306 ts->userbuf.limit = ubuf + len;
307 ts->userbuf.ptr = ubuf;
308 }
309 if (ts->listener) {
310 ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
311 &ts->listenerTSData, ts->listenerData);
312 }
313
314 nl = ts->saveEOL;
315 if (!nl) {
316 /*
317 * Any one of \n, \r, or \r\n ends a line (the longest
318 * match wins). Also allow the Unicode line and paragraph
319 * separators.
320 */
321 for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
322 /*
323 * Try to prevent value-testing on most characters by
324 * filtering out characters that aren't 000x or 202x.
325 */
326 if ((*nl & 0xDFD0) == 0) {
327 if (*nl == '\n')
328 break;
329 if (*nl == '\r') {
330 if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
331 nl++;
332 break;
333 }
334 if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
335 break;
336 }
337 }
338 }
339
340 /*
341 * If there was a line terminator, copy thru it into linebuf.
342 * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
343 */
344 if (nl < ts->userbuf.limit)
345 len = (nl - ts->userbuf.ptr) + 1;
346 if (len >= JS_LINE_LIMIT) {
347 len = JS_LINE_LIMIT - 1;
348 ts->saveEOL = nl;
349 } else {
350 ts->saveEOL = NULL;
351 }
352 js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
353 ts->userbuf.ptr += len;
354 olen = len;
355
356 /*
357 * Make sure linebuf contains \n for EOL (don't do this in
358 * userbuf because the user's string might be readonly).
359 */
360 if (nl < ts->userbuf.limit) {
361 if (*nl == '\r') {
362 if (ts->linebuf.base[len-1] == '\r') {
363 /*
364 * Does the line segment end in \r? We must check
365 * for a \n at the front of the next segment before
366 * storing a \n into linebuf. This case matters
367 * only when we're reading from a file.
368 */
369 if (nl + 1 == ts->userbuf.limit && ts->file) {
370 len--;
371 ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
372 if (len == 0) {
373 /*
374 * This can happen when a segment ends in
375 * \r\r. Start over. ptr == limit in this
376 * case, so we'll fall into buffer-filling
377 * code.
378 */
379 return GetChar(ts);
380 }
381 } else {
382 ts->linebuf.base[len-1] = '\n';
383 }
384 }
385 } else if (*nl == '\n') {
386 if (nl > ts->userbuf.base &&
387 nl[-1] == '\r' &&
388 ts->linebuf.base[len-2] == '\r') {
389 len--;
390 JS_ASSERT(ts->linebuf.base[len] == '\n');
391 ts->linebuf.base[len-1] = '\n';
392 }
393 } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
394 ts->linebuf.base[len-1] = '\n';
395 }
396 }
397
398 /* Reset linebuf based on adjusted segment length. */
399 ts->linebuf.limit = ts->linebuf.base + len;
400 ts->linebuf.ptr = ts->linebuf.base;
401
402 /* Update position of linebuf within physical userbuf line. */
403 if (!(ts->flags & TSF_NLFLAG))
404 ts->linepos += ts->linelen;
405 else
406 ts->linepos = 0;
407 if (ts->linebuf.limit[-1] == '\n')
408 ts->flags |= TSF_NLFLAG;
409 else
410 ts->flags &= ~TSF_NLFLAG;
411
412 /* Update linelen from original segment length. */
413 ts->linelen = olen;
414 }
415 c = *ts->linebuf.ptr++;
416 }
417 if (c == '\n')
418 ts->lineno++;
419 return c;
420 }
421
422 static void
423 UngetChar(JSTokenStream *ts, int32 c)
424 {
425 if (c == EOF)
426 return;
427 JS_ASSERT(ts->ungetpos < JS_ARRAY_LENGTH(ts->ungetbuf));
428 if (c == '\n')
429 ts->lineno--;
430 ts->ungetbuf[ts->ungetpos++] = (jschar)c;
431 }
432
433 static int32
434 PeekChar(JSTokenStream *ts)
435 {
436 int32 c;
437
438 c = GetChar(ts);
439 UngetChar(ts, c);
440 return c;
441 }
442
443 /*
444 * Peek n chars ahead into ts. Return true if n chars were read, false if
445 * there weren't enough characters in the input stream. This function cannot
446 * be used to peek into or past a newline.
447 */
448 static JSBool
449 PeekChars(JSTokenStream *ts, intN n, jschar *cp)
450 {
451 intN i, j;
452 int32 c;
453
454 for (i = 0; i < n; i++) {
455 c = GetChar(ts);
456 if (c == EOF)
457 break;
458 if (c == '\n') {
459 UngetChar(ts, c);
460 break;
461 }
462 cp[i] = (jschar)c;
463 }
464 for (j = i - 1; j >= 0; j--)
465 UngetChar(ts, cp[j]);
466 return i == n;
467 }
468
469 static void
470 SkipChars(JSTokenStream *ts, intN n)
471 {
472 while (--n >= 0)
473 GetChar(ts);
474 }
475
476 static JSBool
477 MatchChar(JSTokenStream *ts, int32 expect)
478 {
479 int32 c;
480
481 c = GetChar(ts);
482 if (c == expect)
483 return JS_TRUE;
484 UngetChar(ts, c);
485 return JS_FALSE;
486 }
487
488 JSBool
489 js_ReportCompileErrorNumber(JSContext *cx, JSTokenStream *ts, JSParseNode *pn,
490 uintN flags, uintN errorNumber, ...)
491 {
492 JSErrorReport report;
493 char *message;
494 size_t linelength;
495 jschar *linechars;
496 char *linebytes;
497 va_list ap;
498 JSBool warning, ok;
499 JSTokenPos *tp;
500 uintN index, i;
501 JSErrorReporter onError;
502
503 JS_ASSERT(ts->linebuf.limit < ts->linebuf.base + JS_LINE_LIMIT);
504
505 if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx))
506 return JS_TRUE;
507
508 memset(&report, 0, sizeof report);
509 report.flags = flags;
510 report.errorNumber = errorNumber;
511 message = NULL;
512 linechars = NULL;
513 linebytes = NULL;
514
515 MUST_FLOW_THROUGH("out");
516 va_start(ap, errorNumber);
517 ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
518 errorNumber, &message, &report, &warning,
519 !(flags & JSREPORT_UC), ap);
520 va_end(ap);
521 if (!ok) {
522 warning = JS_FALSE;
523 goto out;
524 }
525
526 report.filename = ts->filename;
527
528 if (pn) {
529 report.lineno = pn->pn_pos.begin.lineno;
530 if (report.lineno != ts->lineno)
531 goto report;
532 tp = &pn->pn_pos;
533 } else {
534 /* Point to the current token, not the next one to get. */
535 tp = &ts->tokens[ts->cursor].pos;
536 }
537 report.lineno = ts->lineno;
538 linelength = ts->linebuf.limit - ts->linebuf.base;
539 linechars = (jschar *)cx->malloc((linelength + 1) * sizeof(jschar));
540 if (!linechars) {
541 warning = JS_FALSE;
542 goto out;
543 }
544 memcpy(linechars, ts->linebuf.base, linelength * sizeof(jschar));
545 linechars[linelength] = 0;
546 linebytes = js_DeflateString(cx, linechars, linelength);
547 if (!linebytes) {
548 warning = JS_FALSE;
549 goto out;
550 }
551 report.linebuf = linebytes;
552
553 /*
554 * FIXME: What should instead happen here is that we should
555 * find error-tokens in userbuf, if !ts->file. That will
556 * allow us to deliver a more helpful error message, which
557 * includes all or part of the bad string or bad token. The
558 * code here yields something that looks truncated.
559 * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
560 */
561 index = 0;
562 if (tp->begin.lineno == tp->end.lineno) {
563 if (tp->begin.index < ts->linepos)
564 goto report;
565
566 index = tp->begin.index - ts->linepos;
567 }
568
569 report.tokenptr = report.linebuf + index;
570 report.uclinebuf = linechars;
571 report.uctokenptr = report.uclinebuf + index;
572
573 /*
574 * If there's a runtime exception type associated with this error
575 * number, set that as the pending exception. For errors occuring at
576 * compile time, this is very likely to be a JSEXN_SYNTAXERR.
577 *
578 * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
579 * flag will be set in report.flags. Proper behavior for an error
580 * reporter is to ignore a report with this flag for all but top-level
581 * compilation errors. The exception will remain pending, and so long
582 * as the non-top-level "load", "eval", or "compile" native function
583 * returns false, the top-level reporter will eventually receive the
584 * uncaught exception report.
585 *
586 * XXX it'd probably be best if there was only one call to this
587 * function, but there seem to be two error reporter call points.
588 */
589 report:
590 onError = cx->errorReporter;
591
592 /*
593 * Try to raise an exception only if there isn't one already set --
594 * otherwise the exception will describe the last compile-time error,
595 * which is likely spurious.
596 */
597 if (!(ts->flags & TSF_ERROR)) {
598 if (js_ErrorToException(cx, message, &report))
599 onError = NULL;
600 }
601
602 /*
603 * Suppress any compile-time errors that don't occur at the top level.
604 * This may still fail, as interplevel may be zero in contexts where we
605 * don't really want to call the error reporter, as when js is called
606 * by other code which could catch the error.
607 */
608 if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
609 onError = NULL;
610
611 if (onError) {
612 JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
613
614 /*
615 * If debugErrorHook is present then we give it a chance to veto
616 * sending the error on to the regular error reporter.
617 */
618 if (hook && !hook(cx, message, &report,
619 cx->debugHooks->debugErrorHookData)) {
620 onError = NULL;
621 }
622 }
623 if (onError)
624 (*onError)(cx, message, &report);
625
626 out:
627 if (linebytes)
628 cx->free(linebytes);
629 if (linechars)
630 cx->free(linechars);
631 if (message)
632 cx->free(message);
633 if (report.ucmessage)
634 cx->free((void *)report.ucmessage);
635
636 if (report.messageArgs) {
637 if (!(flags & JSREPORT_UC)) {
638 i = 0;
639 while (report.messageArgs[i])
640 cx->free((void *)report.messageArgs[i++]);
641 }
642 cx->free((void *)report.messageArgs);
643 }
644
645 if (!JSREPORT_IS_WARNING(flags)) {
646 /* Set the error flag to suppress spurious reports. */
647 ts->flags |= TSF_ERROR;
648 }
649
650 return warning;
651 }
652
653 #if JS_HAS_XML_SUPPORT
654
655 static JSBool
656 GetXMLEntity(JSContext *cx, JSTokenStream *ts)
657 {
658 ptrdiff_t offset, length, i;
659 int32 c, d;
660 JSBool ispair;
661 jschar *bp, digit;
662 char *bytes;
663 JSErrNum msg;
664
665 JSCharBuffer &tb = ts->tokenbuf;
666
667 /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */
668 offset = tb.length();
669 if (!tb.append('&'))
670 return JS_FALSE;
671 while ((c = GetChar(ts)) != ';') {
672 if (c == EOF || c == '\n') {
673 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
674 JSMSG_END_OF_XML_ENTITY);
675 return JS_FALSE;
676 }
677 if (!tb.append(c))
678 return JS_FALSE;
679 }
680
681 /* Let length be the number of jschars after the '&', including the ';'. */
682 length = tb.length() - offset;
683 bp = tb.begin() + offset;
684 c = d = 0;
685 ispair = JS_FALSE;
686 if (length > 2 && bp[1] == '#') {
687 /* Match a well-formed XML Character Reference. */
688 i = 2;
689 if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
690 if (length > 9) /* at most 6 hex digits allowed */
691 goto badncr;
692 while (++i < length) {
693 digit = bp[i];
694 if (!JS7_ISHEX(digit))
695 goto badncr;
696 c = (c << 4) + JS7_UNHEX(digit);
697 }
698 } else {
699 while (i < length) {
700 digit = bp[i++];
701 if (!JS7_ISDEC(digit))
702 goto badncr;
703 c = (c * 10) + JS7_UNDEC(digit);
704 if (c < 0)
705 goto badncr;
706 }
707 }
708
709 if (0x10000 <= c && c <= 0x10FFFF) {
710 /* Form a surrogate pair (c, d) -- c is the high surrogate. */
711 d = 0xDC00 + (c & 0x3FF);
712 c = 0xD7C0 + (c >> 10);
713 ispair = JS_TRUE;
714 } else {
715 /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
716 if (c != 0x9 && c != 0xA && c != 0xD &&
717 !(0x20 <= c && c <= 0xD7FF) &&
718 !(0xE000 <= c && c <= 0xFFFD)) {
719 goto badncr;
720 }
721 }
722 } else {
723 /* Try to match one of the five XML 1.0 predefined entities. */
724 switch (length) {
725 case 3:
726 if (bp[2] == 't') {
727 if (bp[1] == 'l')
728 c = '<';
729 else if (bp[1] == 'g')
730 c = '>';
731 }
732 break;
733 case 4:
734 if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
735 c = '&';
736 break;
737 case 5:
738 if (bp[3] == 'o') {
739 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
740 c = '\'';
741 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
742 c = '"';
743 }
744 break;
745 }
746 if (c == 0) {
747 msg = JSMSG_UNKNOWN_XML_ENTITY;
748 goto bad;
749 }
750 }
751
752 /* If we matched, retract ts->tokenbuf and store the entity's value. */
753 *bp++ = (jschar) c;
754 if (ispair)
755 *bp++ = (jschar) d;
756 tb.shrinkBy(tb.end() - bp);
757 return JS_TRUE;
758
759 badncr:
760 msg = JSMSG_BAD_XML_NCR;
761 bad:
762 /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
763 JS_ASSERT((tb.end() - bp) >= 1);
764 bytes = js_DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
765 if (bytes) {
766 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
767 msg, bytes);
768 cx->free(bytes);
769 }
770 return JS_FALSE;
771 }
772
773 #endif /* JS_HAS_XML_SUPPORT */
774
775 JSTokenType
776 js_PeekToken(JSContext *cx, JSTokenStream *ts)
777 {
778 JSTokenType tt;
779
780 if (ts->lookahead != 0) {
781 tt = ts->tokens[(ts->cursor + ts->lookahead) & NTOKENS_MASK].type;
782 } else {
783 tt = js_GetToken(cx, ts);
784 js_UngetToken(ts);
785 }
786 return tt;
787 }
788
789 JSTokenType
790 js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts)
791 {
792 JSTokenType tt;
793
794 if (!ON_CURRENT_LINE(ts, CURRENT_TOKEN(ts).pos))
795 return TOK_EOL;
796 ts->flags |= TSF_NEWLINES;
797 tt = js_PeekToken(cx, ts);
798 ts->flags &= ~TSF_NEWLINES;
799 return tt;
800 }
801
802 /*
803 * We have encountered a '\': check for a Unicode escape sequence after it,
804 * returning the character code value if we found a Unicode escape sequence.
805 * Otherwise, non-destructively return the original '\'.
806 */
807 static int32
808 GetUnicodeEscape(JSTokenStream *ts)
809 {
810 jschar cp[5];
811 int32 c;
812
813 if (PeekChars(ts, 5, cp) && cp[0] == 'u' &&
814 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
815 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
816 {
817 c = (((((JS7_UNHEX(cp[1]) << 4)
818 + JS7_UNHEX(cp[2])) << 4)
819 + JS7_UNHEX(cp[3])) << 4)
820 + JS7_UNHEX(cp[4]);
821 SkipChars(ts, 5);
822 return c;
823 }
824 return '\\';
825 }
826
827 static JSToken *
828 NewToken(JSTokenStream *ts, ptrdiff_t adjust)
829 {
830 JSToken *tp;
831
832 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
833 tp = &CURRENT_TOKEN(ts);
834 tp->ptr = ts->linebuf.ptr + adjust;
835 tp->pos.begin.index = ts->linepos +
836 (tp->ptr - ts->linebuf.base) -
837 ts->ungetpos;
838 tp->pos.begin.lineno = tp->pos.end.lineno = ts->lineno;
839 return tp;
840 }
841
842 static JS_ALWAYS_INLINE JSBool
843 ScanAsSpace(jschar c)
844 {
845 /* Treat little- and big-endian BOMs as whitespace for compatibility. */
846 if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
847 return JS_TRUE;
848 return JS_FALSE;
849 }
850
851 static JS_ALWAYS_INLINE JSAtom *
852 atomize(JSContext *cx, JSCharBuffer &cb)
853 {
854 return js_AtomizeChars(cx, cb.begin(), cb.length(), 0);
855 }
856
857 JSTokenType
858 js_GetToken(JSContext *cx, JSTokenStream *ts)
859 {
860 JSTokenType tt;
861 int32 c, qc;
862 JSToken *tp;
863 JSAtom *atom;
864 JSBool hadUnicodeEscape;
865 const struct keyword *kw;
866 #if JS_HAS_XML_SUPPORT
867 JSBool inTarget;
868 size_t targetLength;
869 ptrdiff_t contentIndex;
870 #endif
871
872 JSCharBuffer &tb = ts->tokenbuf;
873
874 /* Check for a pushed-back token resulting from mismatching lookahead. */
875 while (ts->lookahead != 0) {
876 JS_ASSERT(!(ts->flags & TSF_XMLTEXTMODE));
877 ts->lookahead--;
878 ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
879 tt = CURRENT_TOKEN(ts).type;
880 if (tt != TOK_EOL || (ts->flags & TSF_NEWLINES))
881 return tt;
882 }
883
884 /* If there was a fatal error, keep returning TOK_ERROR. */
885 if (ts->flags & TSF_ERROR)
886 return TOK_ERROR;
887
888 #if JS_HAS_XML_SUPPORT
889 if (ts->flags & TSF_XMLTEXTMODE) {
890 tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
891 tp = NewToken(ts, 0);
892 tb.clear();
893 qc = (ts->flags & TSF_XMLONLYMODE) ? '<' : '{';
894
895 while ((c = GetChar(ts)) != qc && c != '<' && c != EOF) {
896 if (c == '&' && qc == '<') {
897 if (!GetXMLEntity(cx, ts))
898 goto error;
899 tt = TOK_XMLTEXT;
900 continue;
901 }
902
903 if (!JS_ISXMLSPACE(c))
904 tt = TOK_XMLTEXT;
905 if (!tb.append(c))
906 goto error;
907 }
908 UngetChar(ts, c);
909
910 if (tb.empty()) {
911 atom = NULL;
912 } else {
913 atom = atomize(cx, tb);
914 if (!atom)
915 goto error;
916 }
917 tp->pos.end.lineno = ts->lineno;
918 tp->t_op = JSOP_STRING;
919 tp->t_atom = atom;
920 goto out;
921 }
922
923 if (ts->flags & TSF_XMLTAGMODE) {
924 tp = NewToken(ts, 0);
925 c = GetChar(ts);
926 if (JS_ISXMLSPACE(c)) {
927 do {
928 c = GetChar(ts);
929 } while (JS_ISXMLSPACE(c));
930 UngetChar(ts, c);
931 tt = TOK_XMLSPACE;
932 goto out;
933 }
934
935 if (c == EOF) {
936 tt = TOK_EOF;
937 goto out;
938 }
939
940 tb.clear();
941 if (JS_ISXMLNSSTART(c)) {
942 JSBool sawColon = JS_FALSE;
943
944 if (!tb.append(c))
945 goto error;
946 while ((c = GetChar(ts)) != EOF && JS_ISXMLNAME(c)) {
947 if (c == ':') {
948 int nextc;
949
950 if (sawColon ||
951 (nextc = PeekChar(ts),
952 ((ts->flags & TSF_XMLONLYMODE) || nextc != '{') &&
953 !JS_ISXMLNAME(nextc))) {
954 js_ReportCompileErrorNumber(cx, ts, NULL,
955 JSREPORT_ERROR,
956 JSMSG_BAD_XML_QNAME);
957 goto error;
958 }
959 sawColon = JS_TRUE;
960 }
961
962 if (!tb.append(c))
963 goto error;
964 }
965
966 UngetChar(ts, c);
967 atom = atomize(cx, tb);
968 if (!atom)
969 goto error;
970 tp->t_op = JSOP_STRING;
971 tp->t_atom = atom;
972 tt = TOK_XMLNAME;
973 goto out;
974 }
975
976 switch (c) {
977 case '{':
978 if (ts->flags & TSF_XMLONLYMODE)
979 goto bad_xml_char;
980 tt = TOK_LC;
981 goto out;
982
983 case '=':
984 tt = TOK_ASSIGN;
985 goto out;
986
987 case '"':
988 case '\'':
989 qc = c;
990 while ((c = GetChar(ts)) != qc) {
991 if (c == EOF) {
992 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
993 JSMSG_UNTERMINATED_STRING);
994 goto error;
995 }
996
997 /*
998 * XML attribute values are double-quoted when pretty-printed,
999 * so escape " if it is expressed directly in a single-quoted
1000 * attribute value.
1001 */
1002 if (c == '"' && !(ts->flags & TSF_XMLONLYMODE)) {
1003 JS_ASSERT(qc == '\'');
1004 if (!tb.append(js_quot_entity_str,
1005 strlen(js_quot_entity_str)))
1006 goto error;
1007 continue;
1008 }
1009
1010 if (c == '&' && (ts->flags & TSF_XMLONLYMODE)) {
1011 if (!GetXMLEntity(cx, ts))
1012 goto error;
1013 continue;
1014 }
1015
1016 if (!tb.append(c))
1017 goto error;
1018 }
1019 atom = atomize(cx, tb);
1020 if (!atom)
1021 goto error;
1022 tp->pos.end.lineno = ts->lineno;
1023 tp->t_op = JSOP_STRING;
1024 tp->t_atom = atom;
1025 tt = TOK_XMLATTR;
1026 goto out;
1027
1028 case '>':
1029 tt = TOK_XMLTAGC;
1030 goto out;
1031
1032 case '/':
1033 if (MatchChar(ts, '>')) {
1034 tt = TOK_XMLPTAGC;
1035 goto out;
1036 }
1037 /* FALL THROUGH */
1038
1039 bad_xml_char:
1040 default:
1041 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1042 JSMSG_BAD_XML_CHARACTER);
1043 goto error;
1044 }
1045 /* NOTREACHED */
1046 }
1047 #endif /* JS_HAS_XML_SUPPORT */
1048
1049 retry:
1050 do {
1051 c = GetChar(ts);
1052 if (c == '\n') {
1053 ts->flags &= ~TSF_DIRTYLINE;
1054 if (ts->flags & TSF_NEWLINES)
1055 break;
1056 }
1057 } while (ScanAsSpace((jschar)c));
1058
1059 tp = NewToken(ts, -1);
1060 if (c == EOF) {
1061 tt = TOK_EOF;
1062 goto out;
1063 }
1064
1065 hadUnicodeEscape = JS_FALSE;
1066 if (JS_ISIDSTART(c) ||
1067 (c == '\\' &&
1068 (qc = GetUnicodeEscape(ts),
1069 hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1070 if (hadUnicodeEscape)
1071 c = qc;
1072 tb.clear();
1073 for (;;) {
1074 if (!tb.append(c))
1075 goto error;
1076 c = GetChar(ts);
1077 if (c == '\\') {
1078 qc = GetUnicodeEscape(ts);
1079 if (!JS_ISIDENT(qc))
1080 break;
1081 c = qc;
1082 hadUnicodeEscape = JS_TRUE;
1083 } else {
1084 if (!JS_ISIDENT(c))
1085 break;
1086 }
1087 }
1088 UngetChar(ts, c);
1089
1090 /*
1091 * Check for keywords unless we saw Unicode escape or parser asks
1092 * to ignore keywords.
1093 */
1094 if (!hadUnicodeEscape &&
1095 !(ts->flags & TSF_KEYWORD_IS_NAME) &&
1096 (kw = FindKeyword(tb.begin(), tb.length()))) {
1097 if (kw->tokentype == TOK_RESERVED) {
1098 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1099 JSREPORT_WARNING |
1100 JSREPORT_STRICT,
1101 JSMSG_RESERVED_ID,
1102 kw->chars)) {
1103 goto error;
1104 }
1105 } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1106 tt = kw->tokentype;
1107 tp->t_op = (JSOp) kw->op;
1108 goto out;
1109 }
1110 }
1111
1112 atom = atomize(cx, tb);
1113 if (!atom)
1114 goto error;
1115 tp->t_op = JSOP_NAME;
1116 tp->t_atom = atom;
1117 tt = TOK_NAME;
1118 goto out;
1119 }
1120
1121 if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(PeekChar(ts)))) {
1122 jsint radix;
1123 const jschar *endptr;
1124 jsdouble dval;
1125
1126 radix = 10;
1127 tb.clear();
1128
1129 if (c == '0') {
1130 if (!tb.append(c))
1131 goto error;
1132 c = GetChar(ts);
1133 if (JS_TOLOWER(c) == 'x') {
1134 if (!tb.append(c))
1135 goto error;
1136 c = GetChar(ts);
1137 radix = 16;
1138 } else if (JS7_ISDEC(c)) {
1139 radix = 8;
1140 }
1141 }
1142
1143 while (JS7_ISHEX(c)) {
1144 if (radix < 16) {
1145 if (JS7_ISLET(c))
1146 break;
1147
1148 /*
1149 * We permit 08 and 09 as decimal numbers, which makes our
1150 * behaviour a superset of the ECMA numeric grammar. We might
1151 * not always be so permissive, so we warn about it.
1152 */
1153 if (radix == 8 && c >= '8') {
1154 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1155 JSREPORT_WARNING,
1156 JSMSG_BAD_OCTAL,
1157 c == '8' ? "08" : "09")) {
1158 goto error;
1159 }
1160 radix = 10;
1161 }
1162 }
1163 if (!tb.append(c))
1164 goto error;
1165 c = GetChar(ts);
1166 }
1167
1168 if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1169 if (c == '.') {
1170 do {
1171 if (!tb.append(c))
1172 goto error;
1173 c = GetChar(ts);
1174 } while (JS7_ISDEC(c));
1175 }
1176 if (JS_TOLOWER(c) == 'e') {
1177 if (!tb.append(c))
1178 goto error;
1179 c = GetChar(ts);
1180 if (c == '+' || c == '-') {
1181 if (!tb.append(c))
1182 goto error;
1183 c = GetChar(ts);
1184 }
1185 if (!JS7_ISDEC(c)) {
1186 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1187 JSMSG_MISSING_EXPONENT);
1188 goto error;
1189 }
1190 do {
1191 if (!tb.append(c))
1192 goto error;
1193 c = GetChar(ts);
1194 } while (JS7_ISDEC(c));
1195 }
1196 }
1197
1198 /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1199 UngetChar(ts, c);
1200 if (!tb.append(0))
1201 goto error;
1202
1203 if (radix == 10) {
1204 if (!js_strtod(cx, tb.begin(), tb.end(), &endptr, &dval)) {
1205 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1206 JSMSG_OUT_OF_MEMORY);
1207 goto error;
1208 }
1209 } else {
1210 if (!js_strtointeger(cx, tb.begin(), tb.end(),
1211 &endptr, radix, &dval)) {
1212 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1213 JSMSG_OUT_OF_MEMORY);
1214 goto error;
1215 }
1216 }
1217 tp->t_dval = dval;
1218 tt = TOK_NUMBER;
1219 goto out;
1220 }
1221
1222 if (c == '"' || c == '\'') {
1223 qc = c;
1224 tb.clear();
1225 while ((c = GetChar(ts)) != qc) {
1226 if (c == '\n' || c == EOF) {
1227 UngetChar(ts, c);
1228 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1229 JSMSG_UNTERMINATED_STRING);
1230 goto error;
1231 }
1232 if (c == '\\') {
1233 switch (c = GetChar(ts)) {
1234 case 'b': c = '\b'; break;
1235 case 'f': c = '\f'; break;
1236 case 'n': c = '\n'; break;
1237 case 'r': c = '\r'; break;
1238 case 't': c = '\t'; break;
1239 case 'v': c = '\v'; break;
1240
1241 default:
1242 if ('0' <= c && c < '8') {
1243 int32 val = JS7_UNDEC(c);
1244
1245 c = PeekChar(ts);
1246 if ('0' <= c && c < '8') {
1247 val = 8 * val + JS7_UNDEC(c);
1248 GetChar(ts);
1249 c = PeekChar(ts);
1250 if ('0' <= c && c < '8') {
1251 int32 save = val;
1252 val = 8 * val + JS7_UNDEC(c);
1253 if (val <= 0377)
1254 GetChar(ts);
1255 else
1256 val = save;
1257 }
1258 }
1259
1260 c = (jschar)val;
1261 } else if (c == 'u') {
1262 jschar cp[4];
1263 if (PeekChars(ts, 4, cp) &&
1264 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1265 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1266 c = (((((JS7_UNHEX(cp[0]) << 4)
1267 + JS7_UNHEX(cp[1])) << 4)
1268 + JS7_UNHEX(cp[2])) << 4)
1269 + JS7_UNHEX(cp[3]);
1270 SkipChars(ts, 4);
1271 }
1272 } else if (c == 'x') {
1273 jschar cp[2];
1274 if (PeekChars(ts, 2, cp) &&
1275 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1276 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1277 SkipChars(ts, 2);
1278 }
1279 } else if (c == '\n') {
1280 /* ECMA follows C by removing escaped newlines. */
1281 continue;
1282 }
1283 break;
1284 }
1285 }
1286 if (!tb.append(c))
1287 goto error;
1288 }
1289 atom = atomize(cx, tb);
1290 if (!atom)
1291 goto error;
1292 tp->pos.end.lineno = ts->lineno;
1293 tp->t_op = JSOP_STRING;
1294 tp->t_atom = atom;
1295 tt = TOK_STRING;
1296 goto out;
1297 }
1298
1299 switch (c) {
1300 case '\n': tt = TOK_EOL; goto eol_out;
1301 case ';': tt = TOK_SEMI; break;
1302 case '[': tt = TOK_LB; break;
1303 case ']': tt = TOK_RB; break;
1304 case '{': tt = TOK_LC; break;
1305 case '}': tt = TOK_RC; break;
1306 case '(': tt = TOK_LP; break;
1307 case ')': tt = TOK_RP; break;
1308 case ',': tt = TOK_COMMA; break;
1309 case '?': tt = TOK_HOOK; break;
1310
1311 case '.':
1312 #if JS_HAS_XML_SUPPORT
1313 if (MatchChar(ts, c))
1314 tt = TOK_DBLDOT;
1315 else
1316 #endif
1317 tt = TOK_DOT;
1318 break;
1319
1320 case ':':
1321 #if JS_HAS_XML_SUPPORT
1322 if (MatchChar(ts, c)) {
1323 tt = TOK_DBLCOLON;
1324 break;
1325 }
1326 #endif
1327 /*
1328 * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1329 * object initializer, likewise for setter.
1330 */
1331 tp->t_op = JSOP_NOP;
1332 tt = TOK_COLON;
1333 break;
1334
1335 case '|':
1336 if (MatchChar(ts, c)) {
1337 tt = TOK_OR;
1338 } else if (MatchChar(ts, '=')) {
1339 tp->t_op = JSOP_BITOR;
1340 tt = TOK_ASSIGN;
1341 } else {
1342 tt = TOK_BITOR;
1343 }
1344 break;
1345
1346 case '^':
1347 if (MatchChar(ts, '=')) {
1348 tp->t_op = JSOP_BITXOR;
1349 tt = TOK_ASSIGN;
1350 } else {
1351 tt = TOK_BITXOR;
1352 }
1353 break;
1354
1355 case '&':
1356 if (MatchChar(ts, c)) {
1357 tt = TOK_AND;
1358 } else if (MatchChar(ts, '=')) {
1359 tp->t_op = JSOP_BITAND;
1360 tt = TOK_ASSIGN;
1361 } else {
1362 tt = TOK_BITAND;
1363 }
1364 break;
1365
1366 case '=':
1367 if (MatchChar(ts, c)) {
1368 tp->t_op = MatchChar(ts, c) ? JSOP_STRICTEQ : JSOP_EQ;
1369 tt = TOK_EQOP;
1370 } else {
1371 tp->t_op = JSOP_NOP;
1372 tt = TOK_ASSIGN;
1373 }
1374 break;
1375
1376 case '!':
1377 if (MatchChar(ts, '=')) {
1378 tp->t_op = MatchChar(ts, '=') ? JSOP_STRICTNE : JSOP_NE;
1379 tt = TOK_EQOP;
1380 } else {
1381 tp->t_op = JSOP_NOT;
1382 tt = TOK_UNARYOP;
1383 }
1384 break;
1385
1386 #if JS_HAS_XML_SUPPORT
1387 case '@':
1388 tt = TOK_AT;
1389 break;
1390 #endif
1391
1392 case '<':
1393 #if JS_HAS_XML_SUPPORT
1394 /*
1395 * After much testing, it's clear that Postel's advice to protocol
1396 * designers ("be liberal in what you accept, and conservative in what
1397 * you send") invites a natural-law repercussion for JS as "protocol":
1398 *
1399 * "If you are liberal in what you accept, others will utterly fail to
1400 * be conservative in what they send."
1401 *
1402 * Which means you will get <!-- comments to end of line in the middle
1403 * of .js files, and after if conditions whose then statements are on
1404 * the next line, and other wonders. See at least the following bugs:
1405 * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1406 * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1407 * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1408 *
1409 * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
1410 * an XML comment or CDATA literal. Instead, we always scan <! as the
1411 * start of an HTML comment hack to end of line, used since Netscape 2
1412 * to hide script tag content from script-unaware browsers.
1413 *
1414 * But this still leaves XML resources with certain internal structure
1415 * vulnerable to being loaded as script cross-origin, and some internal
1416 * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
1417 * source consists only of XML literals. See:
1418 *
1419 * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
1420 *
1421 * The check for this is in jsparse.cpp, JSCompiler::compileScript.
1422 */
1423 if ((ts->flags & TSF_OPERAND) &&
1424 (JS_HAS_XML_OPTION(cx) || PeekChar(ts) != '!')) {
1425 /* Check for XML comment or CDATA section. */
1426 if (MatchChar(ts, '!')) {
1427 tb.clear();
1428
1429 /* Scan XML comment. */
1430 if (MatchChar(ts, '-')) {
1431 if (!MatchChar(ts, '-'))
1432 goto bad_xml_markup;
1433 while ((c = GetChar(ts)) != '-' || !MatchChar(ts, '-')) {
1434 if (c == EOF)
1435 goto bad_xml_markup;
1436 if (!tb.append(c))
1437 goto error;
1438 }
1439 tt = TOK_XMLCOMMENT;
1440 tp->t_op = JSOP_XMLCOMMENT;
1441 goto finish_xml_markup;
1442 }
1443
1444 /* Scan CDATA section. */
1445 if (MatchChar(ts, '[')) {
1446 jschar cp[6];
1447 if (PeekChars(ts, 6, cp) &&
1448 cp[0] == 'C' &&
1449 cp[1] == 'D' &&
1450 cp[2] == 'A' &&
1451 cp[3] == 'T' &&
1452 cp[4] == 'A' &&
1453 cp[5] == '[') {
1454 SkipChars(ts, 6);
1455 while ((c = GetChar(ts)) != ']' ||
1456 !PeekChars(ts, 2, cp) ||
1457 cp[0] != ']' ||
1458 cp[1] != '>') {
1459 if (c == EOF)
1460 goto bad_xml_markup;
1461 if (!tb.append(c))
1462 goto error;
1463 }
1464 GetChar(ts); /* discard ] but not > */
1465 tt = TOK_XMLCDATA;
1466 tp->t_op = JSOP_XMLCDATA;
1467 goto finish_xml_markup;
1468 }
1469 goto bad_xml_markup;
1470 }
1471 }
1472
1473 /* Check for processing instruction. */
1474 if (MatchChar(ts, '?')) {
1475 inTarget = JS_TRUE;
1476 targetLength = 0;
1477 contentIndex = -1;
1478
1479 tb.clear();
1480 while ((c = GetChar(ts)) != '?' || PeekChar(ts) != '>') {
1481 if (c == EOF)
1482 goto bad_xml_markup;
1483 if (inTarget) {
1484 if (JS_ISXMLSPACE(c)) {
1485 if (tb.empty())
1486 goto bad_xml_markup;
1487 inTarget = JS_FALSE;
1488 } else {
1489 if (!(tb.empty()
1490 ? JS_ISXMLNSSTART(c)
1491 : JS_ISXMLNS(c))) {
1492 goto bad_xml_markup;
1493 }
1494 ++targetLength;
1495 }
1496 } else {
1497 if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1498 contentIndex = tb.length();
1499 }
1500 if (!tb.append(c))
1501 goto error;
1502 }
1503 if (targetLength == 0)
1504 goto bad_xml_markup;
1505 if (contentIndex < 0) {
1506 atom = cx->runtime->atomState.emptyAtom;
1507 } else {
1508 atom = js_AtomizeChars(cx,
1509 tb.begin() + contentIndex,
1510 tb.length() - contentIndex,
1511 0);
1512 if (!atom)
1513 goto error;
1514 }
1515 tb.shrinkBy(tb.length() - targetLength);
1516 tp->t_atom2 = atom;
1517 tt = TOK_XMLPI;
1518
1519 finish_xml_markup:
1520 if (!MatchChar(ts, '>'))
1521 goto bad_xml_markup;
1522 atom = atomize(cx, tb);
1523 if (!atom)
1524 goto error;
1525 tp->t_atom = atom;
1526 tp->pos.end.lineno = ts->lineno;
1527 goto out;
1528 }
1529
1530 /* An XML start-of-tag character. */
1531 tt = MatchChar(ts, '/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1532 goto out;
1533
1534 bad_xml_markup:
1535 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1536 JSMSG_BAD_XML_MARKUP);
1537 goto error;
1538 }
1539 #endif /* JS_HAS_XML_SUPPORT */
1540
1541 /* NB: treat HTML begin-comment as comment-till-end-of-line */
1542 if (MatchChar(ts, '!')) {
1543 if (MatchChar(ts, '-')) {
1544 if (MatchChar(ts, '-')) {
1545 ts->flags |= TSF_IN_HTML_COMMENT;
1546 goto skipline;
1547 }
1548 UngetChar(ts, '-');
1549 }
1550 UngetChar(ts, '!');
1551 }
1552 if (MatchChar(ts, c)) {
1553 tp->t_op = JSOP_LSH;
1554 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1555 } else {
1556 tp->t_op = MatchChar(ts, '=') ? JSOP_LE : JSOP_LT;
1557 tt = TOK_RELOP;
1558 }
1559 break;
1560
1561 case '>':
1562 if (MatchChar(ts, c)) {
1563 tp->t_op = MatchChar(ts, c) ? JSOP_URSH : JSOP_RSH;
1564 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1565 } else {
1566 tp->t_op = MatchChar(ts, '=') ? JSOP_GE : JSOP_GT;
1567 tt = TOK_RELOP;
1568 }
1569 break;
1570
1571 case '*':
1572 tp->t_op = JSOP_MUL;
1573 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_STAR;
1574 break;
1575
1576 case '/':
1577 if (MatchChar(ts, '/')) {
1578 /*
1579 * Hack for source filters such as the Mozilla XUL preprocessor:
1580 * "//@line 123\n" sets the number of the *next* line after the
1581 * comment to 123.
1582 */
1583 if (JS_HAS_ATLINE_OPTION(cx)) {
1584 jschar cp[5];
1585 uintN i, line, temp;
1586 char filename[1024];
1587
1588 if (PeekChars(ts, 5, cp) &&
1589 cp[0] == '@' &&
1590 cp[1] == 'l' &&
1591 cp[2] == 'i' &&
1592 cp[3] == 'n' &&
1593 cp[4] == 'e') {
1594 SkipChars(ts, 5);
1595 while ((c = GetChar(ts)) != '\n' && ScanAsSpace((jschar)c))
1596 continue;
1597 if (JS7_ISDEC(c)) {
1598 line = JS7_UNDEC(c);
1599 while ((c = GetChar(ts)) != EOF && JS7_ISDEC(c)) {
1600 temp = 10 * line + JS7_UNDEC(c);
1601 if (temp < line) {
1602 /* Ignore overlarge line numbers. */
1603 goto skipline;
1604 }
1605 line = temp;
1606 }
1607 while (c != '\n' && ScanAsSpace((jschar)c))
1608 c = GetChar(ts);
1609 i = 0;
1610 if (c == '"') {
1611 while ((c = GetChar(ts)) != EOF && c != '"') {
1612 if (c == '\n') {
1613 UngetChar(ts, c);
1614 goto skipline;
1615 }
1616 if ((c >> 8) != 0 || i >= sizeof filename - 1)
1617 goto skipline;
1618 filename[i++] = (char) c;
1619 }
1620 if (c == '"') {
1621 while ((c = GetChar(ts)) != '\n' &&
1622 ScanAsSpace((jschar)c)) {
1623 continue;
1624 }
1625 }
1626 }
1627 filename[i] = '\0';
1628 if (c == '\n') {
1629 if (i > 0) {
1630 if (ts->flags & TSF_OWNFILENAME)
1631 cx->free((void *) ts->filename);
1632 ts->filename = JS_strdup(cx, filename);
1633 if (!ts->filename)
1634 goto error;
1635 ts->flags |= TSF_OWNFILENAME;
1636 }
1637 ts->lineno = line;
1638 }
1639 }
1640 UngetChar(ts, c);
1641 }
1642 }
1643
1644 skipline:
1645 /* Optimize line skipping if we are not in an HTML comment. */
1646 if (ts->flags & TSF_IN_HTML_COMMENT) {
1647 while ((c = GetChar(ts)) != EOF && c != '\n') {
1648 if (c == '-' && MatchChar(ts, '-') && MatchChar(ts, '>'))
1649 ts->flags &= ~TSF_IN_HTML_COMMENT;
1650 }
1651 } else {
1652 while ((c = GetChar(ts)) != EOF && c != '\n')
1653 continue;
1654 }
1655 UngetChar(ts, c);
1656 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1657 goto retry;
1658 }
1659
1660 if (MatchChar(ts, '*')) {
1661 uintN lineno = ts->lineno;
1662 while ((c = GetChar(ts)) != EOF &&
1663 !(c == '*' && MatchChar(ts, '/'))) {
1664 /* Ignore all characters until comment close. */
1665 }
1666 if (c == EOF) {
1667 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1668 JSMSG_UNTERMINATED_COMMENT);
1669 goto error;
1670 }
1671 if ((ts->flags & TSF_NEWLINES) && lineno != ts->lineno) {
1672 ts->flags &= ~TSF_DIRTYLINE;
1673 tt = TOK_EOL;
1674 goto eol_out;
1675 }
1676 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1677 goto retry;
1678 }
1679
1680 if (ts->flags & TSF_OPERAND) {
1681 uintN flags, length;
1682 JSBool inCharClass = JS_FALSE;
1683
1684 tb.clear();
1685 for (;;) {
1686 c = GetChar(ts);
1687 if (c == '\n' || c == EOF) {
1688 UngetChar(ts, c);
1689 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1690 JSMSG_UNTERMINATED_REGEXP);
1691 goto error;
1692 }
1693 if (c == '\\') {
1694 if (!tb.append(c))
1695 goto error;
1696 c = GetChar(ts);
1697 } else if (c == '[') {
1698 inCharClass = JS_TRUE;
1699 } else if (c == ']') {
1700 inCharClass = JS_FALSE;
1701 } else if (c == '/' && !inCharClass) {
1702 /* For compat with IE, allow unescaped / in char classes. */
1703 break;
1704 }
1705 if (!tb.append(c))
1706 goto error;
1707 }
1708 for (flags = 0, length = tb.length() + 1; ; length++) {
1709 c = PeekChar(ts);
1710 if (c == 'g' && !(flags & JSREG_GLOB))
1711 flags |= JSREG_GLOB;
1712 else if (c == 'i' && !(flags & JSREG_FOLD))
1713 flags |= JSREG_FOLD;
1714 else if (c == 'm' && !(flags & JSREG_MULTILINE))
1715 flags |= JSREG_MULTILINE;
1716 else if (c == 'y' && !(flags & JSREG_STICKY))
1717 flags |= JSREG_STICKY;
1718 else
1719 break;
1720 GetChar(ts);
1721 }
1722 c = PeekChar(ts);
1723 if (JS7_ISLET(c)) {
1724 char buf[2] = { '\0' };
1725 tp->pos.begin.index += length + 1;
1726 buf[0] = (char)c;
1727 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1728 JSMSG_BAD_REGEXP_FLAG, buf);
1729 (void) GetChar(ts);
1730 goto error;
1731 }
1732 tp->t_reflags = flags;
1733 tt = TOK_REGEXP;
1734 break;
1735 }
1736
1737 tp->t_op = JSOP_DIV;
1738 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1739 break;
1740
1741 case '%':
1742 tp->t_op = JSOP_MOD;
1743 tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1744 break;
1745
1746 case '~':
1747 tp->t_op = JSOP_BITNOT;
1748 tt = TOK_UNARYOP;
1749 break;
1750
1751 case '+':
1752 if (MatchChar(ts, '=')) {
1753 tp->t_op = JSOP_ADD;
1754 tt = TOK_ASSIGN;
1755 } else if (MatchChar(ts, c)) {
1756 tt = TOK_INC;
1757 } else {
1758 tp->t_op = JSOP_POS;
1759 tt = TOK_PLUS;
1760 }
1761 break;
1762
1763 case '-':
1764 if (MatchChar(ts, '=')) {
1765 tp->t_op = JSOP_SUB;
1766 tt = TOK_ASSIGN;
1767 } else if (MatchChar(ts, c)) {
1768 if (PeekChar(ts) == '>' && !(ts->flags & TSF_DIRTYLINE)) {
1769 ts->flags &= ~TSF_IN_HTML_COMMENT;
1770 goto skipline;
1771 }
1772 tt = TOK_DEC;
1773 } else {
1774 tp->t_op = JSOP_NEG;
1775 tt = TOK_MINUS;
1776 }
1777 break;
1778
1779 #if JS_HAS_SHARP_VARS
1780 case '#':
1781 {
1782 uint32 n;
1783
1784 c = GetChar(ts);
1785 if (!JS7_ISDEC(c)) {
1786 UngetChar(ts, c);
1787 goto badchar;
1788 }
1789 n = (uint32)JS7_UNDEC(c);
1790 for (;;) {
1791 c = GetChar(ts);
1792 if (!JS7_ISDEC(c))
1793 break;
1794 n = 10 * n + JS7_UNDEC(c);
1795 if (n >= UINT16_LIMIT) {
1796 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1797 JSMSG_SHARPVAR_TOO_BIG);
1798 goto error;
1799 }
1800 }
1801 tp->t_dval = (jsdouble) n;
1802 if (JS_HAS_STRICT_OPTION(cx) &&
1803 (c == '=' || c == '#')) {
1804 char buf[20];
1805 JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1806 if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1807 JSREPORT_WARNING |
1808 JSREPORT_STRICT,
1809 JSMSG_DEPRECATED_USAGE,
1810 buf)) {
1811 goto error;
1812 }
1813 }
1814 if (c == '=')
1815 tt = TOK_DEFSHARP;
1816 else if (c == '#')
1817 tt = TOK_USESHARP;
1818 else
1819 goto badchar;
1820 break;
1821 }
1822 #endif /* JS_HAS_SHARP_VARS */
1823
1824 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1825 badchar:
1826 #endif
1827
1828 default:
1829 js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1830 JSMSG_ILLEGAL_CHARACTER);
1831 goto error;
1832 }
1833
1834 out:
1835 JS_ASSERT(tt != TOK_EOL);
1836 ts->flags |= TSF_DIRTYLINE;
1837
1838 eol_out:
1839 JS_ASSERT(tt < TOK_LIMIT);
1840 tp->pos.end.index = ts->linepos +
1841 (ts->linebuf.ptr - ts->linebuf.base) -
1842 ts->ungetpos;
1843 tp->type = tt;
1844 return tt;
1845
1846 error:
1847 tt = TOK_ERROR;
1848 ts->flags |= TSF_ERROR;
1849 goto out;
1850 }
1851
1852 void
1853 js_UngetToken(JSTokenStream *ts)
1854 {
1855 JS_ASSERT(ts->lookahead < NTOKENS_MASK);
1856 ts->lookahead++;
1857 ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1858 }
1859
1860 JSBool
1861 js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt)
1862 {
1863 if (js_GetToken(cx, ts) == tt)
1864 return JS_TRUE;
1865 js_UngetToken(ts);
1866 return JS_FALSE;
1867 }

  ViewVC Help
Powered by ViewVC 1.1.24