/[jscoverage]/trunk/js/jsscan.cpp
ViewVC logotype

Annotation of /trunk/js/jsscan.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 332 - (hide annotations)
Thu Oct 23 19:03:33 2008 UTC (11 years, 2 months ago) by siliconforks
File size: 61935 byte(s)
Add SpiderMonkey from Firefox 3.1b1.

The following directories and files were removed:
correct/, correct.js
liveconnect/
nanojit/
t/
v8/
vprof/
xpconnect/
all JavaScript files (Y.js, call.js, if.js, math-partial-sums.js, md5.js, perfect.js, trace-test.js, trace.js)


1 siliconforks 332 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2     * vim: set sw=4 ts=8 et tw=78:
3     *
4     * ***** BEGIN LICENSE BLOCK *****
5     * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6     *
7     * The contents of this file are subject to the Mozilla Public License Version
8     * 1.1 (the "License"); you may not use this file except in compliance with
9     * the License. You may obtain a copy of the License at
10     * http://www.mozilla.org/MPL/
11     *
12     * Software distributed under the License is distributed on an "AS IS" basis,
13     * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14     * for the specific language governing rights and limitations under the
15     * License.
16     *
17     * The Original Code is Mozilla Communicator client code, released
18     * March 31, 1998.
19     *
20     * The Initial Developer of the Original Code is
21     * Netscape Communications Corporation.
22     * Portions created by the Initial Developer are Copyright (C) 1998
23     * the Initial Developer. All Rights Reserved.
24     *
25     * Contributor(s):
26     *
27     * Alternatively, the contents of this file may be used under the terms of
28     * either of the GNU General Public License Version 2 or later (the "GPL"),
29     * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30     * in which case the provisions of the GPL or the LGPL are applicable instead
31     * of those above. If you wish to allow use of your version of this file only
32     * under the terms of either the GPL or the LGPL, and not to allow others to
33     * use your version of this file under the terms of the MPL, indicate your
34     * decision by deleting the provisions above and replace them with the notice
35     * and other provisions required by the GPL or the LGPL. If you do not delete
36     * the provisions above, a recipient may use your version of this file under
37     * the terms of any one of the MPL, the GPL or the LGPL.
38     *
39     * ***** END LICENSE BLOCK ***** */
40    
41     /*
42     * JS lexical scanner.
43     */
44     #include "jsstddef.h"
45     #include <stdio.h> /* first to avoid trouble on some systems */
46     #include <errno.h>
47     #include <limits.h>
48     #include <math.h>
49     #ifdef HAVE_MEMORY_H
50     #include <memory.h>
51     #endif
52     #include <stdarg.h>
53     #include <stdlib.h>
54     #include <string.h>
55     #include "jstypes.h"
56     #include "jsarena.h" /* Added by JSIFY */
57     #include "jsutil.h" /* Added by JSIFY */
58     #include "jsdtoa.h"
59     #include "jsprf.h"
60     #include "jsapi.h"
61     #include "jsatom.h"
62     #include "jscntxt.h"
63     #include "jsversion.h"
64     #include "jsemit.h"
65     #include "jsexn.h"
66     #include "jsnum.h"
67     #include "jsopcode.h"
68     #include "jsparse.h"
69     #include "jsregexp.h"
70     #include "jsscan.h"
71     #include "jsscript.h"
72     #include "jsstaticcheck.h"
73    
74     #if JS_HAS_XML_SUPPORT
75     #include "jsxml.h"
76     #endif
77    
78     #define JS_KEYWORD(keyword, type, op, version) \
79     const char js_##keyword##_str[] = #keyword;
80     #include "jskeyword.tbl"
81     #undef JS_KEYWORD
82    
83     struct keyword {
84     const char *chars; /* C string with keyword text */
85     JSTokenType tokentype; /* JSTokenType */
86     JSOp op; /* JSOp */
87     JSVersion version; /* JSVersion */
88     };
89    
90     static const struct keyword keyword_defs[] = {
91     #define JS_KEYWORD(keyword, type, op, version) \
92     {js_##keyword##_str, type, op, version},
93     #include "jskeyword.tbl"
94     #undef JS_KEYWORD
95     };
96    
97     #define KEYWORD_COUNT JS_ARRAY_LENGTH(keyword_defs)
98    
99     static const struct keyword *
100     FindKeyword(const jschar *s, size_t length)
101     {
102     register size_t i;
103     const struct keyword *kw;
104     const char *chars;
105    
106     JS_ASSERT(length != 0);
107    
108     #define JSKW_LENGTH() length
109     #define JSKW_AT(column) s[column]
110     #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
111     #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
112     #define JSKW_NO_MATCH() goto no_match;
113     #include "jsautokw.h"
114     #undef JSKW_NO_MATCH
115     #undef JSKW_TEST_GUESS
116     #undef JSKW_GOT_MATCH
117     #undef JSKW_AT
118     #undef JSKW_LENGTH
119    
120     got_match:
121     return &keyword_defs[i];
122    
123     test_guess:
124     kw = &keyword_defs[i];
125     chars = kw->chars;
126     do {
127     if (*s++ != (unsigned char)(*chars++))
128     goto no_match;
129     } while (--length != 0);
130     return kw;
131    
132     no_match:
133     return NULL;
134     }
135    
136     JSTokenType
137     js_CheckKeyword(const jschar *str, size_t length)
138     {
139     const struct keyword *kw;
140    
141     JS_ASSERT(length != 0);
142     kw = FindKeyword(str, length);
143     return kw ? kw->tokentype : TOK_EOF;
144     }
145    
146     JS_FRIEND_API(void)
147     js_MapKeywords(void (*mapfun)(const char *))
148     {
149     size_t i;
150    
151     for (i = 0; i != KEYWORD_COUNT; ++i)
152     mapfun(keyword_defs[i].chars);
153     }
154    
155     JSBool
156     js_IsIdentifier(JSString *str)
157     {
158     size_t length;
159     jschar c, *chars, *end;
160    
161     JSSTRING_CHARS_AND_LENGTH(str, chars, length);
162     if (length == 0)
163     return JS_FALSE;
164     c = *chars;
165     if (!JS_ISIDSTART(c))
166     return JS_FALSE;
167     end = chars + length;
168     while (++chars != end) {
169     c = *chars;
170     if (!JS_ISIDENT(c))
171     return JS_FALSE;
172     }
173     return JS_TRUE;
174     }
175    
176     #define TBMIN 64
177    
178     static JSBool
179     GrowTokenBuf(JSStringBuffer *sb, size_t newlength)
180     {
181     JSContext *cx;
182     jschar *base;
183     ptrdiff_t offset, length;
184     size_t tbsize;
185     JSArenaPool *pool;
186    
187     cx = (JSContext*) sb->data;
188     base = sb->base;
189     offset = PTRDIFF(sb->ptr, base, jschar);
190     pool = &cx->tempPool;
191     if (!base) {
192     tbsize = TBMIN * sizeof(jschar);
193     length = TBMIN - 1;
194     JS_ARENA_ALLOCATE_CAST(base, jschar *, pool, tbsize);
195     } else {
196     length = PTRDIFF(sb->limit, base, jschar);
197     if ((size_t)length >= ~(size_t)0 / sizeof(jschar)) {
198     base = NULL;
199     } else {
200     tbsize = (length + 1) * sizeof(jschar);
201     length += length + 1;
202     JS_ARENA_GROW_CAST(base, jschar *, pool, tbsize, tbsize);
203     }
204     }
205     if (!base) {
206     js_ReportOutOfScriptQuota(cx);
207     sb->base = STRING_BUFFER_ERROR_BASE;
208     return JS_FALSE;
209     }
210     sb->base = base;
211     sb->limit = base + length;
212     sb->ptr = base + offset;
213     return JS_TRUE;
214     }
215    
216     JSBool
217     js_InitTokenStream(JSContext *cx, JSTokenStream *ts,
218     const jschar *base, size_t length,
219     FILE *fp, const char *filename, uintN lineno)
220     {
221     jschar *buf;
222     size_t nb;
223    
224     JS_ASSERT_IF(fp, !base);
225     JS_ASSERT_IF(!base, length == 0);
226     nb = fp
227     ? 2 * JS_LINE_LIMIT * sizeof(jschar)
228     : JS_LINE_LIMIT * sizeof(jschar);
229     JS_ARENA_ALLOCATE_CAST(buf, jschar *, &cx->tempPool, nb);
230     if (!buf) {
231     js_ReportOutOfScriptQuota(cx);
232     return JS_FALSE;
233     }
234     memset(buf, 0, nb);
235     memset(ts, 0, sizeof(*ts));
236     ts->filename = filename;
237     ts->lineno = lineno;
238     ts->linebuf.base = ts->linebuf.limit = ts->linebuf.ptr = buf;
239     if (fp) {
240     ts->file = fp;
241     ts->userbuf.base = buf + JS_LINE_LIMIT;
242     ts->userbuf.ptr = ts->userbuf.limit = ts->userbuf.base + JS_LINE_LIMIT;
243     } else {
244     ts->userbuf.base = (jschar *)base;
245     ts->userbuf.limit = (jschar *)base + length;
246     ts->userbuf.ptr = (jschar *)base;
247     }
248     ts->tokenbuf.grow = GrowTokenBuf;
249     ts->tokenbuf.data = cx;
250     ts->listener = cx->debugHooks->sourceHandler;
251     ts->listenerData = cx->debugHooks->sourceHandlerData;
252     return JS_TRUE;
253     }
254    
255     void
256     js_CloseTokenStream(JSContext *cx, JSTokenStream *ts)
257     {
258     if (ts->flags & TSF_OWNFILENAME)
259     JS_free(cx, (void *) ts->filename);
260     }
261    
262     JS_FRIEND_API(int)
263     js_fgets(char *buf, int size, FILE *file)
264     {
265     int n, i, c;
266     JSBool crflag;
267    
268     n = size - 1;
269     if (n < 0)
270     return -1;
271    
272     crflag = JS_FALSE;
273     for (i = 0; i < n && (c = getc(file)) != EOF; i++) {
274     buf[i] = c;
275     if (c == '\n') { /* any \n ends a line */
276     i++; /* keep the \n; we know there is room for \0 */
277     break;
278     }
279     if (crflag) { /* \r not followed by \n ends line at the \r */
280     ungetc(c, file);
281     break; /* and overwrite c in buf with \0 */
282     }
283     crflag = (c == '\r');
284     }
285    
286     buf[i] = '\0';
287     return i;
288     }
289    
290     static int32
291     GetChar(JSTokenStream *ts)
292     {
293     int32 c;
294     ptrdiff_t i, j, len, olen;
295     JSBool crflag;
296     char cbuf[JS_LINE_LIMIT];
297     jschar *ubuf, *nl;
298    
299     if (ts->ungetpos != 0) {
300     c = ts->ungetbuf[--ts->ungetpos];
301     } else {
302     if (ts->linebuf.ptr == ts->linebuf.limit) {
303     len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
304     if (len <= 0) {
305     if (!ts->file) {
306     ts->flags |= TSF_EOF;
307     return EOF;
308     }
309    
310     /* Fill ts->userbuf so that \r and \r\n convert to \n. */
311     crflag = (ts->flags & TSF_CRFLAG) != 0;
312     len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
313     if (len <= 0) {
314     ts->flags |= TSF_EOF;
315     return EOF;
316     }
317     olen = len;
318     ubuf = ts->userbuf.base;
319     i = 0;
320     if (crflag) {
321     ts->flags &= ~TSF_CRFLAG;
322     if (cbuf[0] != '\n') {
323     ubuf[i++] = '\n';
324     len++;
325     ts->linepos--;
326     }
327     }
328     for (j = 0; i < len; i++, j++)
329     ubuf[i] = (jschar) (unsigned char) cbuf[j];
330     ts->userbuf.limit = ubuf + len;
331     ts->userbuf.ptr = ubuf;
332     }
333     if (ts->listener) {
334     ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
335     &ts->listenerTSData, ts->listenerData);
336     }
337    
338     nl = ts->saveEOL;
339     if (!nl) {
340     /*
341     * Any one of \n, \r, or \r\n ends a line (the longest
342     * match wins). Also allow the Unicode line and paragraph
343     * separators.
344     */
345     for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
346     /*
347     * Try to prevent value-testing on most characters by
348     * filtering out characters that aren't 000x or 202x.
349     */
350     if ((*nl & 0xDFD0) == 0) {
351     if (*nl == '\n')
352     break;
353     if (*nl == '\r') {
354     if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
355     nl++;
356     break;
357     }
358     if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
359     break;
360     }
361     }
362     }
363    
364     /*
365     * If there was a line terminator, copy thru it into linebuf.
366     * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
367     */
368     if (nl < ts->userbuf.limit)
369     len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
370     if (len >= JS_LINE_LIMIT) {
371     len = JS_LINE_LIMIT - 1;
372     ts->saveEOL = nl;
373     } else {
374     ts->saveEOL = NULL;
375     }
376     js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
377     ts->userbuf.ptr += len;
378     olen = len;
379    
380     /*
381     * Make sure linebuf contains \n for EOL (don't do this in
382     * userbuf because the user's string might be readonly).
383     */
384     if (nl < ts->userbuf.limit) {
385     if (*nl == '\r') {
386     if (ts->linebuf.base[len-1] == '\r') {
387     /*
388     * Does the line segment end in \r? We must check
389     * for a \n at the front of the next segment before
390     * storing a \n into linebuf. This case matters
391     * only when we're reading from a file.
392     */
393     if (nl + 1 == ts->userbuf.limit && ts->file) {
394     len--;
395     ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
396     if (len == 0) {
397     /*
398     * This can happen when a segment ends in
399     * \r\r. Start over. ptr == limit in this
400     * case, so we'll fall into buffer-filling
401     * code.
402     */
403     return GetChar(ts);
404     }
405     } else {
406     ts->linebuf.base[len-1] = '\n';
407     }
408     }
409     } else if (*nl == '\n') {
410     if (nl > ts->userbuf.base &&
411     nl[-1] == '\r' &&
412     ts->linebuf.base[len-2] == '\r') {
413     len--;
414     JS_ASSERT(ts->linebuf.base[len] == '\n');
415     ts->linebuf.base[len-1] = '\n';
416     }
417     } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
418     ts->linebuf.base[len-1] = '\n';
419     }
420     }
421    
422     /* Reset linebuf based on adjusted segment length. */
423     ts->linebuf.limit = ts->linebuf.base + len;
424     ts->linebuf.ptr = ts->linebuf.base;
425    
426     /* Update position of linebuf within physical userbuf line. */
427     if (!(ts->flags & TSF_NLFLAG))
428     ts->linepos += ts->linelen;
429     else
430     ts->linepos = 0;
431     if (ts->linebuf.limit[-1] == '\n')
432     ts->flags |= TSF_NLFLAG;
433     else
434     ts->flags &= ~TSF_NLFLAG;
435    
436     /* Update linelen from original segment length. */
437     ts->linelen = olen;
438     }
439     c = *ts->linebuf.ptr++;
440     }
441     if (c == '\n')
442     ts->lineno++;
443     return c;
444     }
445    
446     static void
447     UngetChar(JSTokenStream *ts, int32 c)
448     {
449     if (c == EOF)
450     return;
451     JS_ASSERT(ts->ungetpos < JS_ARRAY_LENGTH(ts->ungetbuf));
452     if (c == '\n')
453     ts->lineno--;
454     ts->ungetbuf[ts->ungetpos++] = (jschar)c;
455     }
456    
457     static int32
458     PeekChar(JSTokenStream *ts)
459     {
460     int32 c;
461    
462     c = GetChar(ts);
463     UngetChar(ts, c);
464     return c;
465     }
466    
467     /*
468     * Peek n chars ahead into ts. Return true if n chars were read, false if
469     * there weren't enough characters in the input stream. This function cannot
470     * be used to peek into or past a newline.
471     */
472     static JSBool
473     PeekChars(JSTokenStream *ts, intN n, jschar *cp)
474     {
475     intN i, j;
476     int32 c;
477    
478     for (i = 0; i < n; i++) {
479     c = GetChar(ts);
480     if (c == EOF)
481     break;
482     if (c == '\n') {
483     UngetChar(ts, c);
484     break;
485     }
486     cp[i] = (jschar)c;
487     }
488     for (j = i - 1; j >= 0; j--)
489     UngetChar(ts, cp[j]);
490     return i == n;
491     }
492    
493     static void
494     SkipChars(JSTokenStream *ts, intN n)
495     {
496     while (--n >= 0)
497     GetChar(ts);
498     }
499    
500     static JSBool
501     MatchChar(JSTokenStream *ts, int32 expect)
502     {
503     int32 c;
504    
505     c = GetChar(ts);
506     if (c == expect)
507     return JS_TRUE;
508     UngetChar(ts, c);
509     return JS_FALSE;
510     }
511    
512     JSBool
513     js_ReportCompileErrorNumber(JSContext *cx, JSTokenStream *ts, JSParseNode *pn,
514     uintN flags, uintN errorNumber, ...)
515     {
516     JSErrorReport report;
517     char *message;
518     size_t linelength;
519     jschar *linechars;
520     char *linebytes;
521     va_list ap;
522     JSBool warning, ok;
523     JSTokenPos *tp;
524     uintN index, i;
525     JSErrorReporter onError;
526    
527     JS_ASSERT(ts->linebuf.limit < ts->linebuf.base + JS_LINE_LIMIT);
528    
529     if ((flags & JSREPORT_STRICT) && !JS_HAS_STRICT_OPTION(cx))
530     return JS_TRUE;
531    
532     memset(&report, 0, sizeof report);
533     report.flags = flags;
534     report.errorNumber = errorNumber;
535     message = NULL;
536     linechars = NULL;
537     linebytes = NULL;
538    
539     MUST_FLOW_THROUGH("out");
540     va_start(ap, errorNumber);
541     ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
542     errorNumber, &message, &report, &warning,
543     !(flags & JSREPORT_UC), ap);
544     va_end(ap);
545     if (!ok) {
546     warning = JS_FALSE;
547     goto out;
548     }
549    
550     report.filename = ts->filename;
551    
552     if (pn) {
553     report.lineno = pn->pn_pos.begin.lineno;
554     if (report.lineno != ts->lineno)
555     goto report;
556     tp = &pn->pn_pos;
557     } else {
558     /* Point to the current token, not the next one to get. */
559     tp = &ts->tokens[ts->cursor].pos;
560     }
561     report.lineno = ts->lineno;
562     linelength = PTRDIFF(ts->linebuf.limit, ts->linebuf.base, jschar);
563     linechars = (jschar *)JS_malloc(cx, (linelength + 1) * sizeof(jschar));
564     if (!linechars) {
565     warning = JS_FALSE;
566     goto out;
567     }
568     memcpy(linechars, ts->linebuf.base, linelength * sizeof(jschar));
569     linechars[linelength] = 0;
570     linebytes = js_DeflateString(cx, linechars, linelength);
571     if (!linebytes) {
572     warning = JS_FALSE;
573     goto out;
574     }
575     report.linebuf = linebytes;
576    
577     /*
578     * FIXME: What should instead happen here is that we should
579     * find error-tokens in userbuf, if !ts->file. That will
580     * allow us to deliver a more helpful error message, which
581     * includes all or part of the bad string or bad token. The
582     * code here yields something that looks truncated.
583     * See https://bugzilla.mozilla.org/show_bug.cgi?id=352970
584     */
585     index = 0;
586     if (tp->begin.lineno == tp->end.lineno) {
587     if (tp->begin.index < ts->linepos)
588     goto report;
589    
590     index = tp->begin.index - ts->linepos;
591     }
592    
593     report.tokenptr = report.linebuf + index;
594     report.uclinebuf = linechars;
595     report.uctokenptr = report.uclinebuf + index;
596    
597     /*
598     * If there's a runtime exception type associated with this error
599     * number, set that as the pending exception. For errors occuring at
600     * compile time, this is very likely to be a JSEXN_SYNTAXERR.
601     *
602     * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
603     * flag will be set in report.flags. Proper behavior for an error
604     * reporter is to ignore a report with this flag for all but top-level
605     * compilation errors. The exception will remain pending, and so long
606     * as the non-top-level "load", "eval", or "compile" native function
607     * returns false, the top-level reporter will eventually receive the
608     * uncaught exception report.
609     *
610     * XXX it'd probably be best if there was only one call to this
611     * function, but there seem to be two error reporter call points.
612     */
613     report:
614     onError = cx->errorReporter;
615    
616     /*
617     * Try to raise an exception only if there isn't one already set --
618     * otherwise the exception will describe the last compile-time error,
619     * which is likely spurious.
620     */
621     if (!(ts->flags & TSF_ERROR)) {
622     if (js_ErrorToException(cx, message, &report))
623     onError = NULL;
624     }
625    
626     /*
627     * Suppress any compile-time errors that don't occur at the top level.
628     * This may still fail, as interplevel may be zero in contexts where we
629     * don't really want to call the error reporter, as when js is called
630     * by other code which could catch the error.
631     */
632     if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
633     onError = NULL;
634    
635     if (onError) {
636     JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
637    
638     /*
639     * If debugErrorHook is present then we give it a chance to veto
640     * sending the error on to the regular error reporter.
641     */
642     if (hook && !hook(cx, message, &report,
643     cx->debugHooks->debugErrorHookData)) {
644     onError = NULL;
645     }
646     }
647     if (onError)
648     (*onError)(cx, message, &report);
649    
650     out:
651     if (linebytes)
652     JS_free(cx, linebytes);
653     if (linechars)
654     JS_free(cx, linechars);
655     if (message)
656     JS_free(cx, message);
657     if (report.ucmessage)
658     JS_free(cx, (void *)report.ucmessage);
659    
660     if (report.messageArgs) {
661     if (!(flags & JSREPORT_UC)) {
662     i = 0;
663     while (report.messageArgs[i])
664     JS_free(cx, (void *)report.messageArgs[i++]);
665     }
666     JS_free(cx, (void *)report.messageArgs);
667     }
668    
669     if (!JSREPORT_IS_WARNING(flags)) {
670     /* Set the error flag to suppress spurious reports. */
671     ts->flags |= TSF_ERROR;
672     }
673    
674     return warning;
675     }
676    
677     static JSBool
678     GrowStringBuffer(JSStringBuffer *sb, size_t newlength)
679     {
680     ptrdiff_t offset;
681     jschar *bp;
682    
683     offset = PTRDIFF(sb->ptr, sb->base, jschar);
684     JS_ASSERT(offset >= 0);
685     newlength += offset + 1;
686     if ((size_t)offset < newlength && newlength < ~(size_t)0 / sizeof(jschar))
687     bp = (jschar *) realloc(sb->base, newlength * sizeof(jschar));
688     else
689     bp = NULL;
690     if (!bp) {
691     free(sb->base);
692     sb->base = STRING_BUFFER_ERROR_BASE;
693     return JS_FALSE;
694     }
695     sb->base = bp;
696     sb->ptr = bp + offset;
697     sb->limit = bp + newlength - 1;
698     return JS_TRUE;
699     }
700    
701     static void
702     FreeStringBuffer(JSStringBuffer *sb)
703     {
704     JS_ASSERT(STRING_BUFFER_OK(sb));
705     if (sb->base)
706     free(sb->base);
707     }
708    
709     void
710     js_InitStringBuffer(JSStringBuffer *sb)
711     {
712     sb->base = sb->limit = sb->ptr = NULL;
713     sb->data = NULL;
714     sb->grow = GrowStringBuffer;
715     sb->free = FreeStringBuffer;
716     }
717    
718     void
719     js_FinishStringBuffer(JSStringBuffer *sb)
720     {
721     sb->free(sb);
722     }
723    
724     #define ENSURE_STRING_BUFFER(sb,n) \
725     ((sb)->ptr + (n) <= (sb)->limit || sb->grow(sb, n))
726    
727     static void
728     FastAppendChar(JSStringBuffer *sb, jschar c)
729     {
730     if (!STRING_BUFFER_OK(sb))
731     return;
732     if (!ENSURE_STRING_BUFFER(sb, 1))
733     return;
734     *sb->ptr++ = c;
735     }
736    
737     void
738     js_AppendChar(JSStringBuffer *sb, jschar c)
739     {
740     jschar *bp;
741    
742     if (!STRING_BUFFER_OK(sb))
743     return;
744     if (!ENSURE_STRING_BUFFER(sb, 1))
745     return;
746     bp = sb->ptr;
747     *bp++ = c;
748     *bp = 0;
749     sb->ptr = bp;
750     }
751    
752     #if JS_HAS_XML_SUPPORT
753    
754     void
755     js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count)
756     {
757     jschar *bp;
758    
759     if (!STRING_BUFFER_OK(sb) || count == 0)
760     return;
761     if (!ENSURE_STRING_BUFFER(sb, count))
762     return;
763     for (bp = sb->ptr; count; --count)
764     *bp++ = c;
765     *bp = 0;
766     sb->ptr = bp;
767     }
768    
769     void
770     js_AppendCString(JSStringBuffer *sb, const char *asciiz)
771     {
772     size_t length;
773     jschar *bp;
774    
775     if (!STRING_BUFFER_OK(sb) || *asciiz == '\0')
776     return;
777     length = strlen(asciiz);
778     if (!ENSURE_STRING_BUFFER(sb, length))
779     return;
780     for (bp = sb->ptr; length; --length)
781     *bp++ = (jschar) *asciiz++;
782     *bp = 0;
783     sb->ptr = bp;
784     }
785    
786     void
787     js_AppendJSString(JSStringBuffer *sb, JSString *str)
788     {
789     size_t length;
790     jschar *bp;
791    
792     if (!STRING_BUFFER_OK(sb))
793     return;
794     length = JSSTRING_LENGTH(str);
795     if (length == 0 || !ENSURE_STRING_BUFFER(sb, length))
796     return;
797     bp = sb->ptr;
798     js_strncpy(bp, JSSTRING_CHARS(str), length);
799     bp += length;
800     *bp = 0;
801     sb->ptr = bp;
802     }
803    
804     static JSBool
805     GetXMLEntity(JSContext *cx, JSTokenStream *ts)
806     {
807     ptrdiff_t offset, length, i;
808     int32 c, d;
809     JSBool ispair;
810     jschar *bp, digit;
811     char *bytes;
812     JSErrNum msg;
813    
814     /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */
815     offset = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar);
816     FastAppendChar(&ts->tokenbuf, '&');
817     while ((c = GetChar(ts)) != ';') {
818     if (c == EOF || c == '\n') {
819     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
820     JSMSG_END_OF_XML_ENTITY);
821     return JS_FALSE;
822     }
823     FastAppendChar(&ts->tokenbuf, (jschar) c);
824     }
825    
826     /* Let length be the number of jschars after the '&', including the ';'. */
827     length = PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar) - offset;
828     bp = ts->tokenbuf.base + offset;
829     c = d = 0;
830     ispair = JS_FALSE;
831     if (length > 2 && bp[1] == '#') {
832     /* Match a well-formed XML Character Reference. */
833     i = 2;
834     if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
835     if (length > 9) /* at most 6 hex digits allowed */
836     goto badncr;
837     while (++i < length) {
838     digit = bp[i];
839     if (!JS7_ISHEX(digit))
840     goto badncr;
841     c = (c << 4) + JS7_UNHEX(digit);
842     }
843     } else {
844     while (i < length) {
845     digit = bp[i++];
846     if (!JS7_ISDEC(digit))
847     goto badncr;
848     c = (c * 10) + JS7_UNDEC(digit);
849     if (c < 0)
850     goto badncr;
851     }
852     }
853    
854     if (0x10000 <= c && c <= 0x10FFFF) {
855     /* Form a surrogate pair (c, d) -- c is the high surrogate. */
856     d = 0xDC00 + (c & 0x3FF);
857     c = 0xD7C0 + (c >> 10);
858     ispair = JS_TRUE;
859     } else {
860     /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
861     if (c != 0x9 && c != 0xA && c != 0xD &&
862     !(0x20 <= c && c <= 0xD7FF) &&
863     !(0xE000 <= c && c <= 0xFFFD)) {
864     goto badncr;
865     }
866     }
867     } else {
868     /* Try to match one of the five XML 1.0 predefined entities. */
869     switch (length) {
870     case 3:
871     if (bp[2] == 't') {
872     if (bp[1] == 'l')
873     c = '<';
874     else if (bp[1] == 'g')
875     c = '>';
876     }
877     break;
878     case 4:
879     if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
880     c = '&';
881     break;
882     case 5:
883     if (bp[3] == 'o') {
884     if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
885     c = '\'';
886     else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
887     c = '"';
888     }
889     break;
890     }
891     if (c == 0) {
892     msg = JSMSG_UNKNOWN_XML_ENTITY;
893     goto bad;
894     }
895     }
896    
897     /* If we matched, retract ts->tokenbuf and store the entity's value. */
898     *bp++ = (jschar) c;
899     if (ispair)
900     *bp++ = (jschar) d;
901     *bp = 0;
902     ts->tokenbuf.ptr = bp;
903     return JS_TRUE;
904    
905     badncr:
906     msg = JSMSG_BAD_XML_NCR;
907     bad:
908     /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
909     bytes = js_DeflateString(cx, bp + 1,
910     PTRDIFF(ts->tokenbuf.ptr, bp, jschar) - 1);
911     if (bytes) {
912     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
913     msg, bytes);
914     JS_free(cx, bytes);
915     }
916     return JS_FALSE;
917     }
918    
919     #endif /* JS_HAS_XML_SUPPORT */
920    
921     JSTokenType
922     js_PeekToken(JSContext *cx, JSTokenStream *ts)
923     {
924     JSTokenType tt;
925    
926     if (ts->lookahead != 0) {
927     tt = ts->tokens[(ts->cursor + ts->lookahead) & NTOKENS_MASK].type;
928     } else {
929     tt = js_GetToken(cx, ts);
930     js_UngetToken(ts);
931     }
932     return tt;
933     }
934    
935     JSTokenType
936     js_PeekTokenSameLine(JSContext *cx, JSTokenStream *ts)
937     {
938     JSTokenType tt;
939    
940     if (!ON_CURRENT_LINE(ts, CURRENT_TOKEN(ts).pos))
941     return TOK_EOL;
942     ts->flags |= TSF_NEWLINES;
943     tt = js_PeekToken(cx, ts);
944     ts->flags &= ~TSF_NEWLINES;
945     return tt;
946     }
947    
948     /*
949     * We have encountered a '\': check for a Unicode escape sequence after it,
950     * returning the character code value if we found a Unicode escape sequence.
951     * Otherwise, non-destructively return the original '\'.
952     */
953     static int32
954     GetUnicodeEscape(JSTokenStream *ts)
955     {
956     jschar cp[5];
957     int32 c;
958    
959     if (PeekChars(ts, 5, cp) && cp[0] == 'u' &&
960     JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
961     JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
962     {
963     c = (((((JS7_UNHEX(cp[1]) << 4)
964     + JS7_UNHEX(cp[2])) << 4)
965     + JS7_UNHEX(cp[3])) << 4)
966     + JS7_UNHEX(cp[4]);
967     SkipChars(ts, 5);
968     return c;
969     }
970     return '\\';
971     }
972    
973     static JSToken *
974     NewToken(JSTokenStream *ts, ptrdiff_t adjust)
975     {
976     JSToken *tp;
977    
978     ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
979     tp = &CURRENT_TOKEN(ts);
980     tp->ptr = ts->linebuf.ptr + adjust;
981     tp->pos.begin.index = ts->linepos +
982     PTRDIFF(tp->ptr, ts->linebuf.base, jschar) -
983     ts->ungetpos;
984     tp->pos.begin.lineno = tp->pos.end.lineno = (uint16)ts->lineno;
985     return tp;
986     }
987    
988     static JS_ALWAYS_INLINE JSBool
989     ScanAsSpace(jschar c)
990     {
991     /* Treat little- and big-endian BOMs as whitespace for compatibility. */
992     if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
993     return JS_TRUE;
994     return JS_FALSE;
995     }
996    
997     JSTokenType
998     js_GetToken(JSContext *cx, JSTokenStream *ts)
999     {
1000     JSTokenType tt;
1001     int32 c, qc;
1002     JSToken *tp;
1003     JSAtom *atom;
1004     JSBool hadUnicodeEscape;
1005     const struct keyword *kw;
1006     #if JS_HAS_XML_SUPPORT
1007     JSBool inTarget;
1008     size_t targetLength;
1009     ptrdiff_t contentIndex;
1010     #endif
1011    
1012     #define INIT_TOKENBUF() (ts->tokenbuf.ptr = ts->tokenbuf.base)
1013     #define TOKENBUF_LENGTH() PTRDIFF(ts->tokenbuf.ptr, ts->tokenbuf.base, jschar)
1014     #define TOKENBUF_OK() STRING_BUFFER_OK(&ts->tokenbuf)
1015     #define TOKENBUF_TO_ATOM() (TOKENBUF_OK() \
1016     ? js_AtomizeChars(cx, \
1017     TOKENBUF_BASE(), \
1018     TOKENBUF_LENGTH(), \
1019     0) \
1020     : NULL)
1021     #define ADD_TO_TOKENBUF(c) FastAppendChar(&ts->tokenbuf, (jschar) (c))
1022    
1023     /* The following 4 macros should only be used when TOKENBUF_OK() is true. */
1024     #define TOKENBUF_BASE() (ts->tokenbuf.base)
1025     #define TOKENBUF_END() (ts->tokenbuf.ptr)
1026     #define TOKENBUF_CHAR(i) (ts->tokenbuf.base[i])
1027     #define TRIM_TOKENBUF(i) (ts->tokenbuf.ptr = ts->tokenbuf.base + i)
1028     #define NUL_TERM_TOKENBUF() (*ts->tokenbuf.ptr = 0)
1029    
1030     /* Check for a pushed-back token resulting from mismatching lookahead. */
1031     while (ts->lookahead != 0) {
1032     JS_ASSERT(!(ts->flags & TSF_XMLTEXTMODE));
1033     ts->lookahead--;
1034     ts->cursor = (ts->cursor + 1) & NTOKENS_MASK;
1035     tt = CURRENT_TOKEN(ts).type;
1036     if (tt != TOK_EOL || (ts->flags & TSF_NEWLINES))
1037     return tt;
1038     }
1039    
1040     /* If there was a fatal error, keep returning TOK_ERROR. */
1041     if (ts->flags & TSF_ERROR)
1042     return TOK_ERROR;
1043    
1044     #if JS_HAS_XML_SUPPORT
1045     if (ts->flags & TSF_XMLTEXTMODE) {
1046     tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
1047     tp = NewToken(ts, 0);
1048     INIT_TOKENBUF();
1049     qc = (ts->flags & TSF_XMLONLYMODE) ? '<' : '{';
1050    
1051     while ((c = GetChar(ts)) != qc && c != '<' && c != EOF) {
1052     if (c == '&' && qc == '<') {
1053     if (!GetXMLEntity(cx, ts))
1054     goto error;
1055     tt = TOK_XMLTEXT;
1056     continue;
1057     }
1058    
1059     if (!JS_ISXMLSPACE(c))
1060     tt = TOK_XMLTEXT;
1061     ADD_TO_TOKENBUF(c);
1062     }
1063     UngetChar(ts, c);
1064    
1065     if (TOKENBUF_LENGTH() == 0) {
1066     atom = NULL;
1067     } else {
1068     atom = TOKENBUF_TO_ATOM();
1069     if (!atom)
1070     goto error;
1071     }
1072     tp->pos.end.lineno = (uint16)ts->lineno;
1073     tp->t_op = JSOP_STRING;
1074     tp->t_atom = atom;
1075     goto out;
1076     }
1077    
1078     if (ts->flags & TSF_XMLTAGMODE) {
1079     tp = NewToken(ts, 0);
1080     c = GetChar(ts);
1081     if (JS_ISXMLSPACE(c)) {
1082     do {
1083     c = GetChar(ts);
1084     } while (JS_ISXMLSPACE(c));
1085     UngetChar(ts, c);
1086     tt = TOK_XMLSPACE;
1087     goto out;
1088     }
1089    
1090     if (c == EOF) {
1091     tt = TOK_EOF;
1092     goto out;
1093     }
1094    
1095     INIT_TOKENBUF();
1096     if (JS_ISXMLNSSTART(c)) {
1097     JSBool sawColon = JS_FALSE;
1098    
1099     ADD_TO_TOKENBUF(c);
1100     while ((c = GetChar(ts)) != EOF && JS_ISXMLNAME(c)) {
1101     if (c == ':') {
1102     int nextc;
1103    
1104     if (sawColon ||
1105     (nextc = PeekChar(ts),
1106     ((ts->flags & TSF_XMLONLYMODE) || nextc != '{') &&
1107     !JS_ISXMLNAME(nextc))) {
1108     js_ReportCompileErrorNumber(cx, ts, NULL,
1109     JSREPORT_ERROR,
1110     JSMSG_BAD_XML_QNAME);
1111     goto error;
1112     }
1113     sawColon = JS_TRUE;
1114     }
1115    
1116     ADD_TO_TOKENBUF(c);
1117     }
1118    
1119     UngetChar(ts, c);
1120     atom = TOKENBUF_TO_ATOM();
1121     if (!atom)
1122     goto error;
1123     tp->t_op = JSOP_STRING;
1124     tp->t_atom = atom;
1125     tt = TOK_XMLNAME;
1126     goto out;
1127     }
1128    
1129     switch (c) {
1130     case '{':
1131     if (ts->flags & TSF_XMLONLYMODE)
1132     goto bad_xml_char;
1133     tt = TOK_LC;
1134     goto out;
1135    
1136     case '=':
1137     tt = TOK_ASSIGN;
1138     goto out;
1139    
1140     case '"':
1141     case '\'':
1142     qc = c;
1143     while ((c = GetChar(ts)) != qc) {
1144     if (c == EOF) {
1145     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1146     JSMSG_UNTERMINATED_STRING);
1147     goto error;
1148     }
1149    
1150     /*
1151     * XML attribute values are double-quoted when pretty-printed,
1152     * so escape " if it is expressed directly in a single-quoted
1153     * attribute value.
1154     */
1155     if (c == '"' && !(ts->flags & TSF_XMLONLYMODE)) {
1156     JS_ASSERT(qc == '\'');
1157     js_AppendCString(&ts->tokenbuf, js_quot_entity_str);
1158     continue;
1159     }
1160    
1161     if (c == '&' && (ts->flags & TSF_XMLONLYMODE)) {
1162     if (!GetXMLEntity(cx, ts))
1163     goto error;
1164     continue;
1165     }
1166    
1167     ADD_TO_TOKENBUF(c);
1168     }
1169     atom = TOKENBUF_TO_ATOM();
1170     if (!atom)
1171     goto error;
1172     tp->pos.end.lineno = (uint16)ts->lineno;
1173     tp->t_op = JSOP_STRING;
1174     tp->t_atom = atom;
1175     tt = TOK_XMLATTR;
1176     goto out;
1177    
1178     case '>':
1179     tt = TOK_XMLTAGC;
1180     goto out;
1181    
1182     case '/':
1183     if (MatchChar(ts, '>')) {
1184     tt = TOK_XMLPTAGC;
1185     goto out;
1186     }
1187     /* FALL THROUGH */
1188    
1189     bad_xml_char:
1190     default:
1191     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1192     JSMSG_BAD_XML_CHARACTER);
1193     goto error;
1194     }
1195     /* NOTREACHED */
1196     }
1197     #endif /* JS_HAS_XML_SUPPORT */
1198    
1199     retry:
1200     do {
1201     c = GetChar(ts);
1202     if (c == '\n') {
1203     ts->flags &= ~TSF_DIRTYLINE;
1204     if (ts->flags & TSF_NEWLINES)
1205     break;
1206     }
1207     } while (ScanAsSpace((jschar)c));
1208    
1209     tp = NewToken(ts, -1);
1210     if (c == EOF) {
1211     tt = TOK_EOF;
1212     goto out;
1213     }
1214    
1215     hadUnicodeEscape = JS_FALSE;
1216     if (JS_ISIDSTART(c) ||
1217     (c == '\\' &&
1218     (qc = GetUnicodeEscape(ts),
1219     hadUnicodeEscape = JS_ISIDSTART(qc)))) {
1220     if (hadUnicodeEscape)
1221     c = qc;
1222     INIT_TOKENBUF();
1223     for (;;) {
1224     ADD_TO_TOKENBUF(c);
1225     c = GetChar(ts);
1226     if (c == '\\') {
1227     qc = GetUnicodeEscape(ts);
1228     if (!JS_ISIDENT(qc))
1229     break;
1230     c = qc;
1231     hadUnicodeEscape = JS_TRUE;
1232     } else {
1233     if (!JS_ISIDENT(c))
1234     break;
1235     }
1236     }
1237     UngetChar(ts, c);
1238    
1239     /*
1240     * Check for keywords unless we saw Unicode escape or parser asks
1241     * to ignore keywords.
1242     */
1243     if (!hadUnicodeEscape &&
1244     !(ts->flags & TSF_KEYWORD_IS_NAME) &&
1245     TOKENBUF_OK() &&
1246     (kw = FindKeyword(TOKENBUF_BASE(), TOKENBUF_LENGTH()))) {
1247     if (kw->tokentype == TOK_RESERVED) {
1248     if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1249     JSREPORT_WARNING |
1250     JSREPORT_STRICT,
1251     JSMSG_RESERVED_ID,
1252     kw->chars)) {
1253     goto error;
1254     }
1255     } else if (kw->version <= JSVERSION_NUMBER(cx)) {
1256     tt = kw->tokentype;
1257     tp->t_op = (JSOp) kw->op;
1258     goto out;
1259     }
1260     }
1261    
1262     atom = TOKENBUF_TO_ATOM();
1263     if (!atom)
1264     goto error;
1265     tp->t_op = JSOP_NAME;
1266     tp->t_atom = atom;
1267     tt = TOK_NAME;
1268     goto out;
1269     }
1270    
1271     if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(PeekChar(ts)))) {
1272     jsint radix;
1273     const jschar *endptr;
1274     jsdouble dval;
1275    
1276     radix = 10;
1277     INIT_TOKENBUF();
1278    
1279     if (c == '0') {
1280     ADD_TO_TOKENBUF(c);
1281     c = GetChar(ts);
1282     if (JS_TOLOWER(c) == 'x') {
1283     ADD_TO_TOKENBUF(c);
1284     c = GetChar(ts);
1285     radix = 16;
1286     } else if (JS7_ISDEC(c)) {
1287     radix = 8;
1288     }
1289     }
1290    
1291     while (JS7_ISHEX(c)) {
1292     if (radix < 16) {
1293     if (JS7_ISLET(c))
1294     break;
1295    
1296     /*
1297     * We permit 08 and 09 as decimal numbers, which makes our
1298     * behaviour a superset of the ECMA numeric grammar. We might
1299     * not always be so permissive, so we warn about it.
1300     */
1301     if (radix == 8 && c >= '8') {
1302     if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1303     JSREPORT_WARNING,
1304     JSMSG_BAD_OCTAL,
1305     c == '8' ? "08" : "09")) {
1306     goto error;
1307     }
1308     radix = 10;
1309     }
1310     }
1311     ADD_TO_TOKENBUF(c);
1312     c = GetChar(ts);
1313     }
1314    
1315     if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1316     if (c == '.') {
1317     do {
1318     ADD_TO_TOKENBUF(c);
1319     c = GetChar(ts);
1320     } while (JS7_ISDEC(c));
1321     }
1322     if (JS_TOLOWER(c) == 'e') {
1323     ADD_TO_TOKENBUF(c);
1324     c = GetChar(ts);
1325     if (c == '+' || c == '-') {
1326     ADD_TO_TOKENBUF(c);
1327     c = GetChar(ts);
1328     }
1329     if (!JS7_ISDEC(c)) {
1330     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1331     JSMSG_MISSING_EXPONENT);
1332     goto error;
1333     }
1334     do {
1335     ADD_TO_TOKENBUF(c);
1336     c = GetChar(ts);
1337     } while (JS7_ISDEC(c));
1338     }
1339     }
1340    
1341     /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1342     UngetChar(ts, c);
1343     ADD_TO_TOKENBUF(0);
1344    
1345     if (!TOKENBUF_OK())
1346     goto error;
1347     if (radix == 10) {
1348     if (!js_strtod(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1349     &endptr, &dval)) {
1350     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1351     JSMSG_OUT_OF_MEMORY);
1352     goto error;
1353     }
1354     } else {
1355     if (!js_strtointeger(cx, TOKENBUF_BASE(), TOKENBUF_END(),
1356     &endptr, radix, &dval)) {
1357     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1358     JSMSG_OUT_OF_MEMORY);
1359     goto error;
1360     }
1361     }
1362     tp->t_dval = dval;
1363     tt = TOK_NUMBER;
1364     goto out;
1365     }
1366    
1367     if (c == '"' || c == '\'') {
1368     qc = c;
1369     INIT_TOKENBUF();
1370     while ((c = GetChar(ts)) != qc) {
1371     if (c == '\n' || c == EOF) {
1372     UngetChar(ts, c);
1373     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1374     JSMSG_UNTERMINATED_STRING);
1375     goto error;
1376     }
1377     if (c == '\\') {
1378     switch (c = GetChar(ts)) {
1379     case 'b': c = '\b'; break;
1380     case 'f': c = '\f'; break;
1381     case 'n': c = '\n'; break;
1382     case 'r': c = '\r'; break;
1383     case 't': c = '\t'; break;
1384     case 'v': c = '\v'; break;
1385    
1386     default:
1387     if ('0' <= c && c < '8') {
1388     int32 val = JS7_UNDEC(c);
1389    
1390     c = PeekChar(ts);
1391     if ('0' <= c && c < '8') {
1392     val = 8 * val + JS7_UNDEC(c);
1393     GetChar(ts);
1394     c = PeekChar(ts);
1395     if ('0' <= c && c < '8') {
1396     int32 save = val;
1397     val = 8 * val + JS7_UNDEC(c);
1398     if (val <= 0377)
1399     GetChar(ts);
1400     else
1401     val = save;
1402     }
1403     }
1404    
1405     c = (jschar)val;
1406     } else if (c == 'u') {
1407     jschar cp[4];
1408     if (PeekChars(ts, 4, cp) &&
1409     JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1410     JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1411     c = (((((JS7_UNHEX(cp[0]) << 4)
1412     + JS7_UNHEX(cp[1])) << 4)
1413     + JS7_UNHEX(cp[2])) << 4)
1414     + JS7_UNHEX(cp[3]);
1415     SkipChars(ts, 4);
1416     }
1417     } else if (c == 'x') {
1418     jschar cp[2];
1419     if (PeekChars(ts, 2, cp) &&
1420     JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1421     c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1422     SkipChars(ts, 2);
1423     }
1424     } else if (c == '\n') {
1425     /* ECMA follows C by removing escaped newlines. */
1426     continue;
1427     }
1428     break;
1429     }
1430     }
1431     ADD_TO_TOKENBUF(c);
1432     }
1433     atom = TOKENBUF_TO_ATOM();
1434     if (!atom)
1435     goto error;
1436     tp->pos.end.lineno = (uint16)ts->lineno;
1437     tp->t_op = JSOP_STRING;
1438     tp->t_atom = atom;
1439     tt = TOK_STRING;
1440     goto out;
1441     }
1442    
1443     switch (c) {
1444     case '\n': tt = TOK_EOL; goto eol_out;
1445     case ';': tt = TOK_SEMI; break;
1446     case '[': tt = TOK_LB; break;
1447     case ']': tt = TOK_RB; break;
1448     case '{': tt = TOK_LC; break;
1449     case '}': tt = TOK_RC; break;
1450     case '(': tt = TOK_LP; break;
1451     case ')': tt = TOK_RP; break;
1452     case ',': tt = TOK_COMMA; break;
1453     case '?': tt = TOK_HOOK; break;
1454    
1455     case '.':
1456     #if JS_HAS_XML_SUPPORT
1457     if (MatchChar(ts, c))
1458     tt = TOK_DBLDOT;
1459     else
1460     #endif
1461     tt = TOK_DOT;
1462     break;
1463    
1464     case ':':
1465     #if JS_HAS_XML_SUPPORT
1466     if (MatchChar(ts, c)) {
1467     tt = TOK_DBLCOLON;
1468     break;
1469     }
1470     #endif
1471     /*
1472     * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1473     * object initializer, likewise for setter.
1474     */
1475     tp->t_op = JSOP_NOP;
1476     tt = TOK_COLON;
1477     break;
1478    
1479     case '|':
1480     if (MatchChar(ts, c)) {
1481     tt = TOK_OR;
1482     } else if (MatchChar(ts, '=')) {
1483     tp->t_op = JSOP_BITOR;
1484     tt = TOK_ASSIGN;
1485     } else {
1486     tt = TOK_BITOR;
1487     }
1488     break;
1489    
1490     case '^':
1491     if (MatchChar(ts, '=')) {
1492     tp->t_op = JSOP_BITXOR;
1493     tt = TOK_ASSIGN;
1494     } else {
1495     tt = TOK_BITXOR;
1496     }
1497     break;
1498    
1499     case '&':
1500     if (MatchChar(ts, c)) {
1501     tt = TOK_AND;
1502     } else if (MatchChar(ts, '=')) {
1503     tp->t_op = JSOP_BITAND;
1504     tt = TOK_ASSIGN;
1505     } else {
1506     tt = TOK_BITAND;
1507     }
1508     break;
1509    
1510     case '=':
1511     if (MatchChar(ts, c)) {
1512     tp->t_op = MatchChar(ts, c) ? JSOP_STRICTEQ : JSOP_EQ;
1513     tt = TOK_EQOP;
1514     } else {
1515     tp->t_op = JSOP_NOP;
1516     tt = TOK_ASSIGN;
1517     }
1518     break;
1519    
1520     case '!':
1521     if (MatchChar(ts, '=')) {
1522     tp->t_op = MatchChar(ts, '=') ? JSOP_STRICTNE : JSOP_NE;
1523     tt = TOK_EQOP;
1524     } else {
1525     tp->t_op = JSOP_NOT;
1526     tt = TOK_UNARYOP;
1527     }
1528     break;
1529    
1530     #if JS_HAS_XML_SUPPORT
1531     case '@':
1532     tt = TOK_AT;
1533     break;
1534     #endif
1535    
1536     case '<':
1537     #if JS_HAS_XML_SUPPORT
1538     /*
1539     * After much testing, it's clear that Postel's advice to protocol
1540     * designers ("be liberal in what you accept, and conservative in what
1541     * you send") invites a natural-law repercussion for JS as "protocol":
1542     *
1543     * "If you are liberal in what you accept, others will utterly fail to
1544     * be conservative in what they send."
1545     *
1546     * Which means you will get <!-- comments to end of line in the middle
1547     * of .js files, and after if conditions whose then statements are on
1548     * the next line, and other wonders. See at least the following bugs:
1549     * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1550     * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1551     * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1552     *
1553     * So without JSOPTION_XML, we never scan an XML comment or CDATA
1554     * literal. We always scan <! as the start of an HTML comment hack
1555     * to end of line, used since Netscape 2 to hide script tag content
1556     * from script-unaware browsers.
1557     */
1558     if ((ts->flags & TSF_OPERAND) &&
1559     (JS_HAS_XML_OPTION(cx) || PeekChar(ts) != '!')) {
1560     /* Check for XML comment or CDATA section. */
1561     if (MatchChar(ts, '!')) {
1562     INIT_TOKENBUF();
1563    
1564     /* Scan XML comment. */
1565     if (MatchChar(ts, '-')) {
1566     if (!MatchChar(ts, '-'))
1567     goto bad_xml_markup;
1568     while ((c = GetChar(ts)) != '-' || !MatchChar(ts, '-')) {
1569     if (c == EOF)
1570     goto bad_xml_markup;
1571     ADD_TO_TOKENBUF(c);
1572     }
1573     tt = TOK_XMLCOMMENT;
1574     tp->t_op = JSOP_XMLCOMMENT;
1575     goto finish_xml_markup;
1576     }
1577    
1578     /* Scan CDATA section. */
1579     if (MatchChar(ts, '[')) {
1580     jschar cp[6];
1581     if (PeekChars(ts, 6, cp) &&
1582     cp[0] == 'C' &&
1583     cp[1] == 'D' &&
1584     cp[2] == 'A' &&
1585     cp[3] == 'T' &&
1586     cp[4] == 'A' &&
1587     cp[5] == '[') {
1588     SkipChars(ts, 6);
1589     while ((c = GetChar(ts)) != ']' ||
1590     !PeekChars(ts, 2, cp) ||
1591     cp[0] != ']' ||
1592     cp[1] != '>') {
1593     if (c == EOF)
1594     goto bad_xml_markup;
1595     ADD_TO_TOKENBUF(c);
1596     }
1597     GetChar(ts); /* discard ] but not > */
1598     tt = TOK_XMLCDATA;
1599     tp->t_op = JSOP_XMLCDATA;
1600     goto finish_xml_markup;
1601     }
1602     goto bad_xml_markup;
1603     }
1604     }
1605    
1606     /* Check for processing instruction. */
1607     if (MatchChar(ts, '?')) {
1608     inTarget = JS_TRUE;
1609     targetLength = 0;
1610     contentIndex = -1;
1611    
1612     INIT_TOKENBUF();
1613     while ((c = GetChar(ts)) != '?' || PeekChar(ts) != '>') {
1614     if (c == EOF)
1615     goto bad_xml_markup;
1616     if (inTarget) {
1617     if (JS_ISXMLSPACE(c)) {
1618     if (TOKENBUF_LENGTH() == 0)
1619     goto bad_xml_markup;
1620     inTarget = JS_FALSE;
1621     } else {
1622     if (!((TOKENBUF_LENGTH() == 0)
1623     ? JS_ISXMLNSSTART(c)
1624     : JS_ISXMLNS(c))) {
1625     goto bad_xml_markup;
1626     }
1627     ++targetLength;
1628     }
1629     } else {
1630     if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1631     contentIndex = TOKENBUF_LENGTH();
1632     }
1633     ADD_TO_TOKENBUF(c);
1634     }
1635     if (targetLength == 0)
1636     goto bad_xml_markup;
1637     if (!TOKENBUF_OK())
1638     goto error;
1639     if (contentIndex < 0) {
1640     atom = cx->runtime->atomState.emptyAtom;
1641     } else {
1642     atom = js_AtomizeChars(cx,
1643     &TOKENBUF_CHAR(contentIndex),
1644     TOKENBUF_LENGTH() - contentIndex,
1645     0);
1646     if (!atom)
1647     goto error;
1648     }
1649     TRIM_TOKENBUF(targetLength);
1650     tp->t_atom2 = atom;
1651     tt = TOK_XMLPI;
1652    
1653     finish_xml_markup:
1654     if (!MatchChar(ts, '>'))
1655     goto bad_xml_markup;
1656     atom = TOKENBUF_TO_ATOM();
1657     if (!atom)
1658     goto error;
1659     tp->t_atom = atom;
1660     tp->pos.end.lineno = (uint16)ts->lineno;
1661     goto out;
1662     }
1663    
1664     /* An XML start-of-tag character. */
1665     tt = MatchChar(ts, '/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1666     goto out;
1667    
1668     bad_xml_markup:
1669     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1670     JSMSG_BAD_XML_MARKUP);
1671     goto error;
1672     }
1673     #endif /* JS_HAS_XML_SUPPORT */
1674    
1675     /* NB: treat HTML begin-comment as comment-till-end-of-line */
1676     if (MatchChar(ts, '!')) {
1677     if (MatchChar(ts, '-')) {
1678     if (MatchChar(ts, '-')) {
1679     ts->flags |= TSF_IN_HTML_COMMENT;
1680     goto skipline;
1681     }
1682     UngetChar(ts, '-');
1683     }
1684     UngetChar(ts, '!');
1685     }
1686     if (MatchChar(ts, c)) {
1687     tp->t_op = JSOP_LSH;
1688     tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1689     } else {
1690     tp->t_op = MatchChar(ts, '=') ? JSOP_LE : JSOP_LT;
1691     tt = TOK_RELOP;
1692     }
1693     break;
1694    
1695     case '>':
1696     if (MatchChar(ts, c)) {
1697     tp->t_op = MatchChar(ts, c) ? JSOP_URSH : JSOP_RSH;
1698     tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_SHOP;
1699     } else {
1700     tp->t_op = MatchChar(ts, '=') ? JSOP_GE : JSOP_GT;
1701     tt = TOK_RELOP;
1702     }
1703     break;
1704    
1705     case '*':
1706     tp->t_op = JSOP_MUL;
1707     tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_STAR;
1708     break;
1709    
1710     case '/':
1711     if (MatchChar(ts, '/')) {
1712     /*
1713     * Hack for source filters such as the Mozilla XUL preprocessor:
1714     * "//@line 123\n" sets the number of the *next* line after the
1715     * comment to 123.
1716     */
1717     if (JS_HAS_ATLINE_OPTION(cx)) {
1718     jschar cp[5];
1719     uintN i, line, temp;
1720     char filename[1024];
1721    
1722     if (PeekChars(ts, 5, cp) &&
1723     cp[0] == '@' &&
1724     cp[1] == 'l' &&
1725     cp[2] == 'i' &&
1726     cp[3] == 'n' &&
1727     cp[4] == 'e') {
1728     SkipChars(ts, 5);
1729     while ((c = GetChar(ts)) != '\n' && ScanAsSpace((jschar)c))
1730     continue;
1731     if (JS7_ISDEC(c)) {
1732     line = JS7_UNDEC(c);
1733     while ((c = GetChar(ts)) != EOF && JS7_ISDEC(c)) {
1734     temp = 10 * line + JS7_UNDEC(c);
1735     if (temp < line) {
1736     /* Ignore overlarge line numbers. */
1737     goto skipline;
1738     }
1739     line = temp;
1740     }
1741     while (c != '\n' && ScanAsSpace((jschar)c))
1742     c = GetChar(ts);
1743     i = 0;
1744     if (c == '"') {
1745     while ((c = GetChar(ts)) != EOF && c != '"') {
1746     if (c == '\n') {
1747     UngetChar(ts, c);
1748     goto skipline;
1749     }
1750     if ((c >> 8) != 0 || i >= sizeof filename - 1)
1751     goto skipline;
1752     filename[i++] = (char) c;
1753     }
1754     if (c == '"') {
1755     while ((c = GetChar(ts)) != '\n' &&
1756     ScanAsSpace((jschar)c)) {
1757     continue;
1758     }
1759     }
1760     }
1761     filename[i] = '\0';
1762     if (c == '\n') {
1763     if (i > 0) {
1764     if (ts->flags & TSF_OWNFILENAME)
1765     JS_free(cx, (void *) ts->filename);
1766     ts->filename = JS_strdup(cx, filename);
1767     if (!ts->filename)
1768     goto error;
1769     ts->flags |= TSF_OWNFILENAME;
1770     }
1771     ts->lineno = line;
1772     }
1773     }
1774     UngetChar(ts, c);
1775     }
1776     }
1777    
1778     skipline:
1779     /* Optimize line skipping if we are not in an HTML comment. */
1780     if (ts->flags & TSF_IN_HTML_COMMENT) {
1781     while ((c = GetChar(ts)) != EOF && c != '\n') {
1782     if (c == '-' && MatchChar(ts, '-') && MatchChar(ts, '>'))
1783     ts->flags &= ~TSF_IN_HTML_COMMENT;
1784     }
1785     } else {
1786     while ((c = GetChar(ts)) != EOF && c != '\n')
1787     continue;
1788     }
1789     UngetChar(ts, c);
1790     ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1791     goto retry;
1792     }
1793    
1794     if (MatchChar(ts, '*')) {
1795     while ((c = GetChar(ts)) != EOF &&
1796     !(c == '*' && MatchChar(ts, '/'))) {
1797     /* Ignore all characters until comment close. */
1798     }
1799     if (c == EOF) {
1800     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1801     JSMSG_UNTERMINATED_COMMENT);
1802     goto error;
1803     }
1804     ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1805     goto retry;
1806     }
1807    
1808     if (ts->flags & TSF_OPERAND) {
1809     uintN flags;
1810     JSBool inCharClass = JS_FALSE;
1811    
1812     INIT_TOKENBUF();
1813     for (;;) {
1814     c = GetChar(ts);
1815     if (c == '\n' || c == EOF) {
1816     UngetChar(ts, c);
1817     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1818     JSMSG_UNTERMINATED_REGEXP);
1819     goto error;
1820     }
1821     if (c == '\\') {
1822     ADD_TO_TOKENBUF(c);
1823     c = GetChar(ts);
1824     } else if (c == '[') {
1825     inCharClass = JS_TRUE;
1826     } else if (c == ']') {
1827     inCharClass = JS_FALSE;
1828     } else if (c == '/' && !inCharClass) {
1829     /* For compat with IE, allow unescaped / in char classes. */
1830     break;
1831     }
1832     ADD_TO_TOKENBUF(c);
1833     }
1834     for (flags = 0; ; ) {
1835     c = PeekChar(ts);
1836     if (c == 'g')
1837     flags |= JSREG_GLOB;
1838     else if (c == 'i')
1839     flags |= JSREG_FOLD;
1840     else if (c == 'm')
1841     flags |= JSREG_MULTILINE;
1842     else if (c == 'y')
1843     flags |= JSREG_STICKY;
1844     else
1845     break;
1846     GetChar(ts);
1847     }
1848     c = PeekChar(ts);
1849     if (JS7_ISLET(c)) {
1850     tp->ptr = ts->linebuf.ptr - 1;
1851     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1852     JSMSG_BAD_REGEXP_FLAG);
1853     (void) GetChar(ts);
1854     goto error;
1855     }
1856     /* XXXbe fix jsregexp.c so it doesn't depend on NUL termination */
1857     if (!TOKENBUF_OK())
1858     goto error;
1859     NUL_TERM_TOKENBUF();
1860     tp->t_reflags = flags;
1861     tt = TOK_REGEXP;
1862     break;
1863     }
1864    
1865     tp->t_op = JSOP_DIV;
1866     tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1867     break;
1868    
1869     case '%':
1870     tp->t_op = JSOP_MOD;
1871     tt = MatchChar(ts, '=') ? TOK_ASSIGN : TOK_DIVOP;
1872     break;
1873    
1874     case '~':
1875     tp->t_op = JSOP_BITNOT;
1876     tt = TOK_UNARYOP;
1877     break;
1878    
1879     case '+':
1880     if (MatchChar(ts, '=')) {
1881     tp->t_op = JSOP_ADD;
1882     tt = TOK_ASSIGN;
1883     } else if (MatchChar(ts, c)) {
1884     tt = TOK_INC;
1885     } else {
1886     tp->t_op = JSOP_POS;
1887     tt = TOK_PLUS;
1888     }
1889     break;
1890    
1891     case '-':
1892     if (MatchChar(ts, '=')) {
1893     tp->t_op = JSOP_SUB;
1894     tt = TOK_ASSIGN;
1895     } else if (MatchChar(ts, c)) {
1896     if (PeekChar(ts) == '>' && !(ts->flags & TSF_DIRTYLINE)) {
1897     ts->flags &= ~TSF_IN_HTML_COMMENT;
1898     goto skipline;
1899     }
1900     tt = TOK_DEC;
1901     } else {
1902     tp->t_op = JSOP_NEG;
1903     tt = TOK_MINUS;
1904     }
1905     break;
1906    
1907     #if JS_HAS_SHARP_VARS
1908     case '#':
1909     {
1910     uint32 n;
1911    
1912     c = GetChar(ts);
1913     if (!JS7_ISDEC(c)) {
1914     UngetChar(ts, c);
1915     goto badchar;
1916     }
1917     n = (uint32)JS7_UNDEC(c);
1918     for (;;) {
1919     c = GetChar(ts);
1920     if (!JS7_ISDEC(c))
1921     break;
1922     n = 10 * n + JS7_UNDEC(c);
1923     if (n >= UINT16_LIMIT) {
1924     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1925     JSMSG_SHARPVAR_TOO_BIG);
1926     goto error;
1927     }
1928     }
1929     tp->t_dval = (jsdouble) n;
1930     if (JS_HAS_STRICT_OPTION(cx) &&
1931     (c == '=' || c == '#')) {
1932     char buf[20];
1933     JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1934     if (!js_ReportCompileErrorNumber(cx, ts, NULL,
1935     JSREPORT_WARNING |
1936     JSREPORT_STRICT,
1937     JSMSG_DEPRECATED_USAGE,
1938     buf)) {
1939     goto error;
1940     }
1941     }
1942     if (c == '=')
1943     tt = TOK_DEFSHARP;
1944     else if (c == '#')
1945     tt = TOK_USESHARP;
1946     else
1947     goto badchar;
1948     break;
1949     }
1950     #endif /* JS_HAS_SHARP_VARS */
1951    
1952     #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1953     badchar:
1954     #endif
1955    
1956     default:
1957     js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
1958     JSMSG_ILLEGAL_CHARACTER);
1959     goto error;
1960     }
1961    
1962     out:
1963     JS_ASSERT(tt != TOK_EOL);
1964     ts->flags |= TSF_DIRTYLINE;
1965    
1966     eol_out:
1967     if (!STRING_BUFFER_OK(&ts->tokenbuf))
1968     tt = TOK_ERROR;
1969     JS_ASSERT(tt < TOK_LIMIT);
1970     tp->pos.end.index = ts->linepos +
1971     PTRDIFF(ts->linebuf.ptr, ts->linebuf.base, jschar) -
1972     ts->ungetpos;
1973     tp->type = tt;
1974     return tt;
1975    
1976     error:
1977     tt = TOK_ERROR;
1978     ts->flags |= TSF_ERROR;
1979     goto out;
1980    
1981     #undef INIT_TOKENBUF
1982     #undef TOKENBUF_LENGTH
1983     #undef TOKENBUF_OK
1984     #undef TOKENBUF_TO_ATOM
1985     #undef ADD_TO_TOKENBUF
1986     #undef TOKENBUF_BASE
1987     #undef TOKENBUF_CHAR
1988     #undef TRIM_TOKENBUF
1989     #undef NUL_TERM_TOKENBUF
1990     }
1991    
1992     void
1993     js_UngetToken(JSTokenStream *ts)
1994     {
1995     JS_ASSERT(ts->lookahead < NTOKENS_MASK);
1996     ts->lookahead++;
1997     ts->cursor = (ts->cursor - 1) & NTOKENS_MASK;
1998     }
1999    
2000     JSBool
2001     js_MatchToken(JSContext *cx, JSTokenStream *ts, JSTokenType tt)
2002     {
2003     if (js_GetToken(cx, ts) == tt)
2004     return JS_TRUE;
2005     js_UngetToken(ts);
2006     return JS_FALSE;
2007     }

  ViewVC Help
Powered by ViewVC 1.1.24