/[jscoverage]/trunk/highlight.cpp
ViewVC logotype

Annotation of /trunk/highlight.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 213 - (hide annotations)
Fri Oct 3 02:25:47 2008 UTC (10 years, 10 months ago) by siliconforks
Original Path: trunk/highlight.c
File MIME type: text/plain
File size: 12690 byte(s)
Rewrite for improved performance.
1 siliconforks 179 /*
2     highlight.c - JavaScript syntax highlighting
3     Copyright (C) 2008 siliconforks.com
4    
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9    
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     GNU General Public License for more details.
14    
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18     */
19    
20     #include <config.h>
21    
22     #include "highlight.h"
23    
24 siliconforks 213 #include <assert.h>
25 siliconforks 179 #include <stdlib.h>
26     #include <string.h>
27    
28     #include <jslock.h>
29     #include <jsscan.h>
30    
31     #include "util.h"
32    
33     enum Class {
34     CLASS_NONE,
35     CLASS_COMMENT,
36     CLASS_REGEXP,
37     CLASS_NUMBER,
38     CLASS_STRING,
39     CLASS_SPECIALCHAR,
40     CLASS_KEYWORD,
41     CLASS_TYPE,
42     CLASS_SYMBOL,
43     CLASS_CBRACKET
44     };
45    
46     static const char * get_class_name(enum Class class) {
47     switch (class) {
48     case CLASS_NONE:
49     abort();
50     break;
51     case CLASS_COMMENT:
52     return "c";
53     break;
54     case CLASS_REGEXP:
55     return "s";
56     break;
57     case CLASS_NUMBER:
58     return "s";
59     break;
60     case CLASS_STRING:
61     return "s";
62     break;
63     case CLASS_SPECIALCHAR:
64     return "t";
65     break;
66     case CLASS_KEYWORD:
67     return "k";
68     break;
69     case CLASS_TYPE:
70     return "k";
71     break;
72     case CLASS_SYMBOL:
73     return "k";
74     break;
75     case CLASS_CBRACKET:
76     return "k";
77     break;
78     default:
79     abort();
80     break;
81     }
82     }
83    
84 siliconforks 213 static const char * g_id;
85     static const jschar * g_characters;
86     static size_t g_num_characters;
87     static Stream * g_output;
88     static size_t character_offset;
89     static uint16_t line_num;
90     static uint16_t column_num;
91     static enum Class current_class;
92 siliconforks 179
93 siliconforks 213 static void output_character(jschar c, enum Class class) {
94     if (class != current_class) {
95     /* output the end tag */
96     if (current_class != CLASS_NONE) {
97     Stream_write_string(g_output, "</span>");
98     }
99    
100     current_class = class;
101    
102     /* output the start tag */
103     if (current_class != CLASS_NONE) {
104     Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class));
105     }
106 siliconforks 179 }
107    
108 siliconforks 213 switch (c) {
109     case '&':
110     Stream_write_string(g_output, "&amp;");
111     break;
112     case '<':
113     Stream_write_string(g_output, "&lt;");
114     break;
115     case '>':
116     Stream_write_string(g_output, "&gt;");
117     break;
118     case '\n':
119     Stream_write_char(g_output, c);
120     break;
121     default:
122     if (c == '\t' || (32 <= c && c <= 126)) {
123     Stream_write_char(g_output, c);
124 siliconforks 179 }
125 siliconforks 213 else {
126     Stream_printf(g_output, "&#%d;", c);
127     }
128     break;
129 siliconforks 179 }
130     }
131    
132 siliconforks 213 static void mark_nontoken_chars(uint16_t end_line, uint16_t end_column) {
133 siliconforks 179 enum State {
134     STATE_NORMAL,
135     STATE_LINE_COMMENT,
136     STATE_MULTILINE_COMMENT
137     };
138    
139     enum State state = STATE_NORMAL;
140 siliconforks 213 while (character_offset < g_num_characters) {
141     if (end_line != 0 && line_num > end_line) {
142     break;
143 siliconforks 179 }
144 siliconforks 213 else if (line_num == end_line && column_num >= end_column) {
145     break;
146 siliconforks 179 }
147    
148 siliconforks 213 jschar c = g_characters[character_offset];
149     if (c == '\0') {
150     fatal("%s: script contains NULL character", g_id);
151 siliconforks 179 }
152 siliconforks 213
153     switch (state) {
154     case STATE_NORMAL:
155     if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
156     state = STATE_LINE_COMMENT;
157 siliconforks 179 }
158 siliconforks 213 else if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '*') {
159     state = STATE_MULTILINE_COMMENT;
160     output_character('/', CLASS_COMMENT);
161     output_character('*', CLASS_COMMENT);
162     character_offset += 2;
163     if (column_num >= UINT16_MAX - 1) {
164     fatal("%s: script contains line with more than 65,535 characters", g_id);
165     }
166     column_num += 2;
167     continue;
168     }
169     break;
170     case STATE_LINE_COMMENT:
171 siliconforks 179 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
172 siliconforks 213 state = STATE_NORMAL;
173 siliconforks 179 }
174 siliconforks 213 break;
175     case STATE_MULTILINE_COMMENT:
176     if (c == '*' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
177     output_character('*', CLASS_COMMENT);
178     output_character('/', CLASS_COMMENT);
179     state = STATE_NORMAL;
180     character_offset += 2;
181     if (column_num >= UINT16_MAX - 1) {
182     fatal("%s: script contains line with more than 65,535 characters", g_id);
183     }
184     column_num += 2;
185     continue;
186 siliconforks 179 }
187 siliconforks 213 break;
188 siliconforks 179 }
189    
190 siliconforks 213 character_offset++;
191     if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
192     if (line_num == UINT16_MAX) {
193     fatal("%s: script contains more than 65,535 lines", g_id);
194 siliconforks 179 }
195 siliconforks 213 line_num++;
196     column_num = 0;
197     if (c == '\r' && character_offset < g_num_characters && g_characters[character_offset] == '\n') {
198     character_offset++;
199     }
200     output_character('\n', CLASS_NONE);
201 siliconforks 179 }
202 siliconforks 213 else {
203     if (column_num == UINT16_MAX) {
204     fatal("%s: script contains line with more than 65,535 characters", g_id);
205 siliconforks 179 }
206 siliconforks 213 column_num++;
207     if (state == STATE_NORMAL) {
208     output_character(c, CLASS_NONE);
209     }
210     else {
211     output_character(c, CLASS_COMMENT);
212     }
213 siliconforks 179 }
214     }
215 siliconforks 213 }
216 siliconforks 179
217 siliconforks 213 void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
218     g_id = id;
219     g_characters = characters;
220     g_num_characters = num_characters;
221     g_output = output;
222    
223     character_offset = 0;
224     line_num = 1;
225     column_num = 0;
226     current_class = CLASS_NONE;
227    
228 siliconforks 179 /* tokenize the JavaScript */
229     JSTokenStream * token_stream = js_NewTokenStream(context, characters, num_characters, NULL, 1, NULL);
230     if (token_stream == NULL) {
231     fatal("cannot create token stream from JavaScript file %s", id);
232     }
233    
234     /* see js_ParseTokenStream in jsparse.c */
235     JSObject * chain = NULL;
236     JSContext * cx = context;
237     JSStackFrame *fp, frame;
238    
239     /*
240     * Push a compiler frame if we have no frames, or if the top frame is a
241     * lightweight function activation, or if its scope chain doesn't match
242     * the one passed to us.
243     */
244     fp = cx->fp;
245     if (!fp || !fp->varobj || fp->scopeChain != chain) {
246     memset(&frame, 0, sizeof frame);
247     frame.varobj = frame.scopeChain = chain;
248     if (cx->options & JSOPTION_VAROBJFIX) {
249     while ((chain = JS_GetParent(cx, chain)) != NULL)
250     frame.varobj = chain;
251     }
252     frame.down = fp;
253     if (fp)
254     frame.flags = fp->flags & (JSFRAME_SPECIAL | JSFRAME_COMPILE_N_GO);
255     cx->fp = &frame;
256     }
257    
258     /*
259     * Protect atoms from being collected by a GC activation, which might
260     * - nest on this thread due to out of memory (the so-called "last ditch"
261     * GC attempted within js_NewGCThing), or
262     * - run for any reason on another thread if this thread is suspended on
263     * an object lock before it finishes generating bytecode into a script
264     * protected from the GC by a root or a stack frame reference.
265     */
266     JS_KEEP_ATOMS(cx->runtime);
267    
268     for (;;) {
269     JSTokenType tt = js_GetToken(context, token_stream);
270    
271     if (tt == TOK_ERROR) {
272     fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
273     }
274    
275     if (tt == TOK_EOF) {
276 siliconforks 213 mark_nontoken_chars(0, 0);
277 siliconforks 179 break;
278     }
279    
280     /* mark the chars before the token */
281     JSToken t = CURRENT_TOKEN(token_stream);
282 siliconforks 213 mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
283 siliconforks 179
284     /* mark the token */
285     enum Class class;
286     switch (tt) {
287     case TOK_ERROR:
288     case TOK_EOF:
289     abort();
290     case TOK_EOL:
291     class = CLASS_NONE;
292     token_stream->flags |= TSF_OPERAND;
293     break;
294     case TOK_SEMI:
295     case TOK_COMMA:
296     case TOK_ASSIGN:
297     case TOK_HOOK:
298     case TOK_COLON:
299     case TOK_OR:
300     case TOK_AND:
301     case TOK_BITOR:
302     case TOK_BITXOR:
303     case TOK_BITAND:
304     case TOK_EQOP:
305     case TOK_RELOP:
306     case TOK_SHOP:
307     case TOK_PLUS:
308     case TOK_MINUS:
309     case TOK_STAR:
310     case TOK_DIVOP:
311     class = CLASS_SYMBOL;
312     token_stream->flags |= TSF_OPERAND;
313     break;
314     case TOK_UNARYOP:
315     switch (t.t_op) {
316     case JSOP_NEG:
317     case JSOP_POS:
318     case JSOP_NOT:
319     case JSOP_BITNOT:
320     class = CLASS_SYMBOL;
321     token_stream->flags |= TSF_OPERAND;
322     break;
323     case JSOP_TYPEOF:
324     class = CLASS_KEYWORD;
325     token_stream->flags |= TSF_OPERAND;
326     break;
327     case JSOP_VOID:
328     class = CLASS_TYPE;
329     token_stream->flags |= TSF_OPERAND;
330     break;
331     default:
332     abort();
333     }
334     break;
335     case TOK_INC:
336     case TOK_DEC:
337     case TOK_DOT:
338     case TOK_LB:
339     class = CLASS_SYMBOL;
340     token_stream->flags |= TSF_OPERAND;
341     break;
342     case TOK_RB:
343     class = CLASS_SYMBOL;
344     token_stream->flags &= ~TSF_OPERAND;
345     break;
346     case TOK_LC:
347     class = CLASS_CBRACKET;
348     token_stream->flags |= TSF_OPERAND;
349     break;
350     case TOK_RC:
351     class = CLASS_CBRACKET;
352     token_stream->flags &= ~TSF_OPERAND;
353     break;
354     case TOK_LP:
355     class = CLASS_SYMBOL;
356     token_stream->flags |= TSF_OPERAND;
357     break;
358     case TOK_RP:
359     class = CLASS_SYMBOL;
360     token_stream->flags &= ~TSF_OPERAND;
361     break;
362     case TOK_NAME:
363     class = CLASS_NONE;
364     token_stream->flags &= ~TSF_OPERAND;
365     if (js_PeekToken(context, token_stream) == TOK_LP) {
366     /* function */
367     class = CLASS_NONE;
368     }
369     break;
370     case TOK_NUMBER:
371     class = CLASS_NUMBER;
372     token_stream->flags &= ~TSF_OPERAND;
373     break;
374     case TOK_STRING:
375     class = CLASS_STRING;
376     token_stream->flags &= ~TSF_OPERAND;
377     break;
378     case TOK_OBJECT:
379     class = CLASS_REGEXP;
380     token_stream->flags &= ~TSF_OPERAND;
381     break;
382     case TOK_PRIMARY:
383     switch (t.t_op) {
384     case JSOP_TRUE:
385     case JSOP_FALSE:
386     case JSOP_NULL:
387     case JSOP_THIS:
388     class = CLASS_KEYWORD;
389     token_stream->flags &= ~TSF_OPERAND;
390     break;
391     default:
392     abort();
393     }
394     break;
395     case TOK_FUNCTION:
396     class = CLASS_KEYWORD;
397     token_stream->flags |= TSF_OPERAND;
398     break;
399     case TOK_EXPORT:
400     case TOK_IMPORT:
401     abort();
402     break;
403     case TOK_IF:
404     case TOK_ELSE:
405     case TOK_SWITCH:
406     case TOK_CASE:
407     case TOK_DEFAULT:
408     case TOK_WHILE:
409     case TOK_DO:
410     case TOK_FOR:
411     case TOK_BREAK:
412     case TOK_CONTINUE:
413     case TOK_IN:
414     case TOK_VAR:
415     case TOK_WITH:
416     case TOK_RETURN:
417     case TOK_NEW:
418     case TOK_DELETE:
419     token_stream->flags |= TSF_OPERAND;
420     class = CLASS_KEYWORD;
421     break;
422     case TOK_DEFSHARP:
423     case TOK_USESHARP:
424     abort();
425     break;
426     case TOK_TRY:
427     case TOK_CATCH:
428     case TOK_FINALLY:
429     case TOK_THROW:
430     case TOK_INSTANCEOF:
431     case TOK_DEBUGGER:
432     token_stream->flags |= TSF_OPERAND;
433     class = CLASS_KEYWORD;
434     break;
435     case TOK_XMLSTAGO:
436     case TOK_XMLETAGO:
437     case TOK_XMLPTAGC:
438     case TOK_XMLTAGC:
439     case TOK_XMLNAME:
440     case TOK_XMLATTR:
441     case TOK_XMLSPACE:
442     case TOK_XMLTEXT:
443     case TOK_XMLCOMMENT:
444     case TOK_XMLCDATA:
445     case TOK_XMLPI:
446     case TOK_AT:
447     case TOK_DBLCOLON:
448     case TOK_ANYNAME:
449     case TOK_DBLDOT:
450     case TOK_FILTER:
451     case TOK_XMLELEM:
452     case TOK_XMLLIST:
453     case TOK_RESERVED:
454     case TOK_LIMIT:
455     abort();
456     break;
457     default:
458     abort();
459     break;
460     }
461 siliconforks 213
462     assert(t.pos.begin.lineno == t.pos.end.lineno);
463     if (t.pos.begin.index > t.pos.end.index) {
464     fatal("%s: script contains line with more than 65,535 characters", id);
465     }
466     for (uint16_t i = t.pos.begin.index; i < t.pos.end.index; i++) {
467     assert(character_offset < num_characters);
468     jschar c = characters[character_offset];
469     if (tt == TOK_STRING && c == '\\') {
470     output_character(c, CLASS_SPECIALCHAR);
471     character_offset++;
472     i++;
473     assert(character_offset < num_characters);
474     c = characters[character_offset];
475     output_character(c, CLASS_SPECIALCHAR);
476     character_offset++;
477 siliconforks 179 }
478 siliconforks 213 else {
479     output_character(c, class);
480     character_offset++;
481     }
482 siliconforks 179 }
483    
484     line_num = t.pos.end.lineno;
485     column_num = t.pos.end.index;
486     }
487    
488 siliconforks 213 if (current_class != CLASS_NONE) {
489     output_character('\n', CLASS_NONE);
490 siliconforks 179 }
491    
492     /* cleanup */
493     JS_UNKEEP_ATOMS(cx->runtime);
494     context->fp = fp;
495     }

  ViewVC Help
Powered by ViewVC 1.1.24