/[jscoverage]/trunk/highlight.cpp
ViewVC logotype

Annotation of /trunk/highlight.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 179 - (hide annotations)
Sun Sep 21 18:35:21 2008 UTC (10 years, 7 months ago) by siliconforks
Original Path: trunk/highlight.c
File MIME type: text/plain
File size: 13832 byte(s)
Do source code highlighting during instrumentation.

1 siliconforks 179 /*
2     highlight.c - JavaScript syntax highlighting
3     Copyright (C) 2008 siliconforks.com
4    
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9    
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     GNU General Public License for more details.
14    
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18     */
19    
20     #include <config.h>
21    
22     #include "highlight.h"
23    
24     #include <stdlib.h>
25     #include <string.h>
26    
27     #include <jslock.h>
28     #include <jsscan.h>
29    
30     #include "util.h"
31    
32     enum Class {
33     CLASS_NONE,
34     CLASS_COMMENT,
35     CLASS_REGEXP,
36     CLASS_NUMBER,
37     CLASS_STRING,
38     CLASS_SPECIALCHAR,
39     CLASS_KEYWORD,
40     CLASS_TYPE,
41     CLASS_SYMBOL,
42     CLASS_CBRACKET
43     };
44    
45     static const char * get_class_name(enum Class class) {
46     switch (class) {
47     case CLASS_NONE:
48     abort();
49     break;
50     case CLASS_COMMENT:
51     return "c";
52     break;
53     case CLASS_REGEXP:
54     return "s";
55     break;
56     case CLASS_NUMBER:
57     return "s";
58     break;
59     case CLASS_STRING:
60     return "s";
61     break;
62     case CLASS_SPECIALCHAR:
63     return "t";
64     break;
65     case CLASS_KEYWORD:
66     return "k";
67     break;
68     case CLASS_TYPE:
69     return "k";
70     break;
71     case CLASS_SYMBOL:
72     return "k";
73     break;
74     case CLASS_CBRACKET:
75     return "k";
76     break;
77     default:
78     abort();
79     break;
80     }
81     }
82    
83     static enum Class ** classes = NULL;
84     static jschar ** lines = NULL;
85    
86     static uint16_t num_characters_in_line(jschar * line) {
87     uint16_t result = 0;
88     while (line[result] != '\0') {
89     result++;
90     }
91     return result;
92     }
93    
94     static void mark_token_chars(enum Class class, uint16_t start_line, uint16_t start_column, uint16_t end_line, uint16_t end_column) {
95     for (uint16_t i = start_line; i <= end_line; i++) {
96     uint16_t c1 = i == start_line? start_column: 0;
97     uint16_t c2 = i == end_line? end_column: num_characters_in_line(lines[i - 1]);
98     for (uint16_t j = c1; j < c2; j++) {
99     classes[i - 1][j] = class;
100     }
101     }
102     }
103    
104     static void mark_nontoken_chars(uint16_t start_line, uint16_t start_column, uint16_t end_line, uint16_t end_column) {
105     enum State {
106     STATE_NORMAL,
107     STATE_LINE_COMMENT,
108     STATE_MULTILINE_COMMENT
109     };
110    
111     enum State state = STATE_NORMAL;
112     for (uint16_t i = start_line; i <= end_line; i++) {
113     uint16_t c1 = i == start_line? start_column: 0;
114     uint16_t c2 = i == end_line? end_column: num_characters_in_line(lines[i - 1]);
115     for (uint16_t j = c1; j < c2; j++) {
116     jschar c = lines[i - 1][j];
117     switch (state) {
118     case STATE_NORMAL:
119     if (c == '/' && j + 1 < c2 && lines[i - 1][j + 1] == '/') {
120     state = STATE_LINE_COMMENT;
121     classes[i - 1][j] = CLASS_COMMENT;
122     }
123     else if (c == '/' && j + 1 < c2 && lines[i - 1][j + 1] == '*') {
124     state = STATE_MULTILINE_COMMENT;
125     classes[i - 1][j] = CLASS_COMMENT;
126     j++;
127     classes[i - 1][j] = CLASS_COMMENT;
128     }
129     else {
130     classes[i - 1][j] = CLASS_NONE;
131     }
132     break;
133     case STATE_LINE_COMMENT:
134     if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
135     state = STATE_NORMAL;
136     classes[i - 1][j] = CLASS_NONE;
137     }
138     else {
139     classes[i - 1][j] = CLASS_COMMENT;
140     }
141     break;
142     case STATE_MULTILINE_COMMENT:
143     classes[i - 1][j] = CLASS_COMMENT;
144     if (c == '*' && j + 1 < c2 && lines[i - 1][j + 1] == '/') {
145     j++;
146     classes[i - 1][j] = CLASS_COMMENT;
147     state = STATE_NORMAL;
148     }
149     break;
150     }
151     }
152     /* end of the line */
153     if (state == STATE_LINE_COMMENT) {
154     state = STATE_NORMAL;
155     }
156     }
157     }
158    
159     void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
160     /* count the lines - see GetChar in jsscan.c */
161     size_t i = 0;
162     uint16_t num_lines = 0;
163     while (i < num_characters) {
164     if (num_lines == UINT16_MAX) {
165     fatal("%s: script has more than 65535 lines", id);
166     }
167     num_lines++;
168     jschar c;
169     while (i < num_characters) {
170     c = characters[i];
171     if (c == '\0') {
172     fatal("%s: script contains NULL character", id);
173     }
174     if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
175     break;
176     }
177     i++;
178     }
179     if (i < num_characters) {
180     i++;
181     if (c == '\r' && i < num_characters && characters[i] == '\n') {
182     i++;
183     }
184     }
185     }
186    
187     lines = xnew(jschar *, num_lines);
188     classes = xnew(enum Class *, num_lines);
189    
190     uint16_t line_num = 0;
191     i = 0;
192     while (i < num_characters) {
193     size_t line_start = i;
194     jschar c;
195     while (i < num_characters) {
196     c = characters[i];
197     if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
198     break;
199     }
200     i++;
201     }
202     size_t line_end = i;
203     if (i < num_characters) {
204     i++;
205     if (c == '\r' && i < num_characters && characters[i] == '\n') {
206     i++;
207     }
208     }
209     size_t line_length = line_end - line_start;
210     if (line_length >= UINT16_MAX) {
211     fatal("%s: script has line with 65535 characters or more", id);
212     }
213     jschar * line = xnew(jschar, line_length + 1);
214     memcpy(line, characters + line_start, sizeof(jschar) * line_length);
215     line[line_length] = '\0';
216     lines[line_num] = line;
217     classes[line_num] = xnew(enum Class, line_length);
218     line_num++;
219     }
220    
221     /* tokenize the JavaScript */
222     JSTokenStream * token_stream = js_NewTokenStream(context, characters, num_characters, NULL, 1, NULL);
223     if (token_stream == NULL) {
224     fatal("cannot create token stream from JavaScript file %s", id);
225     }
226    
227     /* see js_ParseTokenStream in jsparse.c */
228     JSObject * chain = NULL;
229     JSContext * cx = context;
230     JSStackFrame *fp, frame;
231    
232     /*
233     * Push a compiler frame if we have no frames, or if the top frame is a
234     * lightweight function activation, or if its scope chain doesn't match
235     * the one passed to us.
236     */
237     fp = cx->fp;
238     if (!fp || !fp->varobj || fp->scopeChain != chain) {
239     memset(&frame, 0, sizeof frame);
240     frame.varobj = frame.scopeChain = chain;
241     if (cx->options & JSOPTION_VAROBJFIX) {
242     while ((chain = JS_GetParent(cx, chain)) != NULL)
243     frame.varobj = chain;
244     }
245     frame.down = fp;
246     if (fp)
247     frame.flags = fp->flags & (JSFRAME_SPECIAL | JSFRAME_COMPILE_N_GO);
248     cx->fp = &frame;
249     }
250    
251     /*
252     * Protect atoms from being collected by a GC activation, which might
253     * - nest on this thread due to out of memory (the so-called "last ditch"
254     * GC attempted within js_NewGCThing), or
255     * - run for any reason on another thread if this thread is suspended on
256     * an object lock before it finishes generating bytecode into a script
257     * protected from the GC by a root or a stack frame reference.
258     */
259     JS_KEEP_ATOMS(cx->runtime);
260    
261     line_num = 1;
262     uint16_t column_num = 0;
263     for (;;) {
264     JSTokenType tt = js_GetToken(context, token_stream);
265    
266     if (tt == TOK_ERROR) {
267     fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
268     }
269    
270     if (tt == TOK_EOF) {
271     /* it seems t.pos is invalid for TOK_EOF??? */
272     /* mark the remaining chars */
273     if (num_lines == 0) {
274     break;
275     }
276     uint16_t end_line = num_lines;
277     uint16_t end_column = num_characters_in_line(lines[num_lines - 1]);
278     mark_nontoken_chars(line_num, column_num, end_line, end_column);
279     break;
280     }
281    
282     /* mark the chars before the token */
283     JSToken t = CURRENT_TOKEN(token_stream);
284     mark_nontoken_chars(line_num, column_num, t.pos.begin.lineno, t.pos.begin.index);
285    
286     /* mark the token */
287     enum Class class;
288     switch (tt) {
289     case TOK_ERROR:
290     case TOK_EOF:
291     abort();
292     case TOK_EOL:
293     class = CLASS_NONE;
294     token_stream->flags |= TSF_OPERAND;
295     break;
296     case TOK_SEMI:
297     case TOK_COMMA:
298     case TOK_ASSIGN:
299     case TOK_HOOK:
300     case TOK_COLON:
301     case TOK_OR:
302     case TOK_AND:
303     case TOK_BITOR:
304     case TOK_BITXOR:
305     case TOK_BITAND:
306     case TOK_EQOP:
307     case TOK_RELOP:
308     case TOK_SHOP:
309     case TOK_PLUS:
310     case TOK_MINUS:
311     case TOK_STAR:
312     case TOK_DIVOP:
313     class = CLASS_SYMBOL;
314     token_stream->flags |= TSF_OPERAND;
315     break;
316     case TOK_UNARYOP:
317     switch (t.t_op) {
318     case JSOP_NEG:
319     case JSOP_POS:
320     case JSOP_NOT:
321     case JSOP_BITNOT:
322     class = CLASS_SYMBOL;
323     token_stream->flags |= TSF_OPERAND;
324     break;
325     case JSOP_TYPEOF:
326     class = CLASS_KEYWORD;
327     token_stream->flags |= TSF_OPERAND;
328     break;
329     case JSOP_VOID:
330     class = CLASS_TYPE;
331     token_stream->flags |= TSF_OPERAND;
332     break;
333     default:
334     abort();
335     }
336     break;
337     case TOK_INC:
338     case TOK_DEC:
339     case TOK_DOT:
340     case TOK_LB:
341     class = CLASS_SYMBOL;
342     token_stream->flags |= TSF_OPERAND;
343     break;
344     case TOK_RB:
345     class = CLASS_SYMBOL;
346     token_stream->flags &= ~TSF_OPERAND;
347     break;
348     case TOK_LC:
349     class = CLASS_CBRACKET;
350     token_stream->flags |= TSF_OPERAND;
351     break;
352     case TOK_RC:
353     class = CLASS_CBRACKET;
354     token_stream->flags &= ~TSF_OPERAND;
355     break;
356     case TOK_LP:
357     class = CLASS_SYMBOL;
358     token_stream->flags |= TSF_OPERAND;
359     break;
360     case TOK_RP:
361     class = CLASS_SYMBOL;
362     token_stream->flags &= ~TSF_OPERAND;
363     break;
364     case TOK_NAME:
365     class = CLASS_NONE;
366     token_stream->flags &= ~TSF_OPERAND;
367     if (js_PeekToken(context, token_stream) == TOK_LP) {
368     /* function */
369     class = CLASS_NONE;
370     }
371     break;
372     case TOK_NUMBER:
373     class = CLASS_NUMBER;
374     token_stream->flags &= ~TSF_OPERAND;
375     break;
376     case TOK_STRING:
377     class = CLASS_STRING;
378     token_stream->flags &= ~TSF_OPERAND;
379     break;
380     case TOK_OBJECT:
381     class = CLASS_REGEXP;
382     token_stream->flags &= ~TSF_OPERAND;
383     break;
384     case TOK_PRIMARY:
385     switch (t.t_op) {
386     case JSOP_TRUE:
387     case JSOP_FALSE:
388     case JSOP_NULL:
389     case JSOP_THIS:
390     class = CLASS_KEYWORD;
391     token_stream->flags &= ~TSF_OPERAND;
392     break;
393     default:
394     abort();
395     }
396     break;
397     case TOK_FUNCTION:
398     class = CLASS_KEYWORD;
399     token_stream->flags |= TSF_OPERAND;
400     break;
401     case TOK_EXPORT:
402     case TOK_IMPORT:
403     abort();
404     break;
405     case TOK_IF:
406     case TOK_ELSE:
407     case TOK_SWITCH:
408     case TOK_CASE:
409     case TOK_DEFAULT:
410     case TOK_WHILE:
411     case TOK_DO:
412     case TOK_FOR:
413     case TOK_BREAK:
414     case TOK_CONTINUE:
415     case TOK_IN:
416     case TOK_VAR:
417     case TOK_WITH:
418     case TOK_RETURN:
419     case TOK_NEW:
420     case TOK_DELETE:
421     token_stream->flags |= TSF_OPERAND;
422     class = CLASS_KEYWORD;
423     break;
424     case TOK_DEFSHARP:
425     case TOK_USESHARP:
426     abort();
427     break;
428     case TOK_TRY:
429     case TOK_CATCH:
430     case TOK_FINALLY:
431     case TOK_THROW:
432     case TOK_INSTANCEOF:
433     case TOK_DEBUGGER:
434     token_stream->flags |= TSF_OPERAND;
435     class = CLASS_KEYWORD;
436     break;
437     case TOK_XMLSTAGO:
438     case TOK_XMLETAGO:
439     case TOK_XMLPTAGC:
440     case TOK_XMLTAGC:
441     case TOK_XMLNAME:
442     case TOK_XMLATTR:
443     case TOK_XMLSPACE:
444     case TOK_XMLTEXT:
445     case TOK_XMLCOMMENT:
446     case TOK_XMLCDATA:
447     case TOK_XMLPI:
448     case TOK_AT:
449     case TOK_DBLCOLON:
450     case TOK_ANYNAME:
451     case TOK_DBLDOT:
452     case TOK_FILTER:
453     case TOK_XMLELEM:
454     case TOK_XMLLIST:
455     case TOK_RESERVED:
456     case TOK_LIMIT:
457     abort();
458     break;
459     default:
460     abort();
461     break;
462     }
463     mark_token_chars(class, t.pos.begin.lineno, t.pos.begin.index, t.pos.end.lineno, t.pos.end.index);
464     if (tt == TOK_STRING) {
465     for (uint16_t i = t.pos.begin.index + 1; i < t.pos.end.index - 1; i++) {
466     jschar c = lines[t.pos.begin.lineno - 1][i];
467     if (c == '\\') {
468     mark_token_chars(CLASS_SPECIALCHAR, t.pos.begin.lineno, i, t.pos.begin.lineno, i + 2);
469     i++;
470     }
471     }
472     }
473    
474     line_num = t.pos.end.lineno;
475     column_num = t.pos.end.index;
476     }
477    
478     /* output the highlighted code */
479     enum Class class = CLASS_NONE;
480     for (uint16_t i = 0; i < num_lines; i++) {
481     uint16_t length = num_characters_in_line(lines[i]);
482     for (uint16_t j = 0; j < length; j++) {
483     jschar c = lines[i][j];
484     if (classes[i][j] != class) {
485     if (class != CLASS_NONE) {
486     Stream_write_string(output, "</span>");
487     }
488     class = classes[i][j];
489     if (class != CLASS_NONE) {
490     Stream_printf(output, "<span class=\"%s\">", get_class_name(class));
491     }
492     }
493     if (c == '&') {
494     Stream_write_string(output, "&amp;");
495     }
496     else if (c == '<') {
497     Stream_write_string(output, "&lt;");
498     }
499     else if (c == '>') {
500     Stream_write_string(output, "&gt;");
501     }
502     else if (c == '\t' || (32 <= c && c <= 126)) {
503     Stream_write_char(output, c);
504     }
505     else {
506     Stream_printf(output, "&#%d;", c);
507     }
508     }
509     if (class != CLASS_NONE) {
510     Stream_write_string(output, "</span>");
511     class = CLASS_NONE;
512     }
513     Stream_write_char(output, '\n');
514     }
515    
516     for (uint16_t i = 0; i < num_lines; i++) {
517     free(lines[i]);
518     free(classes[i]);
519     }
520     free(lines);
521     free(classes);
522    
523     /* cleanup */
524     JS_UNKEEP_ATOMS(cx->runtime);
525     context->fp = fp;
526     }

  ViewVC Help
Powered by ViewVC 1.1.24