/[jscoverage]/trunk/highlight.cpp
ViewVC logotype

Contents of /trunk/highlight.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 292 - (show annotations)
Sun Oct 12 16:41:36 2008 UTC (10 years, 1 month ago) by siliconforks
Original Path: trunk/highlight.c
File MIME type: text/plain
File size: 12796 byte(s)
Fix bug setting TSF_OPERAND incorrectly with postfix increment/decrement.
1 /*
2 highlight.c - JavaScript syntax highlighting
3 Copyright (C) 2008 siliconforks.com
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <config.h>
21
22 #include "highlight.h"
23
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include <jslock.h>
29 #include <jsscan.h>
30
31 #include "util.h"
32
33 enum Class {
34 CLASS_NONE,
35 CLASS_COMMENT,
36 CLASS_REGEXP,
37 CLASS_NUMBER,
38 CLASS_STRING,
39 CLASS_SPECIALCHAR,
40 CLASS_KEYWORD,
41 CLASS_TYPE,
42 CLASS_SYMBOL,
43 CLASS_CBRACKET
44 };
45
46 static const char * get_class_name(enum Class class) {
47 switch (class) {
48 case CLASS_NONE:
49 abort();
50 break;
51 case CLASS_COMMENT:
52 return "c";
53 break;
54 case CLASS_REGEXP:
55 return "s";
56 break;
57 case CLASS_NUMBER:
58 return "s";
59 break;
60 case CLASS_STRING:
61 return "s";
62 break;
63 case CLASS_SPECIALCHAR:
64 return "t";
65 break;
66 case CLASS_KEYWORD:
67 return "k";
68 break;
69 case CLASS_TYPE:
70 return "k";
71 break;
72 case CLASS_SYMBOL:
73 return "k";
74 break;
75 case CLASS_CBRACKET:
76 return "k";
77 break;
78 default:
79 abort();
80 break;
81 }
82 }
83
84 static const char * g_id;
85 static const jschar * g_characters;
86 static size_t g_num_characters;
87 static Stream * g_output;
88 static size_t character_offset;
89 static uint16_t line_num;
90 static uint16_t column_num;
91 static enum Class current_class;
92
93 static void output_character(jschar c, enum Class class) {
94 if (class != current_class) {
95 /* output the end tag */
96 if (current_class != CLASS_NONE) {
97 Stream_write_string(g_output, "</span>");
98 }
99
100 current_class = class;
101
102 /* output the start tag */
103 if (current_class != CLASS_NONE) {
104 Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class));
105 }
106 }
107
108 switch (c) {
109 case '&':
110 Stream_write_string(g_output, "&amp;");
111 break;
112 case '<':
113 Stream_write_string(g_output, "&lt;");
114 break;
115 case '>':
116 Stream_write_string(g_output, "&gt;");
117 break;
118 case '\t':
119 case '\n':
120 Stream_write_char(g_output, c);
121 break;
122 default:
123 if (32 <= c && c <= 126) {
124 Stream_write_char(g_output, c);
125 }
126 else {
127 Stream_printf(g_output, "&#%d;", c);
128 }
129 break;
130 }
131 }
132
133 static void mark_nontoken_chars(uint16_t end_line, uint16_t end_column) {
134 enum State {
135 STATE_NORMAL,
136 STATE_LINE_COMMENT,
137 STATE_MULTILINE_COMMENT
138 };
139
140 enum State state = STATE_NORMAL;
141 while (character_offset < g_num_characters) {
142 if (end_line != 0 && line_num > end_line) {
143 break;
144 }
145 else if (line_num == end_line && column_num >= end_column) {
146 break;
147 }
148
149 jschar c = g_characters[character_offset];
150 if (c == '\0') {
151 fatal("%s: script contains NULL character", g_id);
152 }
153
154 switch (state) {
155 case STATE_NORMAL:
156 if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
157 state = STATE_LINE_COMMENT;
158 }
159 else if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '*') {
160 state = STATE_MULTILINE_COMMENT;
161 output_character('/', CLASS_COMMENT);
162 output_character('*', CLASS_COMMENT);
163 character_offset += 2;
164 if (column_num >= UINT16_MAX - 1) {
165 fatal("%s: script contains line with more than 65,535 characters", g_id);
166 }
167 column_num += 2;
168 continue;
169 }
170 break;
171 case STATE_LINE_COMMENT:
172 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
173 state = STATE_NORMAL;
174 }
175 break;
176 case STATE_MULTILINE_COMMENT:
177 if (c == '*' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
178 output_character('*', CLASS_COMMENT);
179 output_character('/', CLASS_COMMENT);
180 state = STATE_NORMAL;
181 character_offset += 2;
182 if (column_num >= UINT16_MAX - 1) {
183 fatal("%s: script contains line with more than 65,535 characters", g_id);
184 }
185 column_num += 2;
186 continue;
187 }
188 break;
189 }
190
191 character_offset++;
192 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
193 if (line_num == UINT16_MAX) {
194 fatal("%s: script contains more than 65,535 lines", g_id);
195 }
196 line_num++;
197 column_num = 0;
198 if (c == '\r' && character_offset < g_num_characters && g_characters[character_offset] == '\n') {
199 character_offset++;
200 }
201 output_character('\n', CLASS_NONE);
202 }
203 else {
204 if (column_num == UINT16_MAX) {
205 fatal("%s: script contains line with more than 65,535 characters", g_id);
206 }
207 column_num++;
208 if (state == STATE_NORMAL) {
209 output_character(c, CLASS_NONE);
210 }
211 else {
212 output_character(c, CLASS_COMMENT);
213 }
214 }
215 }
216 }
217
218 void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
219 g_id = id;
220 g_characters = characters;
221 g_num_characters = num_characters;
222 g_output = output;
223
224 character_offset = 0;
225 line_num = 1;
226 column_num = 0;
227 current_class = CLASS_NONE;
228
229 /* tokenize the JavaScript */
230 JSTokenStream * token_stream = js_NewTokenStream(context, characters, num_characters, NULL, 1, NULL);
231 if (token_stream == NULL) {
232 fatal("cannot create token stream from JavaScript file %s", id);
233 }
234
235 /* see js_ParseTokenStream in jsparse.c */
236 JSObject * chain = NULL;
237 JSContext * cx = context;
238 JSStackFrame *fp, frame;
239
240 /*
241 * Push a compiler frame if we have no frames, or if the top frame is a
242 * lightweight function activation, or if its scope chain doesn't match
243 * the one passed to us.
244 */
245 fp = cx->fp;
246 if (!fp || !fp->varobj || fp->scopeChain != chain) {
247 memset(&frame, 0, sizeof frame);
248 frame.varobj = frame.scopeChain = chain;
249 if (cx->options & JSOPTION_VAROBJFIX) {
250 while ((chain = JS_GetParent(cx, chain)) != NULL)
251 frame.varobj = chain;
252 }
253 frame.down = fp;
254 if (fp)
255 frame.flags = fp->flags & (JSFRAME_SPECIAL | JSFRAME_COMPILE_N_GO);
256 cx->fp = &frame;
257 }
258
259 /*
260 * Protect atoms from being collected by a GC activation, which might
261 * - nest on this thread due to out of memory (the so-called "last ditch"
262 * GC attempted within js_NewGCThing), or
263 * - run for any reason on another thread if this thread is suspended on
264 * an object lock before it finishes generating bytecode into a script
265 * protected from the GC by a root or a stack frame reference.
266 */
267 JS_KEEP_ATOMS(cx->runtime);
268
269 for (;;) {
270 JSTokenType tt = js_GetToken(context, token_stream);
271
272 if (tt == TOK_ERROR) {
273 fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
274 }
275
276 if (tt == TOK_EOF) {
277 mark_nontoken_chars(0, 0);
278 break;
279 }
280
281 /* mark the chars before the token */
282 JSToken t = CURRENT_TOKEN(token_stream);
283 mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
284
285 /* mark the token */
286 enum Class class;
287 switch (tt) {
288 case TOK_ERROR:
289 case TOK_EOF:
290 abort();
291 case TOK_EOL:
292 class = CLASS_NONE;
293 token_stream->flags |= TSF_OPERAND;
294 break;
295 case TOK_SEMI:
296 case TOK_COMMA:
297 case TOK_ASSIGN:
298 case TOK_HOOK:
299 case TOK_COLON:
300 case TOK_OR:
301 case TOK_AND:
302 case TOK_BITOR:
303 case TOK_BITXOR:
304 case TOK_BITAND:
305 case TOK_EQOP:
306 case TOK_RELOP:
307 case TOK_SHOP:
308 case TOK_PLUS:
309 case TOK_MINUS:
310 case TOK_STAR:
311 case TOK_DIVOP:
312 class = CLASS_SYMBOL;
313 token_stream->flags |= TSF_OPERAND;
314 break;
315 case TOK_UNARYOP:
316 switch (t.t_op) {
317 case JSOP_NEG:
318 case JSOP_POS:
319 case JSOP_NOT:
320 case JSOP_BITNOT:
321 class = CLASS_SYMBOL;
322 token_stream->flags |= TSF_OPERAND;
323 break;
324 case JSOP_TYPEOF:
325 class = CLASS_KEYWORD;
326 token_stream->flags |= TSF_OPERAND;
327 break;
328 case JSOP_VOID:
329 class = CLASS_TYPE;
330 token_stream->flags |= TSF_OPERAND;
331 break;
332 default:
333 abort();
334 }
335 break;
336 case TOK_INC:
337 case TOK_DEC:
338 class = CLASS_SYMBOL;
339 /* token_stream->flags does not change w.r.t. TSF_OPERAND */
340 break;
341 case TOK_DOT:
342 case TOK_LB:
343 class = CLASS_SYMBOL;
344 token_stream->flags |= TSF_OPERAND;
345 break;
346 case TOK_RB:
347 class = CLASS_SYMBOL;
348 token_stream->flags &= ~TSF_OPERAND;
349 break;
350 case TOK_LC:
351 class = CLASS_CBRACKET;
352 token_stream->flags |= TSF_OPERAND;
353 break;
354 case TOK_RC:
355 class = CLASS_CBRACKET;
356 token_stream->flags &= ~TSF_OPERAND;
357 break;
358 case TOK_LP:
359 class = CLASS_SYMBOL;
360 token_stream->flags |= TSF_OPERAND;
361 break;
362 case TOK_RP:
363 class = CLASS_SYMBOL;
364 token_stream->flags &= ~TSF_OPERAND;
365 break;
366 case TOK_NAME:
367 class = CLASS_NONE;
368 token_stream->flags &= ~TSF_OPERAND;
369 if (js_PeekToken(context, token_stream) == TOK_LP) {
370 /* function */
371 class = CLASS_NONE;
372 }
373 break;
374 case TOK_NUMBER:
375 class = CLASS_NUMBER;
376 token_stream->flags &= ~TSF_OPERAND;
377 break;
378 case TOK_STRING:
379 class = CLASS_STRING;
380 token_stream->flags &= ~TSF_OPERAND;
381 break;
382 case TOK_OBJECT:
383 class = CLASS_REGEXP;
384 token_stream->flags &= ~TSF_OPERAND;
385 break;
386 case TOK_PRIMARY:
387 switch (t.t_op) {
388 case JSOP_TRUE:
389 case JSOP_FALSE:
390 case JSOP_NULL:
391 case JSOP_THIS:
392 class = CLASS_KEYWORD;
393 token_stream->flags &= ~TSF_OPERAND;
394 break;
395 default:
396 abort();
397 }
398 break;
399 case TOK_FUNCTION:
400 class = CLASS_KEYWORD;
401 token_stream->flags |= TSF_OPERAND;
402 break;
403 case TOK_EXPORT:
404 case TOK_IMPORT:
405 abort();
406 break;
407 case TOK_IF:
408 case TOK_ELSE:
409 case TOK_SWITCH:
410 case TOK_CASE:
411 case TOK_DEFAULT:
412 case TOK_WHILE:
413 case TOK_DO:
414 case TOK_FOR:
415 case TOK_BREAK:
416 case TOK_CONTINUE:
417 case TOK_IN:
418 case TOK_VAR:
419 case TOK_WITH:
420 case TOK_RETURN:
421 case TOK_NEW:
422 case TOK_DELETE:
423 token_stream->flags |= TSF_OPERAND;
424 class = CLASS_KEYWORD;
425 break;
426 case TOK_DEFSHARP:
427 case TOK_USESHARP:
428 abort();
429 break;
430 case TOK_TRY:
431 case TOK_CATCH:
432 case TOK_FINALLY:
433 case TOK_THROW:
434 case TOK_INSTANCEOF:
435 case TOK_DEBUGGER:
436 token_stream->flags |= TSF_OPERAND;
437 class = CLASS_KEYWORD;
438 break;
439 case TOK_XMLSTAGO:
440 case TOK_XMLETAGO:
441 case TOK_XMLPTAGC:
442 case TOK_XMLTAGC:
443 case TOK_XMLNAME:
444 case TOK_XMLATTR:
445 case TOK_XMLSPACE:
446 case TOK_XMLTEXT:
447 case TOK_XMLCOMMENT:
448 case TOK_XMLCDATA:
449 case TOK_XMLPI:
450 case TOK_AT:
451 case TOK_DBLCOLON:
452 case TOK_ANYNAME:
453 case TOK_DBLDOT:
454 case TOK_FILTER:
455 case TOK_XMLELEM:
456 case TOK_XMLLIST:
457 case TOK_RESERVED:
458 case TOK_LIMIT:
459 abort();
460 break;
461 default:
462 abort();
463 break;
464 }
465
466 assert(t.pos.begin.lineno == t.pos.end.lineno);
467 if (t.pos.begin.index > t.pos.end.index) {
468 fatal("%s: script contains line with more than 65,535 characters", id);
469 }
470 for (uint16_t i = t.pos.begin.index; i < t.pos.end.index; i++) {
471 assert(character_offset < num_characters);
472 jschar c = characters[character_offset];
473 if (tt == TOK_STRING && c == '\\') {
474 output_character(c, CLASS_SPECIALCHAR);
475 character_offset++;
476 i++;
477 assert(character_offset < num_characters);
478 c = characters[character_offset];
479 output_character(c, CLASS_SPECIALCHAR);
480 character_offset++;
481 }
482 else {
483 output_character(c, class);
484 character_offset++;
485 }
486 }
487
488 line_num = t.pos.end.lineno;
489 column_num = t.pos.end.index;
490 }
491
492 if (current_class != CLASS_NONE) {
493 output_character('\n', CLASS_NONE);
494 }
495
496 /* cleanup */
497 JS_UNKEEP_ATOMS(cx->runtime);
498 context->fp = fp;
499 }

  ViewVC Help
Powered by ViewVC 1.1.24