/[jscoverage]/trunk/highlight.cpp
ViewVC logotype

Contents of /trunk/highlight.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 477 - (show annotations)
Thu Oct 8 20:08:39 2009 UTC (9 years ago) by siliconforks
File size: 12141 byte(s)
Exit gracefully (do not call "abort") when encountering unknown token.

1 /*
2 highlight.cpp - JavaScript syntax highlighting
3 Copyright (C) 2008, 2009 siliconforks.com
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <config.h>
21
22 #include "highlight.h"
23
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include <jslock.h>
29 #include <jsscan.h>
30
31 #include "util.h"
32
33 #ifndef UINT32_MAX
34 #define UINT32_MAX ((uint32_t) (-1))
35 #endif
36
37 enum Class {
38 CLASS_NONE,
39 CLASS_COMMENT,
40 CLASS_REGEXP,
41 CLASS_NUMBER,
42 CLASS_STRING,
43 CLASS_SPECIALCHAR,
44 CLASS_KEYWORD,
45 CLASS_TYPE,
46 CLASS_SYMBOL,
47 CLASS_CBRACKET
48 };
49
50 static const char * get_class_name(enum Class c) {
51 switch (c) {
52 case CLASS_NONE:
53 abort();
54 break;
55 case CLASS_COMMENT:
56 return "c";
57 break;
58 case CLASS_REGEXP:
59 return "s";
60 break;
61 case CLASS_NUMBER:
62 return "s";
63 break;
64 case CLASS_STRING:
65 return "s";
66 break;
67 case CLASS_SPECIALCHAR:
68 return "t";
69 break;
70 case CLASS_KEYWORD:
71 return "k";
72 break;
73 case CLASS_TYPE:
74 return "k";
75 break;
76 case CLASS_SYMBOL:
77 return "k";
78 break;
79 case CLASS_CBRACKET:
80 return "k";
81 break;
82 default:
83 abort();
84 break;
85 }
86 }
87
88 static const char * g_id;
89 static const jschar * g_characters;
90 static size_t g_num_characters;
91 static Stream * g_output;
92 static size_t character_offset;
93 static uint32_t line_num;
94 static uint32_t column_num;
95 static enum Class current_class;
96
97 static void output_character(jschar c, enum Class class_) {
98 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
99 class_ = CLASS_NONE;
100 }
101
102 if (class_ != current_class) {
103 /* output the end tag */
104 if (current_class != CLASS_NONE) {
105 Stream_write_string(g_output, "</span>");
106 }
107
108 current_class = class_;
109
110 /* output the start tag */
111 if (current_class != CLASS_NONE) {
112 Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class_));
113 }
114 }
115
116 if (column_num == UINT32_MAX) {
117 fatal("%s: script contains a line with more than 65,535 columns", g_id);
118 }
119 column_num++;
120 switch (c) {
121 case '&':
122 Stream_write_string(g_output, "&amp;");
123 break;
124 case '<':
125 Stream_write_string(g_output, "&lt;");
126 break;
127 case '>':
128 Stream_write_string(g_output, "&gt;");
129 break;
130 case '\t':
131 Stream_write_char(g_output, c);
132 break;
133 case '\r':
134 case '\n':
135 case 0x2028:
136 case 0x2029:
137 if (c == '\r' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '\n') {
138 break;
139 }
140 Stream_write_char(g_output, '\n');
141 column_num = 0;
142 if (line_num == UINT32_MAX) {
143 fatal("%s: script contains more than 65,535 lines", g_id);
144 }
145 line_num++;
146 break;
147 default:
148 if (32 <= c && c <= 126) {
149 Stream_write_char(g_output, c);
150 }
151 else {
152 Stream_printf(g_output, "&#%d;", c);
153 }
154 break;
155 }
156 character_offset++;
157 }
158
159 static void mark_nontoken_chars(uint32_t end_line, uint32_t end_column) {
160 enum State {
161 STATE_NORMAL,
162 STATE_LINE_COMMENT,
163 STATE_MULTILINE_COMMENT
164 };
165
166 enum State state = STATE_NORMAL;
167 while (character_offset < g_num_characters) {
168 if (end_line != 0 && line_num > end_line) {
169 break;
170 }
171 else if (line_num == end_line && column_num >= end_column) {
172 break;
173 }
174
175 jschar c = g_characters[character_offset];
176 if (c == '\0') {
177 fatal("%s: script contains NULL character", g_id);
178 }
179
180 switch (state) {
181 case STATE_NORMAL:
182 if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
183 state = STATE_LINE_COMMENT;
184 }
185 else if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '*') {
186 state = STATE_MULTILINE_COMMENT;
187 output_character('/', CLASS_COMMENT);
188 output_character('*', CLASS_COMMENT);
189 continue;
190 }
191 break;
192 case STATE_LINE_COMMENT:
193 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
194 state = STATE_NORMAL;
195 }
196 break;
197 case STATE_MULTILINE_COMMENT:
198 if (c == '*' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
199 output_character('*', CLASS_COMMENT);
200 output_character('/', CLASS_COMMENT);
201 state = STATE_NORMAL;
202 continue;
203 }
204 break;
205 }
206
207 if (state == STATE_NORMAL) {
208 output_character(c, CLASS_NONE);
209 }
210 else {
211 output_character(c, CLASS_COMMENT);
212 }
213 }
214 }
215
216 void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
217 g_id = id;
218 g_characters = characters;
219 g_num_characters = num_characters;
220 g_output = output;
221
222 character_offset = 0;
223 line_num = 1;
224 column_num = 0;
225 current_class = CLASS_NONE;
226
227 /* tokenize the JavaScript */
228 JSTokenStream token_stream;
229 if (! js_InitTokenStream(context, &token_stream, characters, num_characters, NULL, NULL, 1)) {
230 fatal("cannot create token stream from JavaScript file %s", id);
231 }
232
233 for (;;) {
234 JSTokenType tt = js_GetToken(context, &token_stream);
235
236 if (tt == TOK_ERROR) {
237 fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
238 }
239
240 if (tt == TOK_EOF) {
241 mark_nontoken_chars(0, 0);
242 break;
243 }
244
245 /* mark the chars before the token */
246 JSToken t = CURRENT_TOKEN(&token_stream);
247 mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
248
249 /* mark the token */
250 enum Class class_;
251 switch (tt) {
252 case TOK_ERROR:
253 case TOK_EOF:
254 abort();
255 case TOK_EOL:
256 class_ = CLASS_NONE;
257 token_stream.flags |= TSF_OPERAND;
258 break;
259 case TOK_SEMI:
260 case TOK_COMMA:
261 case TOK_ASSIGN:
262 case TOK_HOOK:
263 case TOK_COLON:
264 case TOK_OR:
265 case TOK_AND:
266 case TOK_BITOR:
267 case TOK_BITXOR:
268 case TOK_BITAND:
269 case TOK_EQOP:
270 case TOK_RELOP:
271 case TOK_SHOP:
272 case TOK_PLUS:
273 case TOK_MINUS:
274 case TOK_STAR:
275 case TOK_DIVOP:
276 class_ = CLASS_SYMBOL;
277 token_stream.flags |= TSF_OPERAND;
278 break;
279 case TOK_UNARYOP:
280 switch (t.t_op) {
281 case JSOP_NEG:
282 case JSOP_POS:
283 case JSOP_NOT:
284 case JSOP_BITNOT:
285 class_ = CLASS_SYMBOL;
286 token_stream.flags |= TSF_OPERAND;
287 break;
288 case JSOP_TYPEOF:
289 class_ = CLASS_KEYWORD;
290 token_stream.flags |= TSF_OPERAND;
291 break;
292 case JSOP_VOID:
293 class_ = CLASS_TYPE;
294 token_stream.flags |= TSF_OPERAND;
295 break;
296 default:
297 fatal_source(id, t.pos.begin.lineno, "unknown TOK_UNARYOP (%d)", t.t_op);
298 break;
299 }
300 break;
301 case TOK_INC:
302 case TOK_DEC:
303 class_ = CLASS_SYMBOL;
304 /* token_stream.flags does not change w.r.t. TSF_OPERAND */
305 break;
306 case TOK_DOT:
307 case TOK_LB:
308 class_ = CLASS_SYMBOL;
309 token_stream.flags |= TSF_OPERAND;
310 break;
311 case TOK_RB:
312 class_ = CLASS_SYMBOL;
313 token_stream.flags &= ~TSF_OPERAND;
314 break;
315 case TOK_LC:
316 class_ = CLASS_CBRACKET;
317 token_stream.flags |= TSF_OPERAND;
318 break;
319 case TOK_RC:
320 class_ = CLASS_CBRACKET;
321 token_stream.flags &= ~TSF_OPERAND;
322 break;
323 case TOK_LP:
324 class_ = CLASS_SYMBOL;
325 token_stream.flags |= TSF_OPERAND;
326 break;
327 case TOK_RP:
328 class_ = CLASS_SYMBOL;
329 token_stream.flags &= ~TSF_OPERAND;
330 break;
331 case TOK_NAME:
332 class_ = CLASS_NONE;
333 token_stream.flags &= ~TSF_OPERAND;
334 if (js_PeekToken(context, &token_stream) == TOK_LP) {
335 /* function */
336 class_ = CLASS_NONE;
337 }
338 break;
339 case TOK_NUMBER:
340 class_ = CLASS_NUMBER;
341 token_stream.flags &= ~TSF_OPERAND;
342 break;
343 case TOK_STRING:
344 class_ = CLASS_STRING;
345 token_stream.flags &= ~TSF_OPERAND;
346 break;
347 case TOK_REGEXP:
348 class_ = CLASS_REGEXP;
349 token_stream.flags &= ~TSF_OPERAND;
350 break;
351 case TOK_PRIMARY:
352 switch (t.t_op) {
353 case JSOP_TRUE:
354 case JSOP_FALSE:
355 case JSOP_NULL:
356 case JSOP_THIS:
357 class_ = CLASS_KEYWORD;
358 token_stream.flags &= ~TSF_OPERAND;
359 break;
360 default:
361 fatal_source(id, t.pos.begin.lineno, "unknown TOK_PRIMARY (%d)", t.t_op);
362 break;
363 }
364 break;
365 case TOK_FUNCTION:
366 class_ = CLASS_KEYWORD;
367 token_stream.flags |= TSF_OPERAND;
368 break;
369 case TOK_IF:
370 case TOK_ELSE:
371 case TOK_SWITCH:
372 case TOK_CASE:
373 case TOK_DEFAULT:
374 case TOK_WHILE:
375 case TOK_DO:
376 case TOK_FOR:
377 case TOK_BREAK:
378 case TOK_CONTINUE:
379 case TOK_IN:
380 case TOK_VAR:
381 case TOK_WITH:
382 case TOK_RETURN:
383 case TOK_NEW:
384 case TOK_DELETE:
385 token_stream.flags |= TSF_OPERAND;
386 class_ = CLASS_KEYWORD;
387 break;
388 case TOK_DEFSHARP:
389 case TOK_USESHARP:
390 fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
391 break;
392 case TOK_TRY:
393 case TOK_CATCH:
394 case TOK_FINALLY:
395 case TOK_THROW:
396 case TOK_INSTANCEOF:
397 case TOK_DEBUGGER:
398 token_stream.flags |= TSF_OPERAND;
399 class_ = CLASS_KEYWORD;
400 break;
401 case TOK_XMLSTAGO:
402 case TOK_XMLETAGO:
403 case TOK_XMLPTAGC:
404 case TOK_XMLTAGC:
405 case TOK_XMLNAME:
406 case TOK_XMLATTR:
407 case TOK_XMLSPACE:
408 case TOK_XMLTEXT:
409 case TOK_XMLCOMMENT:
410 case TOK_XMLCDATA:
411 case TOK_XMLPI:
412 case TOK_AT:
413 case TOK_DBLCOLON:
414 case TOK_ANYNAME:
415 case TOK_DBLDOT:
416 case TOK_FILTER:
417 case TOK_XMLELEM:
418 case TOK_XMLLIST:
419 fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
420 break;
421 case TOK_YIELD:
422 token_stream.flags |= TSF_OPERAND;
423 class_ = CLASS_KEYWORD;
424 break;
425 case TOK_ARRAYCOMP:
426 case TOK_ARRAYPUSH:
427 case TOK_LEXICALSCOPE:
428 fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
429 break;
430 case TOK_LET:
431 token_stream.flags |= TSF_OPERAND;
432 class_ = CLASS_KEYWORD;
433 break;
434 case TOK_SEQ:
435 case TOK_FORHEAD:
436 case TOK_RESERVED:
437 case TOK_LIMIT:
438 fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
439 break;
440 default:
441 fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
442 break;
443 }
444
445 uint32_t start_line = t.pos.begin.lineno;
446 uint32_t end_line = t.pos.end.lineno;
447 uint32_t start_column = t.pos.begin.index;
448 uint32_t end_column = t.pos.end.index;
449 assert(line_num == start_line);
450 assert(column_num == start_column);
451 if (start_line == end_line && start_column >= end_column) {
452 fatal("%s: script contains line with more than 65,535 characters", id);
453 }
454 for (;;) {
455 assert(character_offset < num_characters);
456 jschar c = characters[character_offset];
457 if (tt == TOK_STRING && c == '\\') {
458 output_character(c, CLASS_SPECIALCHAR);
459 assert(character_offset < num_characters);
460 c = characters[character_offset];
461 output_character(c, CLASS_SPECIALCHAR);
462 }
463 else {
464 output_character(c, class_);
465 }
466
467 if (line_num > end_line) {
468 break;
469 }
470 else if (line_num == end_line && column_num >= end_column) {
471 break;
472 }
473 }
474
475 assert(line_num == end_line);
476 assert(column_num = end_column);
477 }
478
479 if (current_class != CLASS_NONE) {
480 output_character('\n', CLASS_NONE);
481 }
482
483 js_CloseTokenStream(context, &token_stream);
484 }

Properties

Name Value
svn:mergeinfo

  ViewVC Help
Powered by ViewVC 1.1.24