/[jscoverage]/trunk/highlight.c
ViewVC logotype

Contents of /trunk/highlight.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 343 - (show annotations)
Fri Oct 24 16:15:25 2008 UTC (9 years, 9 months ago) by siliconforks
File MIME type: text/plain
File size: 11841 byte(s)
Add support for array comprehensions from JS 1.7.
1 /*
2 highlight.c - JavaScript syntax highlighting
3 Copyright (C) 2008 siliconforks.com
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <config.h>
21
22 #include "highlight.h"
23
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include <jslock.h>
29 #include <jsscan.h>
30
31 #include "util.h"
32
33 enum Class {
34 CLASS_NONE,
35 CLASS_COMMENT,
36 CLASS_REGEXP,
37 CLASS_NUMBER,
38 CLASS_STRING,
39 CLASS_SPECIALCHAR,
40 CLASS_KEYWORD,
41 CLASS_TYPE,
42 CLASS_SYMBOL,
43 CLASS_CBRACKET
44 };
45
46 static const char * get_class_name(enum Class class) {
47 switch (class) {
48 case CLASS_NONE:
49 abort();
50 break;
51 case CLASS_COMMENT:
52 return "c";
53 break;
54 case CLASS_REGEXP:
55 return "s";
56 break;
57 case CLASS_NUMBER:
58 return "s";
59 break;
60 case CLASS_STRING:
61 return "s";
62 break;
63 case CLASS_SPECIALCHAR:
64 return "t";
65 break;
66 case CLASS_KEYWORD:
67 return "k";
68 break;
69 case CLASS_TYPE:
70 return "k";
71 break;
72 case CLASS_SYMBOL:
73 return "k";
74 break;
75 case CLASS_CBRACKET:
76 return "k";
77 break;
78 default:
79 abort();
80 break;
81 }
82 }
83
84 static const char * g_id;
85 static const jschar * g_characters;
86 static size_t g_num_characters;
87 static Stream * g_output;
88 static size_t character_offset;
89 static uint16_t line_num;
90 static uint16_t column_num;
91 static enum Class current_class;
92
93 static void output_character(jschar c, enum Class class) {
94 if (class != current_class) {
95 /* output the end tag */
96 if (current_class != CLASS_NONE) {
97 Stream_write_string(g_output, "</span>");
98 }
99
100 current_class = class;
101
102 /* output the start tag */
103 if (current_class != CLASS_NONE) {
104 Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class));
105 }
106 }
107
108 switch (c) {
109 case '&':
110 Stream_write_string(g_output, "&amp;");
111 break;
112 case '<':
113 Stream_write_string(g_output, "&lt;");
114 break;
115 case '>':
116 Stream_write_string(g_output, "&gt;");
117 break;
118 case '\t':
119 case '\n':
120 Stream_write_char(g_output, c);
121 break;
122 default:
123 if (32 <= c && c <= 126) {
124 Stream_write_char(g_output, c);
125 }
126 else {
127 Stream_printf(g_output, "&#%d;", c);
128 }
129 break;
130 }
131 }
132
133 static void mark_nontoken_chars(uint16_t end_line, uint16_t end_column) {
134 enum State {
135 STATE_NORMAL,
136 STATE_LINE_COMMENT,
137 STATE_MULTILINE_COMMENT
138 };
139
140 enum State state = STATE_NORMAL;
141 while (character_offset < g_num_characters) {
142 if (end_line != 0 && line_num > end_line) {
143 break;
144 }
145 else if (line_num == end_line && column_num >= end_column) {
146 break;
147 }
148
149 jschar c = g_characters[character_offset];
150 if (c == '\0') {
151 fatal("%s: script contains NULL character", g_id);
152 }
153
154 switch (state) {
155 case STATE_NORMAL:
156 if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
157 state = STATE_LINE_COMMENT;
158 }
159 else if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '*') {
160 state = STATE_MULTILINE_COMMENT;
161 output_character('/', CLASS_COMMENT);
162 output_character('*', CLASS_COMMENT);
163 character_offset += 2;
164 if (column_num >= UINT16_MAX - 1) {
165 fatal("%s: script contains line with more than 65,535 characters", g_id);
166 }
167 column_num += 2;
168 continue;
169 }
170 break;
171 case STATE_LINE_COMMENT:
172 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
173 state = STATE_NORMAL;
174 }
175 break;
176 case STATE_MULTILINE_COMMENT:
177 if (c == '*' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
178 output_character('*', CLASS_COMMENT);
179 output_character('/', CLASS_COMMENT);
180 state = STATE_NORMAL;
181 character_offset += 2;
182 if (column_num >= UINT16_MAX - 1) {
183 fatal("%s: script contains line with more than 65,535 characters", g_id);
184 }
185 column_num += 2;
186 continue;
187 }
188 break;
189 }
190
191 character_offset++;
192 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
193 if (line_num == UINT16_MAX) {
194 fatal("%s: script contains more than 65,535 lines", g_id);
195 }
196 line_num++;
197 column_num = 0;
198 if (c == '\r' && character_offset < g_num_characters && g_characters[character_offset] == '\n') {
199 character_offset++;
200 }
201 output_character('\n', CLASS_NONE);
202 }
203 else {
204 if (column_num == UINT16_MAX) {
205 fatal("%s: script contains line with more than 65,535 characters", g_id);
206 }
207 column_num++;
208 if (state == STATE_NORMAL) {
209 output_character(c, CLASS_NONE);
210 }
211 else {
212 output_character(c, CLASS_COMMENT);
213 }
214 }
215 }
216 }
217
218 void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
219 g_id = id;
220 g_characters = characters;
221 g_num_characters = num_characters;
222 g_output = output;
223
224 character_offset = 0;
225 line_num = 1;
226 column_num = 0;
227 current_class = CLASS_NONE;
228
229 /* tokenize the JavaScript */
230 JSTokenStream token_stream;
231 if (! js_InitTokenStream(context, &token_stream, characters, num_characters, NULL, NULL, 1)) {
232 fatal("cannot create token stream from JavaScript file %s", id);
233 }
234
235 for (;;) {
236 JSTokenType tt = js_GetToken(context, &token_stream);
237
238 if (tt == TOK_ERROR) {
239 fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
240 }
241
242 if (tt == TOK_EOF) {
243 mark_nontoken_chars(0, 0);
244 break;
245 }
246
247 /* mark the chars before the token */
248 JSToken t = CURRENT_TOKEN(&token_stream);
249 mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
250
251 /* mark the token */
252 enum Class class;
253 switch (tt) {
254 case TOK_ERROR:
255 case TOK_EOF:
256 abort();
257 case TOK_EOL:
258 class = CLASS_NONE;
259 token_stream.flags |= TSF_OPERAND;
260 break;
261 case TOK_SEMI:
262 case TOK_COMMA:
263 case TOK_ASSIGN:
264 case TOK_HOOK:
265 case TOK_COLON:
266 case TOK_OR:
267 case TOK_AND:
268 case TOK_BITOR:
269 case TOK_BITXOR:
270 case TOK_BITAND:
271 case TOK_EQOP:
272 case TOK_RELOP:
273 case TOK_SHOP:
274 case TOK_PLUS:
275 case TOK_MINUS:
276 case TOK_STAR:
277 case TOK_DIVOP:
278 class = CLASS_SYMBOL;
279 token_stream.flags |= TSF_OPERAND;
280 break;
281 case TOK_UNARYOP:
282 switch (t.t_op) {
283 case JSOP_NEG:
284 case JSOP_POS:
285 case JSOP_NOT:
286 case JSOP_BITNOT:
287 class = CLASS_SYMBOL;
288 token_stream.flags |= TSF_OPERAND;
289 break;
290 case JSOP_TYPEOF:
291 class = CLASS_KEYWORD;
292 token_stream.flags |= TSF_OPERAND;
293 break;
294 case JSOP_VOID:
295 class = CLASS_TYPE;
296 token_stream.flags |= TSF_OPERAND;
297 break;
298 default:
299 abort();
300 }
301 break;
302 case TOK_INC:
303 case TOK_DEC:
304 class = CLASS_SYMBOL;
305 /* token_stream.flags does not change w.r.t. TSF_OPERAND */
306 break;
307 case TOK_DOT:
308 case TOK_LB:
309 class = CLASS_SYMBOL;
310 token_stream.flags |= TSF_OPERAND;
311 break;
312 case TOK_RB:
313 class = CLASS_SYMBOL;
314 token_stream.flags &= ~TSF_OPERAND;
315 break;
316 case TOK_LC:
317 class = CLASS_CBRACKET;
318 token_stream.flags |= TSF_OPERAND;
319 break;
320 case TOK_RC:
321 class = CLASS_CBRACKET;
322 token_stream.flags &= ~TSF_OPERAND;
323 break;
324 case TOK_LP:
325 class = CLASS_SYMBOL;
326 token_stream.flags |= TSF_OPERAND;
327 break;
328 case TOK_RP:
329 class = CLASS_SYMBOL;
330 token_stream.flags &= ~TSF_OPERAND;
331 break;
332 case TOK_NAME:
333 class = CLASS_NONE;
334 token_stream.flags &= ~TSF_OPERAND;
335 if (js_PeekToken(context, &token_stream) == TOK_LP) {
336 /* function */
337 class = CLASS_NONE;
338 }
339 break;
340 case TOK_NUMBER:
341 class = CLASS_NUMBER;
342 token_stream.flags &= ~TSF_OPERAND;
343 break;
344 case TOK_STRING:
345 class = CLASS_STRING;
346 token_stream.flags &= ~TSF_OPERAND;
347 break;
348 case TOK_REGEXP:
349 class = CLASS_REGEXP;
350 token_stream.flags &= ~TSF_OPERAND;
351 break;
352 case TOK_PRIMARY:
353 switch (t.t_op) {
354 case JSOP_TRUE:
355 case JSOP_FALSE:
356 case JSOP_NULL:
357 case JSOP_THIS:
358 class = CLASS_KEYWORD;
359 token_stream.flags &= ~TSF_OPERAND;
360 break;
361 default:
362 abort();
363 }
364 break;
365 case TOK_FUNCTION:
366 class = CLASS_KEYWORD;
367 token_stream.flags |= TSF_OPERAND;
368 break;
369 case TOK_IF:
370 case TOK_ELSE:
371 case TOK_SWITCH:
372 case TOK_CASE:
373 case TOK_DEFAULT:
374 case TOK_WHILE:
375 case TOK_DO:
376 case TOK_FOR:
377 case TOK_BREAK:
378 case TOK_CONTINUE:
379 case TOK_IN:
380 case TOK_VAR:
381 case TOK_WITH:
382 case TOK_RETURN:
383 case TOK_NEW:
384 case TOK_DELETE:
385 token_stream.flags |= TSF_OPERAND;
386 class = CLASS_KEYWORD;
387 break;
388 case TOK_DEFSHARP:
389 case TOK_USESHARP:
390 abort();
391 break;
392 case TOK_TRY:
393 case TOK_CATCH:
394 case TOK_FINALLY:
395 case TOK_THROW:
396 case TOK_INSTANCEOF:
397 case TOK_DEBUGGER:
398 token_stream.flags |= TSF_OPERAND;
399 class = CLASS_KEYWORD;
400 break;
401 case TOK_XMLSTAGO:
402 case TOK_XMLETAGO:
403 case TOK_XMLPTAGC:
404 case TOK_XMLTAGC:
405 case TOK_XMLNAME:
406 case TOK_XMLATTR:
407 case TOK_XMLSPACE:
408 case TOK_XMLTEXT:
409 case TOK_XMLCOMMENT:
410 case TOK_XMLCDATA:
411 case TOK_XMLPI:
412 case TOK_AT:
413 case TOK_DBLCOLON:
414 case TOK_ANYNAME:
415 case TOK_DBLDOT:
416 case TOK_FILTER:
417 case TOK_XMLELEM:
418 case TOK_XMLLIST:
419 abort();
420 break;
421 case TOK_YIELD:
422 token_stream.flags |= TSF_OPERAND;
423 class = CLASS_KEYWORD;
424 break;
425 case TOK_ARRAYCOMP:
426 case TOK_ARRAYPUSH:
427 case TOK_LEXICALSCOPE:
428 abort();
429 break;
430 case TOK_LET:
431 token_stream.flags |= TSF_OPERAND;
432 class = CLASS_KEYWORD;
433 break;
434 case TOK_BODY:
435 case TOK_RESERVED:
436 case TOK_LIMIT:
437 abort();
438 break;
439 default:
440 abort();
441 break;
442 }
443
444 if (t.pos.begin.lineno != t.pos.end.lineno) {
445 fatal("%s: line %u: token spans multiple lines", id, t.pos.begin.lineno);
446 }
447 if (t.pos.begin.index > t.pos.end.index) {
448 fatal("%s: script contains line with more than 65,535 characters", id);
449 }
450 for (uint16_t i = t.pos.begin.index; i < t.pos.end.index; i++) {
451 assert(character_offset < num_characters);
452 jschar c = characters[character_offset];
453 if (tt == TOK_STRING && c == '\\') {
454 output_character(c, CLASS_SPECIALCHAR);
455 character_offset++;
456 i++;
457 assert(character_offset < num_characters);
458 c = characters[character_offset];
459 output_character(c, CLASS_SPECIALCHAR);
460 character_offset++;
461 }
462 else {
463 output_character(c, class);
464 character_offset++;
465 }
466 }
467
468 line_num = t.pos.end.lineno;
469 column_num = t.pos.end.index;
470 }
471
472 if (current_class != CLASS_NONE) {
473 output_character('\n', CLASS_NONE);
474 }
475
476 js_CloseTokenStream(context, &token_stream);
477 }

  ViewVC Help
Powered by ViewVC 1.1.24