/[jscoverage]/trunk/highlight.c
ViewVC logotype

Contents of /trunk/highlight.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 427 - (show annotations)
Wed Feb 18 16:08:33 2009 UTC (9 years, 9 months ago) by siliconforks
File MIME type: text/plain
File size: 11618 byte(s)
Update Copyright year.
1 /*
2 highlight.c - JavaScript syntax highlighting
3 Copyright (C) 2008, 2009 siliconforks.com
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <config.h>
21
22 #include "highlight.h"
23
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include <jslock.h>
29 #include <jsscan.h>
30
31 #include "util.h"
32
33 enum Class {
34 CLASS_NONE,
35 CLASS_COMMENT,
36 CLASS_REGEXP,
37 CLASS_NUMBER,
38 CLASS_STRING,
39 CLASS_SPECIALCHAR,
40 CLASS_KEYWORD,
41 CLASS_TYPE,
42 CLASS_SYMBOL,
43 CLASS_CBRACKET
44 };
45
46 static const char * get_class_name(enum Class class) {
47 switch (class) {
48 case CLASS_NONE:
49 abort();
50 break;
51 case CLASS_COMMENT:
52 return "c";
53 break;
54 case CLASS_REGEXP:
55 return "s";
56 break;
57 case CLASS_NUMBER:
58 return "s";
59 break;
60 case CLASS_STRING:
61 return "s";
62 break;
63 case CLASS_SPECIALCHAR:
64 return "t";
65 break;
66 case CLASS_KEYWORD:
67 return "k";
68 break;
69 case CLASS_TYPE:
70 return "k";
71 break;
72 case CLASS_SYMBOL:
73 return "k";
74 break;
75 case CLASS_CBRACKET:
76 return "k";
77 break;
78 default:
79 abort();
80 break;
81 }
82 }
83
84 static const char * g_id;
85 static const jschar * g_characters;
86 static size_t g_num_characters;
87 static Stream * g_output;
88 static size_t character_offset;
89 static uint16_t line_num;
90 static uint16_t column_num;
91 static enum Class current_class;
92
93 static void output_character(jschar c, enum Class class) {
94 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
95 class = CLASS_NONE;
96 }
97
98 if (class != current_class) {
99 /* output the end tag */
100 if (current_class != CLASS_NONE) {
101 Stream_write_string(g_output, "</span>");
102 }
103
104 current_class = class;
105
106 /* output the start tag */
107 if (current_class != CLASS_NONE) {
108 Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class));
109 }
110 }
111
112 if (column_num == UINT16_MAX) {
113 fatal("%s: script contains a line with more than 65,535 columns", g_id);
114 }
115 column_num++;
116 switch (c) {
117 case '&':
118 Stream_write_string(g_output, "&amp;");
119 break;
120 case '<':
121 Stream_write_string(g_output, "&lt;");
122 break;
123 case '>':
124 Stream_write_string(g_output, "&gt;");
125 break;
126 case '\t':
127 Stream_write_char(g_output, c);
128 break;
129 case '\r':
130 case '\n':
131 case 0x2028:
132 case 0x2029:
133 if (c == '\r' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '\n') {
134 break;
135 }
136 Stream_write_char(g_output, '\n');
137 column_num = 0;
138 if (line_num == UINT16_MAX) {
139 fatal("%s: script contains more than 65,535 lines", g_id);
140 }
141 line_num++;
142 break;
143 default:
144 if (32 <= c && c <= 126) {
145 Stream_write_char(g_output, c);
146 }
147 else {
148 Stream_printf(g_output, "&#%d;", c);
149 }
150 break;
151 }
152 character_offset++;
153 }
154
155 static void mark_nontoken_chars(uint16_t end_line, uint16_t end_column) {
156 enum State {
157 STATE_NORMAL,
158 STATE_LINE_COMMENT,
159 STATE_MULTILINE_COMMENT
160 };
161
162 enum State state = STATE_NORMAL;
163 while (character_offset < g_num_characters) {
164 if (end_line != 0 && line_num > end_line) {
165 break;
166 }
167 else if (line_num == end_line && column_num >= end_column) {
168 break;
169 }
170
171 jschar c = g_characters[character_offset];
172 if (c == '\0') {
173 fatal("%s: script contains NULL character", g_id);
174 }
175
176 switch (state) {
177 case STATE_NORMAL:
178 if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
179 state = STATE_LINE_COMMENT;
180 }
181 else if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '*') {
182 state = STATE_MULTILINE_COMMENT;
183 output_character('/', CLASS_COMMENT);
184 output_character('*', CLASS_COMMENT);
185 continue;
186 }
187 break;
188 case STATE_LINE_COMMENT:
189 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
190 state = STATE_NORMAL;
191 }
192 break;
193 case STATE_MULTILINE_COMMENT:
194 if (c == '*' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
195 output_character('*', CLASS_COMMENT);
196 output_character('/', CLASS_COMMENT);
197 state = STATE_NORMAL;
198 continue;
199 }
200 break;
201 }
202
203 if (state == STATE_NORMAL) {
204 output_character(c, CLASS_NONE);
205 }
206 else {
207 output_character(c, CLASS_COMMENT);
208 }
209 }
210 }
211
212 void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
213 g_id = id;
214 g_characters = characters;
215 g_num_characters = num_characters;
216 g_output = output;
217
218 character_offset = 0;
219 line_num = 1;
220 column_num = 0;
221 current_class = CLASS_NONE;
222
223 /* tokenize the JavaScript */
224 JSTokenStream token_stream;
225 if (! js_InitTokenStream(context, &token_stream, characters, num_characters, NULL, NULL, 1)) {
226 fatal("cannot create token stream from JavaScript file %s", id);
227 }
228
229 for (;;) {
230 JSTokenType tt = js_GetToken(context, &token_stream);
231
232 if (tt == TOK_ERROR) {
233 fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
234 }
235
236 if (tt == TOK_EOF) {
237 mark_nontoken_chars(0, 0);
238 break;
239 }
240
241 /* mark the chars before the token */
242 JSToken t = CURRENT_TOKEN(&token_stream);
243 mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
244
245 /* mark the token */
246 enum Class class;
247 switch (tt) {
248 case TOK_ERROR:
249 case TOK_EOF:
250 abort();
251 case TOK_EOL:
252 class = CLASS_NONE;
253 token_stream.flags |= TSF_OPERAND;
254 break;
255 case TOK_SEMI:
256 case TOK_COMMA:
257 case TOK_ASSIGN:
258 case TOK_HOOK:
259 case TOK_COLON:
260 case TOK_OR:
261 case TOK_AND:
262 case TOK_BITOR:
263 case TOK_BITXOR:
264 case TOK_BITAND:
265 case TOK_EQOP:
266 case TOK_RELOP:
267 case TOK_SHOP:
268 case TOK_PLUS:
269 case TOK_MINUS:
270 case TOK_STAR:
271 case TOK_DIVOP:
272 class = CLASS_SYMBOL;
273 token_stream.flags |= TSF_OPERAND;
274 break;
275 case TOK_UNARYOP:
276 switch (t.t_op) {
277 case JSOP_NEG:
278 case JSOP_POS:
279 case JSOP_NOT:
280 case JSOP_BITNOT:
281 class = CLASS_SYMBOL;
282 token_stream.flags |= TSF_OPERAND;
283 break;
284 case JSOP_TYPEOF:
285 class = CLASS_KEYWORD;
286 token_stream.flags |= TSF_OPERAND;
287 break;
288 case JSOP_VOID:
289 class = CLASS_TYPE;
290 token_stream.flags |= TSF_OPERAND;
291 break;
292 default:
293 abort();
294 }
295 break;
296 case TOK_INC:
297 case TOK_DEC:
298 class = CLASS_SYMBOL;
299 /* token_stream.flags does not change w.r.t. TSF_OPERAND */
300 break;
301 case TOK_DOT:
302 case TOK_LB:
303 class = CLASS_SYMBOL;
304 token_stream.flags |= TSF_OPERAND;
305 break;
306 case TOK_RB:
307 class = CLASS_SYMBOL;
308 token_stream.flags &= ~TSF_OPERAND;
309 break;
310 case TOK_LC:
311 class = CLASS_CBRACKET;
312 token_stream.flags |= TSF_OPERAND;
313 break;
314 case TOK_RC:
315 class = CLASS_CBRACKET;
316 token_stream.flags &= ~TSF_OPERAND;
317 break;
318 case TOK_LP:
319 class = CLASS_SYMBOL;
320 token_stream.flags |= TSF_OPERAND;
321 break;
322 case TOK_RP:
323 class = CLASS_SYMBOL;
324 token_stream.flags &= ~TSF_OPERAND;
325 break;
326 case TOK_NAME:
327 class = CLASS_NONE;
328 token_stream.flags &= ~TSF_OPERAND;
329 if (js_PeekToken(context, &token_stream) == TOK_LP) {
330 /* function */
331 class = CLASS_NONE;
332 }
333 break;
334 case TOK_NUMBER:
335 class = CLASS_NUMBER;
336 token_stream.flags &= ~TSF_OPERAND;
337 break;
338 case TOK_STRING:
339 class = CLASS_STRING;
340 token_stream.flags &= ~TSF_OPERAND;
341 break;
342 case TOK_REGEXP:
343 class = CLASS_REGEXP;
344 token_stream.flags &= ~TSF_OPERAND;
345 break;
346 case TOK_PRIMARY:
347 switch (t.t_op) {
348 case JSOP_TRUE:
349 case JSOP_FALSE:
350 case JSOP_NULL:
351 case JSOP_THIS:
352 class = CLASS_KEYWORD;
353 token_stream.flags &= ~TSF_OPERAND;
354 break;
355 default:
356 abort();
357 }
358 break;
359 case TOK_FUNCTION:
360 class = CLASS_KEYWORD;
361 token_stream.flags |= TSF_OPERAND;
362 break;
363 case TOK_IF:
364 case TOK_ELSE:
365 case TOK_SWITCH:
366 case TOK_CASE:
367 case TOK_DEFAULT:
368 case TOK_WHILE:
369 case TOK_DO:
370 case TOK_FOR:
371 case TOK_BREAK:
372 case TOK_CONTINUE:
373 case TOK_IN:
374 case TOK_VAR:
375 case TOK_WITH:
376 case TOK_RETURN:
377 case TOK_NEW:
378 case TOK_DELETE:
379 token_stream.flags |= TSF_OPERAND;
380 class = CLASS_KEYWORD;
381 break;
382 case TOK_DEFSHARP:
383 case TOK_USESHARP:
384 abort();
385 break;
386 case TOK_TRY:
387 case TOK_CATCH:
388 case TOK_FINALLY:
389 case TOK_THROW:
390 case TOK_INSTANCEOF:
391 case TOK_DEBUGGER:
392 token_stream.flags |= TSF_OPERAND;
393 class = CLASS_KEYWORD;
394 break;
395 case TOK_XMLSTAGO:
396 case TOK_XMLETAGO:
397 case TOK_XMLPTAGC:
398 case TOK_XMLTAGC:
399 case TOK_XMLNAME:
400 case TOK_XMLATTR:
401 case TOK_XMLSPACE:
402 case TOK_XMLTEXT:
403 case TOK_XMLCOMMENT:
404 case TOK_XMLCDATA:
405 case TOK_XMLPI:
406 case TOK_AT:
407 case TOK_DBLCOLON:
408 case TOK_ANYNAME:
409 case TOK_DBLDOT:
410 case TOK_FILTER:
411 case TOK_XMLELEM:
412 case TOK_XMLLIST:
413 abort();
414 break;
415 case TOK_YIELD:
416 token_stream.flags |= TSF_OPERAND;
417 class = CLASS_KEYWORD;
418 break;
419 case TOK_ARRAYCOMP:
420 case TOK_ARRAYPUSH:
421 case TOK_LEXICALSCOPE:
422 abort();
423 break;
424 case TOK_LET:
425 token_stream.flags |= TSF_OPERAND;
426 class = CLASS_KEYWORD;
427 break;
428 case TOK_SEQ:
429 case TOK_FORHEAD:
430 case TOK_RESERVED:
431 case TOK_LIMIT:
432 abort();
433 break;
434 default:
435 abort();
436 break;
437 }
438
439 uint16_t start_line = t.pos.begin.lineno;
440 uint16_t end_line = t.pos.end.lineno;
441 uint16_t start_column = t.pos.begin.index;
442 uint16_t end_column = t.pos.end.index;
443 assert(line_num == start_line);
444 assert(column_num == start_column);
445 if (start_line == end_line && start_column >= end_column) {
446 fatal("%s: script contains line with more than 65,535 characters", id);
447 }
448 for (;;) {
449 assert(character_offset < num_characters);
450 jschar c = characters[character_offset];
451 if (tt == TOK_STRING && c == '\\') {
452 output_character(c, CLASS_SPECIALCHAR);
453 assert(character_offset < num_characters);
454 c = characters[character_offset];
455 output_character(c, CLASS_SPECIALCHAR);
456 }
457 else {
458 output_character(c, class);
459 }
460
461 if (line_num > end_line) {
462 break;
463 }
464 else if (line_num == end_line && column_num >= end_column) {
465 break;
466 }
467 }
468
469 assert(line_num == end_line);
470 assert(column_num = end_column);
471 }
472
473 if (current_class != CLASS_NONE) {
474 output_character('\n', CLASS_NONE);
475 }
476
477 js_CloseTokenStream(context, &token_stream);
478 }

  ViewVC Help
Powered by ViewVC 1.1.24