/[jscoverage]/trunk/highlight.cpp
ViewVC logotype

Contents of /trunk/highlight.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 473 - (show annotations)
Sun Oct 4 04:48:09 2009 UTC (9 years, 1 month ago) by siliconforks
File size: 11706 byte(s)
Remove limitation of 65535 lines per file and characters per line.

1 /*
2 highlight.cpp - JavaScript syntax highlighting
3 Copyright (C) 2008, 2009 siliconforks.com
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <config.h>
21
22 #include "highlight.h"
23
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include <jslock.h>
29 #include <jsscan.h>
30
31 #include "util.h"
32
33 #ifndef UINT32_MAX
34 #define UINT32_MAX ((uint32_t) (-1))
35 #endif
36
37 enum Class {
38 CLASS_NONE,
39 CLASS_COMMENT,
40 CLASS_REGEXP,
41 CLASS_NUMBER,
42 CLASS_STRING,
43 CLASS_SPECIALCHAR,
44 CLASS_KEYWORD,
45 CLASS_TYPE,
46 CLASS_SYMBOL,
47 CLASS_CBRACKET
48 };
49
50 static const char * get_class_name(enum Class c) {
51 switch (c) {
52 case CLASS_NONE:
53 abort();
54 break;
55 case CLASS_COMMENT:
56 return "c";
57 break;
58 case CLASS_REGEXP:
59 return "s";
60 break;
61 case CLASS_NUMBER:
62 return "s";
63 break;
64 case CLASS_STRING:
65 return "s";
66 break;
67 case CLASS_SPECIALCHAR:
68 return "t";
69 break;
70 case CLASS_KEYWORD:
71 return "k";
72 break;
73 case CLASS_TYPE:
74 return "k";
75 break;
76 case CLASS_SYMBOL:
77 return "k";
78 break;
79 case CLASS_CBRACKET:
80 return "k";
81 break;
82 default:
83 abort();
84 break;
85 }
86 }
87
88 static const char * g_id;
89 static const jschar * g_characters;
90 static size_t g_num_characters;
91 static Stream * g_output;
92 static size_t character_offset;
93 static uint32_t line_num;
94 static uint32_t column_num;
95 static enum Class current_class;
96
97 static void output_character(jschar c, enum Class class_) {
98 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
99 class_ = CLASS_NONE;
100 }
101
102 if (class_ != current_class) {
103 /* output the end tag */
104 if (current_class != CLASS_NONE) {
105 Stream_write_string(g_output, "</span>");
106 }
107
108 current_class = class_;
109
110 /* output the start tag */
111 if (current_class != CLASS_NONE) {
112 Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class_));
113 }
114 }
115
116 if (column_num == UINT32_MAX) {
117 fatal("%s: script contains a line with more than 65,535 columns", g_id);
118 }
119 column_num++;
120 switch (c) {
121 case '&':
122 Stream_write_string(g_output, "&amp;");
123 break;
124 case '<':
125 Stream_write_string(g_output, "&lt;");
126 break;
127 case '>':
128 Stream_write_string(g_output, "&gt;");
129 break;
130 case '\t':
131 Stream_write_char(g_output, c);
132 break;
133 case '\r':
134 case '\n':
135 case 0x2028:
136 case 0x2029:
137 if (c == '\r' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '\n') {
138 break;
139 }
140 Stream_write_char(g_output, '\n');
141 column_num = 0;
142 if (line_num == UINT32_MAX) {
143 fatal("%s: script contains more than 65,535 lines", g_id);
144 }
145 line_num++;
146 break;
147 default:
148 if (32 <= c && c <= 126) {
149 Stream_write_char(g_output, c);
150 }
151 else {
152 Stream_printf(g_output, "&#%d;", c);
153 }
154 break;
155 }
156 character_offset++;
157 }
158
159 static void mark_nontoken_chars(uint32_t end_line, uint32_t end_column) {
160 enum State {
161 STATE_NORMAL,
162 STATE_LINE_COMMENT,
163 STATE_MULTILINE_COMMENT
164 };
165
166 enum State state = STATE_NORMAL;
167 while (character_offset < g_num_characters) {
168 if (end_line != 0 && line_num > end_line) {
169 break;
170 }
171 else if (line_num == end_line && column_num >= end_column) {
172 break;
173 }
174
175 jschar c = g_characters[character_offset];
176 if (c == '\0') {
177 fatal("%s: script contains NULL character", g_id);
178 }
179
180 switch (state) {
181 case STATE_NORMAL:
182 if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
183 state = STATE_LINE_COMMENT;
184 }
185 else if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '*') {
186 state = STATE_MULTILINE_COMMENT;
187 output_character('/', CLASS_COMMENT);
188 output_character('*', CLASS_COMMENT);
189 continue;
190 }
191 break;
192 case STATE_LINE_COMMENT:
193 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
194 state = STATE_NORMAL;
195 }
196 break;
197 case STATE_MULTILINE_COMMENT:
198 if (c == '*' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
199 output_character('*', CLASS_COMMENT);
200 output_character('/', CLASS_COMMENT);
201 state = STATE_NORMAL;
202 continue;
203 }
204 break;
205 }
206
207 if (state == STATE_NORMAL) {
208 output_character(c, CLASS_NONE);
209 }
210 else {
211 output_character(c, CLASS_COMMENT);
212 }
213 }
214 }
215
216 void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
217 g_id = id;
218 g_characters = characters;
219 g_num_characters = num_characters;
220 g_output = output;
221
222 character_offset = 0;
223 line_num = 1;
224 column_num = 0;
225 current_class = CLASS_NONE;
226
227 /* tokenize the JavaScript */
228 JSTokenStream token_stream;
229 if (! js_InitTokenStream(context, &token_stream, characters, num_characters, NULL, NULL, 1)) {
230 fatal("cannot create token stream from JavaScript file %s", id);
231 }
232
233 for (;;) {
234 JSTokenType tt = js_GetToken(context, &token_stream);
235
236 if (tt == TOK_ERROR) {
237 fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
238 }
239
240 if (tt == TOK_EOF) {
241 mark_nontoken_chars(0, 0);
242 break;
243 }
244
245 /* mark the chars before the token */
246 JSToken t = CURRENT_TOKEN(&token_stream);
247 mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
248
249 /* mark the token */
250 enum Class class_;
251 switch (tt) {
252 case TOK_ERROR:
253 case TOK_EOF:
254 abort();
255 case TOK_EOL:
256 class_ = CLASS_NONE;
257 token_stream.flags |= TSF_OPERAND;
258 break;
259 case TOK_SEMI:
260 case TOK_COMMA:
261 case TOK_ASSIGN:
262 case TOK_HOOK:
263 case TOK_COLON:
264 case TOK_OR:
265 case TOK_AND:
266 case TOK_BITOR:
267 case TOK_BITXOR:
268 case TOK_BITAND:
269 case TOK_EQOP:
270 case TOK_RELOP:
271 case TOK_SHOP:
272 case TOK_PLUS:
273 case TOK_MINUS:
274 case TOK_STAR:
275 case TOK_DIVOP:
276 class_ = CLASS_SYMBOL;
277 token_stream.flags |= TSF_OPERAND;
278 break;
279 case TOK_UNARYOP:
280 switch (t.t_op) {
281 case JSOP_NEG:
282 case JSOP_POS:
283 case JSOP_NOT:
284 case JSOP_BITNOT:
285 class_ = CLASS_SYMBOL;
286 token_stream.flags |= TSF_OPERAND;
287 break;
288 case JSOP_TYPEOF:
289 class_ = CLASS_KEYWORD;
290 token_stream.flags |= TSF_OPERAND;
291 break;
292 case JSOP_VOID:
293 class_ = CLASS_TYPE;
294 token_stream.flags |= TSF_OPERAND;
295 break;
296 default:
297 abort();
298 }
299 break;
300 case TOK_INC:
301 case TOK_DEC:
302 class_ = CLASS_SYMBOL;
303 /* token_stream.flags does not change w.r.t. TSF_OPERAND */
304 break;
305 case TOK_DOT:
306 case TOK_LB:
307 class_ = CLASS_SYMBOL;
308 token_stream.flags |= TSF_OPERAND;
309 break;
310 case TOK_RB:
311 class_ = CLASS_SYMBOL;
312 token_stream.flags &= ~TSF_OPERAND;
313 break;
314 case TOK_LC:
315 class_ = CLASS_CBRACKET;
316 token_stream.flags |= TSF_OPERAND;
317 break;
318 case TOK_RC:
319 class_ = CLASS_CBRACKET;
320 token_stream.flags &= ~TSF_OPERAND;
321 break;
322 case TOK_LP:
323 class_ = CLASS_SYMBOL;
324 token_stream.flags |= TSF_OPERAND;
325 break;
326 case TOK_RP:
327 class_ = CLASS_SYMBOL;
328 token_stream.flags &= ~TSF_OPERAND;
329 break;
330 case TOK_NAME:
331 class_ = CLASS_NONE;
332 token_stream.flags &= ~TSF_OPERAND;
333 if (js_PeekToken(context, &token_stream) == TOK_LP) {
334 /* function */
335 class_ = CLASS_NONE;
336 }
337 break;
338 case TOK_NUMBER:
339 class_ = CLASS_NUMBER;
340 token_stream.flags &= ~TSF_OPERAND;
341 break;
342 case TOK_STRING:
343 class_ = CLASS_STRING;
344 token_stream.flags &= ~TSF_OPERAND;
345 break;
346 case TOK_REGEXP:
347 class_ = CLASS_REGEXP;
348 token_stream.flags &= ~TSF_OPERAND;
349 break;
350 case TOK_PRIMARY:
351 switch (t.t_op) {
352 case JSOP_TRUE:
353 case JSOP_FALSE:
354 case JSOP_NULL:
355 case JSOP_THIS:
356 class_ = CLASS_KEYWORD;
357 token_stream.flags &= ~TSF_OPERAND;
358 break;
359 default:
360 abort();
361 }
362 break;
363 case TOK_FUNCTION:
364 class_ = CLASS_KEYWORD;
365 token_stream.flags |= TSF_OPERAND;
366 break;
367 case TOK_IF:
368 case TOK_ELSE:
369 case TOK_SWITCH:
370 case TOK_CASE:
371 case TOK_DEFAULT:
372 case TOK_WHILE:
373 case TOK_DO:
374 case TOK_FOR:
375 case TOK_BREAK:
376 case TOK_CONTINUE:
377 case TOK_IN:
378 case TOK_VAR:
379 case TOK_WITH:
380 case TOK_RETURN:
381 case TOK_NEW:
382 case TOK_DELETE:
383 token_stream.flags |= TSF_OPERAND;
384 class_ = CLASS_KEYWORD;
385 break;
386 case TOK_DEFSHARP:
387 case TOK_USESHARP:
388 abort();
389 break;
390 case TOK_TRY:
391 case TOK_CATCH:
392 case TOK_FINALLY:
393 case TOK_THROW:
394 case TOK_INSTANCEOF:
395 case TOK_DEBUGGER:
396 token_stream.flags |= TSF_OPERAND;
397 class_ = CLASS_KEYWORD;
398 break;
399 case TOK_XMLSTAGO:
400 case TOK_XMLETAGO:
401 case TOK_XMLPTAGC:
402 case TOK_XMLTAGC:
403 case TOK_XMLNAME:
404 case TOK_XMLATTR:
405 case TOK_XMLSPACE:
406 case TOK_XMLTEXT:
407 case TOK_XMLCOMMENT:
408 case TOK_XMLCDATA:
409 case TOK_XMLPI:
410 case TOK_AT:
411 case TOK_DBLCOLON:
412 case TOK_ANYNAME:
413 case TOK_DBLDOT:
414 case TOK_FILTER:
415 case TOK_XMLELEM:
416 case TOK_XMLLIST:
417 abort();
418 break;
419 case TOK_YIELD:
420 token_stream.flags |= TSF_OPERAND;
421 class_ = CLASS_KEYWORD;
422 break;
423 case TOK_ARRAYCOMP:
424 case TOK_ARRAYPUSH:
425 case TOK_LEXICALSCOPE:
426 abort();
427 break;
428 case TOK_LET:
429 token_stream.flags |= TSF_OPERAND;
430 class_ = CLASS_KEYWORD;
431 break;
432 case TOK_SEQ:
433 case TOK_FORHEAD:
434 case TOK_RESERVED:
435 case TOK_LIMIT:
436 abort();
437 break;
438 default:
439 abort();
440 break;
441 }
442
443 uint32_t start_line = t.pos.begin.lineno;
444 uint32_t end_line = t.pos.end.lineno;
445 uint32_t start_column = t.pos.begin.index;
446 uint32_t end_column = t.pos.end.index;
447 assert(line_num == start_line);
448 assert(column_num == start_column);
449 if (start_line == end_line && start_column >= end_column) {
450 fatal("%s: script contains line with more than 65,535 characters", id);
451 }
452 for (;;) {
453 assert(character_offset < num_characters);
454 jschar c = characters[character_offset];
455 if (tt == TOK_STRING && c == '\\') {
456 output_character(c, CLASS_SPECIALCHAR);
457 assert(character_offset < num_characters);
458 c = characters[character_offset];
459 output_character(c, CLASS_SPECIALCHAR);
460 }
461 else {
462 output_character(c, class_);
463 }
464
465 if (line_num > end_line) {
466 break;
467 }
468 else if (line_num == end_line && column_num >= end_column) {
469 break;
470 }
471 }
472
473 assert(line_num == end_line);
474 assert(column_num = end_column);
475 }
476
477 if (current_class != CLASS_NONE) {
478 output_character('\n', CLASS_NONE);
479 }
480
481 js_CloseTokenStream(context, &token_stream);
482 }

Properties

Name Value
svn:mergeinfo

  ViewVC Help
Powered by ViewVC 1.1.24