/[jscoverage]/trunk/highlight.c
ViewVC logotype

Contents of /trunk/highlight.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 213 - (show annotations)
Fri Oct 3 02:25:47 2008 UTC (11 years, 2 months ago) by siliconforks
File MIME type: text/plain
File size: 12690 byte(s)
Rewrite for improved performance.
1 /*
2 highlight.c - JavaScript syntax highlighting
3 Copyright (C) 2008 siliconforks.com
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <config.h>
21
22 #include "highlight.h"
23
24 #include <assert.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include <jslock.h>
29 #include <jsscan.h>
30
31 #include "util.h"
32
33 enum Class {
34 CLASS_NONE,
35 CLASS_COMMENT,
36 CLASS_REGEXP,
37 CLASS_NUMBER,
38 CLASS_STRING,
39 CLASS_SPECIALCHAR,
40 CLASS_KEYWORD,
41 CLASS_TYPE,
42 CLASS_SYMBOL,
43 CLASS_CBRACKET
44 };
45
46 static const char * get_class_name(enum Class class) {
47 switch (class) {
48 case CLASS_NONE:
49 abort();
50 break;
51 case CLASS_COMMENT:
52 return "c";
53 break;
54 case CLASS_REGEXP:
55 return "s";
56 break;
57 case CLASS_NUMBER:
58 return "s";
59 break;
60 case CLASS_STRING:
61 return "s";
62 break;
63 case CLASS_SPECIALCHAR:
64 return "t";
65 break;
66 case CLASS_KEYWORD:
67 return "k";
68 break;
69 case CLASS_TYPE:
70 return "k";
71 break;
72 case CLASS_SYMBOL:
73 return "k";
74 break;
75 case CLASS_CBRACKET:
76 return "k";
77 break;
78 default:
79 abort();
80 break;
81 }
82 }
83
84 static const char * g_id;
85 static const jschar * g_characters;
86 static size_t g_num_characters;
87 static Stream * g_output;
88 static size_t character_offset;
89 static uint16_t line_num;
90 static uint16_t column_num;
91 static enum Class current_class;
92
93 static void output_character(jschar c, enum Class class) {
94 if (class != current_class) {
95 /* output the end tag */
96 if (current_class != CLASS_NONE) {
97 Stream_write_string(g_output, "</span>");
98 }
99
100 current_class = class;
101
102 /* output the start tag */
103 if (current_class != CLASS_NONE) {
104 Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class));
105 }
106 }
107
108 switch (c) {
109 case '&':
110 Stream_write_string(g_output, "&amp;");
111 break;
112 case '<':
113 Stream_write_string(g_output, "&lt;");
114 break;
115 case '>':
116 Stream_write_string(g_output, "&gt;");
117 break;
118 case '\n':
119 Stream_write_char(g_output, c);
120 break;
121 default:
122 if (c == '\t' || (32 <= c && c <= 126)) {
123 Stream_write_char(g_output, c);
124 }
125 else {
126 Stream_printf(g_output, "&#%d;", c);
127 }
128 break;
129 }
130 }
131
132 static void mark_nontoken_chars(uint16_t end_line, uint16_t end_column) {
133 enum State {
134 STATE_NORMAL,
135 STATE_LINE_COMMENT,
136 STATE_MULTILINE_COMMENT
137 };
138
139 enum State state = STATE_NORMAL;
140 while (character_offset < g_num_characters) {
141 if (end_line != 0 && line_num > end_line) {
142 break;
143 }
144 else if (line_num == end_line && column_num >= end_column) {
145 break;
146 }
147
148 jschar c = g_characters[character_offset];
149 if (c == '\0') {
150 fatal("%s: script contains NULL character", g_id);
151 }
152
153 switch (state) {
154 case STATE_NORMAL:
155 if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
156 state = STATE_LINE_COMMENT;
157 }
158 else if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '*') {
159 state = STATE_MULTILINE_COMMENT;
160 output_character('/', CLASS_COMMENT);
161 output_character('*', CLASS_COMMENT);
162 character_offset += 2;
163 if (column_num >= UINT16_MAX - 1) {
164 fatal("%s: script contains line with more than 65,535 characters", g_id);
165 }
166 column_num += 2;
167 continue;
168 }
169 break;
170 case STATE_LINE_COMMENT:
171 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
172 state = STATE_NORMAL;
173 }
174 break;
175 case STATE_MULTILINE_COMMENT:
176 if (c == '*' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
177 output_character('*', CLASS_COMMENT);
178 output_character('/', CLASS_COMMENT);
179 state = STATE_NORMAL;
180 character_offset += 2;
181 if (column_num >= UINT16_MAX - 1) {
182 fatal("%s: script contains line with more than 65,535 characters", g_id);
183 }
184 column_num += 2;
185 continue;
186 }
187 break;
188 }
189
190 character_offset++;
191 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
192 if (line_num == UINT16_MAX) {
193 fatal("%s: script contains more than 65,535 lines", g_id);
194 }
195 line_num++;
196 column_num = 0;
197 if (c == '\r' && character_offset < g_num_characters && g_characters[character_offset] == '\n') {
198 character_offset++;
199 }
200 output_character('\n', CLASS_NONE);
201 }
202 else {
203 if (column_num == UINT16_MAX) {
204 fatal("%s: script contains line with more than 65,535 characters", g_id);
205 }
206 column_num++;
207 if (state == STATE_NORMAL) {
208 output_character(c, CLASS_NONE);
209 }
210 else {
211 output_character(c, CLASS_COMMENT);
212 }
213 }
214 }
215 }
216
217 void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
218 g_id = id;
219 g_characters = characters;
220 g_num_characters = num_characters;
221 g_output = output;
222
223 character_offset = 0;
224 line_num = 1;
225 column_num = 0;
226 current_class = CLASS_NONE;
227
228 /* tokenize the JavaScript */
229 JSTokenStream * token_stream = js_NewTokenStream(context, characters, num_characters, NULL, 1, NULL);
230 if (token_stream == NULL) {
231 fatal("cannot create token stream from JavaScript file %s", id);
232 }
233
234 /* see js_ParseTokenStream in jsparse.c */
235 JSObject * chain = NULL;
236 JSContext * cx = context;
237 JSStackFrame *fp, frame;
238
239 /*
240 * Push a compiler frame if we have no frames, or if the top frame is a
241 * lightweight function activation, or if its scope chain doesn't match
242 * the one passed to us.
243 */
244 fp = cx->fp;
245 if (!fp || !fp->varobj || fp->scopeChain != chain) {
246 memset(&frame, 0, sizeof frame);
247 frame.varobj = frame.scopeChain = chain;
248 if (cx->options & JSOPTION_VAROBJFIX) {
249 while ((chain = JS_GetParent(cx, chain)) != NULL)
250 frame.varobj = chain;
251 }
252 frame.down = fp;
253 if (fp)
254 frame.flags = fp->flags & (JSFRAME_SPECIAL | JSFRAME_COMPILE_N_GO);
255 cx->fp = &frame;
256 }
257
258 /*
259 * Protect atoms from being collected by a GC activation, which might
260 * - nest on this thread due to out of memory (the so-called "last ditch"
261 * GC attempted within js_NewGCThing), or
262 * - run for any reason on another thread if this thread is suspended on
263 * an object lock before it finishes generating bytecode into a script
264 * protected from the GC by a root or a stack frame reference.
265 */
266 JS_KEEP_ATOMS(cx->runtime);
267
268 for (;;) {
269 JSTokenType tt = js_GetToken(context, token_stream);
270
271 if (tt == TOK_ERROR) {
272 fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
273 }
274
275 if (tt == TOK_EOF) {
276 mark_nontoken_chars(0, 0);
277 break;
278 }
279
280 /* mark the chars before the token */
281 JSToken t = CURRENT_TOKEN(token_stream);
282 mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
283
284 /* mark the token */
285 enum Class class;
286 switch (tt) {
287 case TOK_ERROR:
288 case TOK_EOF:
289 abort();
290 case TOK_EOL:
291 class = CLASS_NONE;
292 token_stream->flags |= TSF_OPERAND;
293 break;
294 case TOK_SEMI:
295 case TOK_COMMA:
296 case TOK_ASSIGN:
297 case TOK_HOOK:
298 case TOK_COLON:
299 case TOK_OR:
300 case TOK_AND:
301 case TOK_BITOR:
302 case TOK_BITXOR:
303 case TOK_BITAND:
304 case TOK_EQOP:
305 case TOK_RELOP:
306 case TOK_SHOP:
307 case TOK_PLUS:
308 case TOK_MINUS:
309 case TOK_STAR:
310 case TOK_DIVOP:
311 class = CLASS_SYMBOL;
312 token_stream->flags |= TSF_OPERAND;
313 break;
314 case TOK_UNARYOP:
315 switch (t.t_op) {
316 case JSOP_NEG:
317 case JSOP_POS:
318 case JSOP_NOT:
319 case JSOP_BITNOT:
320 class = CLASS_SYMBOL;
321 token_stream->flags |= TSF_OPERAND;
322 break;
323 case JSOP_TYPEOF:
324 class = CLASS_KEYWORD;
325 token_stream->flags |= TSF_OPERAND;
326 break;
327 case JSOP_VOID:
328 class = CLASS_TYPE;
329 token_stream->flags |= TSF_OPERAND;
330 break;
331 default:
332 abort();
333 }
334 break;
335 case TOK_INC:
336 case TOK_DEC:
337 case TOK_DOT:
338 case TOK_LB:
339 class = CLASS_SYMBOL;
340 token_stream->flags |= TSF_OPERAND;
341 break;
342 case TOK_RB:
343 class = CLASS_SYMBOL;
344 token_stream->flags &= ~TSF_OPERAND;
345 break;
346 case TOK_LC:
347 class = CLASS_CBRACKET;
348 token_stream->flags |= TSF_OPERAND;
349 break;
350 case TOK_RC:
351 class = CLASS_CBRACKET;
352 token_stream->flags &= ~TSF_OPERAND;
353 break;
354 case TOK_LP:
355 class = CLASS_SYMBOL;
356 token_stream->flags |= TSF_OPERAND;
357 break;
358 case TOK_RP:
359 class = CLASS_SYMBOL;
360 token_stream->flags &= ~TSF_OPERAND;
361 break;
362 case TOK_NAME:
363 class = CLASS_NONE;
364 token_stream->flags &= ~TSF_OPERAND;
365 if (js_PeekToken(context, token_stream) == TOK_LP) {
366 /* function */
367 class = CLASS_NONE;
368 }
369 break;
370 case TOK_NUMBER:
371 class = CLASS_NUMBER;
372 token_stream->flags &= ~TSF_OPERAND;
373 break;
374 case TOK_STRING:
375 class = CLASS_STRING;
376 token_stream->flags &= ~TSF_OPERAND;
377 break;
378 case TOK_OBJECT:
379 class = CLASS_REGEXP;
380 token_stream->flags &= ~TSF_OPERAND;
381 break;
382 case TOK_PRIMARY:
383 switch (t.t_op) {
384 case JSOP_TRUE:
385 case JSOP_FALSE:
386 case JSOP_NULL:
387 case JSOP_THIS:
388 class = CLASS_KEYWORD;
389 token_stream->flags &= ~TSF_OPERAND;
390 break;
391 default:
392 abort();
393 }
394 break;
395 case TOK_FUNCTION:
396 class = CLASS_KEYWORD;
397 token_stream->flags |= TSF_OPERAND;
398 break;
399 case TOK_EXPORT:
400 case TOK_IMPORT:
401 abort();
402 break;
403 case TOK_IF:
404 case TOK_ELSE:
405 case TOK_SWITCH:
406 case TOK_CASE:
407 case TOK_DEFAULT:
408 case TOK_WHILE:
409 case TOK_DO:
410 case TOK_FOR:
411 case TOK_BREAK:
412 case TOK_CONTINUE:
413 case TOK_IN:
414 case TOK_VAR:
415 case TOK_WITH:
416 case TOK_RETURN:
417 case TOK_NEW:
418 case TOK_DELETE:
419 token_stream->flags |= TSF_OPERAND;
420 class = CLASS_KEYWORD;
421 break;
422 case TOK_DEFSHARP:
423 case TOK_USESHARP:
424 abort();
425 break;
426 case TOK_TRY:
427 case TOK_CATCH:
428 case TOK_FINALLY:
429 case TOK_THROW:
430 case TOK_INSTANCEOF:
431 case TOK_DEBUGGER:
432 token_stream->flags |= TSF_OPERAND;
433 class = CLASS_KEYWORD;
434 break;
435 case TOK_XMLSTAGO:
436 case TOK_XMLETAGO:
437 case TOK_XMLPTAGC:
438 case TOK_XMLTAGC:
439 case TOK_XMLNAME:
440 case TOK_XMLATTR:
441 case TOK_XMLSPACE:
442 case TOK_XMLTEXT:
443 case TOK_XMLCOMMENT:
444 case TOK_XMLCDATA:
445 case TOK_XMLPI:
446 case TOK_AT:
447 case TOK_DBLCOLON:
448 case TOK_ANYNAME:
449 case TOK_DBLDOT:
450 case TOK_FILTER:
451 case TOK_XMLELEM:
452 case TOK_XMLLIST:
453 case TOK_RESERVED:
454 case TOK_LIMIT:
455 abort();
456 break;
457 default:
458 abort();
459 break;
460 }
461
462 assert(t.pos.begin.lineno == t.pos.end.lineno);
463 if (t.pos.begin.index > t.pos.end.index) {
464 fatal("%s: script contains line with more than 65,535 characters", id);
465 }
466 for (uint16_t i = t.pos.begin.index; i < t.pos.end.index; i++) {
467 assert(character_offset < num_characters);
468 jschar c = characters[character_offset];
469 if (tt == TOK_STRING && c == '\\') {
470 output_character(c, CLASS_SPECIALCHAR);
471 character_offset++;
472 i++;
473 assert(character_offset < num_characters);
474 c = characters[character_offset];
475 output_character(c, CLASS_SPECIALCHAR);
476 character_offset++;
477 }
478 else {
479 output_character(c, class);
480 character_offset++;
481 }
482 }
483
484 line_num = t.pos.end.lineno;
485 column_num = t.pos.end.index;
486 }
487
488 if (current_class != CLASS_NONE) {
489 output_character('\n', CLASS_NONE);
490 }
491
492 /* cleanup */
493 JS_UNKEEP_ATOMS(cx->runtime);
494 context->fp = fp;
495 }

  ViewVC Help
Powered by ViewVC 1.1.24