/[jscoverage]/trunk/highlight.c
ViewVC logotype

Contents of /trunk/highlight.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 179 - (show annotations)
Sun Sep 21 18:35:21 2008 UTC (11 years ago) by siliconforks
File MIME type: text/plain
File size: 13832 byte(s)
Do source code highlighting during instrumentation.

1 /*
2 highlight.c - JavaScript syntax highlighting
3 Copyright (C) 2008 siliconforks.com
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <config.h>
21
22 #include "highlight.h"
23
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include <jslock.h>
28 #include <jsscan.h>
29
30 #include "util.h"
31
32 enum Class {
33 CLASS_NONE,
34 CLASS_COMMENT,
35 CLASS_REGEXP,
36 CLASS_NUMBER,
37 CLASS_STRING,
38 CLASS_SPECIALCHAR,
39 CLASS_KEYWORD,
40 CLASS_TYPE,
41 CLASS_SYMBOL,
42 CLASS_CBRACKET
43 };
44
45 static const char * get_class_name(enum Class class) {
46 switch (class) {
47 case CLASS_NONE:
48 abort();
49 break;
50 case CLASS_COMMENT:
51 return "c";
52 break;
53 case CLASS_REGEXP:
54 return "s";
55 break;
56 case CLASS_NUMBER:
57 return "s";
58 break;
59 case CLASS_STRING:
60 return "s";
61 break;
62 case CLASS_SPECIALCHAR:
63 return "t";
64 break;
65 case CLASS_KEYWORD:
66 return "k";
67 break;
68 case CLASS_TYPE:
69 return "k";
70 break;
71 case CLASS_SYMBOL:
72 return "k";
73 break;
74 case CLASS_CBRACKET:
75 return "k";
76 break;
77 default:
78 abort();
79 break;
80 }
81 }
82
83 static enum Class ** classes = NULL;
84 static jschar ** lines = NULL;
85
86 static uint16_t num_characters_in_line(jschar * line) {
87 uint16_t result = 0;
88 while (line[result] != '\0') {
89 result++;
90 }
91 return result;
92 }
93
94 static void mark_token_chars(enum Class class, uint16_t start_line, uint16_t start_column, uint16_t end_line, uint16_t end_column) {
95 for (uint16_t i = start_line; i <= end_line; i++) {
96 uint16_t c1 = i == start_line? start_column: 0;
97 uint16_t c2 = i == end_line? end_column: num_characters_in_line(lines[i - 1]);
98 for (uint16_t j = c1; j < c2; j++) {
99 classes[i - 1][j] = class;
100 }
101 }
102 }
103
104 static void mark_nontoken_chars(uint16_t start_line, uint16_t start_column, uint16_t end_line, uint16_t end_column) {
105 enum State {
106 STATE_NORMAL,
107 STATE_LINE_COMMENT,
108 STATE_MULTILINE_COMMENT
109 };
110
111 enum State state = STATE_NORMAL;
112 for (uint16_t i = start_line; i <= end_line; i++) {
113 uint16_t c1 = i == start_line? start_column: 0;
114 uint16_t c2 = i == end_line? end_column: num_characters_in_line(lines[i - 1]);
115 for (uint16_t j = c1; j < c2; j++) {
116 jschar c = lines[i - 1][j];
117 switch (state) {
118 case STATE_NORMAL:
119 if (c == '/' && j + 1 < c2 && lines[i - 1][j + 1] == '/') {
120 state = STATE_LINE_COMMENT;
121 classes[i - 1][j] = CLASS_COMMENT;
122 }
123 else if (c == '/' && j + 1 < c2 && lines[i - 1][j + 1] == '*') {
124 state = STATE_MULTILINE_COMMENT;
125 classes[i - 1][j] = CLASS_COMMENT;
126 j++;
127 classes[i - 1][j] = CLASS_COMMENT;
128 }
129 else {
130 classes[i - 1][j] = CLASS_NONE;
131 }
132 break;
133 case STATE_LINE_COMMENT:
134 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
135 state = STATE_NORMAL;
136 classes[i - 1][j] = CLASS_NONE;
137 }
138 else {
139 classes[i - 1][j] = CLASS_COMMENT;
140 }
141 break;
142 case STATE_MULTILINE_COMMENT:
143 classes[i - 1][j] = CLASS_COMMENT;
144 if (c == '*' && j + 1 < c2 && lines[i - 1][j + 1] == '/') {
145 j++;
146 classes[i - 1][j] = CLASS_COMMENT;
147 state = STATE_NORMAL;
148 }
149 break;
150 }
151 }
152 /* end of the line */
153 if (state == STATE_LINE_COMMENT) {
154 state = STATE_NORMAL;
155 }
156 }
157 }
158
159 void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
160 /* count the lines - see GetChar in jsscan.c */
161 size_t i = 0;
162 uint16_t num_lines = 0;
163 while (i < num_characters) {
164 if (num_lines == UINT16_MAX) {
165 fatal("%s: script has more than 65535 lines", id);
166 }
167 num_lines++;
168 jschar c;
169 while (i < num_characters) {
170 c = characters[i];
171 if (c == '\0') {
172 fatal("%s: script contains NULL character", id);
173 }
174 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
175 break;
176 }
177 i++;
178 }
179 if (i < num_characters) {
180 i++;
181 if (c == '\r' && i < num_characters && characters[i] == '\n') {
182 i++;
183 }
184 }
185 }
186
187 lines = xnew(jschar *, num_lines);
188 classes = xnew(enum Class *, num_lines);
189
190 uint16_t line_num = 0;
191 i = 0;
192 while (i < num_characters) {
193 size_t line_start = i;
194 jschar c;
195 while (i < num_characters) {
196 c = characters[i];
197 if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
198 break;
199 }
200 i++;
201 }
202 size_t line_end = i;
203 if (i < num_characters) {
204 i++;
205 if (c == '\r' && i < num_characters && characters[i] == '\n') {
206 i++;
207 }
208 }
209 size_t line_length = line_end - line_start;
210 if (line_length >= UINT16_MAX) {
211 fatal("%s: script has line with 65535 characters or more", id);
212 }
213 jschar * line = xnew(jschar, line_length + 1);
214 memcpy(line, characters + line_start, sizeof(jschar) * line_length);
215 line[line_length] = '\0';
216 lines[line_num] = line;
217 classes[line_num] = xnew(enum Class, line_length);
218 line_num++;
219 }
220
221 /* tokenize the JavaScript */
222 JSTokenStream * token_stream = js_NewTokenStream(context, characters, num_characters, NULL, 1, NULL);
223 if (token_stream == NULL) {
224 fatal("cannot create token stream from JavaScript file %s", id);
225 }
226
227 /* see js_ParseTokenStream in jsparse.c */
228 JSObject * chain = NULL;
229 JSContext * cx = context;
230 JSStackFrame *fp, frame;
231
232 /*
233 * Push a compiler frame if we have no frames, or if the top frame is a
234 * lightweight function activation, or if its scope chain doesn't match
235 * the one passed to us.
236 */
237 fp = cx->fp;
238 if (!fp || !fp->varobj || fp->scopeChain != chain) {
239 memset(&frame, 0, sizeof frame);
240 frame.varobj = frame.scopeChain = chain;
241 if (cx->options & JSOPTION_VAROBJFIX) {
242 while ((chain = JS_GetParent(cx, chain)) != NULL)
243 frame.varobj = chain;
244 }
245 frame.down = fp;
246 if (fp)
247 frame.flags = fp->flags & (JSFRAME_SPECIAL | JSFRAME_COMPILE_N_GO);
248 cx->fp = &frame;
249 }
250
251 /*
252 * Protect atoms from being collected by a GC activation, which might
253 * - nest on this thread due to out of memory (the so-called "last ditch"
254 * GC attempted within js_NewGCThing), or
255 * - run for any reason on another thread if this thread is suspended on
256 * an object lock before it finishes generating bytecode into a script
257 * protected from the GC by a root or a stack frame reference.
258 */
259 JS_KEEP_ATOMS(cx->runtime);
260
261 line_num = 1;
262 uint16_t column_num = 0;
263 for (;;) {
264 JSTokenType tt = js_GetToken(context, token_stream);
265
266 if (tt == TOK_ERROR) {
267 fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
268 }
269
270 if (tt == TOK_EOF) {
271 /* it seems t.pos is invalid for TOK_EOF??? */
272 /* mark the remaining chars */
273 if (num_lines == 0) {
274 break;
275 }
276 uint16_t end_line = num_lines;
277 uint16_t end_column = num_characters_in_line(lines[num_lines - 1]);
278 mark_nontoken_chars(line_num, column_num, end_line, end_column);
279 break;
280 }
281
282 /* mark the chars before the token */
283 JSToken t = CURRENT_TOKEN(token_stream);
284 mark_nontoken_chars(line_num, column_num, t.pos.begin.lineno, t.pos.begin.index);
285
286 /* mark the token */
287 enum Class class;
288 switch (tt) {
289 case TOK_ERROR:
290 case TOK_EOF:
291 abort();
292 case TOK_EOL:
293 class = CLASS_NONE;
294 token_stream->flags |= TSF_OPERAND;
295 break;
296 case TOK_SEMI:
297 case TOK_COMMA:
298 case TOK_ASSIGN:
299 case TOK_HOOK:
300 case TOK_COLON:
301 case TOK_OR:
302 case TOK_AND:
303 case TOK_BITOR:
304 case TOK_BITXOR:
305 case TOK_BITAND:
306 case TOK_EQOP:
307 case TOK_RELOP:
308 case TOK_SHOP:
309 case TOK_PLUS:
310 case TOK_MINUS:
311 case TOK_STAR:
312 case TOK_DIVOP:
313 class = CLASS_SYMBOL;
314 token_stream->flags |= TSF_OPERAND;
315 break;
316 case TOK_UNARYOP:
317 switch (t.t_op) {
318 case JSOP_NEG:
319 case JSOP_POS:
320 case JSOP_NOT:
321 case JSOP_BITNOT:
322 class = CLASS_SYMBOL;
323 token_stream->flags |= TSF_OPERAND;
324 break;
325 case JSOP_TYPEOF:
326 class = CLASS_KEYWORD;
327 token_stream->flags |= TSF_OPERAND;
328 break;
329 case JSOP_VOID:
330 class = CLASS_TYPE;
331 token_stream->flags |= TSF_OPERAND;
332 break;
333 default:
334 abort();
335 }
336 break;
337 case TOK_INC:
338 case TOK_DEC:
339 case TOK_DOT:
340 case TOK_LB:
341 class = CLASS_SYMBOL;
342 token_stream->flags |= TSF_OPERAND;
343 break;
344 case TOK_RB:
345 class = CLASS_SYMBOL;
346 token_stream->flags &= ~TSF_OPERAND;
347 break;
348 case TOK_LC:
349 class = CLASS_CBRACKET;
350 token_stream->flags |= TSF_OPERAND;
351 break;
352 case TOK_RC:
353 class = CLASS_CBRACKET;
354 token_stream->flags &= ~TSF_OPERAND;
355 break;
356 case TOK_LP:
357 class = CLASS_SYMBOL;
358 token_stream->flags |= TSF_OPERAND;
359 break;
360 case TOK_RP:
361 class = CLASS_SYMBOL;
362 token_stream->flags &= ~TSF_OPERAND;
363 break;
364 case TOK_NAME:
365 class = CLASS_NONE;
366 token_stream->flags &= ~TSF_OPERAND;
367 if (js_PeekToken(context, token_stream) == TOK_LP) {
368 /* function */
369 class = CLASS_NONE;
370 }
371 break;
372 case TOK_NUMBER:
373 class = CLASS_NUMBER;
374 token_stream->flags &= ~TSF_OPERAND;
375 break;
376 case TOK_STRING:
377 class = CLASS_STRING;
378 token_stream->flags &= ~TSF_OPERAND;
379 break;
380 case TOK_OBJECT:
381 class = CLASS_REGEXP;
382 token_stream->flags &= ~TSF_OPERAND;
383 break;
384 case TOK_PRIMARY:
385 switch (t.t_op) {
386 case JSOP_TRUE:
387 case JSOP_FALSE:
388 case JSOP_NULL:
389 case JSOP_THIS:
390 class = CLASS_KEYWORD;
391 token_stream->flags &= ~TSF_OPERAND;
392 break;
393 default:
394 abort();
395 }
396 break;
397 case TOK_FUNCTION:
398 class = CLASS_KEYWORD;
399 token_stream->flags |= TSF_OPERAND;
400 break;
401 case TOK_EXPORT:
402 case TOK_IMPORT:
403 abort();
404 break;
405 case TOK_IF:
406 case TOK_ELSE:
407 case TOK_SWITCH:
408 case TOK_CASE:
409 case TOK_DEFAULT:
410 case TOK_WHILE:
411 case TOK_DO:
412 case TOK_FOR:
413 case TOK_BREAK:
414 case TOK_CONTINUE:
415 case TOK_IN:
416 case TOK_VAR:
417 case TOK_WITH:
418 case TOK_RETURN:
419 case TOK_NEW:
420 case TOK_DELETE:
421 token_stream->flags |= TSF_OPERAND;
422 class = CLASS_KEYWORD;
423 break;
424 case TOK_DEFSHARP:
425 case TOK_USESHARP:
426 abort();
427 break;
428 case TOK_TRY:
429 case TOK_CATCH:
430 case TOK_FINALLY:
431 case TOK_THROW:
432 case TOK_INSTANCEOF:
433 case TOK_DEBUGGER:
434 token_stream->flags |= TSF_OPERAND;
435 class = CLASS_KEYWORD;
436 break;
437 case TOK_XMLSTAGO:
438 case TOK_XMLETAGO:
439 case TOK_XMLPTAGC:
440 case TOK_XMLTAGC:
441 case TOK_XMLNAME:
442 case TOK_XMLATTR:
443 case TOK_XMLSPACE:
444 case TOK_XMLTEXT:
445 case TOK_XMLCOMMENT:
446 case TOK_XMLCDATA:
447 case TOK_XMLPI:
448 case TOK_AT:
449 case TOK_DBLCOLON:
450 case TOK_ANYNAME:
451 case TOK_DBLDOT:
452 case TOK_FILTER:
453 case TOK_XMLELEM:
454 case TOK_XMLLIST:
455 case TOK_RESERVED:
456 case TOK_LIMIT:
457 abort();
458 break;
459 default:
460 abort();
461 break;
462 }
463 mark_token_chars(class, t.pos.begin.lineno, t.pos.begin.index, t.pos.end.lineno, t.pos.end.index);
464 if (tt == TOK_STRING) {
465 for (uint16_t i = t.pos.begin.index + 1; i < t.pos.end.index - 1; i++) {
466 jschar c = lines[t.pos.begin.lineno - 1][i];
467 if (c == '\\') {
468 mark_token_chars(CLASS_SPECIALCHAR, t.pos.begin.lineno, i, t.pos.begin.lineno, i + 2);
469 i++;
470 }
471 }
472 }
473
474 line_num = t.pos.end.lineno;
475 column_num = t.pos.end.index;
476 }
477
478 /* output the highlighted code */
479 enum Class class = CLASS_NONE;
480 for (uint16_t i = 0; i < num_lines; i++) {
481 uint16_t length = num_characters_in_line(lines[i]);
482 for (uint16_t j = 0; j < length; j++) {
483 jschar c = lines[i][j];
484 if (classes[i][j] != class) {
485 if (class != CLASS_NONE) {
486 Stream_write_string(output, "</span>");
487 }
488 class = classes[i][j];
489 if (class != CLASS_NONE) {
490 Stream_printf(output, "<span class=\"%s\">", get_class_name(class));
491 }
492 }
493 if (c == '&') {
494 Stream_write_string(output, "&amp;");
495 }
496 else if (c == '<') {
497 Stream_write_string(output, "&lt;");
498 }
499 else if (c == '>') {
500 Stream_write_string(output, "&gt;");
501 }
502 else if (c == '\t' || (32 <= c && c <= 126)) {
503 Stream_write_char(output, c);
504 }
505 else {
506 Stream_printf(output, "&#%d;", c);
507 }
508 }
509 if (class != CLASS_NONE) {
510 Stream_write_string(output, "</span>");
511 class = CLASS_NONE;
512 }
513 Stream_write_char(output, '\n');
514 }
515
516 for (uint16_t i = 0; i < num_lines; i++) {
517 free(lines[i]);
518 free(classes[i]);
519 }
520 free(lines);
521 free(classes);
522
523 /* cleanup */
524 JS_UNKEEP_ATOMS(cx->runtime);
525 context->fp = fp;
526 }

  ViewVC Help
Powered by ViewVC 1.1.24