/[jscoverage]/trunk/highlight.c
ViewVC logotype

Diff of /trunk/highlight.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 179 by siliconforks, Sun Sep 21 18:35:21 2008 UTC revision 347 by siliconforks, Fri Oct 24 16:16:41 2008 UTC
# Line 21  Line 21 
21    
22  #include "highlight.h"  #include "highlight.h"
23    
24    #include <assert.h>
25  #include <stdlib.h>  #include <stdlib.h>
26  #include <string.h>  #include <string.h>
27    
# Line 80  Line 81 
81    }    }
82  }  }
83    
84  static enum Class ** classes = NULL;  static const char * g_id;
85  static jschar ** lines = NULL;  static const jschar * g_characters;
86    static size_t g_num_characters;
87    static Stream * g_output;
88    static size_t character_offset;
89    static uint16_t line_num;
90    static uint16_t column_num;
91    static enum Class current_class;
92    
93    static void output_character(jschar c, enum Class class) {
94      if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
95        class = CLASS_NONE;
96      }
97    
98      if (class != current_class) {
99        /* output the end tag */
100        if (current_class != CLASS_NONE) {
101          Stream_write_string(g_output, "</span>");
102        }
103    
104  static uint16_t num_characters_in_line(jschar * line) {      current_class = class;
105    uint16_t result = 0;  
106    while (line[result] != '\0') {      /* output the start tag */
107      result++;      if (current_class != CLASS_NONE) {
108          Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class));
109        }
110    }    }
   return result;  
 }  
111    
112  static void mark_token_chars(enum Class class, uint16_t start_line, uint16_t start_column, uint16_t end_line, uint16_t end_column) {    if (column_num == UINT16_MAX) {
113    for (uint16_t i = start_line; i <= end_line; i++) {      fatal("%s: script contains a line with more than 65,535 columns", g_id);
114      uint16_t c1 = i == start_line? start_column: 0;    }
115      uint16_t c2 = i == end_line? end_column: num_characters_in_line(lines[i - 1]);    column_num++;
116      for (uint16_t j = c1; j < c2; j++) {    switch (c) {
117        classes[i - 1][j] = class;    case '&':
118        Stream_write_string(g_output, "&amp;");
119        break;
120      case '<':
121        Stream_write_string(g_output, "&lt;");
122        break;
123      case '>':
124        Stream_write_string(g_output, "&gt;");
125        break;
126      case '\t':
127        Stream_write_char(g_output, c);
128        break;
129      case '\r':
130      case '\n':
131      case 0x2028:
132      case 0x2029:
133        if (c == '\r' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '\n') {
134          break;
135        }
136        Stream_write_char(g_output, '\n');
137        column_num = 0;
138        if (line_num == UINT16_MAX) {
139          fatal("%s: script contains more than 65,535 lines", g_id);
140        }
141        line_num++;
142        break;
143      default:
144        if (32 <= c && c <= 126) {
145          Stream_write_char(g_output, c);
146        }
147        else {
148          Stream_printf(g_output, "&#%d;", c);
149      }      }
150        break;
151    }    }
152      character_offset++;
153  }  }
154    
155  static void mark_nontoken_chars(uint16_t start_line, uint16_t start_column, uint16_t end_line, uint16_t end_column) {  static void mark_nontoken_chars(uint16_t end_line, uint16_t end_column) {
156    enum State {    enum State {
157      STATE_NORMAL,      STATE_NORMAL,
158      STATE_LINE_COMMENT,      STATE_LINE_COMMENT,
# Line 109  Line 160 
160    };    };
161    
162    enum State state = STATE_NORMAL;    enum State state = STATE_NORMAL;
163    for (uint16_t i = start_line; i <= end_line; i++) {    while (character_offset < g_num_characters) {
164      uint16_t c1 = i == start_line? start_column: 0;      if (end_line != 0 && line_num > end_line) {
165      uint16_t c2 = i == end_line? end_column: num_characters_in_line(lines[i - 1]);        break;
     for (uint16_t j = c1; j < c2; j++) {  
       jschar c = lines[i - 1][j];  
       switch (state) {  
       case STATE_NORMAL:  
         if (c == '/' && j + 1 < c2 && lines[i - 1][j + 1] == '/') {  
           state = STATE_LINE_COMMENT;  
           classes[i - 1][j] = CLASS_COMMENT;  
         }  
         else if (c == '/' && j + 1 < c2 && lines[i - 1][j + 1] == '*') {  
           state = STATE_MULTILINE_COMMENT;  
           classes[i - 1][j] = CLASS_COMMENT;  
           j++;  
           classes[i - 1][j] = CLASS_COMMENT;  
         }  
         else {  
           classes[i - 1][j] = CLASS_NONE;  
         }  
         break;  
       case STATE_LINE_COMMENT:  
         if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {  
           state = STATE_NORMAL;  
           classes[i - 1][j] = CLASS_NONE;  
         }  
         else {  
           classes[i - 1][j] = CLASS_COMMENT;  
         }  
         break;  
       case STATE_MULTILINE_COMMENT:  
         classes[i - 1][j] = CLASS_COMMENT;  
         if (c == '*' && j + 1 < c2 && lines[i - 1][j + 1] == '/') {  
           j++;  
           classes[i - 1][j] = CLASS_COMMENT;  
           state = STATE_NORMAL;  
         }  
         break;  
       }  
166      }      }
167      /* end of the line */      else if (line_num == end_line && column_num >= end_column) {
168      if (state == STATE_LINE_COMMENT) {        break;
       state = STATE_NORMAL;  
169      }      }
   }  
 }  
170    
171  void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {      jschar c = g_characters[character_offset];
172    /* count the lines - see GetChar in jsscan.c */      if (c == '\0') {
173    size_t i = 0;        fatal("%s: script contains NULL character", g_id);
174    uint16_t num_lines = 0;      }
175    while (i < num_characters) {  
176      if (num_lines == UINT16_MAX) {      switch (state) {
177        fatal("%s: script has more than 65535 lines", id);      case STATE_NORMAL:
178      }        if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
179      num_lines++;          state = STATE_LINE_COMMENT;
180      jschar c;        }
181      while (i < num_characters) {        else if (c == '/' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '*') {
182        c = characters[i];          state = STATE_MULTILINE_COMMENT;
183        if (c == '\0') {          output_character('/', CLASS_COMMENT);
184          fatal("%s: script contains NULL character", id);          output_character('*', CLASS_COMMENT);
185            continue;
186        }        }
187          break;
188        case STATE_LINE_COMMENT:
189        if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {        if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
190          break;          state = STATE_NORMAL;
191        }        }
192        i++;        break;
193      }      case STATE_MULTILINE_COMMENT:
194      if (i < num_characters) {        if (c == '*' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '/') {
195        i++;          output_character('*', CLASS_COMMENT);
196        if (c == '\r' && i < num_characters && characters[i] == '\n') {          output_character('/', CLASS_COMMENT);
197          i++;          state = STATE_NORMAL;
198            continue;
199        }        }
200          break;
201      }      }
   }  
   
   lines = xnew(jschar *, num_lines);  
   classes = xnew(enum Class *, num_lines);  
202    
203    uint16_t line_num = 0;      if (state == STATE_NORMAL) {
204    i = 0;        output_character(c, CLASS_NONE);
205    while (i < num_characters) {      }
206      size_t line_start = i;      else {
207      jschar c;        output_character(c, CLASS_COMMENT);
     while (i < num_characters) {  
       c = characters[i];  
       if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {  
         break;  
       }  
       i++;  
208      }      }
     size_t line_end = i;  
     if (i < num_characters) {  
       i++;  
       if (c == '\r' && i < num_characters && characters[i] == '\n') {  
         i++;  
       }  
     }  
     size_t line_length = line_end - line_start;  
     if (line_length >= UINT16_MAX) {  
       fatal("%s: script has line with 65535 characters or more", id);  
     }  
     jschar * line = xnew(jschar, line_length + 1);  
     memcpy(line, characters + line_start, sizeof(jschar) * line_length);  
     line[line_length] = '\0';  
     lines[line_num] = line;  
     classes[line_num] = xnew(enum Class, line_length);  
     line_num++;  
209    }    }
210    }
211    
212    void jscoverage_highlight_js(JSContext * context, const char * id, const jschar * characters, size_t num_characters, Stream * output) {
213      g_id = id;
214      g_characters = characters;
215      g_num_characters = num_characters;
216      g_output = output;
217    
218      character_offset = 0;
219      line_num = 1;
220      column_num = 0;
221      current_class = CLASS_NONE;
222    
223    /* tokenize the JavaScript */    /* tokenize the JavaScript */
224    JSTokenStream * token_stream = js_NewTokenStream(context, characters, num_characters, NULL, 1, NULL);    JSTokenStream token_stream;
225    if (token_stream == NULL) {    if (! js_InitTokenStream(context, &token_stream, characters, num_characters, NULL, NULL, 1)) {
226      fatal("cannot create token stream from JavaScript file %s", id);      fatal("cannot create token stream from JavaScript file %s", id);
227    }    }
228    
     /* see js_ParseTokenStream in jsparse.c */  
     JSObject * chain = NULL;  
     JSContext * cx = context;  
     JSStackFrame *fp, frame;  
   
     /*  
      * Push a compiler frame if we have no frames, or if the top frame is a  
      * lightweight function activation, or if its scope chain doesn't match  
      * the one passed to us.  
      */  
     fp = cx->fp;  
     if (!fp || !fp->varobj || fp->scopeChain != chain) {  
         memset(&frame, 0, sizeof frame);  
         frame.varobj = frame.scopeChain = chain;  
         if (cx->options & JSOPTION_VAROBJFIX) {  
             while ((chain = JS_GetParent(cx, chain)) != NULL)  
                 frame.varobj = chain;  
         }  
         frame.down = fp;  
         if (fp)  
             frame.flags = fp->flags & (JSFRAME_SPECIAL | JSFRAME_COMPILE_N_GO);  
         cx->fp = &frame;  
     }  
   
     /*  
      * Protect atoms from being collected by a GC activation, which might  
      * - nest on this thread due to out of memory (the so-called "last ditch"  
      *   GC attempted within js_NewGCThing), or  
      * - run for any reason on another thread if this thread is suspended on  
      *   an object lock before it finishes generating bytecode into a script  
      *   protected from the GC by a root or a stack frame reference.  
      */  
     JS_KEEP_ATOMS(cx->runtime);  
   
   line_num = 1;  
   uint16_t column_num = 0;  
229    for (;;) {    for (;;) {
230      JSTokenType tt = js_GetToken(context, token_stream);      JSTokenType tt = js_GetToken(context, &token_stream);
231    
232      if (tt == TOK_ERROR) {      if (tt == TOK_ERROR) {
233        fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);        fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
234      }      }
235    
236      if (tt == TOK_EOF) {      if (tt == TOK_EOF) {
237        /* it seems t.pos is invalid for TOK_EOF??? */        mark_nontoken_chars(0, 0);
       /* mark the remaining chars */  
       if (num_lines == 0) {  
         break;  
       }  
       uint16_t end_line = num_lines;  
       uint16_t end_column = num_characters_in_line(lines[num_lines - 1]);  
       mark_nontoken_chars(line_num, column_num, end_line, end_column);  
238        break;        break;
239      }      }
240    
241      /* mark the chars before the token */      /* mark the chars before the token */
242      JSToken t = CURRENT_TOKEN(token_stream);      JSToken t = CURRENT_TOKEN(&token_stream);
243      mark_nontoken_chars(line_num, column_num, t.pos.begin.lineno, t.pos.begin.index);      mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
244    
245      /* mark the token */      /* mark the token */
246      enum Class class;      enum Class class;
# Line 291  Line 250 
250        abort();        abort();
251      case TOK_EOL:      case TOK_EOL:
252        class = CLASS_NONE;        class = CLASS_NONE;
253        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
254        break;        break;
255      case TOK_SEMI:      case TOK_SEMI:
256      case TOK_COMMA:      case TOK_COMMA:
# Line 311  Line 270 
270      case TOK_STAR:      case TOK_STAR:
271      case TOK_DIVOP:      case TOK_DIVOP:
272        class = CLASS_SYMBOL;        class = CLASS_SYMBOL;
273        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
274        break;        break;
275      case TOK_UNARYOP:      case TOK_UNARYOP:
276        switch (t.t_op) {        switch (t.t_op) {
# Line 320  Line 279 
279        case JSOP_NOT:        case JSOP_NOT:
280        case JSOP_BITNOT:        case JSOP_BITNOT:
281          class = CLASS_SYMBOL;          class = CLASS_SYMBOL;
282          token_stream->flags |= TSF_OPERAND;          token_stream.flags |= TSF_OPERAND;
283          break;          break;
284        case JSOP_TYPEOF:        case JSOP_TYPEOF:
285          class = CLASS_KEYWORD;          class = CLASS_KEYWORD;
286          token_stream->flags |= TSF_OPERAND;          token_stream.flags |= TSF_OPERAND;
287          break;          break;
288        case JSOP_VOID:        case JSOP_VOID:
289          class = CLASS_TYPE;          class = CLASS_TYPE;
290          token_stream->flags |= TSF_OPERAND;          token_stream.flags |= TSF_OPERAND;
291          break;          break;
292        default:        default:
293          abort();          abort();
# Line 336  Line 295 
295        break;        break;
296      case TOK_INC:      case TOK_INC:
297      case TOK_DEC:      case TOK_DEC:
298          class = CLASS_SYMBOL;
299          /* token_stream.flags does not change w.r.t. TSF_OPERAND */
300          break;
301      case TOK_DOT:      case TOK_DOT:
302      case TOK_LB:      case TOK_LB:
303        class = CLASS_SYMBOL;        class = CLASS_SYMBOL;
304        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
305        break;        break;
306      case TOK_RB:      case TOK_RB:
307        class = CLASS_SYMBOL;        class = CLASS_SYMBOL;
308        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
309        break;        break;
310      case TOK_LC:      case TOK_LC:
311        class = CLASS_CBRACKET;        class = CLASS_CBRACKET;
312        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
313        break;        break;
314      case TOK_RC:      case TOK_RC:
315        class = CLASS_CBRACKET;        class = CLASS_CBRACKET;
316        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
317        break;        break;
318      case TOK_LP:      case TOK_LP:
319        class = CLASS_SYMBOL;        class = CLASS_SYMBOL;
320        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
321        break;        break;
322      case TOK_RP:      case TOK_RP:
323        class = CLASS_SYMBOL;        class = CLASS_SYMBOL;
324        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
325        break;        break;
326      case TOK_NAME:      case TOK_NAME:
327        class = CLASS_NONE;        class = CLASS_NONE;
328        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
329        if (js_PeekToken(context, token_stream) == TOK_LP) {        if (js_PeekToken(context, &token_stream) == TOK_LP) {
330          /* function */          /* function */
331          class = CLASS_NONE;          class = CLASS_NONE;
332        }        }
333        break;        break;
334      case TOK_NUMBER:      case TOK_NUMBER:
335        class = CLASS_NUMBER;        class = CLASS_NUMBER;
336        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
337        break;        break;
338      case TOK_STRING:      case TOK_STRING:
339        class = CLASS_STRING;        class = CLASS_STRING;
340        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
341        break;        break;
342      case TOK_OBJECT:      case TOK_REGEXP:
343        class = CLASS_REGEXP;        class = CLASS_REGEXP;
344        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
345        break;        break;
346      case TOK_PRIMARY:      case TOK_PRIMARY:
347        switch (t.t_op) {        switch (t.t_op) {
# Line 388  Line 350 
350        case JSOP_NULL:        case JSOP_NULL:
351        case JSOP_THIS:        case JSOP_THIS:
352          class = CLASS_KEYWORD;          class = CLASS_KEYWORD;
353          token_stream->flags &= ~TSF_OPERAND;          token_stream.flags &= ~TSF_OPERAND;
354          break;          break;
355        default:        default:
356          abort();          abort();
# Line 396  Line 358 
358        break;        break;
359      case TOK_FUNCTION:      case TOK_FUNCTION:
360        class = CLASS_KEYWORD;        class = CLASS_KEYWORD;
361        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
       break;  
     case TOK_EXPORT:  
     case TOK_IMPORT:  
       abort();  
362        break;        break;
363      case TOK_IF:      case TOK_IF:
364      case TOK_ELSE:      case TOK_ELSE:
# Line 418  Line 376 
376      case TOK_RETURN:      case TOK_RETURN:
377      case TOK_NEW:      case TOK_NEW:
378      case TOK_DELETE:      case TOK_DELETE:
379        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
380        class = CLASS_KEYWORD;        class = CLASS_KEYWORD;
381        break;        break;
382      case TOK_DEFSHARP:      case TOK_DEFSHARP:
# Line 431  Line 389 
389      case TOK_THROW:      case TOK_THROW:
390      case TOK_INSTANCEOF:      case TOK_INSTANCEOF:
391      case TOK_DEBUGGER:      case TOK_DEBUGGER:
392        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
393        class = CLASS_KEYWORD;        class = CLASS_KEYWORD;
394        break;        break;
395      case TOK_XMLSTAGO:      case TOK_XMLSTAGO:
# Line 452  Line 410 
410      case TOK_FILTER:      case TOK_FILTER:
411      case TOK_XMLELEM:      case TOK_XMLELEM:
412      case TOK_XMLLIST:      case TOK_XMLLIST:
413          abort();
414          break;
415        case TOK_YIELD:
416          token_stream.flags |= TSF_OPERAND;
417          class = CLASS_KEYWORD;
418          break;
419        case TOK_ARRAYCOMP:
420        case TOK_ARRAYPUSH:
421        case TOK_LEXICALSCOPE:
422          abort();
423          break;
424        case TOK_LET:
425          token_stream.flags |= TSF_OPERAND;
426          class = CLASS_KEYWORD;
427          break;
428        case TOK_BODY:
429      case TOK_RESERVED:      case TOK_RESERVED:
430      case TOK_LIMIT:      case TOK_LIMIT:
431        abort();        abort();
# Line 460  Line 434 
434        abort();        abort();
435        break;        break;
436      }      }
     mark_token_chars(class, t.pos.begin.lineno, t.pos.begin.index, t.pos.end.lineno, t.pos.end.index);  
     if (tt == TOK_STRING) {  
       for (uint16_t i = t.pos.begin.index + 1; i < t.pos.end.index - 1; i++) {  
         jschar c = lines[t.pos.begin.lineno - 1][i];  
         if (c == '\\') {  
           mark_token_chars(CLASS_SPECIALCHAR, t.pos.begin.lineno, i, t.pos.begin.lineno, i + 2);  
           i++;  
         }  
       }  
     }  
   
     line_num = t.pos.end.lineno;  
     column_num = t.pos.end.index;  
   }  
437    
438    /* output the highlighted code */      uint16_t start_line = t.pos.begin.lineno;
439    enum Class class = CLASS_NONE;      uint16_t end_line = t.pos.end.lineno;
440    for (uint16_t i = 0; i < num_lines; i++) {      uint16_t start_column = t.pos.begin.index;
441      uint16_t length = num_characters_in_line(lines[i]);      uint16_t end_column = t.pos.end.index;
442      for (uint16_t j = 0; j < length; j++) {      assert(line_num == start_line);
443        jschar c = lines[i][j];      assert(column_num == start_column);
444        if (classes[i][j] != class) {      if (start_line == end_line && start_column >= end_column) {
445          if (class != CLASS_NONE) {        fatal("%s: script contains line with more than 65,535 characters", id);
446            Stream_write_string(output, "</span>");      }
447          }      for (;;) {
448          class = classes[i][j];        assert(character_offset < num_characters);
449          if (class != CLASS_NONE) {        jschar c = characters[character_offset];
450            Stream_printf(output, "<span class=\"%s\">", get_class_name(class));        if (tt == TOK_STRING && c == '\\') {
451          }          output_character(c, CLASS_SPECIALCHAR);
452        }          assert(character_offset < num_characters);
453        if (c == '&') {          c = characters[character_offset];
454          Stream_write_string(output, "&amp;");          output_character(c, CLASS_SPECIALCHAR);
       }  
       else if (c == '<') {  
         Stream_write_string(output, "&lt;");  
455        }        }
456        else if (c == '>') {        else {
457          Stream_write_string(output, "&gt;");          output_character(c, class);
458        }        }
459        else if (c == '\t' || (32 <= c && c <= 126)) {  
460          Stream_write_char(output, c);        if (line_num > end_line) {
461            break;
462        }        }
463        else {        else if (line_num == end_line && column_num >= end_column) {
464          Stream_printf(output, "&#%d;", c);          break;
465        }        }
466      }      }
467      if (class != CLASS_NONE) {  
468        Stream_write_string(output, "</span>");      assert(line_num == end_line);
469        class = CLASS_NONE;      assert(column_num = end_column);
     }  
     Stream_write_char(output, '\n');  
470    }    }
471    
472    for (uint16_t i = 0; i < num_lines; i++) {    if (current_class != CLASS_NONE) {
473      free(lines[i]);      output_character('\n', CLASS_NONE);
     free(classes[i]);  
474    }    }
   free(lines);  
   free(classes);  
475    
476    /* cleanup */    js_CloseTokenStream(context, &token_stream);
   JS_UNKEEP_ATOMS(cx->runtime);  
   context->fp = fp;  
477  }  }

Legend:
Removed from v.179  
changed lines
  Added in v.347

  ViewVC Help
Powered by ViewVC 1.1.24