/[jscoverage]/trunk/highlight.cpp
ViewVC logotype

Diff of /trunk/highlight.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/highlight.c revision 215 by siliconforks, Fri Oct 3 02:26:18 2008 UTC trunk/highlight.cpp revision 507 by siliconforks, Sun Jan 10 07:23:34 2010 UTC
# Line 1  Line 1 
1  /*  /*
2      highlight.c - JavaScript syntax highlighting      highlight.cpp - JavaScript syntax highlighting
3      Copyright (C) 2008 siliconforks.com      Copyright (C) 2008, 2009, 2010 siliconforks.com
4    
5      This program is free software; you can redistribute it and/or modify      This program is free software; you can redistribute it and/or modify
6      it under the terms of the GNU General Public License as published by      it under the terms of the GNU General Public License as published by
# Line 30  Line 30 
30    
31  #include "util.h"  #include "util.h"
32    
33    #ifndef UINT32_MAX
34    #define UINT32_MAX ((uint32_t) (-1))
35    #endif
36    
37  enum Class {  enum Class {
38    CLASS_NONE,    CLASS_NONE,
39    CLASS_COMMENT,    CLASS_COMMENT,
# Line 43  Line 47 
47    CLASS_CBRACKET    CLASS_CBRACKET
48  };  };
49    
50  static const char * get_class_name(enum Class class) {  static const char * get_class_name(enum Class c) {
51    switch (class) {    switch (c) {
52    case CLASS_NONE:    case CLASS_NONE:
53      abort();      abort();
54      break;      break;
# Line 86  Line 90 
90  static size_t g_num_characters;  static size_t g_num_characters;
91  static Stream * g_output;  static Stream * g_output;
92  static size_t character_offset;  static size_t character_offset;
93  static uint16_t line_num;  static uint32_t line_num;
94  static uint16_t column_num;  static uint32_t column_num;
95  static enum Class current_class;  static enum Class current_class;
96    
97  static void output_character(jschar c, enum Class class) {  static void output_character(jschar c, enum Class class_) {
98    if (class != current_class) {    if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {
99        class_ = CLASS_NONE;
100      }
101    
102      if (class_ != current_class) {
103      /* output the end tag */      /* output the end tag */
104      if (current_class != CLASS_NONE) {      if (current_class != CLASS_NONE) {
105        Stream_write_string(g_output, "</span>");        Stream_write_string(g_output, "</span>");
106      }      }
107    
108      current_class = class;      current_class = class_;
109    
110      /* output the start tag */      /* output the start tag */
111      if (current_class != CLASS_NONE) {      if (current_class != CLASS_NONE) {
112        Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class));        Stream_printf(g_output, "<span class=\"%s\">", get_class_name(class_));
113      }      }
114    }    }
115    
116      if (column_num == UINT32_MAX) {
117        fatal("%s: script contains a line with more than 65,535 columns", g_id);
118      }
119      column_num++;
120    switch (c) {    switch (c) {
121    case '&':    case '&':
122      Stream_write_string(g_output, "&amp;");      Stream_write_string(g_output, "&amp;");
# Line 116  Line 128 
128      Stream_write_string(g_output, "&gt;");      Stream_write_string(g_output, "&gt;");
129      break;      break;
130    case '\t':    case '\t':
   case '\n':  
131      Stream_write_char(g_output, c);      Stream_write_char(g_output, c);
132      break;      break;
133      case '\r':
134      case '\n':
135      case 0x2028:
136      case 0x2029:
137        if (c == '\r' && character_offset + 1 < g_num_characters && g_characters[character_offset + 1] == '\n') {
138          break;
139        }
140        Stream_write_char(g_output, '\n');
141        column_num = 0;
142        if (line_num == UINT32_MAX) {
143          fatal("%s: script contains more than 65,535 lines", g_id);
144        }
145        line_num++;
146        break;
147    default:    default:
148      if (32 <= c && c <= 126) {      if (32 <= c && c <= 126) {
149        Stream_write_char(g_output, c);        Stream_write_char(g_output, c);
# Line 128  Line 153 
153      }      }
154      break;      break;
155    }    }
156      character_offset++;
157  }  }
158    
159  static void mark_nontoken_chars(uint16_t end_line, uint16_t end_column) {  static void mark_nontoken_chars(uint32_t end_line, uint32_t end_column) {
160    enum State {    enum State {
161      STATE_NORMAL,      STATE_NORMAL,
162      STATE_LINE_COMMENT,      STATE_LINE_COMMENT,
# Line 160  Line 186 
186          state = STATE_MULTILINE_COMMENT;          state = STATE_MULTILINE_COMMENT;
187          output_character('/', CLASS_COMMENT);          output_character('/', CLASS_COMMENT);
188          output_character('*', CLASS_COMMENT);          output_character('*', CLASS_COMMENT);
         character_offset += 2;  
         if (column_num >= UINT16_MAX - 1) {  
           fatal("%s: script contains line with more than 65,535 characters", g_id);  
         }  
         column_num += 2;  
189          continue;          continue;
190        }        }
191        break;        break;
# Line 178  Line 199 
199          output_character('*', CLASS_COMMENT);          output_character('*', CLASS_COMMENT);
200          output_character('/', CLASS_COMMENT);          output_character('/', CLASS_COMMENT);
201          state = STATE_NORMAL;          state = STATE_NORMAL;
         character_offset += 2;  
         if (column_num >= UINT16_MAX - 1) {  
           fatal("%s: script contains line with more than 65,535 characters", g_id);  
         }  
         column_num += 2;  
202          continue;          continue;
203        }        }
204        break;        break;
205      }      }
206    
207      character_offset++;      if (state == STATE_NORMAL) {
208      if (c == '\r' || c == '\n' || c == 0x2028 || c == 0x2029) {        output_character(c, CLASS_NONE);
       if (line_num == UINT16_MAX) {  
         fatal("%s: script contains more than 65,535 lines", g_id);  
       }  
       line_num++;  
       column_num = 0;  
       if (c == '\r' && character_offset < g_num_characters && g_characters[character_offset] == '\n') {  
         character_offset++;  
       }  
       output_character('\n', CLASS_NONE);  
209      }      }
210      else {      else {
211        if (column_num == UINT16_MAX) {        output_character(c, CLASS_COMMENT);
         fatal("%s: script contains line with more than 65,535 characters", g_id);  
       }  
       column_num++;  
       if (state == STATE_NORMAL) {  
         output_character(c, CLASS_NONE);  
       }  
       else {  
         output_character(c, CLASS_COMMENT);  
       }  
212      }      }
213    }    }
214  }  }
# Line 227  Line 225 
225    current_class = CLASS_NONE;    current_class = CLASS_NONE;
226    
227    /* tokenize the JavaScript */    /* tokenize the JavaScript */
228    JSTokenStream * token_stream = js_NewTokenStream(context, characters, num_characters, NULL, 1, NULL);    JSTokenStream token_stream(context);
229    if (token_stream == NULL) {    if (! token_stream.init(context, characters, num_characters, NULL, NULL, 1)) {
230      fatal("cannot create token stream from JavaScript file %s", id);      fatal("cannot create token stream from JavaScript file %s", id);
231    }    }
232    
     /* see js_ParseTokenStream in jsparse.c */  
     JSObject * chain = NULL;  
     JSContext * cx = context;  
     JSStackFrame *fp, frame;  
   
     /*  
      * Push a compiler frame if we have no frames, or if the top frame is a  
      * lightweight function activation, or if its scope chain doesn't match  
      * the one passed to us.  
      */  
     fp = cx->fp;  
     if (!fp || !fp->varobj || fp->scopeChain != chain) {  
         memset(&frame, 0, sizeof frame);  
         frame.varobj = frame.scopeChain = chain;  
         if (cx->options & JSOPTION_VAROBJFIX) {  
             while ((chain = JS_GetParent(cx, chain)) != NULL)  
                 frame.varobj = chain;  
         }  
         frame.down = fp;  
         if (fp)  
             frame.flags = fp->flags & (JSFRAME_SPECIAL | JSFRAME_COMPILE_N_GO);  
         cx->fp = &frame;  
     }  
   
     /*  
      * Protect atoms from being collected by a GC activation, which might  
      * - nest on this thread due to out of memory (the so-called "last ditch"  
      *   GC attempted within js_NewGCThing), or  
      * - run for any reason on another thread if this thread is suspended on  
      *   an object lock before it finishes generating bytecode into a script  
      *   protected from the GC by a root or a stack frame reference.  
      */  
     JS_KEEP_ATOMS(cx->runtime);  
   
233    for (;;) {    for (;;) {
234      JSTokenType tt = js_GetToken(context, token_stream);      JSTokenType tt = js_GetToken(context, &token_stream);
235    
236      if (tt == TOK_ERROR) {      if (tt == TOK_ERROR) {
237        fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);        fatal("JavaScript parse error: %s: line = %d, col = %d\n", id, line_num, column_num);
# Line 279  Line 243 
243      }      }
244    
245      /* mark the chars before the token */      /* mark the chars before the token */
246      JSToken t = CURRENT_TOKEN(token_stream);      JSToken t = CURRENT_TOKEN(&token_stream);
247      mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);      mark_nontoken_chars(t.pos.begin.lineno, t.pos.begin.index);
248    
249      /* mark the token */      /* mark the token */
250      enum Class class;      enum Class class_;
251      switch (tt) {      switch (tt) {
252      case TOK_ERROR:      case TOK_ERROR:
253      case TOK_EOF:      case TOK_EOF:
254        abort();        abort();
255      case TOK_EOL:      case TOK_EOL:
256        class = CLASS_NONE;        class_ = CLASS_NONE;
257        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
258        break;        break;
259      case TOK_SEMI:      case TOK_SEMI:
260      case TOK_COMMA:      case TOK_COMMA:
# Line 309  Line 273 
273      case TOK_MINUS:      case TOK_MINUS:
274      case TOK_STAR:      case TOK_STAR:
275      case TOK_DIVOP:      case TOK_DIVOP:
276        class = CLASS_SYMBOL;        class_ = CLASS_SYMBOL;
277        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
278        break;        break;
279      case TOK_UNARYOP:      case TOK_UNARYOP:
280        switch (t.t_op) {        switch (t.t_op) {
# Line 318  Line 282 
282        case JSOP_POS:        case JSOP_POS:
283        case JSOP_NOT:        case JSOP_NOT:
284        case JSOP_BITNOT:        case JSOP_BITNOT:
285          class = CLASS_SYMBOL;          class_ = CLASS_SYMBOL;
286          token_stream->flags |= TSF_OPERAND;          token_stream.flags |= TSF_OPERAND;
287          break;          break;
288        case JSOP_TYPEOF:        case JSOP_TYPEOF:
289          class = CLASS_KEYWORD;          class_ = CLASS_KEYWORD;
290          token_stream->flags |= TSF_OPERAND;          token_stream.flags |= TSF_OPERAND;
291          break;          break;
292        case JSOP_VOID:        case JSOP_VOID:
293          class = CLASS_TYPE;          class_ = CLASS_TYPE;
294          token_stream->flags |= TSF_OPERAND;          token_stream.flags |= TSF_OPERAND;
295          break;          break;
296        default:        default:
297          abort();          fatal_source(id, t.pos.begin.lineno, "unknown TOK_UNARYOP (%d)", t.t_op);
298            break;
299        }        }
300        break;        break;
301      case TOK_INC:      case TOK_INC:
302      case TOK_DEC:      case TOK_DEC:
303          class_ = CLASS_SYMBOL;
304          /* token_stream.flags does not change w.r.t. TSF_OPERAND */
305          break;
306      case TOK_DOT:      case TOK_DOT:
307      case TOK_LB:      case TOK_LB:
308        class = CLASS_SYMBOL;        class_ = CLASS_SYMBOL;
309        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
310        break;        break;
311      case TOK_RB:      case TOK_RB:
312        class = CLASS_SYMBOL;        class_ = CLASS_SYMBOL;
313        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
314        break;        break;
315      case TOK_LC:      case TOK_LC:
316        class = CLASS_CBRACKET;        class_ = CLASS_CBRACKET;
317        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
318        break;        break;
319      case TOK_RC:      case TOK_RC:
320        class = CLASS_CBRACKET;        class_ = CLASS_CBRACKET;
321        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
322        break;        break;
323      case TOK_LP:      case TOK_LP:
324        class = CLASS_SYMBOL;        class_ = CLASS_SYMBOL;
325        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
326        break;        break;
327      case TOK_RP:      case TOK_RP:
328        class = CLASS_SYMBOL;        class_ = CLASS_SYMBOL;
329        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
330        break;        break;
331      case TOK_NAME:      case TOK_NAME:
332        class = CLASS_NONE;        class_ = CLASS_NONE;
333        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
334        if (js_PeekToken(context, token_stream) == TOK_LP) {        if (js_PeekToken(context, &token_stream) == TOK_LP) {
335          /* function */          /* function */
336          class = CLASS_NONE;          class_ = CLASS_NONE;
337        }        }
338        break;        break;
339      case TOK_NUMBER:      case TOK_NUMBER:
340        class = CLASS_NUMBER;        class_ = CLASS_NUMBER;
341        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
342        break;        break;
343      case TOK_STRING:      case TOK_STRING:
344        class = CLASS_STRING;        class_ = CLASS_STRING;
345        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
346        break;        break;
347      case TOK_OBJECT:      case TOK_REGEXP:
348        class = CLASS_REGEXP;        class_ = CLASS_REGEXP;
349        token_stream->flags &= ~TSF_OPERAND;        token_stream.flags &= ~TSF_OPERAND;
350        break;        break;
351      case TOK_PRIMARY:      case TOK_PRIMARY:
352        switch (t.t_op) {        switch (t.t_op) {
# Line 386  Line 354 
354        case JSOP_FALSE:        case JSOP_FALSE:
355        case JSOP_NULL:        case JSOP_NULL:
356        case JSOP_THIS:        case JSOP_THIS:
357          class = CLASS_KEYWORD;          class_ = CLASS_KEYWORD;
358          token_stream->flags &= ~TSF_OPERAND;          token_stream.flags &= ~TSF_OPERAND;
359          break;          break;
360        default:        default:
361          abort();          fatal_source(id, t.pos.begin.lineno, "unknown TOK_PRIMARY (%d)", t.t_op);
362            break;
363        }        }
364        break;        break;
365      case TOK_FUNCTION:      case TOK_FUNCTION:
366        class = CLASS_KEYWORD;        class_ = CLASS_KEYWORD;
367        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
       break;  
     case TOK_EXPORT:  
     case TOK_IMPORT:  
       abort();  
368        break;        break;
369      case TOK_IF:      case TOK_IF:
370      case TOK_ELSE:      case TOK_ELSE:
# Line 417  Line 382 
382      case TOK_RETURN:      case TOK_RETURN:
383      case TOK_NEW:      case TOK_NEW:
384      case TOK_DELETE:      case TOK_DELETE:
385        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
386        class = CLASS_KEYWORD;        class_ = CLASS_KEYWORD;
387        break;        break;
388      case TOK_DEFSHARP:      case TOK_DEFSHARP:
389      case TOK_USESHARP:      case TOK_USESHARP:
390        abort();        fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
391        break;        break;
392      case TOK_TRY:      case TOK_TRY:
393      case TOK_CATCH:      case TOK_CATCH:
# Line 430  Line 395 
395      case TOK_THROW:      case TOK_THROW:
396      case TOK_INSTANCEOF:      case TOK_INSTANCEOF:
397      case TOK_DEBUGGER:      case TOK_DEBUGGER:
398        token_stream->flags |= TSF_OPERAND;        token_stream.flags |= TSF_OPERAND;
399        class = CLASS_KEYWORD;        class_ = CLASS_KEYWORD;
400        break;        break;
401      case TOK_XMLSTAGO:      case TOK_XMLSTAGO:
402      case TOK_XMLETAGO:      case TOK_XMLETAGO:
# Line 451  Line 416 
416      case TOK_FILTER:      case TOK_FILTER:
417      case TOK_XMLELEM:      case TOK_XMLELEM:
418      case TOK_XMLLIST:      case TOK_XMLLIST:
419          fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
420          break;
421        case TOK_YIELD:
422          token_stream.flags |= TSF_OPERAND;
423          class_ = CLASS_KEYWORD;
424          break;
425        case TOK_ARRAYCOMP:
426        case TOK_ARRAYPUSH:
427        case TOK_LEXICALSCOPE:
428          fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
429          break;
430        case TOK_LET:
431          token_stream.flags |= TSF_OPERAND;
432          class_ = CLASS_KEYWORD;
433          break;
434        case TOK_SEQ:
435        case TOK_FORHEAD:
436      case TOK_RESERVED:      case TOK_RESERVED:
437      case TOK_LIMIT:      case TOK_LIMIT:
438        abort();        fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
439        break;        break;
440      default:      default:
441        abort();        fatal_source(id, t.pos.begin.lineno, "unknown token (%d)", tt);
442        break;        break;
443      }      }
444    
445      assert(t.pos.begin.lineno == t.pos.end.lineno);      uint32_t start_line = t.pos.begin.lineno;
446      if (t.pos.begin.index > t.pos.end.index) {      uint32_t end_line = t.pos.end.lineno;
447        uint32_t start_column = t.pos.begin.index;
448        uint32_t end_column = t.pos.end.index;
449        assert(line_num == start_line);
450        assert(column_num == start_column);
451        if (start_line == end_line && start_column >= end_column) {
452        fatal("%s: script contains line with more than 65,535 characters", id);        fatal("%s: script contains line with more than 65,535 characters", id);
453      }      }
454      for (uint16_t i = t.pos.begin.index; i < t.pos.end.index; i++) {      for (;;) {
455        assert(character_offset < num_characters);        assert(character_offset < num_characters);
456        jschar c = characters[character_offset];        jschar c = characters[character_offset];
457        if (tt == TOK_STRING && c == '\\') {        if (tt == TOK_STRING && c == '\\') {
458          output_character(c, CLASS_SPECIALCHAR);          output_character(c, CLASS_SPECIALCHAR);
         character_offset++;  
         i++;  
459          assert(character_offset < num_characters);          assert(character_offset < num_characters);
460          c = characters[character_offset];          c = characters[character_offset];
461          output_character(c, CLASS_SPECIALCHAR);          output_character(c, CLASS_SPECIALCHAR);
         character_offset++;  
462        }        }
463        else {        else {
464          output_character(c, class);          output_character(c, class_);
465          character_offset++;        }
466    
467          if (line_num > end_line) {
468            break;
469          }
470          else if (line_num == end_line && column_num >= end_column) {
471            break;
472        }        }
473      }      }
474    
475      line_num = t.pos.end.lineno;      assert(line_num == end_line);
476      column_num = t.pos.end.index;      assert(column_num = end_column);
477    }    }
478    
479    if (current_class != CLASS_NONE) {    if (current_class != CLASS_NONE) {
480      output_character('\n', CLASS_NONE);      output_character('\n', CLASS_NONE);
481    }    }
482    
483    /* cleanup */    token_stream.close(context);
   JS_UNKEEP_ATOMS(cx->runtime);  
   context->fp = fp;  
484  }  }

Legend:
Removed from v.215  
changed lines
  Added in v.507

  ViewVC Help
Powered by ViewVC 1.1.24