1 |
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
2 |
* vim: set sw=4 ts=8 et tw=80: |
3 |
* |
4 |
* ***** BEGIN LICENSE BLOCK ***** |
5 |
* Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
6 |
* |
7 |
* The contents of this file are subject to the Mozilla Public License Version |
8 |
* 1.1 (the "License"); you may not use this file except in compliance with |
9 |
* the License. You may obtain a copy of the License at |
10 |
* http://www.mozilla.org/MPL/ |
11 |
* |
12 |
* Software distributed under the License is distributed on an "AS IS" basis, |
13 |
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
14 |
* for the specific language governing rights and limitations under the |
15 |
* License. |
16 |
* |
17 |
* The Original Code is String Switch Generator for JavaScript Keywords, |
18 |
* released 2005-12-09. |
19 |
* |
20 |
* The Initial Developer of the Original Code is |
21 |
* Igor Bukanov. |
22 |
* Portions created by the Initial Developer are Copyright (C) 2005-2006 |
23 |
* the Initial Developer. All Rights Reserved. |
24 |
* |
25 |
* Contributor(s): |
26 |
* |
27 |
* Alternatively, the contents of this file may be used under the terms of |
28 |
* either of the GNU General Public License Version 2 or later (the "GPL"), |
29 |
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
30 |
* in which case the provisions of the GPL or the LGPL are applicable instead |
31 |
* of those above. If you wish to allow use of your version of this file only |
32 |
* under the terms of either the GPL or the LGPL, and not to allow others to |
33 |
* use your version of this file under the terms of the MPL, indicate your |
34 |
* decision by deleting the provisions above and replace them with the notice |
35 |
* and other provisions required by the GPL or the LGPL. If you do not delete |
36 |
* the provisions above, a recipient may use your version of this file under |
37 |
* the terms of any one of the MPL, the GPL or the LGPL. |
38 |
* |
39 |
* ***** END LICENSE BLOCK ***** */ |
40 |
|
41 |
#include "jsstddef.h" |
42 |
#include <assert.h> |
43 |
#include <stdio.h> |
44 |
#include <stdlib.h> |
45 |
#include <string.h> |
46 |
#include <stdarg.h> |
47 |
#include <ctype.h> |
48 |
|
49 |
#include "jsversion.h" |
50 |
|
51 |
const char * const keyword_list[] = { |
52 |
#define JS_KEYWORD(keyword, type, op, version) #keyword, |
53 |
#include "jskeyword.tbl" |
54 |
#undef JS_KEYWORD |
55 |
}; |
56 |
|
57 |
struct gen_opt { |
58 |
FILE *output; /* output file for generated source */ |
59 |
unsigned use_if_threshold; /* max number of choices to generate |
60 |
"if" selector instead of "switch" */ |
61 |
unsigned char_tail_test_threshold; /* max number of unprocessed columns |
62 |
to use inlined char compare |
63 |
for remaining chars and not generic |
64 |
string compare code */ |
65 |
unsigned indent_level; /* current source identation level */ |
66 |
}; |
67 |
|
68 |
static unsigned column_to_compare; |
69 |
|
70 |
static int |
71 |
length_comparator(const void *a, const void *b) |
72 |
{ |
73 |
const char *str1 = keyword_list[*(unsigned *)a]; |
74 |
const char *str2 = keyword_list[*(unsigned *)b]; |
75 |
return (int)strlen(str1) - (int)strlen(str2); |
76 |
} |
77 |
|
78 |
static int |
79 |
column_comparator(const void *a, const void *b) |
80 |
{ |
81 |
const char *str1 = keyword_list[*(unsigned *)a]; |
82 |
const char *str2 = keyword_list[*(unsigned *)b]; |
83 |
return (int)str1[column_to_compare] - (int)str2[column_to_compare]; |
84 |
} |
85 |
|
86 |
static unsigned |
87 |
count_different_lengths(unsigned indexes[], unsigned nelem) |
88 |
{ |
89 |
unsigned nlength, current_length, i, l; |
90 |
|
91 |
current_length = 0; |
92 |
nlength = 0; |
93 |
for (i = 0; i != nelem; ++i) { |
94 |
l = (unsigned)strlen(keyword_list[indexes[i]]); |
95 |
assert(l != 0); |
96 |
if (current_length != l) { |
97 |
++nlength; |
98 |
current_length = l; |
99 |
} |
100 |
} |
101 |
return nlength; |
102 |
} |
103 |
|
104 |
static void |
105 |
find_char_span_and_count(unsigned indexes[], unsigned nelem, unsigned column, |
106 |
unsigned *span_result, unsigned *count_result) |
107 |
{ |
108 |
unsigned i, count; |
109 |
unsigned char c, prev, minc, maxc; |
110 |
|
111 |
assert(nelem != 0); |
112 |
minc = maxc = prev = (unsigned char)keyword_list[indexes[0]][column]; |
113 |
count = 1; |
114 |
for (i = 1; i != nelem; ++i) { |
115 |
c = (unsigned char)keyword_list[indexes[i]][column]; |
116 |
if (prev != c) { |
117 |
prev = c; |
118 |
++count; |
119 |
if (minc > c) { |
120 |
minc = c; |
121 |
} else if (maxc < c) { |
122 |
maxc = c; |
123 |
} |
124 |
} |
125 |
} |
126 |
|
127 |
*span_result = maxc - minc + 1; |
128 |
*count_result = count; |
129 |
} |
130 |
|
131 |
static unsigned |
132 |
find_optimal_switch_column(struct gen_opt *opt, |
133 |
unsigned indexes[], unsigned nelem, |
134 |
unsigned columns[], unsigned unprocessed_columns, |
135 |
int *use_if_result) |
136 |
{ |
137 |
unsigned i; |
138 |
unsigned span, min_span, min_span_index; |
139 |
unsigned nchar, min_nchar, min_nchar_index; |
140 |
|
141 |
assert(unprocessed_columns != 0); |
142 |
i = 0; |
143 |
min_nchar = min_span = (unsigned)-1; |
144 |
min_nchar_index = min_span_index = 0; |
145 |
do { |
146 |
column_to_compare = columns[i]; |
147 |
qsort(indexes, nelem, sizeof(indexes[0]), column_comparator); |
148 |
find_char_span_and_count(indexes, nelem, column_to_compare, |
149 |
&span, &nchar); |
150 |
assert(span != 0); |
151 |
if (span == 1) { |
152 |
assert(nchar == 1); |
153 |
*use_if_result = 1; |
154 |
return 1; |
155 |
} |
156 |
assert(nchar != 1); |
157 |
if (min_span > span) { |
158 |
min_span = span; |
159 |
min_span_index = i; |
160 |
} |
161 |
if (min_nchar > nchar) { |
162 |
min_nchar = nchar; |
163 |
min_nchar_index = i; |
164 |
} |
165 |
} while (++i != unprocessed_columns); |
166 |
|
167 |
if (min_nchar <= opt->use_if_threshold) { |
168 |
*use_if_result = 1; |
169 |
i = min_nchar_index; |
170 |
} else { |
171 |
*use_if_result = 0; |
172 |
i = min_span_index; |
173 |
} |
174 |
|
175 |
/* |
176 |
* Restore order corresponding to i if it was destroyed by |
177 |
* subsequent sort. |
178 |
*/ |
179 |
if (i != unprocessed_columns - 1) { |
180 |
column_to_compare = columns[i]; |
181 |
qsort(indexes, nelem, sizeof(indexes[0]), column_comparator); |
182 |
} |
183 |
|
184 |
return i; |
185 |
} |
186 |
|
187 |
|
188 |
static void |
189 |
p(struct gen_opt *opt, const char *format, ...) |
190 |
{ |
191 |
va_list ap; |
192 |
|
193 |
va_start(ap, format); |
194 |
vfprintf(opt->output, format, ap); |
195 |
va_end(ap); |
196 |
} |
197 |
|
198 |
/* Size for '\xxx' where xxx is octal escape */ |
199 |
#define MIN_QUOTED_CHAR_BUFFER 7 |
200 |
|
201 |
static char * |
202 |
qchar(char c, char *quoted_buffer) |
203 |
{ |
204 |
char *s; |
205 |
|
206 |
s = quoted_buffer; |
207 |
*s++ = '\''; |
208 |
switch (c) { |
209 |
case '\n': c = 'n'; goto one_char_escape; |
210 |
case '\r': c = 'r'; goto one_char_escape; |
211 |
case '\t': c = 't'; goto one_char_escape; |
212 |
case '\f': c = 't'; goto one_char_escape; |
213 |
case '\0': c = '0'; goto one_char_escape; |
214 |
case '\'': goto one_char_escape; |
215 |
one_char_escape: |
216 |
*s++ = '\\'; |
217 |
break; |
218 |
default: |
219 |
if (!isprint(c)) { |
220 |
*s++ = '\\'; |
221 |
*s++ = (char)('0' + (0x3 & (((unsigned char)c) >> 6))); |
222 |
*s++ = (char)('0' + (0x7 & (((unsigned char)c) >> 3))); |
223 |
c = (char)('0' + (0x7 & ((unsigned char)c))); |
224 |
} |
225 |
} |
226 |
*s++ = c; |
227 |
*s++ = '\''; |
228 |
*s = '\0'; |
229 |
assert(s + 1 <= quoted_buffer + MIN_QUOTED_CHAR_BUFFER); |
230 |
return quoted_buffer; |
231 |
} |
232 |
|
233 |
static void |
234 |
nl(struct gen_opt *opt) |
235 |
{ |
236 |
putc('\n', opt->output); |
237 |
} |
238 |
|
239 |
static void |
240 |
indent(struct gen_opt *opt) |
241 |
{ |
242 |
unsigned n = opt->indent_level; |
243 |
while (n != 0) { |
244 |
--n; |
245 |
fputs(" ", opt->output); |
246 |
} |
247 |
} |
248 |
|
249 |
static void |
250 |
line(struct gen_opt *opt, const char *format, ...) |
251 |
{ |
252 |
va_list ap; |
253 |
|
254 |
indent(opt); |
255 |
va_start(ap, format); |
256 |
vfprintf(opt->output, format, ap); |
257 |
va_end(ap); |
258 |
nl(opt); |
259 |
} |
260 |
|
261 |
static void |
262 |
generate_letter_switch_r(struct gen_opt *opt, |
263 |
unsigned indexes[], unsigned nelem, |
264 |
unsigned columns[], unsigned unprocessed_columns) |
265 |
{ |
266 |
char qbuf[MIN_QUOTED_CHAR_BUFFER]; |
267 |
|
268 |
assert(nelem != 0); |
269 |
if (nelem == 1) { |
270 |
unsigned kw_index = indexes[0]; |
271 |
const char *keyword = keyword_list[kw_index]; |
272 |
|
273 |
if (unprocessed_columns == 0) { |
274 |
line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword); |
275 |
} else if (unprocessed_columns > opt->char_tail_test_threshold) { |
276 |
line(opt, "JSKW_TEST_GUESS(%u) /* %s */", kw_index, keyword); |
277 |
} else { |
278 |
unsigned i, column; |
279 |
|
280 |
indent(opt); p(opt, "if ("); |
281 |
for (i = 0; i != unprocessed_columns; ++i) { |
282 |
column = columns[i]; |
283 |
qchar(keyword[column], qbuf); |
284 |
p(opt, "%sJSKW_AT(%u)==%s", (i == 0) ? "" : " && ", |
285 |
column, qbuf); |
286 |
} |
287 |
p(opt, ") {"); nl(opt); |
288 |
++opt->indent_level; |
289 |
line(opt, "JSKW_GOT_MATCH(%u) /* %s */", kw_index, keyword); |
290 |
--opt->indent_level; |
291 |
line(opt, "}"); |
292 |
line(opt, "JSKW_NO_MATCH()"); |
293 |
} |
294 |
} else { |
295 |
unsigned optimal_column_index, optimal_column; |
296 |
unsigned i; |
297 |
int use_if; |
298 |
char current; |
299 |
|
300 |
assert(unprocessed_columns != 0); |
301 |
optimal_column_index = find_optimal_switch_column(opt, indexes, nelem, |
302 |
columns, |
303 |
unprocessed_columns, |
304 |
&use_if); |
305 |
optimal_column = columns[optimal_column_index]; |
306 |
columns[optimal_column_index] = columns[unprocessed_columns - 1]; |
307 |
|
308 |
if (!use_if) |
309 |
line(opt, "switch (JSKW_AT(%u)) {", optimal_column); |
310 |
|
311 |
current = keyword_list[indexes[0]][optimal_column]; |
312 |
for (i = 0; i != nelem;) { |
313 |
unsigned same_char_begin = i; |
314 |
char next = current; |
315 |
|
316 |
for (++i; i != nelem; ++i) { |
317 |
next = keyword_list[indexes[i]][optimal_column]; |
318 |
if (next != current) |
319 |
break; |
320 |
} |
321 |
qchar(current, qbuf); |
322 |
if (use_if) { |
323 |
line(opt, "if (JSKW_AT(%u) == %s) {", optimal_column, qbuf); |
324 |
} else { |
325 |
line(opt, " case %s:", qbuf); |
326 |
} |
327 |
++opt->indent_level; |
328 |
generate_letter_switch_r(opt, indexes + same_char_begin, |
329 |
i - same_char_begin, |
330 |
columns, unprocessed_columns - 1); |
331 |
--opt->indent_level; |
332 |
if (use_if) { |
333 |
line(opt, "}"); |
334 |
} |
335 |
current = next; |
336 |
} |
337 |
|
338 |
if (!use_if) { |
339 |
line(opt, "}"); |
340 |
} |
341 |
|
342 |
columns[optimal_column_index] = optimal_column; |
343 |
|
344 |
line(opt, "JSKW_NO_MATCH()"); |
345 |
} |
346 |
} |
347 |
|
348 |
static void |
349 |
generate_letter_switch(struct gen_opt *opt, |
350 |
unsigned indexes[], unsigned nelem, |
351 |
unsigned current_length) |
352 |
{ |
353 |
unsigned *columns; |
354 |
unsigned i; |
355 |
|
356 |
columns = (unsigned *) malloc(sizeof(columns[0]) * current_length); |
357 |
if (!columns) { |
358 |
perror("malloc"); |
359 |
exit(EXIT_FAILURE); |
360 |
} |
361 |
for (i = 0; i != current_length; ++i) { |
362 |
columns[i] = i; |
363 |
} |
364 |
generate_letter_switch_r(opt, indexes, nelem, columns, current_length); |
365 |
free(columns); |
366 |
} |
367 |
|
368 |
|
369 |
static void |
370 |
generate_switch(struct gen_opt *opt) |
371 |
{ |
372 |
unsigned *indexes; |
373 |
unsigned nlength; |
374 |
unsigned i, current; |
375 |
int use_if; |
376 |
unsigned nelem; |
377 |
|
378 |
nelem = sizeof(keyword_list)/sizeof(keyword_list[0]); |
379 |
|
380 |
line(opt, "/*"); |
381 |
line(opt, " * Generating switch for the list of %u entries:", nelem); |
382 |
for (i = 0; i != nelem; ++i) { |
383 |
line(opt, " * %s", keyword_list[i]); |
384 |
} |
385 |
line(opt, " */"); |
386 |
|
387 |
indexes = (unsigned *) malloc(sizeof(indexes[0]) * nelem); |
388 |
if (!indexes) { |
389 |
perror("malloc"); |
390 |
exit(EXIT_FAILURE); |
391 |
} |
392 |
for (i = 0; i != nelem; ++i) |
393 |
indexes[i] = i; |
394 |
qsort(indexes, nelem, sizeof(indexes[i]), length_comparator); |
395 |
nlength = count_different_lengths(indexes, nelem); |
396 |
|
397 |
use_if = (nlength <= opt->use_if_threshold); |
398 |
|
399 |
if (!use_if) |
400 |
line(opt, "switch (JSKW_LENGTH()) {"); |
401 |
|
402 |
current = (unsigned)strlen(keyword_list[indexes[0]]); |
403 |
for (i = 0; i != nelem;) { |
404 |
unsigned same_length_begin = i; |
405 |
unsigned next = current; |
406 |
|
407 |
for (++i; i != nelem; ++i) { |
408 |
next = (unsigned)strlen(keyword_list[indexes[i]]); |
409 |
if (next != current) |
410 |
break; |
411 |
} |
412 |
if (use_if) { |
413 |
line(opt, "if (JSKW_LENGTH() == %u) {", current); |
414 |
} else { |
415 |
line(opt, " case %u:", current); |
416 |
} |
417 |
++opt->indent_level; |
418 |
generate_letter_switch(opt, indexes + same_length_begin, |
419 |
i - same_length_begin, |
420 |
current); |
421 |
--opt->indent_level; |
422 |
if (use_if) { |
423 |
line(opt, "}"); |
424 |
} |
425 |
current = next; |
426 |
} |
427 |
if (!use_if) |
428 |
line(opt, "}"); |
429 |
line(opt, "JSKW_NO_MATCH()"); |
430 |
free(indexes); |
431 |
} |
432 |
|
433 |
int main(int argc, char **argv) |
434 |
{ |
435 |
struct gen_opt opt; |
436 |
|
437 |
if (argc < 2) { |
438 |
opt.output = stdout; |
439 |
} else { |
440 |
opt.output = fopen(argv[1], "w"); |
441 |
if (!opt.output) { |
442 |
perror("fopen"); |
443 |
exit(EXIT_FAILURE); |
444 |
} |
445 |
} |
446 |
opt.indent_level = 1; |
447 |
opt.use_if_threshold = 3; |
448 |
opt.char_tail_test_threshold = 4; |
449 |
|
450 |
generate_switch(&opt); |
451 |
|
452 |
if (opt.output != stdout) { |
453 |
if (fclose(opt.output)) { |
454 |
perror("fclose"); |
455 |
exit(EXIT_FAILURE); |
456 |
} |
457 |
} |
458 |
|
459 |
return EXIT_SUCCESS; |
460 |
} |