/[jscoverage]/trunk/js/jsstr.cpp
ViewVC logotype

Contents of /trunk/js/jsstr.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 332 - (show annotations)
Thu Oct 23 19:03:33 2008 UTC (11 years ago) by siliconforks
File size: 177482 byte(s)
Add SpiderMonkey from Firefox 3.1b1.

The following directories and files were removed:
correct/, correct.js
liveconnect/
nanojit/
t/
v8/
vprof/
xpconnect/
all JavaScript files (Y.js, call.js, if.js, math-partial-sums.js, md5.js, perfect.js, trace-test.js, trace.js)


1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sw=4 et tw=80:
3 *
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
19 *
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
24 *
25 * Contributor(s):
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 /*
42 * JS string type implementation.
43 *
44 * In order to avoid unnecessary js_LockGCThing/js_UnlockGCThing calls, these
45 * native methods store strings (possibly newborn) converted from their 'this'
46 * parameter and arguments on the stack: 'this' conversions at argv[-1], arg
47 * conversions at their index (argv[0], argv[1]). This is a legitimate method
48 * of rooting things that might lose their newborn root due to subsequent GC
49 * allocations in the same native method.
50 */
51 #include "jsstddef.h"
52 #include <stdlib.h>
53 #include <string.h>
54 #include "jstypes.h"
55 #include "jsutil.h" /* Added by JSIFY */
56 #include "jshash.h" /* Added by JSIFY */
57 #include "jsprf.h"
58 #include "jsapi.h"
59 #include "jsarray.h"
60 #include "jsatom.h"
61 #include "jsbool.h"
62 #include "jscntxt.h"
63 #include "jsversion.h"
64 #include "jsgc.h"
65 #include "jsinterp.h"
66 #include "jslock.h"
67 #include "jsnum.h"
68 #include "jsobj.h"
69 #include "jsopcode.h"
70 #include "jsregexp.h"
71 #include "jsscope.h"
72 #include "jsstr.h"
73 #include "jsbit.h"
74
75 #define JSSTRDEP_RECURSION_LIMIT 100
76
77 size_t
78 js_MinimizeDependentStrings(JSString *str, int level, JSString **basep)
79 {
80 JSString *base;
81 size_t start, length;
82
83 JS_ASSERT(JSSTRING_IS_DEPENDENT(str));
84 base = JSSTRDEP_BASE(str);
85 start = JSSTRDEP_START(str);
86 if (JSSTRING_IS_DEPENDENT(base)) {
87 if (level < JSSTRDEP_RECURSION_LIMIT) {
88 start += js_MinimizeDependentStrings(base, level + 1, &base);
89 } else {
90 do {
91 start += JSSTRDEP_START(base);
92 base = JSSTRDEP_BASE(base);
93 } while (JSSTRING_IS_DEPENDENT(base));
94 }
95 if (start == 0) {
96 JS_ASSERT(JSSTRDEP_IS_PREFIX(str));
97 JSPREFIX_SET_BASE(str, base);
98 } else if (start <= JSSTRDEP_START_MASK) {
99 length = JSSTRDEP_LENGTH(str);
100 JSSTRDEP_INIT(str, base, start, length);
101 }
102 }
103 *basep = base;
104 return start;
105 }
106
107 jschar *
108 js_GetDependentStringChars(JSString *str)
109 {
110 size_t start;
111 JSString *base;
112
113 start = js_MinimizeDependentStrings(str, 0, &base);
114 JS_ASSERT(start < JSFLATSTR_LENGTH(base));
115 return JSFLATSTR_CHARS(base) + start;
116 }
117
118 const jschar *
119 js_GetStringChars(JSContext *cx, JSString *str)
120 {
121 if (!js_MakeStringImmutable(cx, str))
122 return NULL;
123 return JSFLATSTR_CHARS(str);
124 }
125
126 JSString * JS_FASTCALL
127 js_ConcatStrings(JSContext *cx, JSString *left, JSString *right)
128 {
129 size_t rn, ln, lrdist, n;
130 jschar *rs, *ls, *s;
131 JSString *ldep; /* non-null if left should become dependent */
132 JSString *str;
133
134 JSSTRING_CHARS_AND_LENGTH(right, rs, rn);
135 if (rn == 0)
136 return left;
137
138 JSSTRING_CHARS_AND_LENGTH(left, ls, ln);
139 if (ln == 0)
140 return right;
141
142 if (!JSSTRING_IS_MUTABLE(left)) {
143 /* We must copy if left does not own a buffer to realloc. */
144 s = (jschar *) JS_malloc(cx, (ln + rn + 1) * sizeof(jschar));
145 if (!s)
146 return NULL;
147 js_strncpy(s, ls, ln);
148 ldep = NULL;
149 } else {
150 /* We can realloc left's space and make it depend on our result. */
151 JS_ASSERT(JSSTRING_IS_FLAT(left));
152 s = (jschar *) JS_realloc(cx, ls, (ln + rn + 1) * sizeof(jschar));
153 if (!s)
154 return NULL;
155
156 /* Take care: right could depend on left! */
157 lrdist = (size_t)(rs - ls);
158 if (lrdist < ln)
159 rs = s + lrdist;
160 left->u.chars = ls = s;
161 ldep = left;
162 }
163
164 js_strncpy(s + ln, rs, rn);
165 n = ln + rn;
166 s[n] = 0;
167 str = js_NewString(cx, s, n);
168 if (!str) {
169 /* Out of memory: clean up any space we (re-)allocated. */
170 if (!ldep) {
171 JS_free(cx, s);
172 } else {
173 s = (jschar *) JS_realloc(cx, ls, (ln + 1) * sizeof(jschar));
174 if (s)
175 left->u.chars = s;
176 }
177 } else {
178 JSFLATSTR_SET_MUTABLE(str);
179
180 /* Morph left into a dependent prefix if we realloc'd its buffer. */
181 if (ldep) {
182 JSPREFIX_INIT(ldep, str, ln);
183 #ifdef DEBUG
184 {
185 JSRuntime *rt = cx->runtime;
186 JS_RUNTIME_METER(rt, liveDependentStrings);
187 JS_RUNTIME_METER(rt, totalDependentStrings);
188 JS_LOCK_RUNTIME_VOID(rt,
189 (rt->strdepLengthSum += (double)ln,
190 rt->strdepLengthSquaredSum += (double)ln * (double)ln));
191 }
192 #endif
193 }
194 }
195
196 return str;
197 }
198
199 const jschar *
200 js_UndependString(JSContext *cx, JSString *str)
201 {
202 size_t n, size;
203 jschar *s;
204
205 if (JSSTRING_IS_DEPENDENT(str)) {
206 n = JSSTRDEP_LENGTH(str);
207 size = (n + 1) * sizeof(jschar);
208 s = (jschar *) JS_malloc(cx, size);
209 if (!s)
210 return NULL;
211
212 js_strncpy(s, JSSTRDEP_CHARS(str), n);
213 s[n] = 0;
214 JSFLATSTR_INIT(str, s, n);
215
216 #ifdef DEBUG
217 {
218 JSRuntime *rt = cx->runtime;
219 JS_RUNTIME_UNMETER(rt, liveDependentStrings);
220 JS_RUNTIME_UNMETER(rt, totalDependentStrings);
221 JS_LOCK_RUNTIME_VOID(rt,
222 (rt->strdepLengthSum -= (double)n,
223 rt->strdepLengthSquaredSum -= (double)n * (double)n));
224 }
225 #endif
226 }
227
228 return JSFLATSTR_CHARS(str);
229 }
230
231 JSBool
232 js_MakeStringImmutable(JSContext *cx, JSString *str)
233 {
234 if (JSSTRING_IS_DEPENDENT(str) && !js_UndependString(cx, str)) {
235 JS_RUNTIME_METER(cx->runtime, badUndependStrings);
236 return JS_FALSE;
237 }
238 JSFLATSTR_CLEAR_MUTABLE(str);
239 return JS_TRUE;
240 }
241
242 static JSString *
243 ArgToRootedString(JSContext *cx, uintN argc, jsval *vp, uintN arg)
244 {
245 JSObject *obj;
246 JSString *str;
247
248 if (arg >= argc)
249 return ATOM_TO_STRING(cx->runtime->atomState.typeAtoms[JSTYPE_VOID]);
250 vp += 2 + arg;
251
252 if (JSVAL_IS_OBJECT(*vp)) {
253 obj = JSVAL_TO_OBJECT(*vp);
254 if (!obj)
255 return ATOM_TO_STRING(cx->runtime->atomState.nullAtom);
256 if (!OBJ_DEFAULT_VALUE(cx, obj, JSTYPE_STRING, vp))
257 return NULL;
258 }
259 if (JSVAL_IS_STRING(*vp))
260 return JSVAL_TO_STRING(*vp);
261 if (JSVAL_IS_INT(*vp)) {
262 str = js_NumberToString(cx, JSVAL_TO_INT(*vp));
263 } else if (JSVAL_IS_DOUBLE(*vp)) {
264 str = js_NumberToString(cx, *JSVAL_TO_DOUBLE(*vp));
265 } else if (JSVAL_IS_BOOLEAN(*vp)) {
266 return ATOM_TO_STRING(cx->runtime->atomState.booleanAtoms[
267 JSVAL_TO_BOOLEAN(*vp)? 1 : 0]);
268 } else {
269 JS_ASSERT(JSVAL_IS_VOID(*vp));
270 return ATOM_TO_STRING(cx->runtime->atomState.typeAtoms[JSTYPE_VOID]);
271 }
272 if (str)
273 *vp = STRING_TO_JSVAL(str);
274 return str;
275 }
276
277 /*
278 * Forward declarations for URI encode/decode and helper routines
279 */
280 static JSBool
281 str_decodeURI(JSContext *cx, uintN argc, jsval *vp);
282
283 static JSBool
284 str_decodeURI_Component(JSContext *cx, uintN argc, jsval *vp);
285
286 static JSBool
287 str_encodeURI(JSContext *cx, uintN argc, jsval *vp);
288
289 static JSBool
290 str_encodeURI_Component(JSContext *cx, uintN argc, jsval *vp);
291
292 static uint32
293 Utf8ToOneUcs4Char(const uint8 *utf8Buffer, int utf8Length);
294
295 /*
296 * Contributions from the String class to the set of methods defined for the
297 * global object. escape and unescape used to be defined in the Mocha library,
298 * but as ECMA decided to spec them, they've been moved to the core engine
299 * and made ECMA-compliant. (Incomplete escapes are interpreted as literal
300 * characters by unescape.)
301 */
302
303 /*
304 * Stuff to emulate the old libmocha escape, which took a second argument
305 * giving the type of escape to perform. Retained for compatibility, and
306 * copied here to avoid reliance on net.h, mkparse.c/NET_EscapeBytes.
307 */
308
309 #define URL_XALPHAS ((uint8) 1)
310 #define URL_XPALPHAS ((uint8) 2)
311 #define URL_PATH ((uint8) 4)
312
313 static const uint8 urlCharType[256] =
314 /* Bit 0 xalpha -- the alphas
315 * Bit 1 xpalpha -- as xalpha but
316 * converts spaces to plus and plus to %20
317 * Bit 2 ... path -- as xalphas but doesn't escape '/'
318 */
319 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
320 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
321 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
322 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
323 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
324 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
325 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
326 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
327 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
328 0, };
329
330 /* This matches the ECMA escape set when mask is 7 (default.) */
331
332 #define IS_OK(C, mask) (urlCharType[((uint8) (C))] & (mask))
333
334 /* See ECMA-262 Edition 3 B.2.1 */
335 JSBool
336 js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
337 {
338 JSString *str;
339 size_t i, ni, length, newlength;
340 const jschar *chars;
341 jschar *newchars;
342 jschar ch;
343 jsint mask;
344 jsdouble d;
345 const char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7',
346 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
347
348 mask = URL_XALPHAS | URL_XPALPHAS | URL_PATH;
349 if (argc > 1) {
350 d = js_ValueToNumber(cx, &argv[1]);
351 if (JSVAL_IS_NULL(argv[1]))
352 return JS_FALSE;
353 if (!JSDOUBLE_IS_FINITE(d) ||
354 (mask = (jsint)d) != d ||
355 mask & ~(URL_XALPHAS | URL_XPALPHAS | URL_PATH))
356 {
357 char numBuf[12];
358 JS_snprintf(numBuf, sizeof numBuf, "%lx", (unsigned long) mask);
359 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
360 JSMSG_BAD_STRING_MASK, numBuf);
361 return JS_FALSE;
362 }
363 }
364
365 str = ArgToRootedString(cx, argc, argv - 2, 0);
366 if (!str)
367 return JS_FALSE;
368
369 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
370 newlength = length;
371
372 /* Take a first pass and see how big the result string will need to be. */
373 for (i = 0; i < length; i++) {
374 if ((ch = chars[i]) < 128 && IS_OK(ch, mask))
375 continue;
376 if (ch < 256) {
377 if (mask == URL_XPALPHAS && ch == ' ')
378 continue; /* The character will be encoded as '+' */
379 newlength += 2; /* The character will be encoded as %XX */
380 } else {
381 newlength += 5; /* The character will be encoded as %uXXXX */
382 }
383
384 /*
385 * This overflow test works because newlength is incremented by at
386 * most 5 on each iteration.
387 */
388 if (newlength < length) {
389 js_ReportAllocationOverflow(cx);
390 return JS_FALSE;
391 }
392 }
393
394 if (newlength >= ~(size_t)0 / sizeof(jschar)) {
395 js_ReportAllocationOverflow(cx);
396 return JS_FALSE;
397 }
398
399 newchars = (jschar *) JS_malloc(cx, (newlength + 1) * sizeof(jschar));
400 if (!newchars)
401 return JS_FALSE;
402 for (i = 0, ni = 0; i < length; i++) {
403 if ((ch = chars[i]) < 128 && IS_OK(ch, mask)) {
404 newchars[ni++] = ch;
405 } else if (ch < 256) {
406 if (mask == URL_XPALPHAS && ch == ' ') {
407 newchars[ni++] = '+'; /* convert spaces to pluses */
408 } else {
409 newchars[ni++] = '%';
410 newchars[ni++] = digits[ch >> 4];
411 newchars[ni++] = digits[ch & 0xF];
412 }
413 } else {
414 newchars[ni++] = '%';
415 newchars[ni++] = 'u';
416 newchars[ni++] = digits[ch >> 12];
417 newchars[ni++] = digits[(ch & 0xF00) >> 8];
418 newchars[ni++] = digits[(ch & 0xF0) >> 4];
419 newchars[ni++] = digits[ch & 0xF];
420 }
421 }
422 JS_ASSERT(ni == newlength);
423 newchars[newlength] = 0;
424
425 str = js_NewString(cx, newchars, newlength);
426 if (!str) {
427 JS_free(cx, newchars);
428 return JS_FALSE;
429 }
430 *rval = STRING_TO_JSVAL(str);
431 return JS_TRUE;
432 }
433 #undef IS_OK
434
435 static JSBool
436 str_escape(JSContext *cx, uintN argc, jsval *vp)
437 {
438 JSObject *obj;
439
440 obj = JS_THIS_OBJECT(cx, vp);
441 return obj && js_str_escape(cx, obj, argc, vp + 2, vp);
442 }
443
444 /* See ECMA-262 Edition 3 B.2.2 */
445 static JSBool
446 str_unescape(JSContext *cx, uintN argc, jsval *vp)
447 {
448 JSString *str;
449 size_t i, ni, length;
450 const jschar *chars;
451 jschar *newchars;
452 jschar ch;
453
454 str = ArgToRootedString(cx, argc, vp, 0);
455 if (!str)
456 return JS_FALSE;
457
458 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
459
460 /* Don't bother allocating less space for the new string. */
461 newchars = (jschar *) JS_malloc(cx, (length + 1) * sizeof(jschar));
462 if (!newchars)
463 return JS_FALSE;
464 ni = i = 0;
465 while (i < length) {
466 ch = chars[i++];
467 if (ch == '%') {
468 if (i + 1 < length &&
469 JS7_ISHEX(chars[i]) && JS7_ISHEX(chars[i + 1]))
470 {
471 ch = JS7_UNHEX(chars[i]) * 16 + JS7_UNHEX(chars[i + 1]);
472 i += 2;
473 } else if (i + 4 < length && chars[i] == 'u' &&
474 JS7_ISHEX(chars[i + 1]) && JS7_ISHEX(chars[i + 2]) &&
475 JS7_ISHEX(chars[i + 3]) && JS7_ISHEX(chars[i + 4]))
476 {
477 ch = (((((JS7_UNHEX(chars[i + 1]) << 4)
478 + JS7_UNHEX(chars[i + 2])) << 4)
479 + JS7_UNHEX(chars[i + 3])) << 4)
480 + JS7_UNHEX(chars[i + 4]);
481 i += 5;
482 }
483 }
484 newchars[ni++] = ch;
485 }
486 newchars[ni] = 0;
487
488 str = js_NewString(cx, newchars, ni);
489 if (!str) {
490 JS_free(cx, newchars);
491 return JS_FALSE;
492 }
493 *vp = STRING_TO_JSVAL(str);
494 return JS_TRUE;
495 }
496
497 #if JS_HAS_UNEVAL
498 static JSBool
499 str_uneval(JSContext *cx, uintN argc, jsval *vp)
500 {
501 JSString *str;
502
503 str = js_ValueToSource(cx, argc != 0 ? vp[2] : JSVAL_VOID);
504 if (!str)
505 return JS_FALSE;
506 *vp = STRING_TO_JSVAL(str);
507 return JS_TRUE;
508 }
509 #endif
510
511 const char js_escape_str[] = "escape";
512 const char js_unescape_str[] = "unescape";
513 #if JS_HAS_UNEVAL
514 const char js_uneval_str[] = "uneval";
515 #endif
516 const char js_decodeURI_str[] = "decodeURI";
517 const char js_encodeURI_str[] = "encodeURI";
518 const char js_decodeURIComponent_str[] = "decodeURIComponent";
519 const char js_encodeURIComponent_str[] = "encodeURIComponent";
520
521 static JSFunctionSpec string_functions[] = {
522 JS_FN(js_escape_str, str_escape, 1,0),
523 JS_FN(js_unescape_str, str_unescape, 1,0),
524 #if JS_HAS_UNEVAL
525 JS_FN(js_uneval_str, str_uneval, 1,0),
526 #endif
527 JS_FN(js_decodeURI_str, str_decodeURI, 1,0),
528 JS_FN(js_encodeURI_str, str_encodeURI, 1,0),
529 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,0),
530 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,0),
531
532 JS_FS_END
533 };
534
535 jschar js_empty_ucstr[] = {0};
536 JSSubString js_EmptySubString = {0, js_empty_ucstr};
537
538 enum string_tinyid {
539 STRING_LENGTH = -1
540 };
541
542 static JSPropertySpec string_props[] = {
543 {js_length_str, STRING_LENGTH,
544 JSPROP_READONLY|JSPROP_PERMANENT|JSPROP_SHARED, 0,0},
545 {0,0,0,0,0}
546 };
547
548 static JSBool
549 str_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
550 {
551 jsval v;
552 JSString *str;
553 jsint slot;
554
555 if (!JSVAL_IS_INT(id))
556 return JS_TRUE;
557
558 slot = JSVAL_TO_INT(id);
559 if (slot == STRING_LENGTH) {
560 if (OBJ_GET_CLASS(cx, obj) == &js_StringClass) {
561 /* Follow ECMA-262 by fetching intrinsic length of our string. */
562 v = OBJ_GET_SLOT(cx, obj, JSSLOT_PRIVATE);
563 JS_ASSERT(JSVAL_IS_STRING(v));
564 str = JSVAL_TO_STRING(v);
565 } else {
566 /* Preserve compatibility: convert obj to a string primitive. */
567 str = js_ValueToString(cx, OBJECT_TO_JSVAL(obj));
568 if (!str)
569 return JS_FALSE;
570 }
571
572 *vp = INT_TO_JSVAL((jsint) JSSTRING_LENGTH(str));
573 }
574 return JS_TRUE;
575 }
576
577 #define STRING_ELEMENT_ATTRS (JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_PERMANENT)
578
579 static JSBool
580 str_enumerate(JSContext *cx, JSObject *obj)
581 {
582 jsval v;
583 JSString *str, *str1;
584 size_t i, length;
585
586 v = OBJ_GET_SLOT(cx, obj, JSSLOT_PRIVATE);
587 JS_ASSERT(JSVAL_IS_STRING(v));
588 str = JSVAL_TO_STRING(v);
589
590 length = JSSTRING_LENGTH(str);
591 for (i = 0; i < length; i++) {
592 str1 = js_NewDependentString(cx, str, i, 1);
593 if (!str1)
594 return JS_FALSE;
595 if (!OBJ_DEFINE_PROPERTY(cx, obj, INT_TO_JSID(i),
596 STRING_TO_JSVAL(str1), NULL, NULL,
597 STRING_ELEMENT_ATTRS, NULL)) {
598 return JS_FALSE;
599 }
600 }
601 return JS_TRUE;
602 }
603
604 static JSBool
605 str_resolve(JSContext *cx, JSObject *obj, jsval id, uintN flags,
606 JSObject **objp)
607 {
608 jsval v;
609 JSString *str, *str1;
610 jsint slot;
611
612 if (!JSVAL_IS_INT(id) || (flags & JSRESOLVE_ASSIGNING))
613 return JS_TRUE;
614
615 v = OBJ_GET_SLOT(cx, obj, JSSLOT_PRIVATE);
616 JS_ASSERT(JSVAL_IS_STRING(v));
617 str = JSVAL_TO_STRING(v);
618
619 slot = JSVAL_TO_INT(id);
620 if ((size_t)slot < JSSTRING_LENGTH(str)) {
621 str1 = js_GetUnitString(cx, str, (size_t)slot);
622 if (!str1)
623 return JS_FALSE;
624 if (!OBJ_DEFINE_PROPERTY(cx, obj, INT_TO_JSID(slot),
625 STRING_TO_JSVAL(str1), NULL, NULL,
626 STRING_ELEMENT_ATTRS, NULL)) {
627 return JS_FALSE;
628 }
629 *objp = obj;
630 }
631 return JS_TRUE;
632 }
633
634 JSClass js_StringClass = {
635 js_String_str,
636 JSCLASS_HAS_PRIVATE | JSCLASS_NEW_RESOLVE |
637 JSCLASS_HAS_CACHED_PROTO(JSProto_String),
638 JS_PropertyStub, JS_PropertyStub, str_getProperty, JS_PropertyStub,
639 str_enumerate, (JSResolveOp)str_resolve, JS_ConvertStub, JS_FinalizeStub,
640 JSCLASS_NO_OPTIONAL_MEMBERS
641 };
642
643 #define NORMALIZE_THIS(cx,vp,str) \
644 JS_BEGIN_MACRO \
645 if (JSVAL_IS_STRING(vp[1])) { \
646 str = JSVAL_TO_STRING(vp[1]); \
647 } else { \
648 str = NormalizeThis(cx, vp); \
649 if (!str) \
650 return JS_FALSE; \
651 } \
652 JS_END_MACRO
653
654 static JSString *
655 NormalizeThis(JSContext *cx, jsval *vp)
656 {
657 JSString *str;
658
659 if (JSVAL_IS_NULL(vp[1]) && JSVAL_IS_NULL(JS_THIS(cx, vp)))
660 return NULL;
661 str = js_ValueToString(cx, vp[1]);
662 if (!str)
663 return NULL;
664 vp[1] = STRING_TO_JSVAL(str);
665 return str;
666 }
667
668 #if JS_HAS_TOSOURCE
669
670 /*
671 * String.prototype.quote is generic (as are most string methods), unlike
672 * toSource, toString, and valueOf.
673 */
674 static JSBool
675 str_quote(JSContext *cx, uintN argc, jsval *vp)
676 {
677 JSString *str;
678
679 NORMALIZE_THIS(cx, vp, str);
680 str = js_QuoteString(cx, str, '"');
681 if (!str)
682 return JS_FALSE;
683 *vp = STRING_TO_JSVAL(str);
684 return JS_TRUE;
685 }
686
687 static JSBool
688 str_toSource(JSContext *cx, uintN argc, jsval *vp)
689 {
690 jsval v;
691 JSString *str;
692 size_t i, j, k, n;
693 char buf[16];
694 jschar *s, *t;
695
696 if (!js_GetPrimitiveThis(cx, vp, &js_StringClass, &v))
697 return JS_FALSE;
698 JS_ASSERT(JSVAL_IS_STRING(v));
699 str = js_QuoteString(cx, JSVAL_TO_STRING(v), '"');
700 if (!str)
701 return JS_FALSE;
702 j = JS_snprintf(buf, sizeof buf, "(new %s(", js_StringClass.name);
703 JSSTRING_CHARS_AND_LENGTH(str, s, k);
704 n = j + k + 2;
705 t = (jschar *) JS_malloc(cx, (n + 1) * sizeof(jschar));
706 if (!t)
707 return JS_FALSE;
708 for (i = 0; i < j; i++)
709 t[i] = buf[i];
710 for (j = 0; j < k; i++, j++)
711 t[i] = s[j];
712 t[i++] = ')';
713 t[i++] = ')';
714 t[i] = 0;
715 str = js_NewString(cx, t, n);
716 if (!str) {
717 JS_free(cx, t);
718 return JS_FALSE;
719 }
720 *vp = STRING_TO_JSVAL(str);
721 return JS_TRUE;
722 }
723
724 #endif /* JS_HAS_TOSOURCE */
725
726 static JSBool
727 str_toString(JSContext *cx, uintN argc, jsval *vp)
728 {
729 return js_GetPrimitiveThis(cx, vp, &js_StringClass, vp);
730 }
731
732 /*
733 * Java-like string native methods.
734 */
735 JSBool
736 js_str_substring(JSContext *cx, uintN argc, jsval *vp)
737 {
738 JSString *str;
739 jsdouble d;
740 jsdouble length, begin, end;
741
742 NORMALIZE_THIS(cx, vp, str);
743 if (argc != 0) {
744 d = js_ValueToNumber(cx, &vp[2]);
745 if (JSVAL_IS_NULL(vp[2]))
746 return JS_FALSE;
747 length = JSSTRING_LENGTH(str);
748 begin = js_DoubleToInteger(d);
749 if (begin < 0)
750 begin = 0;
751 else if (begin > length)
752 begin = length;
753
754 if (argc == 1) {
755 end = length;
756 } else {
757 d = js_ValueToNumber(cx, &vp[3]);
758 if (JSVAL_IS_NULL(vp[3]))
759 return JS_FALSE;
760 end = js_DoubleToInteger(d);
761 if (end < 0)
762 end = 0;
763 else if (end > length)
764 end = length;
765 if (end < begin) {
766 /* ECMA emulates old JDK1.0 java.lang.String.substring. */
767 jsdouble tmp = begin;
768 begin = end;
769 end = tmp;
770 }
771 }
772
773 str = js_NewDependentString(cx, str, (size_t)begin,
774 (size_t)(end - begin));
775 if (!str)
776 return JS_FALSE;
777 }
778 *vp = STRING_TO_JSVAL(str);
779 return JS_TRUE;
780 }
781
782 JSString* JS_FASTCALL
783 js_toLowerCase(JSContext *cx, JSString *str)
784 {
785 size_t i, n;
786 jschar *s, *news;
787
788 JSSTRING_CHARS_AND_LENGTH(str, s, n);
789 news = (jschar *) JS_malloc(cx, (n + 1) * sizeof(jschar));
790 if (!news)
791 return NULL;
792 for (i = 0; i < n; i++)
793 news[i] = JS_TOLOWER(s[i]);
794 news[n] = 0;
795 str = js_NewString(cx, news, n);
796 if (!str) {
797 JS_free(cx, news);
798 return NULL;
799 }
800 return str;
801 }
802
803 JSBool
804 js_str_toLowerCase(JSContext *cx, uintN argc, jsval *vp)
805 {
806 JSString *str;
807
808 NORMALIZE_THIS(cx, vp, str);
809 str = js_toLowerCase(cx, str);
810 if (!str)
811 return JS_FALSE;
812 *vp = STRING_TO_JSVAL(str);
813 return JS_TRUE;
814 }
815
816 static JSBool
817 str_toLocaleLowerCase(JSContext *cx, uintN argc, jsval *vp)
818 {
819 JSString *str;
820
821 /*
822 * Forcefully ignore the first (or any) argument and return toLowerCase(),
823 * ECMA has reserved that argument, presumably for defining the locale.
824 */
825 if (cx->localeCallbacks && cx->localeCallbacks->localeToLowerCase) {
826 NORMALIZE_THIS(cx, vp, str);
827 return cx->localeCallbacks->localeToLowerCase(cx, str, vp);
828 }
829 return js_str_toLowerCase(cx, 0, vp);
830 }
831
832 JSString* JS_FASTCALL
833 js_toUpperCase(JSContext *cx, JSString *str)
834 {
835 size_t i, n;
836 jschar *s, *news;
837
838 JSSTRING_CHARS_AND_LENGTH(str, s, n);
839 news = (jschar *) JS_malloc(cx, (n + 1) * sizeof(jschar));
840 if (!news)
841 return NULL;
842 for (i = 0; i < n; i++)
843 news[i] = JS_TOUPPER(s[i]);
844 news[n] = 0;
845 str = js_NewString(cx, news, n);
846 if (!str) {
847 JS_free(cx, news);
848 return NULL;
849 }
850 return str;
851 }
852
853 JSBool
854 js_str_toUpperCase(JSContext *cx, uintN argc, jsval *vp)
855 {
856 JSString *str;
857
858 NORMALIZE_THIS(cx, vp, str);
859 str = js_toUpperCase(cx, str);
860 if (!str)
861 return JS_FALSE;
862 *vp = STRING_TO_JSVAL(str);
863 return JS_TRUE;
864 }
865
866 static JSBool
867 str_toLocaleUpperCase(JSContext *cx, uintN argc, jsval *vp)
868 {
869 JSString *str;
870
871 /*
872 * Forcefully ignore the first (or any) argument and return toUpperCase(),
873 * ECMA has reserved that argument, presumably for defining the locale.
874 */
875 if (cx->localeCallbacks && cx->localeCallbacks->localeToUpperCase) {
876 NORMALIZE_THIS(cx, vp, str);
877 return cx->localeCallbacks->localeToUpperCase(cx, str, vp);
878 }
879 return js_str_toUpperCase(cx, 0, vp);
880 }
881
882 static JSBool
883 str_localeCompare(JSContext *cx, uintN argc, jsval *vp)
884 {
885 JSString *str, *thatStr;
886
887 NORMALIZE_THIS(cx, vp, str);
888 if (argc == 0) {
889 *vp = JSVAL_ZERO;
890 } else {
891 thatStr = js_ValueToString(cx, vp[2]);
892 if (!thatStr)
893 return JS_FALSE;
894 if (cx->localeCallbacks && cx->localeCallbacks->localeCompare) {
895 vp[2] = STRING_TO_JSVAL(thatStr);
896 return cx->localeCallbacks->localeCompare(cx, str, thatStr, vp);
897 }
898 *vp = INT_TO_JSVAL(js_CompareStrings(str, thatStr));
899 }
900 return JS_TRUE;
901 }
902
903 JSBool
904 js_str_charAt(JSContext *cx, uintN argc, jsval *vp)
905 {
906 jsval t;
907 JSString *str;
908 jsint i;
909 jsdouble d;
910
911 t = vp[1];
912 if (JSVAL_IS_STRING(t) && argc != 0 && JSVAL_IS_INT(vp[2])) {
913 str = JSVAL_TO_STRING(t);
914 i = JSVAL_TO_INT(vp[2]);
915 if ((size_t)i >= JSSTRING_LENGTH(str))
916 goto out_of_range;
917 } else {
918 str = NormalizeThis(cx, vp);
919 if (!str)
920 return JS_FALSE;
921
922 if (argc == 0) {
923 d = 0.0;
924 } else {
925 d = js_ValueToNumber(cx, &vp[2]);
926 if (JSVAL_IS_NULL(vp[2]))
927 return JS_FALSE;
928 d = js_DoubleToInteger(d);
929 }
930
931 if (d < 0 || JSSTRING_LENGTH(str) <= d)
932 goto out_of_range;
933 i = (jsint) d;
934 }
935
936 str = js_GetUnitString(cx, str, (size_t)i);
937 if (!str)
938 return JS_FALSE;
939 *vp = STRING_TO_JSVAL(str);
940 return JS_TRUE;
941
942 out_of_range:
943 *vp = JS_GetEmptyStringValue(cx);
944 return JS_TRUE;
945 }
946
947 JSBool
948 js_str_charCodeAt(JSContext *cx, uintN argc, jsval *vp)
949 {
950 jsval t;
951 JSString *str;
952 jsint i;
953 jsdouble d;
954
955 t = vp[1];
956 if (JSVAL_IS_STRING(t) && argc != 0 && JSVAL_IS_INT(vp[2])) {
957 str = JSVAL_TO_STRING(t);
958 i = JSVAL_TO_INT(vp[2]);
959 if ((size_t)i >= JSSTRING_LENGTH(str))
960 goto out_of_range;
961 } else {
962 str = NormalizeThis(cx, vp);
963 if (!str)
964 return JS_FALSE;
965
966 if (argc == 0) {
967 d = 0.0;
968 } else {
969 d = js_ValueToNumber(cx, &vp[2]);
970 if (JSVAL_IS_NULL(vp[2]))
971 return JS_FALSE;
972 d = js_DoubleToInteger(d);
973 }
974
975 if (d < 0 || JSSTRING_LENGTH(str) <= d)
976 goto out_of_range;
977 i = (jsint) d;
978 }
979
980 *vp = INT_TO_JSVAL(JSSTRING_CHARS(str)[i]);
981 return JS_TRUE;
982
983 out_of_range:
984 *vp = JS_GetNaNValue(cx);
985 return JS_TRUE;
986 }
987
988 jsint
989 js_BoyerMooreHorspool(const jschar *text, jsint textlen,
990 const jschar *pat, jsint patlen,
991 jsint start)
992 {
993 jsint i, j, k, m;
994 uint8 skip[BMH_CHARSET_SIZE];
995 jschar c;
996
997 JS_ASSERT(0 < patlen && patlen <= BMH_PATLEN_MAX);
998 for (i = 0; i < BMH_CHARSET_SIZE; i++)
999 skip[i] = (uint8)patlen;
1000 m = patlen - 1;
1001 for (i = 0; i < m; i++) {
1002 c = pat[i];
1003 if (c >= BMH_CHARSET_SIZE)
1004 return BMH_BAD_PATTERN;
1005 skip[c] = (uint8)(m - i);
1006 }
1007 for (k = start + m;
1008 k < textlen;
1009 k += ((c = text[k]) >= BMH_CHARSET_SIZE) ? patlen : skip[c]) {
1010 for (i = k, j = m; ; i--, j--) {
1011 if (j < 0)
1012 return i + 1;
1013 if (text[i] != pat[j])
1014 break;
1015 }
1016 }
1017 return -1;
1018 }
1019
1020 static JSBool
1021 str_indexOf(JSContext *cx, uintN argc, jsval *vp)
1022 {
1023 jsval t;
1024 JSString *str, *str2;
1025 const jschar *text, *pat;
1026 jsint i, j, index, textlen, patlen;
1027 jsdouble d;
1028
1029 t = vp[1];
1030 if (JSVAL_IS_STRING(t) && argc != 0 && JSVAL_IS_STRING(vp[2])) {
1031 str = JSVAL_TO_STRING(t);
1032 str2 = JSVAL_TO_STRING(vp[2]);
1033 } else {
1034 str = NormalizeThis(cx, vp);
1035 if (!str)
1036 return JS_FALSE;
1037
1038 str2 = ArgToRootedString(cx, argc, vp, 0);
1039 if (!str2)
1040 return JS_FALSE;
1041 }
1042
1043 text = JSSTRING_CHARS(str);
1044 textlen = (jsint) JSSTRING_LENGTH(str);
1045 pat = JSSTRING_CHARS(str2);
1046 patlen = (jsint) JSSTRING_LENGTH(str2);
1047
1048 if (argc > 1) {
1049 d = js_ValueToNumber(cx, &vp[3]);
1050 if (JSVAL_IS_NULL(vp[3]))
1051 return JS_FALSE;
1052 d = js_DoubleToInteger(d);
1053 if (d < 0)
1054 i = 0;
1055 else if (d > textlen)
1056 i = textlen;
1057 else
1058 i = (jsint)d;
1059 } else {
1060 i = 0;
1061 }
1062 if (patlen == 0) {
1063 *vp = INT_TO_JSVAL(i);
1064 return JS_TRUE;
1065 }
1066
1067 /* XXX tune the BMH threshold (512) */
1068 if (textlen - i >= 512 && (jsuint)(patlen - 2) <= BMH_PATLEN_MAX - 2) {
1069 index = js_BoyerMooreHorspool(text, textlen, pat, patlen, i);
1070 if (index != BMH_BAD_PATTERN)
1071 goto out;
1072 }
1073
1074 index = -1;
1075 j = 0;
1076 while (i + j < textlen) {
1077 if (text[i + j] == pat[j]) {
1078 if (++j == patlen) {
1079 index = i;
1080 break;
1081 }
1082 } else {
1083 i++;
1084 j = 0;
1085 }
1086 }
1087
1088 out:
1089 *vp = INT_TO_JSVAL(index);
1090 return JS_TRUE;
1091 }
1092
1093 static JSBool
1094 str_lastIndexOf(JSContext *cx, uintN argc, jsval *vp)
1095 {
1096 JSString *str, *str2;
1097 const jschar *text, *pat;
1098 jsint i, j, textlen, patlen;
1099 jsdouble d;
1100
1101 NORMALIZE_THIS(cx, vp, str);
1102 text = JSSTRING_CHARS(str);
1103 textlen = (jsint) JSSTRING_LENGTH(str);
1104
1105 str2 = ArgToRootedString(cx, argc, vp, 0);
1106 if (!str2)
1107 return JS_FALSE;
1108 pat = JSSTRING_CHARS(str2);
1109 patlen = (jsint) JSSTRING_LENGTH(str2);
1110
1111 if (argc > 1) {
1112 d = js_ValueToNumber(cx, &vp[3]);
1113 if (JSVAL_IS_NULL(vp[3]))
1114 return JS_FALSE;
1115 if (JSDOUBLE_IS_NaN(d)) {
1116 i = textlen;
1117 } else {
1118 d = js_DoubleToInteger(d);
1119 if (d < 0)
1120 i = 0;
1121 else if (d > textlen)
1122 i = textlen;
1123 else
1124 i = (jsint)d;
1125 }
1126 } else {
1127 i = textlen;
1128 }
1129
1130 if (patlen == 0) {
1131 *vp = INT_TO_JSVAL(i);
1132 return JS_TRUE;
1133 }
1134
1135 j = 0;
1136 while (i >= 0) {
1137 /* Don't assume that text is NUL-terminated: it could be dependent. */
1138 if (i + j < textlen && text[i + j] == pat[j]) {
1139 if (++j == patlen)
1140 break;
1141 } else {
1142 i--;
1143 j = 0;
1144 }
1145 }
1146 *vp = INT_TO_JSVAL(i);
1147 return JS_TRUE;
1148 }
1149
1150 static JSBool
1151 js_TrimString(JSContext *cx, jsval *vp, JSBool trimLeft, JSBool trimRight)
1152 {
1153 JSString *str;
1154 const jschar *chars;
1155 size_t length, begin, end;
1156
1157 NORMALIZE_THIS(cx, vp, str);
1158 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
1159 begin = 0;
1160 end = length;
1161
1162 if (trimLeft) {
1163 while (begin < length && JS_ISSPACE(chars[begin]))
1164 ++begin;
1165 }
1166
1167 if (trimRight) {
1168 while (end > begin && JS_ISSPACE(chars[end-1]))
1169 --end;
1170 }
1171
1172 str = js_NewDependentString(cx, str, begin, end - begin);
1173 if (!str)
1174 return JS_FALSE;
1175
1176 *vp = STRING_TO_JSVAL(str);
1177 return JS_TRUE;
1178 }
1179
1180 static JSBool
1181 str_trim(JSContext *cx, uintN argc, jsval *vp)
1182 {
1183 return js_TrimString(cx, vp, JS_TRUE, JS_TRUE);
1184 }
1185
1186 static JSBool
1187 str_trimLeft(JSContext *cx, uintN argc, jsval *vp)
1188 {
1189 return js_TrimString(cx, vp, JS_TRUE, JS_FALSE);
1190 }
1191
1192 static JSBool
1193 str_trimRight(JSContext *cx, uintN argc, jsval *vp)
1194 {
1195 return js_TrimString(cx, vp, JS_FALSE, JS_TRUE);
1196 }
1197
1198 /*
1199 * Perl-inspired string functions.
1200 */
1201 typedef struct GlobData {
1202 jsbytecode *pc; /* in: program counter resulting in us matching */
1203 uintN flags; /* inout: mode and flag bits, see below */
1204 uintN optarg; /* in: index of optional flags argument */
1205 JSString *str; /* out: 'this' parameter object as string */
1206 JSRegExp *regexp; /* out: regexp parameter object private data */
1207 } GlobData;
1208
1209 /*
1210 * Mode and flag bit definitions for match_or_replace's GlobData.flags field.
1211 */
1212 #define MODE_MATCH 0x00 /* in: return match array on success */
1213 #define MODE_REPLACE 0x01 /* in: match and replace */
1214 #define MODE_SEARCH 0x02 /* in: search only, return match index or -1 */
1215 #define GET_MODE(f) ((f) & 0x03)
1216 #define FORCE_FLAT 0x04 /* in: force flat (non-regexp) string match */
1217 #define KEEP_REGEXP 0x08 /* inout: keep GlobData.regexp alive for caller
1218 of match_or_replace; if set on input
1219 but clear on output, regexp ownership
1220 does not pass to caller */
1221 #define GLOBAL_REGEXP 0x10 /* out: regexp had the 'g' flag */
1222
1223 static JSBool
1224 match_or_replace(JSContext *cx,
1225 JSBool (*glob)(JSContext *cx, jsint count, GlobData *data),
1226 void (*destroy)(JSContext *cx, GlobData *data),
1227 GlobData *data, uintN argc, jsval *vp)
1228 {
1229 JSString *str, *src, *opt;
1230 JSObject *reobj;
1231 JSRegExp *re;
1232 size_t index, length;
1233 JSBool ok, test;
1234 jsint count;
1235
1236 NORMALIZE_THIS(cx, vp, str);
1237 data->str = str;
1238
1239 if (argc != 0 && VALUE_IS_REGEXP(cx, vp[2])) {
1240 reobj = JSVAL_TO_OBJECT(vp[2]);
1241 re = (JSRegExp *) JS_GetPrivate(cx, reobj);
1242 } else {
1243 src = ArgToRootedString(cx, argc, vp, 0);
1244 if (!src)
1245 return JS_FALSE;
1246 if (data->optarg < argc) {
1247 opt = js_ValueToString(cx, vp[2 + data->optarg]);
1248 if (!opt)
1249 return JS_FALSE;
1250 } else {
1251 opt = NULL;
1252 }
1253 re = js_NewRegExpOpt(cx, src, opt, (data->flags & FORCE_FLAT) != 0);
1254 if (!re)
1255 return JS_FALSE;
1256 reobj = NULL;
1257 }
1258 /* From here on, all control flow must reach the matching DROP. */
1259 data->regexp = re;
1260 HOLD_REGEXP(cx, re);
1261
1262 if (re->flags & JSREG_GLOB)
1263 data->flags |= GLOBAL_REGEXP;
1264 index = 0;
1265 if (GET_MODE(data->flags) == MODE_SEARCH) {
1266 ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, vp);
1267 if (ok) {
1268 *vp = (*vp == JSVAL_TRUE)
1269 ? INT_TO_JSVAL(cx->regExpStatics.leftContext.length)
1270 : INT_TO_JSVAL(-1);
1271 }
1272 } else if (data->flags & GLOBAL_REGEXP) {
1273 if (reobj) {
1274 /* Set the lastIndex property's reserved slot to 0. */
1275 ok = js_SetLastIndex(cx, reobj, 0);
1276 } else {
1277 ok = JS_TRUE;
1278 }
1279 if (ok) {
1280 length = JSSTRING_LENGTH(str);
1281 for (count = 0; index <= length; count++) {
1282 ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, vp);
1283 if (!ok || *vp != JSVAL_TRUE)
1284 break;
1285 ok = glob(cx, count, data);
1286 if (!ok)
1287 break;
1288 if (cx->regExpStatics.lastMatch.length == 0) {
1289 if (index == length)
1290 break;
1291 index++;
1292 }
1293 }
1294 if (!ok && destroy)
1295 destroy(cx, data);
1296 }
1297 } else {
1298 if (GET_MODE(data->flags) == MODE_REPLACE) {
1299 test = JS_TRUE;
1300 } else {
1301 /*
1302 * MODE_MATCH implies js_str_match is being called from a script or
1303 * a scripted function. If the caller cares only about testing null
1304 * vs. non-null return value, optimize away the array object that
1305 * would normally be returned in *vp.
1306 *
1307 * Assume a full array result is required, then prove otherwise.
1308 */
1309 test = JS_FALSE;
1310 if (data->pc && (*data->pc == JSOP_CALL || *data->pc == JSOP_NEW)) {
1311 JS_ASSERT(js_CodeSpec[*data->pc].length == 3);
1312 switch (data->pc[3]) {
1313 case JSOP_POP:
1314 case JSOP_IFEQ:
1315 case JSOP_IFNE:
1316 case JSOP_IFEQX:
1317 case JSOP_IFNEX:
1318 test = JS_TRUE;
1319 break;
1320 default:;
1321 }
1322 }
1323 }
1324 ok = js_ExecuteRegExp(cx, re, str, &index, test, vp);
1325 }
1326
1327 DROP_REGEXP(cx, re);
1328 if (reobj) {
1329 /* Tell our caller that it doesn't need to destroy data->regexp. */
1330 data->flags &= ~KEEP_REGEXP;
1331 } else if (!ok || !(data->flags & KEEP_REGEXP)) {
1332 /* Caller didn't want to keep data->regexp, so null and destroy it. */
1333 data->regexp = NULL;
1334 js_DestroyRegExp(cx, re);
1335 }
1336
1337 return ok;
1338 }
1339
1340 typedef struct MatchData {
1341 GlobData base;
1342 jsval *arrayval; /* NB: local root pointer */
1343 } MatchData;
1344
1345 static JSBool
1346 match_glob(JSContext *cx, jsint count, GlobData *data)
1347 {
1348 MatchData *mdata;
1349 JSObject *arrayobj;
1350 JSSubString *matchsub;
1351 JSString *matchstr;
1352 jsval v;
1353
1354 mdata = (MatchData *)data;
1355 arrayobj = JSVAL_TO_OBJECT(*mdata->arrayval);
1356 if (!arrayobj) {
1357 arrayobj = js_ConstructObject(cx, &js_ArrayClass, NULL, NULL, 0, NULL);
1358 if (!arrayobj)
1359 return JS_FALSE;
1360 *mdata->arrayval = OBJECT_TO_JSVAL(arrayobj);
1361 }
1362 matchsub = &cx->regExpStatics.lastMatch;
1363 matchstr = js_NewStringCopyN(cx, matchsub->chars, matchsub->length);
1364 if (!matchstr)
1365 return JS_FALSE;
1366 v = STRING_TO_JSVAL(matchstr);
1367 JS_ASSERT(count <= JSVAL_INT_MAX);
1368 return OBJ_SET_PROPERTY(cx, arrayobj, INT_TO_JSID(count), &v);
1369 }
1370
1371 JSBool
1372 js_StringMatchHelper(JSContext *cx, uintN argc, jsval *vp, jsbytecode *pc)
1373 {
1374 JSTempValueRooter tvr;
1375 MatchData mdata;
1376 JSBool ok;
1377
1378 JS_PUSH_SINGLE_TEMP_ROOT(cx, JSVAL_NULL, &tvr);
1379 mdata.base.pc = pc;
1380 mdata.base.flags = MODE_MATCH;
1381 mdata.base.optarg = 1;
1382 mdata.arrayval = &tvr.u.value;
1383 ok = match_or_replace(cx, match_glob, NULL, &mdata.base, argc, vp);
1384 if (ok && !JSVAL_IS_NULL(*mdata.arrayval))
1385 *vp = *mdata.arrayval;
1386 JS_POP_TEMP_ROOT(cx, &tvr);
1387 return ok;
1388 }
1389
1390 JSBool
1391 js_str_match(JSContext *cx, uintN argc, jsval *vp)
1392 {
1393 JSStackFrame *fp;
1394
1395 for (fp = cx->fp; fp && !fp->regs; fp = fp->down)
1396 JS_ASSERT(!fp->script);
1397 return js_StringMatchHelper(cx, argc, vp, fp ? fp->regs->pc : NULL);
1398 }
1399
1400 static JSBool
1401 str_search(JSContext *cx, uintN argc, jsval *vp)
1402 {
1403 GlobData data;
1404
1405 data.flags = MODE_SEARCH;
1406 data.optarg = 1;
1407 return match_or_replace(cx, NULL, NULL, &data, argc, vp);
1408 }
1409
1410 typedef struct ReplaceData {
1411 GlobData base; /* base struct state */
1412 JSObject *lambda; /* replacement function object or null */
1413 JSString *repstr; /* replacement string */
1414 jschar *dollar; /* null or pointer to first $ in repstr */
1415 jschar *dollarEnd; /* limit pointer for js_strchr_limit */
1416 jschar *chars; /* result chars, null initially */
1417 size_t length; /* result length, 0 initially */
1418 jsint index; /* index in result of next replacement */
1419 jsint leftIndex; /* left context index in base.str->chars */
1420 JSSubString dollarStr; /* for "$$" interpret_dollar result */
1421 } ReplaceData;
1422
1423 static JSSubString *
1424 interpret_dollar(JSContext *cx, jschar *dp, jschar *ep, ReplaceData *rdata,
1425 size_t *skip)
1426 {
1427 JSRegExpStatics *res;
1428 jschar dc, *cp;
1429 uintN num, tmp;
1430
1431 JS_ASSERT(*dp == '$');
1432
1433 /* If there is only a dollar, bail now */
1434 if (dp + 1 >= ep)
1435 return NULL;
1436
1437 /* Interpret all Perl match-induced dollar variables. */
1438 res = &cx->regExpStatics;
1439 dc = dp[1];
1440 if (JS7_ISDEC(dc)) {
1441 /* ECMA-262 Edition 3: 1-9 or 01-99 */
1442 num = JS7_UNDEC(dc);
1443 if (num > res->parenCount)
1444 return NULL;
1445
1446 cp = dp + 2;
1447 if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
1448 tmp = 10 * num + JS7_UNDEC(dc);
1449 if (tmp <= res->parenCount) {
1450 cp++;
1451 num = tmp;
1452 }
1453 }
1454 if (num == 0)
1455 return NULL;
1456
1457 /* Adjust num from 1 $n-origin to 0 array-index-origin. */
1458 num--;
1459 *skip = cp - dp;
1460 return REGEXP_PAREN_SUBSTRING(res, num);
1461 }
1462
1463 *skip = 2;
1464 switch (dc) {
1465 case '$':
1466 rdata->dollarStr.chars = dp;
1467 rdata->dollarStr.length = 1;
1468 return &rdata->dollarStr;
1469 case '&':
1470 return &res->lastMatch;
1471 case '+':
1472 return &res->lastParen;
1473 case '`':
1474 return &res->leftContext;
1475 case '\'':
1476 return &res->rightContext;
1477 }
1478 return NULL;
1479 }
1480
1481 static JSBool
1482 find_replen(JSContext *cx, ReplaceData *rdata, size_t *sizep)
1483 {
1484 JSString *repstr;
1485 size_t replen, skip;
1486 jschar *dp, *ep;
1487 JSSubString *sub;
1488 JSObject *lambda;
1489
1490 lambda = rdata->lambda;
1491 if (lambda) {
1492 uintN argc, i, j, m, n, p;
1493 jsval *invokevp, *sp;
1494 void *mark;
1495 JSBool ok;
1496
1497 /*
1498 * Save the regExpStatics from the current regexp, since they may be
1499 * clobbered by a RegExp usage in the lambda function. Note that all
1500 * members of JSRegExpStatics are JSSubStrings, so not GC roots, save
1501 * input, which is rooted otherwise via vp[1] in js_str_replace.
1502 */
1503 JSRegExpStatics save = cx->regExpStatics;
1504 JSBool freeMoreParens = JS_FALSE;
1505
1506 /*
1507 * In the lambda case, not only do we find the replacement string's
1508 * length, we compute repstr and return it via rdata for use within
1509 * do_replace. The lambda is called with arguments ($&, $1, $2, ...,
1510 * index, input), i.e., all the properties of a regexp match array.
1511 * For $&, etc., we must create string jsvals from cx->regExpStatics.
1512 * We grab up stack space to keep the newborn strings GC-rooted.
1513 */
1514 p = rdata->base.regexp->parenCount;
1515 argc = 1 + p + 2;
1516 invokevp = js_AllocStack(cx, 2 + argc, &mark);
1517 if (!invokevp)
1518 return JS_FALSE;
1519
1520 /* Push lambda and its 'this' parameter. */
1521 sp = invokevp;
1522 *sp++ = OBJECT_TO_JSVAL(lambda);
1523 *sp++ = OBJECT_TO_JSVAL(OBJ_GET_PARENT(cx, lambda));
1524
1525 #define PUSH_REGEXP_STATIC(sub) \
1526 JS_BEGIN_MACRO \
1527 JSString *str = js_NewStringCopyN(cx, \
1528 cx->regExpStatics.sub.chars, \
1529 cx->regExpStatics.sub.length); \
1530 if (!str) { \
1531 ok = JS_FALSE; \
1532 goto lambda_out; \
1533 } \
1534 *sp++ = STRING_TO_JSVAL(str); \
1535 JS_END_MACRO
1536
1537 /* Push $&, $1, $2, ... */
1538 PUSH_REGEXP_STATIC(lastMatch);
1539 i = 0;
1540 m = cx->regExpStatics.parenCount;
1541 n = JS_MIN(m, 9);
1542 for (j = 0; i < n; i++, j++)
1543 PUSH_REGEXP_STATIC(parens[j]);
1544 for (j = 0; i < m; i++, j++)
1545 PUSH_REGEXP_STATIC(moreParens[j]);
1546
1547 /*
1548 * We need to clear moreParens in the top-of-stack cx->regExpStatics
1549 * to it won't be possibly realloc'ed, leaving the bottom-of-stack
1550 * moreParens pointing to freed memory.
1551 */
1552 cx->regExpStatics.moreParens = NULL;
1553 freeMoreParens = JS_TRUE;
1554
1555 #undef PUSH_REGEXP_STATIC
1556
1557 /* Make sure to push undefined for any unmatched parens. */
1558 for (; i < p; i++)
1559 *sp++ = JSVAL_VOID;
1560
1561 /* Push match index and input string. */
1562 *sp++ = INT_TO_JSVAL((jsint)cx->regExpStatics.leftContext.length);
1563 *sp++ = STRING_TO_JSVAL(rdata->base.str);
1564
1565 ok = js_Invoke(cx, argc, invokevp, 0);
1566 if (ok) {
1567 /*
1568 * NB: we count on the newborn string root to hold any string
1569 * created by this js_ValueToString that would otherwise be GC-
1570 * able, until we use rdata->repstr in do_replace.
1571 */
1572 repstr = js_ValueToString(cx, *invokevp);
1573 if (!repstr) {
1574 ok = JS_FALSE;
1575 } else {
1576 rdata->repstr = repstr;
1577 *sizep = JSSTRING_LENGTH(repstr);
1578 }
1579 }
1580
1581 lambda_out:
1582 js_FreeStack(cx, mark);
1583 if (freeMoreParens)
1584 JS_free(cx, cx->regExpStatics.moreParens);
1585 cx->regExpStatics = save;
1586 return ok;
1587 }
1588
1589 repstr = rdata->repstr;
1590 replen = JSSTRING_LENGTH(repstr);
1591 for (dp = rdata->dollar, ep = rdata->dollarEnd; dp;
1592 dp = js_strchr_limit(dp, '$', ep)) {
1593 sub = interpret_dollar(cx, dp, ep, rdata, &skip);
1594 if (sub) {
1595 replen += sub->length - skip;
1596 dp += skip;
1597 }
1598 else
1599 dp++;
1600 }
1601 *sizep = replen;
1602 return JS_TRUE;
1603 }
1604
1605 static void
1606 do_replace(JSContext *cx, ReplaceData *rdata, jschar *chars)
1607 {
1608 JSString *repstr;
1609 jschar *bp, *cp, *dp, *ep;
1610 size_t len, skip;
1611 JSSubString *sub;
1612
1613 repstr = rdata->repstr;
1614 bp = cp = JSSTRING_CHARS(repstr);
1615 for (dp = rdata->dollar, ep = rdata->dollarEnd; dp;
1616 dp = js_strchr_limit(dp, '$', ep)) {
1617 len = dp - cp;
1618 js_strncpy(chars, cp, len);
1619 chars += len;
1620 cp = dp;
1621 sub = interpret_dollar(cx, dp, ep, rdata, &skip);
1622 if (sub) {
1623 len = sub->length;
1624 js_strncpy(chars, sub->chars, len);
1625 chars += len;
1626 cp += skip;
1627 dp += skip;
1628 } else {
1629 dp++;
1630 }
1631 }
1632 js_strncpy(chars, cp, JSSTRING_LENGTH(repstr) - (cp - bp));
1633 }
1634
1635 static void
1636 replace_destroy(JSContext *cx, GlobData *data)
1637 {
1638 ReplaceData *rdata;
1639
1640 rdata = (ReplaceData *)data;
1641 JS_free(cx, rdata->chars);
1642 rdata->chars = NULL;
1643 }
1644
1645 static JSBool
1646 replace_glob(JSContext *cx, jsint count, GlobData *data)
1647 {
1648 ReplaceData *rdata;
1649 JSString *str;
1650 size_t leftoff, leftlen, replen, growth;
1651 const jschar *left;
1652 jschar *chars;
1653
1654 rdata = (ReplaceData *)data;
1655 str = data->str;
1656 leftoff = rdata->leftIndex;
1657 left = JSSTRING_CHARS(str) + leftoff;
1658 leftlen = cx->regExpStatics.lastMatch.chars - left;
1659 rdata->leftIndex = cx->regExpStatics.lastMatch.chars - JSSTRING_CHARS(str);
1660 rdata->leftIndex += cx->regExpStatics.lastMatch.length;
1661 if (!find_replen(cx, rdata, &replen))
1662 return JS_FALSE;
1663 growth = leftlen + replen;
1664 chars = (jschar *)
1665 (rdata->chars
1666 ? JS_realloc(cx, rdata->chars, (rdata->length + growth + 1)
1667 * sizeof(jschar))
1668 : JS_malloc(cx, (growth + 1) * sizeof(jschar)));
1669 if (!chars)
1670 return JS_FALSE;
1671 rdata->chars = chars;
1672 rdata->length += growth;
1673 chars += rdata->index;
1674 rdata->index += growth;
1675 js_strncpy(chars, left, leftlen);
1676 chars += leftlen;
1677 do_replace(cx, rdata, chars);
1678 return JS_TRUE;
1679 }
1680
1681 JSBool
1682 js_str_replace(JSContext *cx, uintN argc, jsval *vp)
1683 {
1684 JSObject *lambda;
1685 JSString *repstr;
1686
1687 if (argc >= 2 && JS_TypeOfValue(cx, vp[3]) == JSTYPE_FUNCTION) {
1688 lambda = JSVAL_TO_OBJECT(vp[3]);
1689 repstr = NULL;
1690 } else {
1691 lambda = NULL;
1692 repstr = ArgToRootedString(cx, argc, vp, 1);
1693 if (!repstr)
1694 return JS_FALSE;
1695 }
1696
1697 return js_StringReplaceHelper(cx, argc, lambda, repstr, vp);
1698 }
1699
1700 JSBool
1701 js_StringReplaceHelper(JSContext *cx, uintN argc, JSObject *lambda,
1702 JSString *repstr, jsval *vp)
1703 {
1704 ReplaceData rdata;
1705 JSBool ok;
1706 size_t leftlen, rightlen, length;
1707 jschar *chars;
1708 JSString *str;
1709
1710 /*
1711 * For ECMA Edition 3, the first argument is to be converted to a string
1712 * to match in a "flat" sense (without regular expression metachars having
1713 * special meanings) UNLESS the first arg is a RegExp object.
1714 */
1715 rdata.base.flags = MODE_REPLACE | KEEP_REGEXP | FORCE_FLAT;
1716 rdata.base.optarg = 2;
1717
1718 rdata.lambda = lambda;
1719 rdata.repstr = repstr;
1720 if (repstr) {
1721 rdata.dollarEnd = JSSTRING_CHARS(repstr) + JSSTRING_LENGTH(repstr);
1722 rdata.dollar = js_strchr_limit(JSSTRING_CHARS(repstr), '$',
1723 rdata.dollarEnd);
1724 } else {
1725 rdata.dollar = rdata.dollarEnd = NULL;
1726 }
1727 rdata.chars = NULL;
1728 rdata.length = 0;
1729 rdata.index = 0;
1730 rdata.leftIndex = 0;
1731
1732 ok = match_or_replace(cx, replace_glob, replace_destroy, &rdata.base,
1733 argc, vp);
1734 if (!ok)
1735 return JS_FALSE;
1736
1737 if (!rdata.chars) {
1738 if ((rdata.base.flags & GLOBAL_REGEXP) || *vp != JSVAL_TRUE) {
1739 /* Didn't match even once. */
1740 *vp = STRING_TO_JSVAL(rdata.base.str);
1741 goto out;
1742 }
1743 leftlen = cx->regExpStatics.leftContext.length;
1744 ok = find_replen(cx, &rdata, &length);
1745 if (!ok)
1746 goto out;
1747 length += leftlen;
1748 chars = (jschar *) JS_malloc(cx, (length + 1) * sizeof(jschar));
1749 if (!chars) {
1750 ok = JS_FALSE;
1751 goto out;
1752 }
1753 js_strncpy(chars, cx->regExpStatics.leftContext.chars, leftlen);
1754 do_replace(cx, &rdata, chars + leftlen);
1755 rdata.chars = chars;
1756 rdata.length = length;
1757 }
1758
1759 rightlen = cx->regExpStatics.rightContext.length;
1760 length = rdata.length + rightlen;
1761 chars = (jschar *)
1762 JS_realloc(cx, rdata.chars, (length + 1) * sizeof(jschar));
1763 if (!chars) {
1764 JS_free(cx, rdata.chars);
1765 ok = JS_FALSE;
1766 goto out;
1767 }
1768 js_strncpy(chars + rdata.length, cx->regExpStatics.rightContext.chars,
1769 rightlen);
1770 chars[length] = 0;
1771
1772 str = js_NewString(cx, chars, length);
1773 if (!str) {
1774 JS_free(cx, chars);
1775 ok = JS_FALSE;
1776 goto out;
1777 }
1778 *vp = STRING_TO_JSVAL(str);
1779
1780 out:
1781 /* If KEEP_REGEXP is still set, it's our job to destroy regexp now. */
1782 if (rdata.base.flags & KEEP_REGEXP)
1783 js_DestroyRegExp(cx, rdata.base.regexp);
1784 return ok;
1785 }
1786
1787 /*
1788 * Subroutine used by js_str_split to find the next split point in str, starting
1789 * at offset *ip and looking either for the separator substring given by sep, or
1790 * for the next re match. In the re case, return the matched separator in *sep,
1791 * and the possibly updated offset in *ip.
1792 *
1793 * Return -2 on error, -1 on end of string, >= 0 for a valid index of the next
1794 * separator occurrence if found, or str->length if no separator is found.
1795 */
1796 static jsint
1797 find_split(JSContext *cx, JSString *str, JSRegExp *re, jsint *ip,
1798 JSSubString *sep)
1799 {
1800 jsint i, j, k;
1801 size_t length;
1802 jschar *chars;
1803
1804 /*
1805 * Stop if past end of string. If at end of string, we will compare the
1806 * null char stored there (by js_NewString*) to sep->chars[j] in the while
1807 * loop at the end of this function, so that
1808 *
1809 * "ab,".split(',') => ["ab", ""]
1810 *
1811 * and the resulting array converts back to the string "ab," for symmetry.
1812 * However, we ape Perl and do this only if there is a sufficiently large
1813 * limit argument (see js_str_split).
1814 */
1815 i = *ip;
1816 length = JSSTRING_LENGTH(str);
1817 if ((size_t)i > length)
1818 return -1;
1819
1820 chars = JSSTRING_CHARS(str);
1821
1822 /*
1823 * Match a regular expression against the separator at or above index i.
1824 * Call js_ExecuteRegExp with true for the test argument. On successful
1825 * match, get the separator from cx->regExpStatics.lastMatch.
1826 */
1827 if (re) {
1828 size_t index;
1829 jsval rval;
1830
1831 again:
1832 /* JS1.2 deviated from Perl by never matching at end of string. */
1833 index = (size_t)i;
1834 if (!js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, &rval))
1835 return -2;
1836 if (rval != JSVAL_TRUE) {
1837 /* Mismatch: ensure our caller advances i past end of string. */
1838 sep->length = 1;
1839 return length;
1840 }
1841 i = (jsint)index;
1842 *sep = cx->regExpStatics.lastMatch;
1843 if (sep->length == 0) {
1844 /*
1845 * Empty string match: never split on an empty match at the start
1846 * of a find_split cycle. Same rule as for an empty global match
1847 * in match_or_replace.
1848 */
1849 if (i == *ip) {
1850 /*
1851 * "Bump-along" to avoid sticking at an empty match, but don't
1852 * bump past end of string -- our caller must do that by adding
1853 * sep->length to our return value.
1854 */
1855 if ((size_t)i == length)
1856 return -1;
1857 i++;
1858 goto again;
1859 }
1860 if ((size_t)i == length) {
1861 /*
1862 * If there was a trivial zero-length match at the end of the
1863 * split, then we shouldn't output the matched string at the end
1864 * of the split array. See ECMA-262 Ed. 3, 15.5.4.14, Step 15.
1865 */
1866 sep->chars = NULL;
1867 }
1868 }
1869 JS_ASSERT((size_t)i >= sep->length);
1870 return i - sep->length;
1871 }
1872
1873 /*
1874 * Special case: if sep is the empty string, split str into one character
1875 * substrings. Let our caller worry about whether to split once at end of
1876 * string into an empty substring.
1877 */
1878 if (sep->length == 0)
1879 return ((size_t)i == length) ? -1 : i + 1;
1880
1881 /*
1882 * Now that we know sep is non-empty, search starting at i in str for an
1883 * occurrence of all of sep's chars. If we find them, return the index of
1884 * the first separator char. Otherwise, return length.
1885 */
1886 j = 0;
1887 while ((size_t)(k = i + j) < length) {
1888 if (chars[k] == sep->chars[j]) {
1889 if ((size_t)++j == sep->length)
1890 return i;
1891 } else {
1892 i++;
1893 j = 0;
1894 }
1895 }
1896 return k;
1897 }
1898
1899 JSBool
1900 js_str_split(JSContext *cx, uintN argc, jsval *vp)
1901 {
1902 JSString *str, *sub;
1903 JSObject *arrayobj;
1904 jsval v;
1905 JSBool ok, limited;
1906 JSRegExp *re;
1907 JSSubString *sep, tmp;
1908 jsdouble d;
1909 jsint i, j;
1910 uint32 len, limit;
1911
1912 NORMALIZE_THIS(cx, vp, str);
1913
1914 arrayobj = js_ConstructObject(cx, &js_ArrayClass, NULL, NULL, 0, NULL);
1915 if (!arrayobj)
1916 return JS_FALSE;
1917 *vp = OBJECT_TO_JSVAL(arrayobj);
1918
1919 if (argc == 0) {
1920 v = STRING_TO_JSVAL(str);
1921 ok = OBJ_SET_PROPERTY(cx, arrayobj, INT_TO_JSID(0), &v);
1922 } else {
1923 if (VALUE_IS_REGEXP(cx, vp[2])) {
1924 re = (JSRegExp *) JS_GetPrivate(cx, JSVAL_TO_OBJECT(vp[2]));
1925 sep = &tmp;
1926
1927 /* Set a magic value so we can detect a successful re match. */
1928 sep->chars = NULL;
1929 sep->length = 0;
1930 } else {
1931 JSString *str2 = js_ValueToString(cx, vp[2]);
1932 if (!str2)
1933 return JS_FALSE;
1934 vp[2] = STRING_TO_JSVAL(str2);
1935
1936 /*
1937 * Point sep at a local copy of str2's header because find_split
1938 * will modify sep->length.
1939 */
1940 JSSTRING_CHARS_AND_LENGTH(str2, tmp.chars, tmp.length);
1941 sep = &tmp;
1942 re = NULL;
1943 }
1944
1945 /* Use the second argument as the split limit, if given. */
1946 limited = (argc > 1) && !JSVAL_IS_VOID(vp[3]);
1947 limit = 0; /* Avoid warning. */
1948 if (limited) {
1949 d = js_ValueToNumber(cx, &vp[3]);
1950 if (JSVAL_IS_NULL(vp[3]))
1951 return JS_FALSE;
1952
1953 /* Clamp limit between 0 and 1 + string length. */
1954 limit = js_DoubleToECMAUint32(d);
1955 if (limit > JSSTRING_LENGTH(str))
1956 limit = 1 + JSSTRING_LENGTH(str);
1957 }
1958
1959 len = i = 0;
1960 while ((j = find_split(cx, str, re, &i, sep)) >= 0) {
1961 if (limited && len >= limit)
1962 break;
1963 sub = js_NewDependentString(cx, str, i, (size_t)(j - i));
1964 if (!sub)
1965 return JS_FALSE;
1966 v = STRING_TO_JSVAL(sub);
1967 if (!JS_SetElement(cx, arrayobj, len, &v))
1968 return JS_FALSE;
1969 len++;
1970
1971 /*
1972 * Imitate perl's feature of including parenthesized substrings
1973 * that matched part of the delimiter in the new array, after the
1974 * split substring that was delimited.
1975 */
1976 if (re && sep->chars) {
1977 uintN num;
1978 JSSubString *parsub;
1979
1980 for (num = 0; num < cx->regExpStatics.parenCount; num++) {
1981 if (limited && len >= limit)
1982 break;
1983 parsub = REGEXP_PAREN_SUBSTRING(&cx->regExpStatics, num);
1984 sub = js_NewStringCopyN(cx, parsub->chars, parsub->length);
1985 if (!sub)
1986 return JS_FALSE;
1987 v = STRING_TO_JSVAL(sub);
1988 if (!JS_SetElement(cx, arrayobj, len, &v))
1989 return JS_FALSE;
1990 len++;
1991 }
1992 sep->chars = NULL;
1993 }
1994 i = j + sep->length;
1995 }
1996 ok = (j != -2);
1997 }
1998 return ok;
1999 }
2000
2001 #if JS_HAS_PERL_SUBSTR
2002 static JSBool
2003 str_substr(JSContext *cx, uintN argc, jsval *vp)
2004 {
2005 JSString *str;
2006 jsdouble d;
2007 jsdouble length, begin, end;
2008
2009 NORMALIZE_THIS(cx, vp, str);
2010 if (argc != 0) {
2011 d = js_ValueToNumber(cx, &vp[2]);
2012 if (JSVAL_IS_NULL(vp[2]))
2013 return JS_FALSE;
2014 length = JSSTRING_LENGTH(str);
2015 begin = js_DoubleToInteger(d);
2016 if (begin < 0) {
2017 begin += length;
2018 if (begin < 0)
2019 begin = 0;
2020 } else if (begin > length) {
2021 begin = length;
2022 }
2023
2024 if (argc == 1) {
2025 end = length;
2026 } else {
2027 d = js_ValueToNumber(cx, &vp[3]);
2028 if (JSVAL_IS_NULL(vp[3]))
2029 return JS_FALSE;
2030 end = js_DoubleToInteger(d);
2031 if (end < 0)
2032 end = 0;
2033 end += begin;
2034 if (end > length)
2035 end = length;
2036 }
2037
2038 str = js_NewDependentString(cx, str,
2039 (size_t)begin,
2040 (size_t)(end - begin));
2041 if (!str)
2042 return JS_FALSE;
2043 }
2044 *vp = STRING_TO_JSVAL(str);
2045 return JS_TRUE;
2046 }
2047 #endif /* JS_HAS_PERL_SUBSTR */
2048
2049 /*
2050 * Python-esque sequence operations.
2051 */
2052 JSBool
2053 js_str_concat(JSContext *cx, uintN argc, jsval *vp)
2054 {
2055 JSString *str, *str2;
2056 jsval *argv;
2057 uintN i;
2058
2059 NORMALIZE_THIS(cx, vp, str);
2060
2061 for (i = 0, argv = vp + 2; i < argc; i++) {
2062 str2 = js_ValueToString(cx, argv[i]);
2063 if (!str2)
2064 return JS_FALSE;
2065 argv[i] = STRING_TO_JSVAL(str2);
2066
2067 str = js_ConcatStrings(cx, str, str2);
2068 if (!str)
2069 return JS_FALSE;
2070 }
2071
2072 *vp = STRING_TO_JSVAL(str);
2073 return JS_TRUE;
2074 }
2075
2076 static JSBool
2077 str_slice(JSContext *cx, uintN argc, jsval *vp)
2078 {
2079 jsval t, v;
2080 JSString *str;
2081
2082 t = vp[1];
2083 v = vp[2];
2084 if (argc == 1 && JSVAL_IS_STRING(t) && JSVAL_IS_INT(v)) {
2085 size_t begin, end, length;
2086
2087 str = JSVAL_TO_STRING(t);
2088 begin = JSVAL_TO_INT(v);
2089 end = JSSTRING_LENGTH(str);
2090 if (begin <= end) {
2091 length = end - begin;
2092 if (length == 0) {
2093 str = cx->runtime->emptyString;
2094 } else {
2095 str = (length == 1)
2096 ? js_GetUnitString(cx, str, begin)
2097 : js_NewDependentString(cx, str, begin, length);
2098 if (!str)
2099 return JS_FALSE;
2100 }
2101 *vp = STRING_TO_JSVAL(str);
2102 return JS_TRUE;
2103 }
2104 }
2105
2106 NORMALIZE_THIS(cx, vp, str);
2107
2108 if (argc != 0) {
2109 double begin, end, length;
2110
2111 begin = js_ValueToNumber(cx, &vp[2]);
2112 if (JSVAL_IS_NULL(vp[2]))
2113 return JS_FALSE;
2114 begin = js_DoubleToInteger(begin);
2115 length = JSSTRING_LENGTH(str);
2116 if (begin < 0) {
2117 begin += length;
2118 if (begin < 0)
2119 begin = 0;
2120 } else if (begin > length) {
2121 begin = length;
2122 }
2123
2124 if (argc == 1) {
2125 end = length;
2126 } else {
2127 end = js_ValueToNumber(cx, &vp[3]);
2128 if (JSVAL_IS_NULL(vp[3]))
2129 return JS_FALSE;
2130 end = js_DoubleToInteger(end);
2131 if (end < 0) {
2132 end += length;
2133 if (end < 0)
2134 end = 0;
2135 } else if (end > length) {
2136 end = length;
2137 }
2138 if (end < begin)
2139 end = begin;
2140 }
2141
2142 str = js_NewDependentString(cx, str,
2143 (size_t)begin,
2144 (size_t)(end - begin));
2145 if (!str)
2146 return JS_FALSE;
2147 }
2148 *vp = STRING_TO_JSVAL(str);
2149 return JS_TRUE;
2150 }
2151
2152 #if JS_HAS_STR_HTML_HELPERS
2153 /*
2154 * HTML composition aids.
2155 */
2156 static JSBool
2157 tagify(JSContext *cx, const char *begin, JSString *param, const char *end,
2158 jsval *vp)
2159 {
2160 JSString *str;
2161 jschar *tagbuf;
2162 size_t beglen, endlen, parlen, taglen;
2163 size_t i, j;
2164
2165 NORMALIZE_THIS(cx, vp, str);
2166
2167 if (!end)
2168 end = begin;
2169
2170 beglen = strlen(begin);
2171 taglen = 1 + beglen + 1; /* '<begin' + '>' */
2172 parlen = 0; /* Avoid warning. */
2173 if (param) {
2174 parlen = JSSTRING_LENGTH(param);
2175 taglen += 2 + parlen + 1; /* '="param"' */
2176 }
2177 endlen = strlen(end);
2178 taglen += JSSTRING_LENGTH(str) + 2 + endlen + 1; /* 'str</end>' */
2179
2180 if (taglen >= ~(size_t)0 / sizeof(jschar)) {
2181 js_ReportAllocationOverflow(cx);
2182 return JS_FALSE;
2183 }
2184
2185 tagbuf = (jschar *) JS_malloc(cx, (taglen + 1) * sizeof(jschar));
2186 if (!tagbuf)
2187 return JS_FALSE;
2188
2189 j = 0;
2190 tagbuf[j++] = '<';
2191 for (i = 0; i < beglen; i++)
2192 tagbuf[j++] = (jschar)begin[i];
2193 if (param) {
2194 tagbuf[j++] = '=';
2195 tagbuf[j++] = '"';
2196 js_strncpy(&tagbuf[j], JSSTRING_CHARS(param), parlen);
2197 j += parlen;
2198 tagbuf[j++] = '"';
2199 }
2200 tagbuf[j++] = '>';
2201 js_strncpy(&tagbuf[j], JSSTRING_CHARS(str), JSSTRING_LENGTH(str));
2202 j += JSSTRING_LENGTH(str);
2203 tagbuf[j++] = '<';
2204 tagbuf[j++] = '/';
2205 for (i = 0; i < endlen; i++)
2206 tagbuf[j++] = (jschar)end[i];
2207 tagbuf[j++] = '>';
2208 JS_ASSERT(j == taglen);
2209 tagbuf[j] = 0;
2210
2211 str = js_NewString(cx, tagbuf, taglen);
2212 if (!str) {
2213 free((char *)tagbuf);
2214 return JS_FALSE;
2215 }
2216 *vp = STRING_TO_JSVAL(str);
2217 return JS_TRUE;
2218 }
2219
2220 static JSBool
2221 tagify_value(JSContext *cx, uintN argc, jsval *vp,
2222 const char *begin, const char *end)
2223 {
2224 JSString *param;
2225
2226 param = ArgToRootedString(cx, argc, vp, 0);
2227 if (!param)
2228 return JS_FALSE;
2229 return tagify(cx, begin, param, end, vp);
2230 }
2231
2232 static JSBool
2233 str_bold(JSContext *cx, uintN argc, jsval *vp)
2234 {
2235 return tagify(cx, "b", NULL, NULL, vp);
2236 }
2237
2238 static JSBool
2239 str_italics(JSContext *cx, uintN argc, jsval *vp)
2240 {
2241 return tagify(cx, "i", NULL, NULL, vp);
2242 }
2243
2244 static JSBool
2245 str_fixed(JSContext *cx, uintN argc, jsval *vp)
2246 {
2247 return tagify(cx, "tt", NULL, NULL, vp);
2248 }
2249
2250 static JSBool
2251 str_fontsize(JSContext *cx, uintN argc, jsval *vp)
2252 {
2253 return tagify_value(cx, argc, vp, "font size", "font");
2254 }
2255
2256 static JSBool
2257 str_fontcolor(JSContext *cx, uintN argc, jsval *vp)
2258 {
2259 return tagify_value(cx, argc, vp, "font color", "font");
2260 }
2261
2262 static JSBool
2263 str_link(JSContext *cx, uintN argc, jsval *vp)
2264 {
2265 return tagify_value(cx, argc, vp, "a href", "a");
2266 }
2267
2268 static JSBool
2269 str_anchor(JSContext *cx, uintN argc, jsval *vp)
2270 {
2271 return tagify_value(cx, argc, vp, "a name", "a");
2272 }
2273
2274 static JSBool
2275 str_strike(JSContext *cx, uintN argc, jsval *vp)
2276 {
2277 return tagify(cx, "strike", NULL, NULL, vp);
2278 }
2279
2280 static JSBool
2281 str_small(JSContext *cx, uintN argc, jsval *vp)
2282 {
2283 return tagify(cx, "small", NULL, NULL, vp);
2284 }
2285
2286 static JSBool
2287 str_big(JSContext *cx, uintN argc, jsval *vp)
2288 {
2289 return tagify(cx, "big", NULL, NULL, vp);
2290 }
2291
2292 static JSBool
2293 str_blink(JSContext *cx, uintN argc, jsval *vp)
2294 {
2295 return tagify(cx, "blink", NULL, NULL, vp);
2296 }
2297
2298 static JSBool
2299 str_sup(JSContext *cx, uintN argc, jsval *vp)
2300 {
2301 return tagify(cx, "sup", NULL, NULL, vp);
2302 }
2303
2304 static JSBool
2305 str_sub(JSContext *cx, uintN argc, jsval *vp)
2306 {
2307 return tagify(cx, "sub", NULL, NULL, vp);
2308 }
2309 #endif /* JS_HAS_STR_HTML_HELPERS */
2310
2311 #define GENERIC JSFUN_GENERIC_NATIVE
2312 #define PRIMITIVE JSFUN_THISP_PRIMITIVE
2313 #define GENERIC_PRIMITIVE (GENERIC | PRIMITIVE)
2314
2315 static JSFunctionSpec string_methods[] = {
2316 #if JS_HAS_TOSOURCE
2317 JS_FN("quote", str_quote, 0,GENERIC_PRIMITIVE),
2318 JS_FN(js_toSource_str, str_toSource, 0,JSFUN_THISP_STRING),
2319 #endif
2320
2321 /* Java-like methods. */
2322 JS_FN(js_toString_str, str_toString, 0,JSFUN_THISP_STRING),
2323 JS_FN(js_valueOf_str, str_toString, 0,JSFUN_THISP_STRING),
2324 JS_FN("substring", js_str_substring, 2,GENERIC_PRIMITIVE),
2325 JS_FN("toLowerCase", js_str_toLowerCase, 0,GENERIC_PRIMITIVE),
2326 JS_FN("toUpperCase", js_str_toUpperCase, 0,GENERIC_PRIMITIVE),
2327 JS_FN("charAt", js_str_charAt, 1,GENERIC_PRIMITIVE),
2328 JS_FN("charCodeAt", js_str_charCodeAt, 1,GENERIC_PRIMITIVE),
2329 JS_FN("indexOf", str_indexOf, 1,GENERIC_PRIMITIVE),
2330 JS_FN("lastIndexOf", str_lastIndexOf, 1,GENERIC_PRIMITIVE),
2331 JS_FN("trim", str_trim, 0,GENERIC_PRIMITIVE),
2332 JS_FN("trimLeft", str_trimLeft, 0,GENERIC_PRIMITIVE),
2333 JS_FN("trimRight", str_trimRight, 0,GENERIC_PRIMITIVE),
2334 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,GENERIC_PRIMITIVE),
2335 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,GENERIC_PRIMITIVE),
2336 JS_FN("localeCompare", str_localeCompare, 1,GENERIC_PRIMITIVE),
2337
2338 /* Perl-ish methods (search is actually Python-esque). */
2339 JS_FN("match", js_str_match, 1,GENERIC_PRIMITIVE),
2340 JS_FN("search", str_search, 1,GENERIC_PRIMITIVE),
2341 JS_FN("replace", js_str_replace, 2,GENERIC_PRIMITIVE),
2342 JS_FN("split", js_str_split, 2,GENERIC_PRIMITIVE),
2343 #if JS_HAS_PERL_SUBSTR
2344 JS_FN("substr", str_substr, 2,GENERIC_PRIMITIVE),
2345 #endif
2346
2347 /* Python-esque sequence methods. */
2348 JS_FN("concat", js_str_concat, 1,GENERIC_PRIMITIVE),
2349 JS_FN("slice", str_slice, 2,GENERIC_PRIMITIVE),
2350
2351 /* HTML string methods. */
2352 #if JS_HAS_STR_HTML_HELPERS
2353 JS_FN("bold", str_bold, 0,PRIMITIVE),
2354 JS_FN("italics", str_italics, 0,PRIMITIVE),
2355 JS_FN("fixed", str_fixed, 0,PRIMITIVE),
2356 JS_FN("fontsize", str_fontsize, 1,PRIMITIVE),
2357 JS_FN("fontcolor", str_fontcolor, 1,PRIMITIVE),
2358 JS_FN("link", str_link, 1,PRIMITIVE),
2359 JS_FN("anchor", str_anchor, 1,PRIMITIVE),
2360 JS_FN("strike", str_strike, 0,PRIMITIVE),
2361 JS_FN("small", str_small, 0,PRIMITIVE),
2362 JS_FN("big", str_big, 0,PRIMITIVE),
2363 JS_FN("blink", str_blink, 0,PRIMITIVE),
2364 JS_FN("sup", str_sup, 0,PRIMITIVE),
2365 JS_FN("sub", str_sub, 0,PRIMITIVE),
2366 #endif
2367
2368 JS_FS_END
2369 };
2370
2371 static JSBool
2372 String(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
2373 {
2374 JSString *str;
2375
2376 if (argc > 0) {
2377 str = js_ValueToString(cx, argv[0]);
2378 if (!str)
2379 return JS_FALSE;
2380 argv[0] = STRING_TO_JSVAL(str);
2381 } else {
2382 str = cx->runtime->emptyString;
2383 }
2384 if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
2385 *rval = STRING_TO_JSVAL(str);
2386 return JS_TRUE;
2387 }
2388 STOBJ_SET_SLOT(obj, JSSLOT_PRIVATE, STRING_TO_JSVAL(str));
2389 return JS_TRUE;
2390 }
2391
2392 JSBool
2393 js_str_fromCharCode(JSContext *cx, uintN argc, jsval *vp)
2394 {
2395 jsval *argv;
2396 uintN i;
2397 uint16 code;
2398 jschar *chars;
2399 JSString *str;
2400
2401 argv = vp + 2;
2402 JS_ASSERT(argc < ARRAY_INIT_LIMIT);
2403 if (argc == 1 &&
2404 (code = js_ValueToUint16(cx, &argv[0])) < UNIT_STRING_LIMIT) {
2405 str = js_GetUnitStringForChar(cx, code);
2406 if (!str)
2407 return JS_FALSE;
2408 *vp = STRING_TO_JSVAL(str);
2409 return JS_TRUE;
2410 }
2411 chars = (jschar *) JS_malloc(cx, (argc + 1) * sizeof(jschar));
2412 if (!chars)
2413 return JS_FALSE;
2414 for (i = 0; i < argc; i++) {
2415 code = js_ValueToUint16(cx, &argv[i]);
2416 if (JSVAL_IS_NULL(argv[i])) {
2417 JS_free(cx, chars);
2418 return JS_FALSE;
2419 }
2420 chars[i] = (jschar)code;
2421 }
2422 chars[i] = 0;
2423 str = js_NewString(cx, chars, argc);
2424 if (!str) {
2425 JS_free(cx, chars);
2426 return JS_FALSE;
2427 }
2428 *vp = STRING_TO_JSVAL(str);
2429 return JS_TRUE;
2430 }
2431
2432 static JSFunctionSpec string_static_methods[] = {
2433 JS_FN("fromCharCode", js_str_fromCharCode, 1,0),
2434 JS_FS_END
2435 };
2436
2437 static JSHashNumber
2438 js_hash_string_pointer(const void *key)
2439 {
2440 return (JSHashNumber)JS_PTR_TO_UINT32(key) >> JSVAL_TAGBITS;
2441 }
2442
2443 JSBool
2444 js_InitRuntimeStringState(JSContext *cx)
2445 {
2446 JSRuntime *rt;
2447
2448 rt = cx->runtime;
2449 rt->emptyString = ATOM_TO_STRING(rt->atomState.emptyAtom);
2450 return JS_TRUE;
2451 }
2452
2453 JSBool
2454 js_InitDeflatedStringCache(JSRuntime *rt)
2455 {
2456 JSHashTable *cache;
2457
2458 /* Initialize string cache */
2459 JS_ASSERT(!rt->deflatedStringCache);
2460 cache = JS_NewHashTable(8, js_hash_string_pointer,
2461 JS_CompareValues, JS_CompareValues,
2462 NULL, NULL);
2463 if (!cache)
2464 return JS_FALSE;
2465 rt->deflatedStringCache = cache;
2466
2467 #ifdef JS_THREADSAFE
2468 JS_ASSERT(!rt->deflatedStringCacheLock);
2469 rt->deflatedStringCacheLock = JS_NEW_LOCK();
2470 if (!rt->deflatedStringCacheLock)
2471 return JS_FALSE;
2472 #endif
2473 return JS_TRUE;
2474 }
2475
2476 #define UNIT_STRING_SPACE(sp) ((jschar *) ((sp) + UNIT_STRING_LIMIT))
2477 #define UNIT_STRING_SPACE_RT(rt) UNIT_STRING_SPACE((rt)->unitStrings)
2478
2479 #define IN_UNIT_STRING_SPACE(sp,cp) \
2480 ((size_t)((cp) - UNIT_STRING_SPACE(sp)) < 2 * UNIT_STRING_LIMIT)
2481 #define IN_UNIT_STRING_SPACE_RT(rt,cp) \
2482 IN_UNIT_STRING_SPACE((rt)->unitStrings, cp)
2483
2484 JSString *
2485 js_GetUnitStringForChar(JSContext *cx, jschar c)
2486 {
2487 jschar *cp, i;
2488 JSRuntime *rt;
2489 JSString **sp;
2490
2491 JS_ASSERT(c < UNIT_STRING_LIMIT);
2492 rt = cx->runtime;
2493 if (!rt->unitStrings) {
2494 sp = (JSString **) calloc(UNIT_STRING_LIMIT * sizeof(JSString *) +
2495 UNIT_STRING_LIMIT * 2 * sizeof(jschar),
2496 1);
2497 if (!sp) {
2498 JS_ReportOutOfMemory(cx);
2499 return NULL;
2500 }
2501 cp = UNIT_STRING_SPACE(sp);
2502 for (i = 0; i < UNIT_STRING_LIMIT; i++) {
2503 *cp = i;
2504 cp += 2;
2505 }
2506 JS_LOCK_GC(rt);
2507 if (!rt->unitStrings) {
2508 rt->unitStrings = sp;
2509 JS_UNLOCK_GC(rt);
2510 } else {
2511 JS_UNLOCK_GC(rt);
2512 free(sp);
2513 }
2514 }
2515 if (!rt->unitStrings[c]) {
2516 JSString *str;
2517
2518 cp = UNIT_STRING_SPACE_RT(rt);
2519 str = js_NewString(cx, cp + 2 * c, 1);
2520 if (!str)
2521 return NULL;
2522 JS_LOCK_GC(rt);
2523 if (!rt->unitStrings[c])
2524 rt->unitStrings[c] = str;
2525 JS_UNLOCK_GC(rt);
2526 }
2527 return rt->unitStrings[c];
2528 }
2529
2530 JSString *
2531 js_GetUnitString(JSContext *cx, JSString *str, size_t index)
2532 {
2533 jschar c;
2534
2535 JS_ASSERT(index < JSSTRING_LENGTH(str));
2536 c = JSSTRING_CHARS(str)[index];
2537 if (c >= UNIT_STRING_LIMIT)
2538 return js_NewDependentString(cx, str, index, 1);
2539 return js_GetUnitStringForChar(cx, c);
2540 }
2541
2542 void
2543 js_FinishUnitStrings(JSRuntime *rt)
2544 {
2545 free(rt->unitStrings);
2546 rt->unitStrings = NULL;
2547 }
2548
2549 void
2550 js_FinishRuntimeStringState(JSContext *cx)
2551 {
2552 cx->runtime->emptyString = NULL;
2553 }
2554
2555 void
2556 js_FinishDeflatedStringCache(JSRuntime *rt)
2557 {
2558 if (rt->deflatedStringCache) {
2559 JS_HashTableDestroy(rt->deflatedStringCache);
2560 rt->deflatedStringCache = NULL;
2561 }
2562 #ifdef JS_THREADSAFE
2563 if (rt->deflatedStringCacheLock) {
2564 JS_DESTROY_LOCK(rt->deflatedStringCacheLock);
2565 rt->deflatedStringCacheLock = NULL;
2566 }
2567 #endif
2568 }
2569
2570 JSObject *
2571 js_InitStringClass(JSContext *cx, JSObject *obj)
2572 {
2573 JSObject *proto;
2574
2575 /* Define the escape, unescape functions in the global object. */
2576 if (!JS_DefineFunctions(cx, obj, string_functions))
2577 return NULL;
2578
2579 proto = JS_InitClass(cx, obj, NULL, &js_StringClass, String, 1,
2580 string_props, string_methods,
2581 NULL, string_static_methods);
2582 if (!proto)
2583 return NULL;
2584 STOBJ_SET_SLOT(proto, JSSLOT_PRIVATE,
2585 STRING_TO_JSVAL(cx->runtime->emptyString));
2586 return proto;
2587 }
2588
2589 JSString *
2590 js_NewString(JSContext *cx, jschar *chars, size_t length)
2591 {
2592 JSString *str;
2593
2594 if (length > JSSTRING_LENGTH_MASK) {
2595 js_ReportAllocationOverflow(cx);
2596 return NULL;
2597 }
2598
2599 str = (JSString *) js_NewGCThing(cx, GCX_STRING, sizeof(JSString));
2600 if (!str)
2601 return NULL;
2602 JSFLATSTR_INIT(str, chars, length);
2603 #ifdef DEBUG
2604 {
2605 JSRuntime *rt = cx->runtime;
2606 JS_RUNTIME_METER(rt, liveStrings);
2607 JS_RUNTIME_METER(rt, totalStrings);
2608 JS_LOCK_RUNTIME_VOID(rt,
2609 (rt->lengthSum += (double)length,
2610 rt->lengthSquaredSum += (double)length * (double)length));
2611 }
2612 #endif
2613 return str;
2614 }
2615
2616 JSString *
2617 js_NewDependentString(JSContext *cx, JSString *base, size_t start,
2618 size_t length)
2619 {
2620 JSString *ds;
2621
2622 if (length == 0)
2623 return cx->runtime->emptyString;
2624
2625 if (start == 0 && length == JSSTRING_LENGTH(base))
2626 return base;
2627
2628 if (start > JSSTRDEP_START_MASK ||
2629 (start != 0 && length > JSSTRDEP_LENGTH_MASK)) {
2630 return js_NewStringCopyN(cx, JSSTRING_CHARS(base) + start, length);
2631 }
2632
2633 ds = (JSString *)js_NewGCThing(cx, GCX_STRING, sizeof(JSString));
2634 if (!ds)
2635 return NULL;
2636 if (start == 0)
2637 JSPREFIX_INIT(ds, base, length);
2638 else
2639 JSSTRDEP_INIT(ds, base, start, length);
2640 #ifdef DEBUG
2641 {
2642 JSRuntime *rt = cx->runtime;
2643 JS_RUNTIME_METER(rt, liveDependentStrings);
2644 JS_RUNTIME_METER(rt, totalDependentStrings);
2645 JS_RUNTIME_METER(rt, liveStrings);
2646 JS_RUNTIME_METER(rt, totalStrings);
2647 JS_LOCK_RUNTIME_VOID(rt,
2648 (rt->strdepLengthSum += (double)length,
2649 rt->strdepLengthSquaredSum += (double)length * (double)length));
2650 JS_LOCK_RUNTIME_VOID(rt,
2651 (rt->lengthSum += (double)length,
2652 rt->lengthSquaredSum += (double)length * (double)length));
2653 }
2654 #endif
2655 return ds;
2656 }
2657
2658 #ifdef DEBUG
2659 #include <math.h>
2660
2661 void printJSStringStats(JSRuntime *rt)
2662 {
2663 double mean, sigma;
2664
2665 mean = JS_MeanAndStdDev(rt->totalStrings, rt->lengthSum,
2666 rt->lengthSquaredSum, &sigma);
2667
2668 fprintf(stderr, "%lu total strings, mean length %g (sigma %g)\n",
2669 (unsigned long)rt->totalStrings, mean, sigma);
2670
2671 mean = JS_MeanAndStdDev(rt->totalDependentStrings, rt->strdepLengthSum,
2672 rt->strdepLengthSquaredSum, &sigma);
2673
2674 fprintf(stderr, "%lu total dependent strings, mean length %g (sigma %g)\n",
2675 (unsigned long)rt->totalDependentStrings, mean, sigma);
2676 }
2677 #endif
2678
2679 JSString *
2680 js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n)
2681 {
2682 jschar *news;
2683 JSString *str;
2684
2685 news = (jschar *) JS_malloc(cx, (n + 1) * sizeof(jschar));
2686 if (!news)
2687 return NULL;
2688 js_strncpy(news, s, n);
2689 news[n] = 0;
2690 str = js_NewString(cx, news, n);
2691 if (!str)
2692 JS_free(cx, news);
2693 return str;
2694 }
2695
2696 JSString *
2697 js_NewStringCopyZ(JSContext *cx, const jschar *s)
2698 {
2699 size_t n, m;
2700 jschar *news;
2701 JSString *str;
2702
2703 n = js_strlen(s);
2704 m = (n + 1) * sizeof(jschar);
2705 news = (jschar *) JS_malloc(cx, m);
2706 if (!news)
2707 return NULL;
2708 memcpy(news, s, m);
2709 str = js_NewString(cx, news, n);
2710 if (!str)
2711 JS_free(cx, news);
2712 return str;
2713 }
2714
2715 void
2716 js_PurgeDeflatedStringCache(JSRuntime *rt, JSString *str)
2717 {
2718 JSHashNumber hash;
2719 JSHashEntry *he, **hep;
2720
2721 hash = js_hash_string_pointer(str);
2722 JS_ACQUIRE_LOCK(rt->deflatedStringCacheLock);
2723 hep = JS_HashTableRawLookup(rt->deflatedStringCache, hash, str);
2724 he = *hep;
2725 if (he) {
2726 #ifdef DEBUG
2727 rt->deflatedStringCacheBytes -= JSSTRING_LENGTH(str);
2728 #endif
2729 free(he->value);
2730 JS_HashTableRawRemove(rt->deflatedStringCache, hep, he);
2731 }
2732 JS_RELEASE_LOCK(rt->deflatedStringCacheLock);
2733 }
2734
2735 static JSStringFinalizeOp str_finalizers[GCX_NTYPES - GCX_EXTERNAL_STRING] = {
2736 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
2737 };
2738
2739 intN
2740 js_ChangeExternalStringFinalizer(JSStringFinalizeOp oldop,
2741 JSStringFinalizeOp newop)
2742 {
2743 uintN i;
2744
2745 for (i = 0; i != JS_ARRAY_LENGTH(str_finalizers); i++) {
2746 if (str_finalizers[i] == oldop) {
2747 str_finalizers[i] = newop;
2748 return (intN) i;
2749 }
2750 }
2751 return -1;
2752 }
2753
2754 /*
2755 * cx is NULL when we are called from js_FinishAtomState to force the
2756 * finalization of the permanently interned strings.
2757 */
2758 void
2759 js_FinalizeStringRT(JSRuntime *rt, JSString *str, intN type, JSContext *cx)
2760 {
2761 jschar *chars;
2762 JSBool valid;
2763 JSStringFinalizeOp finalizer;
2764
2765 JS_RUNTIME_UNMETER(rt, liveStrings);
2766 if (JSSTRING_IS_DEPENDENT(str)) {
2767 /* A dependent string can not be external and must be valid. */
2768 JS_ASSERT(type < 0);
2769 JS_ASSERT(JSSTRDEP_BASE(str));
2770 JS_RUNTIME_UNMETER(rt, liveDependentStrings);
2771 valid = JS_TRUE;
2772 } else {
2773 /* A stillborn string has null chars, so is not valid. */
2774 chars = JSFLATSTR_CHARS(str);
2775 valid = (chars != NULL);
2776 if (valid) {
2777 if (IN_UNIT_STRING_SPACE_RT(rt, chars)) {
2778 JS_ASSERT(rt->unitStrings[*chars] == str);
2779 JS_ASSERT(type < 0);
2780 rt->unitStrings[*chars] = NULL;
2781 } else if (type < 0) {
2782 free(chars);
2783 } else {
2784 JS_ASSERT((uintN) type < JS_ARRAY_LENGTH(str_finalizers));
2785 finalizer = str_finalizers[type];
2786 if (finalizer) {
2787 /*
2788 * Assume that the finalizer for the permanently interned
2789 * string knows how to deal with null context.
2790 */
2791 finalizer(cx, str);
2792 }
2793 }
2794 }
2795 }
2796 if (valid)
2797 js_PurgeDeflatedStringCache(rt, str);
2798 }
2799
2800 JS_FRIEND_API(const char *)
2801 js_ValueToPrintable(JSContext *cx, jsval v, JSValueToStringFun v2sfun)
2802 {
2803 JSString *str;
2804
2805 str = v2sfun(cx, v);
2806 if (!str)
2807 return NULL;
2808 str = js_QuoteString(cx, str, 0);
2809 if (!str)
2810 return NULL;
2811 return js_GetStringBytes(cx, str);
2812 }
2813
2814 JS_FRIEND_API(JSString *)
2815 js_ValueToString(JSContext *cx, jsval v)
2816 {
2817 JSObject *obj;
2818 JSString *str;
2819
2820 if (JSVAL_IS_OBJECT(v)) {
2821 obj = JSVAL_TO_OBJECT(v);
2822 if (!obj)
2823 return ATOM_TO_STRING(cx->runtime->atomState.nullAtom);
2824 if (!OBJ_DEFAULT_VALUE(cx, obj, JSTYPE_STRING, &v))
2825 return NULL;
2826 }
2827 if (JSVAL_IS_STRING(v)) {
2828 str = JSVAL_TO_STRING(v);
2829 } else if (JSVAL_IS_INT(v)) {
2830 str = js_NumberToString(cx, JSVAL_TO_INT(v));
2831 } else if (JSVAL_IS_DOUBLE(v)) {
2832 str = js_NumberToString(cx, *JSVAL_TO_DOUBLE(v));
2833 } else if (JSVAL_IS_BOOLEAN(v)) {
2834 str = js_BooleanToString(cx, JSVAL_TO_BOOLEAN(v));
2835 } else {
2836 str = ATOM_TO_STRING(cx->runtime->atomState.typeAtoms[JSTYPE_VOID]);
2837 }
2838 return str;
2839 }
2840
2841 JS_FRIEND_API(JSString *)
2842 js_ValueToSource(JSContext *cx, jsval v)
2843 {
2844 JSTempValueRooter tvr;
2845 JSString *str;
2846
2847 if (JSVAL_IS_VOID(v))
2848 return ATOM_TO_STRING(cx->runtime->atomState.void0Atom);
2849 if (JSVAL_IS_STRING(v))
2850 return js_QuoteString(cx, JSVAL_TO_STRING(v), '"');
2851 if (JSVAL_IS_PRIMITIVE(v)) {
2852 /* Special case to preserve negative zero, _contra_ toString. */
2853 if (JSVAL_IS_DOUBLE(v) && JSDOUBLE_IS_NEGZERO(*JSVAL_TO_DOUBLE(v))) {
2854 /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
2855 static const jschar js_negzero_ucNstr[] = {'-', '0'};
2856
2857 return js_NewStringCopyN(cx, js_negzero_ucNstr, 2);
2858 }
2859 return js_ValueToString(cx, v);
2860 }
2861
2862 JS_PUSH_SINGLE_TEMP_ROOT(cx, JSVAL_NULL, &tvr);
2863 if (!js_TryMethod(cx, JSVAL_TO_OBJECT(v),
2864 cx->runtime->atomState.toSourceAtom,
2865 0, NULL, &tvr.u.value)) {
2866 str = NULL;
2867 } else {
2868 str = js_ValueToString(cx, tvr.u.value);
2869 }
2870 JS_POP_TEMP_ROOT(cx, &tvr);
2871 return str;
2872 }
2873
2874 /*
2875 * str is not necessarily a GC thing here.
2876 */
2877 uint32
2878 js_HashString(JSString *str)
2879 {
2880 const jschar *s;
2881 size_t n;
2882 uint32 h;
2883
2884 JSSTRING_CHARS_AND_LENGTH(str, s, n);
2885 for (h = 0; n; s++, n--)
2886 h = JS_ROTATE_LEFT32(h, 4) ^ *s;
2887 return h;
2888 }
2889
2890 /*
2891 * str is not necessarily a GC thing here.
2892 */
2893 JSBool JS_FASTCALL
2894 js_EqualStrings(JSString *str1, JSString *str2)
2895 {
2896 size_t n;
2897 const jschar *s1, *s2;
2898
2899 JS_ASSERT(str1);
2900 JS_ASSERT(str2);
2901
2902 /* Fast case: pointer equality could be a quick win. */
2903 if (str1 == str2)
2904 return JS_TRUE;
2905
2906 n = JSSTRING_LENGTH(str1);
2907 if (n != JSSTRING_LENGTH(str2))
2908 return JS_FALSE;
2909
2910 if (n == 0)
2911 return JS_TRUE;
2912
2913 s1 = JSSTRING_CHARS(str1), s2 = JSSTRING_CHARS(str2);
2914 do {
2915 if (*s1 != *s2)
2916 return JS_FALSE;
2917 ++s1, ++s2;
2918 } while (--n != 0);
2919
2920 return JS_TRUE;
2921 }
2922
2923 jsint JS_FASTCALL
2924 js_CompareStrings(JSString *str1, JSString *str2)
2925 {
2926 size_t l1, l2, n, i;
2927 const jschar *s1, *s2;
2928 intN cmp;
2929
2930 JS_ASSERT(str1);
2931 JS_ASSERT(str2);
2932
2933 /* Fast case: pointer equality could be a quick win. */
2934 if (str1 == str2)
2935 return 0;
2936
2937 JSSTRING_CHARS_AND_LENGTH(str1, s1, l1);
2938 JSSTRING_CHARS_AND_LENGTH(str2, s2, l2);
2939 n = JS_MIN(l1, l2);
2940 for (i = 0; i < n; i++) {
2941 cmp = s1[i] - s2[i];
2942 if (cmp != 0)
2943 return cmp;
2944 }
2945 return (intN)(l1 - l2);
2946 }
2947
2948 size_t
2949 js_strlen(const jschar *s)
2950 {
2951 const jschar *t;
2952
2953 for (t = s; *t != 0; t++)
2954 continue;
2955 return (size_t)(t - s);
2956 }
2957
2958 jschar *
2959 js_strchr(const jschar *s, jschar c)
2960 {
2961 while (*s != 0) {
2962 if (*s == c)
2963 return (jschar *)s;
2964 s++;
2965 }
2966 return NULL;
2967 }
2968
2969 jschar *
2970 js_strchr_limit(const jschar *s, jschar c, const jschar *limit)
2971 {
2972 while (s < limit) {
2973 if (*s == c)
2974 return (jschar *)s;
2975 s++;
2976 }
2977 return NULL;
2978 }
2979
2980 const jschar *
2981 js_SkipWhiteSpace(const jschar *s, const jschar *end)
2982 {
2983 JS_ASSERT(s <= end);
2984 while (s != end && JS_ISSPACE(*s))
2985 s++;
2986 return s;
2987 }
2988
2989 jschar *
2990 js_InflateString(JSContext *cx, const char *bytes, size_t *lengthp)
2991 {
2992 size_t nbytes, nchars, i;
2993 jschar *chars;
2994 #ifdef DEBUG
2995 JSBool ok;
2996 #endif
2997
2998 nbytes = *lengthp;
2999 if (js_CStringsAreUTF8) {
3000 if (!js_InflateStringToBuffer(cx, bytes, nbytes, NULL, &nchars))
3001 goto bad;
3002 chars = (jschar *) JS_malloc(cx, (nchars + 1) * sizeof (jschar));
3003 if (!chars)
3004 goto bad;
3005 #ifdef DEBUG
3006 ok =
3007 #endif
3008 js_InflateStringToBuffer(cx, bytes, nbytes, chars, &nchars);
3009 JS_ASSERT(ok);
3010 } else {
3011 nchars = nbytes;
3012 chars = (jschar *) JS_malloc(cx, (nchars + 1) * sizeof(jschar));
3013 if (!chars)
3014 goto bad;
3015 for (i = 0; i < nchars; i++)
3016 chars[i] = (unsigned char) bytes[i];
3017 }
3018 *lengthp = nchars;
3019 chars[nchars] = 0;
3020 return chars;
3021
3022 bad:
3023 /*
3024 * For compatibility with callers of JS_DecodeBytes we must zero lengthp
3025 * on errors.
3026 */
3027 *lengthp = 0;
3028 return NULL;
3029 }
3030
3031 /*
3032 * May be called with null cx by js_GetStringBytes, see below.
3033 */
3034 char *
3035 js_DeflateString(JSContext *cx, const jschar *chars, size_t nchars)
3036 {
3037 size_t nbytes, i;
3038 char *bytes;
3039 #ifdef DEBUG
3040 JSBool ok;
3041 #endif
3042
3043 if (js_CStringsAreUTF8) {
3044 nbytes = js_GetDeflatedStringLength(cx, chars, nchars);
3045 if (nbytes == (size_t) -1)
3046 return NULL;
3047 bytes = (char *) (cx ? JS_malloc(cx, nbytes + 1) : malloc(nbytes + 1));
3048 if (!bytes)
3049 return NULL;
3050 #ifdef DEBUG
3051 ok =
3052 #endif
3053 js_DeflateStringToBuffer(cx, chars, nchars, bytes, &nbytes);
3054 JS_ASSERT(ok);
3055 } else {
3056 nbytes = nchars;
3057 bytes = (char *) (cx ? JS_malloc(cx, nbytes + 1) : malloc(nbytes + 1));
3058 if (!bytes)
3059 return NULL;
3060 for (i = 0; i < nbytes; i++)
3061 bytes[i] = (char) chars[i];
3062 }
3063 bytes[nbytes] = 0;
3064 return bytes;
3065 }
3066
3067 /*
3068 * May be called with null cx through js_GetStringBytes, see below.
3069 */
3070 size_t
3071 js_GetDeflatedStringLength(JSContext *cx, const jschar *chars, size_t nchars)
3072 {
3073 size_t nbytes;
3074 const jschar *end;
3075 uintN c, c2;
3076 char buffer[10];
3077
3078 if (!js_CStringsAreUTF8)
3079 return nchars;
3080
3081 nbytes = nchars;
3082 for (end = chars + nchars; chars != end; chars++) {
3083 c = *chars;
3084 if (c < 0x80)
3085 continue;
3086 if (0xD800 <= c && c <= 0xDFFF) {
3087 /* Surrogate pair. */
3088 chars++;
3089 if (c >= 0xDC00 || chars == end)
3090 goto bad_surrogate;
3091 c2 = *chars;
3092 if (c2 < 0xDC00 || c2 > 0xDFFF)
3093 goto bad_surrogate;
3094 c = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
3095 }
3096 c >>= 11;
3097 nbytes++;
3098 while (c) {
3099 c >>= 5;
3100 nbytes++;
3101 }
3102 }
3103 return nbytes;
3104
3105 bad_surrogate:
3106 if (cx) {
3107 JS_snprintf(buffer, 10, "0x%x", c);
3108 JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, js_GetErrorMessage,
3109 NULL, JSMSG_BAD_SURROGATE_CHAR, buffer);
3110 }
3111 return (size_t) -1;
3112 }
3113
3114 JSBool
3115 js_DeflateStringToBuffer(JSContext *cx, const jschar *src, size_t srclen,
3116 char *dst, size_t *dstlenp)
3117 {
3118 size_t dstlen, i, origDstlen, utf8Len;
3119 jschar c, c2;
3120 uint32 v;
3121 uint8 utf8buf[6];
3122
3123 dstlen = *dstlenp;
3124 if (!js_CStringsAreUTF8) {
3125 if (srclen > dstlen) {
3126 for (i = 0; i < dstlen; i++)
3127 dst[i] = (char) src[i];
3128 if (cx) {
3129 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3130 JSMSG_BUFFER_TOO_SMALL);
3131 }
3132 return JS_FALSE;
3133 }
3134 for (i = 0; i < srclen; i++)
3135 dst[i] = (char) src[i];
3136 *dstlenp = srclen;
3137 return JS_TRUE;
3138 }
3139
3140 origDstlen = dstlen;
3141 while (srclen) {
3142 c = *src++;
3143 srclen--;
3144 if ((c >= 0xDC00) && (c <= 0xDFFF))
3145 goto badSurrogate;
3146 if (c < 0xD800 || c > 0xDBFF) {
3147 v = c;
3148 } else {
3149 if (srclen < 1)
3150 goto badSurrogate;
3151 c2 = *src;
3152 if ((c2 < 0xDC00) || (c2 > 0xDFFF))
3153 goto badSurrogate;
3154 src++;
3155 srclen--;
3156 v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
3157 }
3158 if (v < 0x0080) {
3159 /* no encoding necessary - performance hack */
3160 if (dstlen == 0)
3161 goto bufferTooSmall;
3162 *dst++ = (char) v;
3163 utf8Len = 1;
3164 } else {
3165 utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);
3166 if (utf8Len > dstlen)
3167 goto bufferTooSmall;
3168 for (i = 0; i < utf8Len; i++)
3169 *dst++ = (char) utf8buf[i];
3170 }
3171 dstlen -= utf8Len;
3172 }
3173 *dstlenp = (origDstlen - dstlen);
3174 return JS_TRUE;
3175
3176 badSurrogate:
3177 *dstlenp = (origDstlen - dstlen);
3178 /* Delegate error reporting to the measurement function. */
3179 if (cx)
3180 js_GetDeflatedStringLength(cx, src - 1, srclen + 1);
3181 return JS_FALSE;