/[jscoverage]/trunk/js/jsstr.cpp
ViewVC logotype

Contents of /trunk/js/jsstr.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 460 - (show annotations)
Sat Sep 26 23:15:22 2009 UTC (10 years, 2 months ago) by siliconforks
File size: 182056 byte(s)
Upgrade to SpiderMonkey from Firefox 3.5.3.

1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sw=4 et tw=99:
3 *
4 * ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * The Original Code is Mozilla Communicator client code, released
18 * March 31, 1998.
19 *
20 * The Initial Developer of the Original Code is
21 * Netscape Communications Corporation.
22 * Portions created by the Initial Developer are Copyright (C) 1998
23 * the Initial Developer. All Rights Reserved.
24 *
25 * Contributor(s):
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either of the GNU General Public License Version 2 or later (the "GPL"),
29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 /*
42 * JS string type implementation.
43 *
44 * In order to avoid unnecessary js_LockGCThing/js_UnlockGCThing calls, these
45 * native methods store strings (possibly newborn) converted from their 'this'
46 * parameter and arguments on the stack: 'this' conversions at argv[-1], arg
47 * conversions at their index (argv[0], argv[1]). This is a legitimate method
48 * of rooting things that might lose their newborn root due to subsequent GC
49 * allocations in the same native method.
50 */
51 #include "jsstddef.h"
52 #include <stdlib.h>
53 #include <string.h>
54 #include "jstypes.h"
55 #include "jsutil.h" /* Added by JSIFY */
56 #include "jshash.h" /* Added by JSIFY */
57 #include "jsprf.h"
58 #include "jsapi.h"
59 #include "jsarray.h"
60 #include "jsatom.h"
61 #include "jsbool.h"
62 #include "jsbuiltins.h"
63 #include "jscntxt.h"
64 #include "jsversion.h"
65 #include "jsgc.h"
66 #include "jsinterp.h"
67 #include "jslock.h"
68 #include "jsnum.h"
69 #include "jsobj.h"
70 #include "jsopcode.h"
71 #include "jsregexp.h"
72 #include "jsscope.h"
73 #include "jsstaticcheck.h"
74 #include "jsstr.h"
75 #include "jsbit.h"
76
77 #define JSSTRDEP_RECURSION_LIMIT 100
78
79 size_t
80 js_MinimizeDependentStrings(JSString *str, int level, JSString **basep)
81 {
82 JSString *base;
83 size_t start, length;
84
85 JS_ASSERT(JSSTRING_IS_DEPENDENT(str));
86 base = JSSTRDEP_BASE(str);
87 start = JSSTRDEP_START(str);
88 if (JSSTRING_IS_DEPENDENT(base)) {
89 if (level < JSSTRDEP_RECURSION_LIMIT) {
90 start += js_MinimizeDependentStrings(base, level + 1, &base);
91 } else {
92 do {
93 start += JSSTRDEP_START(base);
94 base = JSSTRDEP_BASE(base);
95 } while (JSSTRING_IS_DEPENDENT(base));
96 }
97 if (start == 0) {
98 JS_ASSERT(JSSTRDEP_IS_PREFIX(str));
99 JSPREFIX_SET_BASE(str, base);
100 } else if (start <= JSSTRDEP_START_MASK) {
101 length = JSSTRDEP_LENGTH(str);
102 JSSTRDEP_REINIT(str, base, start, length);
103 }
104 }
105 *basep = base;
106 return start;
107 }
108
109 jschar *
110 js_GetDependentStringChars(JSString *str)
111 {
112 size_t start;
113 JSString *base;
114
115 start = js_MinimizeDependentStrings(str, 0, &base);
116 JS_ASSERT(start < JSFLATSTR_LENGTH(base));
117 return JSFLATSTR_CHARS(base) + start;
118 }
119
120 const jschar *
121 js_GetStringChars(JSContext *cx, JSString *str)
122 {
123 if (!js_MakeStringImmutable(cx, str))
124 return NULL;
125 return JSFLATSTR_CHARS(str);
126 }
127
128 JSString * JS_FASTCALL
129 js_ConcatStrings(JSContext *cx, JSString *left, JSString *right)
130 {
131 size_t rn, ln, lrdist, n;
132 jschar *rs, *ls, *s;
133 JSString *ldep; /* non-null if left should become dependent */
134 JSString *str;
135
136 JSSTRING_CHARS_AND_LENGTH(right, rs, rn);
137 if (rn == 0)
138 return left;
139
140 JSSTRING_CHARS_AND_LENGTH(left, ls, ln);
141 if (ln == 0)
142 return right;
143
144 if (!JSSTRING_IS_MUTABLE(left)) {
145 /* We must copy if left does not own a buffer to realloc. */
146 s = (jschar *) JS_malloc(cx, (ln + rn + 1) * sizeof(jschar));
147 if (!s)
148 return NULL;
149 js_strncpy(s, ls, ln);
150 ldep = NULL;
151 } else {
152 /* We can realloc left's space and make it depend on our result. */
153 JS_ASSERT(JSSTRING_IS_FLAT(left));
154 s = (jschar *) JS_realloc(cx, ls, (ln + rn + 1) * sizeof(jschar));
155 if (!s)
156 return NULL;
157
158 /* Take care: right could depend on left! */
159 lrdist = (size_t)(rs - ls);
160 if (lrdist < ln)
161 rs = s + lrdist;
162 left->u.chars = ls = s;
163 ldep = left;
164 }
165
166 js_strncpy(s + ln, rs, rn);
167 n = ln + rn;
168 s[n] = 0;
169
170 str = js_NewString(cx, s, n);
171 if (!str) {
172 /* Out of memory: clean up any space we (re-)allocated. */
173 if (!ldep) {
174 JS_free(cx, s);
175 } else {
176 s = (jschar *) JS_realloc(cx, ls, (ln + 1) * sizeof(jschar));
177 if (s)
178 left->u.chars = s;
179 }
180 } else {
181 JSFLATSTR_SET_MUTABLE(str);
182
183 /* Morph left into a dependent prefix if we realloc'd its buffer. */
184 if (ldep) {
185 JSPREFIX_REINIT(ldep, str, ln);
186 #ifdef DEBUG
187 {
188 JSRuntime *rt = cx->runtime;
189 JS_RUNTIME_METER(rt, liveDependentStrings);
190 JS_RUNTIME_METER(rt, totalDependentStrings);
191 JS_LOCK_RUNTIME_VOID(rt,
192 (rt->strdepLengthSum += (double)ln,
193 rt->strdepLengthSquaredSum += (double)ln * (double)ln));
194 }
195 #endif
196 }
197 }
198
199 return str;
200 }
201
202 const jschar *
203 js_UndependString(JSContext *cx, JSString *str)
204 {
205 size_t n, size;
206 jschar *s;
207
208 if (JSSTRING_IS_DEPENDENT(str)) {
209 n = JSSTRDEP_LENGTH(str);
210 size = (n + 1) * sizeof(jschar);
211 s = (jschar *) JS_malloc(cx, size);
212 if (!s)
213 return NULL;
214
215 js_strncpy(s, JSSTRDEP_CHARS(str), n);
216 s[n] = 0;
217 JSFLATSTR_REINIT(str, s, n);
218
219 #ifdef DEBUG
220 {
221 JSRuntime *rt = cx->runtime;
222 JS_RUNTIME_UNMETER(rt, liveDependentStrings);
223 JS_RUNTIME_UNMETER(rt, totalDependentStrings);
224 JS_LOCK_RUNTIME_VOID(rt,
225 (rt->strdepLengthSum -= (double)n,
226 rt->strdepLengthSquaredSum -= (double)n * (double)n));
227 }
228 #endif
229 }
230
231 return JSFLATSTR_CHARS(str);
232 }
233
234 JSBool
235 js_MakeStringImmutable(JSContext *cx, JSString *str)
236 {
237 if (JSSTRING_IS_DEPENDENT(str) && !js_UndependString(cx, str)) {
238 JS_RUNTIME_METER(cx->runtime, badUndependStrings);
239 return JS_FALSE;
240 }
241 JSFLATSTR_CLEAR_MUTABLE(str);
242 return JS_TRUE;
243 }
244
245 static JSString *
246 ArgToRootedString(JSContext *cx, uintN argc, jsval *vp, uintN arg)
247 {
248 JSObject *obj;
249 JSString *str;
250
251 if (arg >= argc)
252 return ATOM_TO_STRING(cx->runtime->atomState.typeAtoms[JSTYPE_VOID]);
253 vp += 2 + arg;
254
255 if (JSVAL_IS_OBJECT(*vp)) {
256 obj = JSVAL_TO_OBJECT(*vp);
257 if (!obj)
258 return ATOM_TO_STRING(cx->runtime->atomState.nullAtom);
259 if (!OBJ_DEFAULT_VALUE(cx, obj, JSTYPE_STRING, vp))
260 return NULL;
261 }
262 if (JSVAL_IS_STRING(*vp))
263 return JSVAL_TO_STRING(*vp);
264 if (JSVAL_IS_INT(*vp)) {
265 str = js_NumberToString(cx, JSVAL_TO_INT(*vp));
266 } else if (JSVAL_IS_DOUBLE(*vp)) {
267 str = js_NumberToString(cx, *JSVAL_TO_DOUBLE(*vp));
268 } else if (JSVAL_IS_BOOLEAN(*vp)) {
269 return ATOM_TO_STRING(cx->runtime->atomState.booleanAtoms[
270 JSVAL_TO_BOOLEAN(*vp)? 1 : 0]);
271 } else {
272 JS_ASSERT(JSVAL_IS_VOID(*vp));
273 return ATOM_TO_STRING(cx->runtime->atomState.typeAtoms[JSTYPE_VOID]);
274 }
275 if (str)
276 *vp = STRING_TO_JSVAL(str);
277 return str;
278 }
279
280 /*
281 * Forward declarations for URI encode/decode and helper routines
282 */
283 static JSBool
284 str_decodeURI(JSContext *cx, uintN argc, jsval *vp);
285
286 static JSBool
287 str_decodeURI_Component(JSContext *cx, uintN argc, jsval *vp);
288
289 static JSBool
290 str_encodeURI(JSContext *cx, uintN argc, jsval *vp);
291
292 static JSBool
293 str_encodeURI_Component(JSContext *cx, uintN argc, jsval *vp);
294
295 static uint32
296 Utf8ToOneUcs4Char(const uint8 *utf8Buffer, int utf8Length);
297
298 /*
299 * Contributions from the String class to the set of methods defined for the
300 * global object. escape and unescape used to be defined in the Mocha library,
301 * but as ECMA decided to spec them, they've been moved to the core engine
302 * and made ECMA-compliant. (Incomplete escapes are interpreted as literal
303 * characters by unescape.)
304 */
305
306 /*
307 * Stuff to emulate the old libmocha escape, which took a second argument
308 * giving the type of escape to perform. Retained for compatibility, and
309 * copied here to avoid reliance on net.h, mkparse.c/NET_EscapeBytes.
310 */
311
312 #define URL_XALPHAS ((uint8) 1)
313 #define URL_XPALPHAS ((uint8) 2)
314 #define URL_PATH ((uint8) 4)
315
316 static const uint8 urlCharType[256] =
317 /* Bit 0 xalpha -- the alphas
318 * Bit 1 xpalpha -- as xalpha but
319 * converts spaces to plus and plus to %20
320 * Bit 2 ... path -- as xalphas but doesn't escape '/'
321 */
322 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
323 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
324 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
325 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
326 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
327 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
328 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
329 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
330 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
331 0, };
332
333 /* This matches the ECMA escape set when mask is 7 (default.) */
334
335 #define IS_OK(C, mask) (urlCharType[((uint8) (C))] & (mask))
336
337 /* See ECMA-262 Edition 3 B.2.1 */
338 JSBool
339 js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
340 {
341 JSString *str;
342 size_t i, ni, length, newlength;
343 const jschar *chars;
344 jschar *newchars;
345 jschar ch;
346 jsint mask;
347 jsdouble d;
348 const char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7',
349 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
350
351 mask = URL_XALPHAS | URL_XPALPHAS | URL_PATH;
352 if (argc > 1) {
353 d = js_ValueToNumber(cx, &argv[1]);
354 if (JSVAL_IS_NULL(argv[1]))
355 return JS_FALSE;
356 if (!JSDOUBLE_IS_FINITE(d) ||
357 (mask = (jsint)d) != d ||
358 mask & ~(URL_XALPHAS | URL_XPALPHAS | URL_PATH))
359 {
360 char numBuf[12];
361 JS_snprintf(numBuf, sizeof numBuf, "%lx", (unsigned long) mask);
362 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
363 JSMSG_BAD_STRING_MASK, numBuf);
364 return JS_FALSE;
365 }
366 }
367
368 str = ArgToRootedString(cx, argc, argv - 2, 0);
369 if (!str)
370 return JS_FALSE;
371
372 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
373 newlength = length;
374
375 /* Take a first pass and see how big the result string will need to be. */
376 for (i = 0; i < length; i++) {
377 if ((ch = chars[i]) < 128 && IS_OK(ch, mask))
378 continue;
379 if (ch < 256) {
380 if (mask == URL_XPALPHAS && ch == ' ')
381 continue; /* The character will be encoded as '+' */
382 newlength += 2; /* The character will be encoded as %XX */
383 } else {
384 newlength += 5; /* The character will be encoded as %uXXXX */
385 }
386
387 /*
388 * This overflow test works because newlength is incremented by at
389 * most 5 on each iteration.
390 */
391 if (newlength < length) {
392 js_ReportAllocationOverflow(cx);
393 return JS_FALSE;
394 }
395 }
396
397 if (newlength >= ~(size_t)0 / sizeof(jschar)) {
398 js_ReportAllocationOverflow(cx);
399 return JS_FALSE;
400 }
401
402 newchars = (jschar *) JS_malloc(cx, (newlength + 1) * sizeof(jschar));
403 if (!newchars)
404 return JS_FALSE;
405 for (i = 0, ni = 0; i < length; i++) {
406 if ((ch = chars[i]) < 128 && IS_OK(ch, mask)) {
407 newchars[ni++] = ch;
408 } else if (ch < 256) {
409 if (mask == URL_XPALPHAS && ch == ' ') {
410 newchars[ni++] = '+'; /* convert spaces to pluses */
411 } else {
412 newchars[ni++] = '%';
413 newchars[ni++] = digits[ch >> 4];
414 newchars[ni++] = digits[ch & 0xF];
415 }
416 } else {
417 newchars[ni++] = '%';
418 newchars[ni++] = 'u';
419 newchars[ni++] = digits[ch >> 12];
420 newchars[ni++] = digits[(ch & 0xF00) >> 8];
421 newchars[ni++] = digits[(ch & 0xF0) >> 4];
422 newchars[ni++] = digits[ch & 0xF];
423 }
424 }
425 JS_ASSERT(ni == newlength);
426 newchars[newlength] = 0;
427
428 str = js_NewString(cx, newchars, newlength);
429 if (!str) {
430 JS_free(cx, newchars);
431 return JS_FALSE;
432 }
433 *rval = STRING_TO_JSVAL(str);
434 return JS_TRUE;
435 }
436 #undef IS_OK
437
438 static JSBool
439 str_escape(JSContext *cx, uintN argc, jsval *vp)
440 {
441 JSObject *obj;
442
443 obj = JS_THIS_OBJECT(cx, vp);
444 return obj && js_str_escape(cx, obj, argc, vp + 2, vp);
445 }
446
447 /* See ECMA-262 Edition 3 B.2.2 */
448 static JSBool
449 str_unescape(JSContext *cx, uintN argc, jsval *vp)
450 {
451 JSString *str;
452 size_t i, ni, length;
453 const jschar *chars;
454 jschar *newchars;
455 jschar ch;
456
457 str = ArgToRootedString(cx, argc, vp, 0);
458 if (!str)
459 return JS_FALSE;
460
461 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
462
463 /* Don't bother allocating less space for the new string. */
464 newchars = (jschar *) JS_malloc(cx, (length + 1) * sizeof(jschar));
465 if (!newchars)
466 return JS_FALSE;
467 ni = i = 0;
468 while (i < length) {
469 ch = chars[i++];
470 if (ch == '%') {
471 if (i + 1 < length &&
472 JS7_ISHEX(chars[i]) && JS7_ISHEX(chars[i + 1]))
473 {
474 ch = JS7_UNHEX(chars[i]) * 16 + JS7_UNHEX(chars[i + 1]);
475 i += 2;
476 } else if (i + 4 < length && chars[i] == 'u' &&
477 JS7_ISHEX(chars[i + 1]) && JS7_ISHEX(chars[i + 2]) &&
478 JS7_ISHEX(chars[i + 3]) && JS7_ISHEX(chars[i + 4]))
479 {
480 ch = (((((JS7_UNHEX(chars[i + 1]) << 4)
481 + JS7_UNHEX(chars[i + 2])) << 4)
482 + JS7_UNHEX(chars[i + 3])) << 4)
483 + JS7_UNHEX(chars[i + 4]);
484 i += 5;
485 }
486 }
487 newchars[ni++] = ch;
488 }
489 newchars[ni] = 0;
490
491 str = js_NewString(cx, newchars, ni);
492 if (!str) {
493 JS_free(cx, newchars);
494 return JS_FALSE;
495 }
496 *vp = STRING_TO_JSVAL(str);
497 return JS_TRUE;
498 }
499
500 #if JS_HAS_UNEVAL
501 static JSBool
502 str_uneval(JSContext *cx, uintN argc, jsval *vp)
503 {
504 JSString *str;
505
506 str = js_ValueToSource(cx, argc != 0 ? vp[2] : JSVAL_VOID);
507 if (!str)
508 return JS_FALSE;
509 *vp = STRING_TO_JSVAL(str);
510 return JS_TRUE;
511 }
512 #endif
513
514 const char js_escape_str[] = "escape";
515 const char js_unescape_str[] = "unescape";
516 #if JS_HAS_UNEVAL
517 const char js_uneval_str[] = "uneval";
518 #endif
519 const char js_decodeURI_str[] = "decodeURI";
520 const char js_encodeURI_str[] = "encodeURI";
521 const char js_decodeURIComponent_str[] = "decodeURIComponent";
522 const char js_encodeURIComponent_str[] = "encodeURIComponent";
523
524 static JSFunctionSpec string_functions[] = {
525 JS_FN(js_escape_str, str_escape, 1,0),
526 JS_FN(js_unescape_str, str_unescape, 1,0),
527 #if JS_HAS_UNEVAL
528 JS_FN(js_uneval_str, str_uneval, 1,0),
529 #endif
530 JS_FN(js_decodeURI_str, str_decodeURI, 1,0),
531 JS_FN(js_encodeURI_str, str_encodeURI, 1,0),
532 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,0),
533 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,0),
534
535 JS_FS_END
536 };
537
538 jschar js_empty_ucstr[] = {0};
539 JSSubString js_EmptySubString = {0, js_empty_ucstr};
540
541 static JSBool
542 str_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
543 {
544 jsval v;
545 JSString *str;
546
547 if (id == ATOM_KEY(cx->runtime->atomState.lengthAtom)) {
548 if (OBJ_GET_CLASS(cx, obj) == &js_StringClass) {
549 /* Follow ECMA-262 by fetching intrinsic length of our string. */
550 v = OBJ_GET_SLOT(cx, obj, JSSLOT_PRIVATE);
551 JS_ASSERT(JSVAL_IS_STRING(v));
552 str = JSVAL_TO_STRING(v);
553 } else {
554 /* Preserve compatibility: convert obj to a string primitive. */
555 str = js_ValueToString(cx, OBJECT_TO_JSVAL(obj));
556 if (!str)
557 return JS_FALSE;
558 }
559
560 *vp = INT_TO_JSVAL((jsint) JSSTRING_LENGTH(str));
561 }
562
563 return JS_TRUE;
564 }
565
566 #define STRING_ELEMENT_ATTRS (JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_PERMANENT)
567
568 static JSBool
569 str_enumerate(JSContext *cx, JSObject *obj)
570 {
571 jsval v;
572 JSString *str, *str1;
573 size_t i, length;
574
575 v = OBJ_GET_SLOT(cx, obj, JSSLOT_PRIVATE);
576 JS_ASSERT(JSVAL_IS_STRING(v));
577 str = JSVAL_TO_STRING(v);
578
579 length = JSSTRING_LENGTH(str);
580 for (i = 0; i < length; i++) {
581 str1 = js_NewDependentString(cx, str, i, 1);
582 if (!str1)
583 return JS_FALSE;
584 if (!OBJ_DEFINE_PROPERTY(cx, obj, INT_TO_JSID(i),
585 STRING_TO_JSVAL(str1), NULL, NULL,
586 STRING_ELEMENT_ATTRS, NULL)) {
587 return JS_FALSE;
588 }
589 }
590 return JS_TRUE;
591 }
592
593 static JSBool
594 str_resolve(JSContext *cx, JSObject *obj, jsval id, uintN flags,
595 JSObject **objp)
596 {
597 jsval v;
598 JSString *str, *str1;
599 jsint slot;
600
601 if (!JSVAL_IS_INT(id) || (flags & JSRESOLVE_ASSIGNING))
602 return JS_TRUE;
603
604 v = OBJ_GET_SLOT(cx, obj, JSSLOT_PRIVATE);
605 JS_ASSERT(JSVAL_IS_STRING(v));
606 str = JSVAL_TO_STRING(v);
607
608 slot = JSVAL_TO_INT(id);
609 if ((size_t)slot < JSSTRING_LENGTH(str)) {
610 str1 = js_GetUnitString(cx, str, (size_t)slot);
611 if (!str1)
612 return JS_FALSE;
613 if (!OBJ_DEFINE_PROPERTY(cx, obj, INT_TO_JSID(slot),
614 STRING_TO_JSVAL(str1), NULL, NULL,
615 STRING_ELEMENT_ATTRS, NULL)) {
616 return JS_FALSE;
617 }
618 *objp = obj;
619 }
620 return JS_TRUE;
621 }
622
623 JSClass js_StringClass = {
624 js_String_str,
625 JSCLASS_HAS_PRIVATE | JSCLASS_NEW_RESOLVE |
626 JSCLASS_HAS_CACHED_PROTO(JSProto_String),
627 JS_PropertyStub, JS_PropertyStub, str_getProperty, JS_PropertyStub,
628 str_enumerate, (JSResolveOp)str_resolve, JS_ConvertStub, JS_FinalizeStub,
629 JSCLASS_NO_OPTIONAL_MEMBERS
630 };
631
632 #define NORMALIZE_THIS(cx,vp,str) \
633 JS_BEGIN_MACRO \
634 if (JSVAL_IS_STRING(vp[1])) { \
635 str = JSVAL_TO_STRING(vp[1]); \
636 } else { \
637 str = NormalizeThis(cx, vp); \
638 if (!str) \
639 return JS_FALSE; \
640 } \
641 JS_END_MACRO
642
643 static JSString *
644 NormalizeThis(JSContext *cx, jsval *vp)
645 {
646 JSString *str;
647
648 if (JSVAL_IS_NULL(vp[1]) && JSVAL_IS_NULL(JS_THIS(cx, vp)))
649 return NULL;
650 str = js_ValueToString(cx, vp[1]);
651 if (!str)
652 return NULL;
653 vp[1] = STRING_TO_JSVAL(str);
654 return str;
655 }
656
657 #if JS_HAS_TOSOURCE
658
659 /*
660 * String.prototype.quote is generic (as are most string methods), unlike
661 * toSource, toString, and valueOf.
662 */
663 static JSBool
664 str_quote(JSContext *cx, uintN argc, jsval *vp)
665 {
666 JSString *str;
667
668 NORMALIZE_THIS(cx, vp, str);
669 str = js_QuoteString(cx, str, '"');
670 if (!str)
671 return JS_FALSE;
672 *vp = STRING_TO_JSVAL(str);
673 return JS_TRUE;
674 }
675
676 static JSBool
677 str_toSource(JSContext *cx, uintN argc, jsval *vp)
678 {
679 jsval v;
680 JSString *str;
681 size_t i, j, k, n;
682 char buf[16];
683 jschar *s, *t;
684
685 if (!js_GetPrimitiveThis(cx, vp, &js_StringClass, &v))
686 return JS_FALSE;
687 JS_ASSERT(JSVAL_IS_STRING(v));
688 str = js_QuoteString(cx, JSVAL_TO_STRING(v), '"');
689 if (!str)
690 return JS_FALSE;
691 j = JS_snprintf(buf, sizeof buf, "(new %s(", js_StringClass.name);
692 JSSTRING_CHARS_AND_LENGTH(str, s, k);
693 n = j + k + 2;
694 t = (jschar *) JS_malloc(cx, (n + 1) * sizeof(jschar));
695 if (!t)
696 return JS_FALSE;
697 for (i = 0; i < j; i++)
698 t[i] = buf[i];
699 for (j = 0; j < k; i++, j++)
700 t[i] = s[j];
701 t[i++] = ')';
702 t[i++] = ')';
703 t[i] = 0;
704 str = js_NewString(cx, t, n);
705 if (!str) {
706 JS_free(cx, t);
707 return JS_FALSE;
708 }
709 *vp = STRING_TO_JSVAL(str);
710 return JS_TRUE;
711 }
712
713 #endif /* JS_HAS_TOSOURCE */
714
715 static JSBool
716 str_toString(JSContext *cx, uintN argc, jsval *vp)
717 {
718 return js_GetPrimitiveThis(cx, vp, &js_StringClass, vp);
719 }
720
721 /*
722 * Java-like string native methods.
723 */
724
725 static JSString *
726 SubstringTail(JSContext *cx, JSString *str, jsdouble length, jsdouble begin, jsdouble end)
727 {
728 if (begin < 0)
729 begin = 0;
730 else if (begin > length)
731 begin = length;
732
733 if (end < 0)
734 end = 0;
735 else if (end > length)
736 end = length;
737 if (end < begin) {
738 /* ECMA emulates old JDK1.0 java.lang.String.substring. */
739 jsdouble tmp = begin;
740 begin = end;
741 end = tmp;
742 }
743
744 return js_NewDependentString(cx, str, (size_t)begin, (size_t)(end - begin));
745 }
746
747 static JSBool
748 str_substring(JSContext *cx, uintN argc, jsval *vp)
749 {
750 JSString *str;
751 jsdouble d;
752 jsdouble length, begin, end;
753
754 NORMALIZE_THIS(cx, vp, str);
755 if (argc != 0) {
756 d = js_ValueToNumber(cx, &vp[2]);
757 if (JSVAL_IS_NULL(vp[2]))
758 return JS_FALSE;
759 length = JSSTRING_LENGTH(str);
760 begin = js_DoubleToInteger(d);
761 if (argc == 1) {
762 end = length;
763 } else {
764 d = js_ValueToNumber(cx, &vp[3]);
765 if (JSVAL_IS_NULL(vp[3]))
766 return JS_FALSE;
767 end = js_DoubleToInteger(d);
768 }
769
770 str = SubstringTail(cx, str, length, begin, end);
771 if (!str)
772 return JS_FALSE;
773 }
774 *vp = STRING_TO_JSVAL(str);
775 return JS_TRUE;
776 }
777
778 #ifdef JS_TRACER
779 static JSString* FASTCALL
780 String_p_toString(JSContext* cx, JSObject* obj)
781 {
782 if (!JS_InstanceOf(cx, obj, &js_StringClass, NULL))
783 return NULL;
784 jsval v = OBJ_GET_SLOT(cx, obj, JSSLOT_PRIVATE);
785 JS_ASSERT(JSVAL_IS_STRING(v));
786 return JSVAL_TO_STRING(v);
787 }
788 #endif
789
790 JSString* JS_FASTCALL
791 js_toLowerCase(JSContext *cx, JSString *str)
792 {
793 size_t i, n;
794 jschar *s, *news;
795
796 JSSTRING_CHARS_AND_LENGTH(str, s, n);
797 news = (jschar *) JS_malloc(cx, (n + 1) * sizeof(jschar));
798 if (!news)
799 return NULL;
800 for (i = 0; i < n; i++)
801 news[i] = JS_TOLOWER(s[i]);
802 news[n] = 0;
803 str = js_NewString(cx, news, n);
804 if (!str) {
805 JS_free(cx, news);
806 return NULL;
807 }
808 return str;
809 }
810
811 static JSBool
812 str_toLowerCase(JSContext *cx, uintN argc, jsval *vp)
813 {
814 JSString *str;
815
816 NORMALIZE_THIS(cx, vp, str);
817 str = js_toLowerCase(cx, str);
818 if (!str)
819 return JS_FALSE;
820 *vp = STRING_TO_JSVAL(str);
821 return JS_TRUE;
822 }
823
824 static JSBool
825 str_toLocaleLowerCase(JSContext *cx, uintN argc, jsval *vp)
826 {
827 JSString *str;
828
829 /*
830 * Forcefully ignore the first (or any) argument and return toLowerCase(),
831 * ECMA has reserved that argument, presumably for defining the locale.
832 */
833 if (cx->localeCallbacks && cx->localeCallbacks->localeToLowerCase) {
834 NORMALIZE_THIS(cx, vp, str);
835 return cx->localeCallbacks->localeToLowerCase(cx, str, vp);
836 }
837 return str_toLowerCase(cx, 0, vp);
838 }
839
840 JSString* JS_FASTCALL
841 js_toUpperCase(JSContext *cx, JSString *str)
842 {
843 size_t i, n;
844 jschar *s, *news;
845
846 JSSTRING_CHARS_AND_LENGTH(str, s, n);
847 news = (jschar *) JS_malloc(cx, (n + 1) * sizeof(jschar));
848 if (!news)
849 return NULL;
850 for (i = 0; i < n; i++)
851 news[i] = JS_TOUPPER(s[i]);
852 news[n] = 0;
853 str = js_NewString(cx, news, n);
854 if (!str) {
855 JS_free(cx, news);
856 return NULL;
857 }
858 return str;
859 }
860
861 static JSBool
862 str_toUpperCase(JSContext *cx, uintN argc, jsval *vp)
863 {
864 JSString *str;
865
866 NORMALIZE_THIS(cx, vp, str);
867 str = js_toUpperCase(cx, str);
868 if (!str)
869 return JS_FALSE;
870 *vp = STRING_TO_JSVAL(str);
871 return JS_TRUE;
872 }
873
874 static JSBool
875 str_toLocaleUpperCase(JSContext *cx, uintN argc, jsval *vp)
876 {
877 JSString *str;
878
879 /*
880 * Forcefully ignore the first (or any) argument and return toUpperCase(),
881 * ECMA has reserved that argument, presumably for defining the locale.
882 */
883 if (cx->localeCallbacks && cx->localeCallbacks->localeToUpperCase) {
884 NORMALIZE_THIS(cx, vp, str);
885 return cx->localeCallbacks->localeToUpperCase(cx, str, vp);
886 }
887 return str_toUpperCase(cx, 0, vp);
888 }
889
890 static JSBool
891 str_localeCompare(JSContext *cx, uintN argc, jsval *vp)
892 {
893 JSString *str, *thatStr;
894
895 NORMALIZE_THIS(cx, vp, str);
896 if (argc == 0) {
897 *vp = JSVAL_ZERO;
898 } else {
899 thatStr = js_ValueToString(cx, vp[2]);
900 if (!thatStr)
901 return JS_FALSE;
902 if (cx->localeCallbacks && cx->localeCallbacks->localeCompare) {
903 vp[2] = STRING_TO_JSVAL(thatStr);
904 return cx->localeCallbacks->localeCompare(cx, str, thatStr, vp);
905 }
906 *vp = INT_TO_JSVAL(js_CompareStrings(str, thatStr));
907 }
908 return JS_TRUE;
909 }
910
911 static JSBool
912 str_charAt(JSContext *cx, uintN argc, jsval *vp)
913 {
914 jsval t;
915 JSString *str;
916 jsint i;
917 jsdouble d;
918
919 t = vp[1];
920 if (JSVAL_IS_STRING(t) && argc != 0 && JSVAL_IS_INT(vp[2])) {
921 str = JSVAL_TO_STRING(t);
922 i = JSVAL_TO_INT(vp[2]);
923 if ((size_t)i >= JSSTRING_LENGTH(str))
924 goto out_of_range;
925 } else {
926 str = NormalizeThis(cx, vp);
927 if (!str)
928 return JS_FALSE;
929
930 if (argc == 0) {
931 d = 0.0;
932 } else {
933 d = js_ValueToNumber(cx, &vp[2]);
934 if (JSVAL_IS_NULL(vp[2]))
935 return JS_FALSE;
936 d = js_DoubleToInteger(d);
937 }
938
939 if (d < 0 || JSSTRING_LENGTH(str) <= d)
940 goto out_of_range;
941 i = (jsint) d;
942 }
943
944 str = js_GetUnitString(cx, str, (size_t)i);
945 if (!str)
946 return JS_FALSE;
947 *vp = STRING_TO_JSVAL(str);
948 return JS_TRUE;
949
950 out_of_range:
951 *vp = JS_GetEmptyStringValue(cx);
952 return JS_TRUE;
953 }
954
955 static JSBool
956 str_charCodeAt(JSContext *cx, uintN argc, jsval *vp)
957 {
958 jsval t;
959 JSString *str;
960 jsint i;
961 jsdouble d;
962
963 t = vp[1];
964 if (JSVAL_IS_STRING(t) && argc != 0 && JSVAL_IS_INT(vp[2])) {
965 str = JSVAL_TO_STRING(t);
966 i = JSVAL_TO_INT(vp[2]);
967 if ((size_t)i >= JSSTRING_LENGTH(str))
968 goto out_of_range;
969 } else {
970 str = NormalizeThis(cx, vp);
971 if (!str)
972 return JS_FALSE;
973
974 if (argc == 0) {
975 d = 0.0;
976 } else {
977 d = js_ValueToNumber(cx, &vp[2]);
978 if (JSVAL_IS_NULL(vp[2]))
979 return JS_FALSE;
980 d = js_DoubleToInteger(d);
981 }
982
983 if (d < 0 || JSSTRING_LENGTH(str) <= d)
984 goto out_of_range;
985 i = (jsint) d;
986 }
987
988 *vp = INT_TO_JSVAL(JSSTRING_CHARS(str)[i]);
989 return JS_TRUE;
990
991 out_of_range:
992 *vp = JS_GetNaNValue(cx);
993 return JS_TRUE;
994 }
995
996 #ifdef JS_TRACER
997 extern jsdouble js_NaN;
998
999 jsdouble FASTCALL
1000 js_String_p_charCodeAt(JSString* str, jsdouble d)
1001 {
1002 d = js_DoubleToInteger(d);
1003 if (d < 0 || (int32)JSSTRING_LENGTH(str) <= d)
1004 return js_NaN;
1005 return jsdouble(JSSTRING_CHARS(str)[jsuint(d)]);
1006 }
1007
1008 int32 FASTCALL
1009 js_String_p_charCodeAt_int(JSString* str, jsint i)
1010 {
1011 if (i < 0 || (int32)JSSTRING_LENGTH(str) <= i)
1012 return 0;
1013 return JSSTRING_CHARS(str)[i];
1014 }
1015
1016 jsdouble FASTCALL
1017 js_String_p_charCodeAt0(JSString* str)
1018 {
1019 if ((int32)JSSTRING_LENGTH(str) == 0)
1020 return js_NaN;
1021 return jsdouble(JSSTRING_CHARS(str)[0]);
1022 }
1023
1024 int32 FASTCALL
1025 js_String_p_charCodeAt0_int(JSString* str)
1026 {
1027 if ((int32)JSSTRING_LENGTH(str) == 0)
1028 return 0;
1029 return JSSTRING_CHARS(str)[0];
1030 }
1031
1032 /*
1033 * The FuncFilter replaces the generic double version of charCodeAt with the
1034 * integer fast path if appropriate.
1035 */
1036 JS_DEFINE_CALLINFO_1(extern, INT32, js_String_p_charCodeAt0_int, STRING, 1, 1)
1037 JS_DEFINE_CALLINFO_2(extern, INT32, js_String_p_charCodeAt_int, STRING, INT32, 1, 1)
1038 #endif
1039
1040 jsint
1041 js_BoyerMooreHorspool(const jschar *text, jsint textlen,
1042 const jschar *pat, jsint patlen,
1043 jsint start)
1044 {
1045 jsint i, j, k, m;
1046 uint8 skip[BMH_CHARSET_SIZE];
1047 jschar c;
1048
1049 JS_ASSERT(0 < patlen && patlen <= BMH_PATLEN_MAX);
1050 for (i = 0; i < BMH_CHARSET_SIZE; i++)
1051 skip[i] = (uint8)patlen;
1052 m = patlen - 1;
1053 for (i = 0; i < m; i++) {
1054 c = pat[i];
1055 if (c >= BMH_CHARSET_SIZE)
1056 return BMH_BAD_PATTERN;
1057 skip[c] = (uint8)(m - i);
1058 }
1059 for (k = start + m;
1060 k < textlen;
1061 k += ((c = text[k]) >= BMH_CHARSET_SIZE) ? patlen : skip[c]) {
1062 for (i = k, j = m; ; i--, j--) {
1063 if (j < 0)
1064 return i + 1;
1065 if (text[i] != pat[j])
1066 break;
1067 }
1068 }
1069 return -1;
1070 }
1071
1072 static JSBool
1073 str_indexOf(JSContext *cx, uintN argc, jsval *vp)
1074 {
1075 jsval t;
1076 JSString *str, *str2;
1077 const jschar *text, *pat;
1078 jsint i, j, index, textlen, patlen;
1079 jsdouble d;
1080
1081 t = vp[1];
1082 if (JSVAL_IS_STRING(t) && argc != 0 && JSVAL_IS_STRING(vp[2])) {
1083 str = JSVAL_TO_STRING(t);
1084 str2 = JSVAL_TO_STRING(vp[2]);
1085 } else {
1086 str = NormalizeThis(cx, vp);
1087 if (!str)
1088 return JS_FALSE;
1089
1090 str2 = ArgToRootedString(cx, argc, vp, 0);
1091 if (!str2)
1092 return JS_FALSE;
1093 }
1094
1095 text = JSSTRING_CHARS(str);
1096 textlen = (jsint) JSSTRING_LENGTH(str);
1097 pat = JSSTRING_CHARS(str2);
1098 patlen = (jsint) JSSTRING_LENGTH(str2);
1099
1100 if (argc > 1) {
1101 d = js_ValueToNumber(cx, &vp[3]);
1102 if (JSVAL_IS_NULL(vp[3]))
1103 return JS_FALSE;
1104 d = js_DoubleToInteger(d);
1105 if (d < 0)
1106 i = 0;
1107 else if (d > textlen)
1108 i = textlen;
1109 else
1110 i = (jsint)d;
1111 } else {
1112 i = 0;
1113 }
1114 if (patlen == 0) {
1115 *vp = INT_TO_JSVAL(i);
1116 return JS_TRUE;
1117 }
1118
1119 /* XXX tune the BMH threshold (512) */
1120 if (textlen - i >= 512 && (jsuint)(patlen - 2) <= BMH_PATLEN_MAX - 2) {
1121 index = js_BoyerMooreHorspool(text, textlen, pat, patlen, i);
1122 if (index != BMH_BAD_PATTERN)
1123 goto out;
1124 }
1125
1126 index = -1;
1127 j = 0;
1128 while (i + j < textlen) {
1129 if (text[i + j] == pat[j]) {
1130 if (++j == patlen) {
1131 index = i;
1132 break;
1133 }
1134 } else {
1135 i++;
1136 j = 0;
1137 }
1138 }
1139
1140 out:
1141 *vp = INT_TO_JSVAL(index);
1142 return JS_TRUE;
1143 }
1144
1145 static JSBool
1146 str_lastIndexOf(JSContext *cx, uintN argc, jsval *vp)
1147 {
1148 JSString *str, *str2;
1149 const jschar *text, *pat;
1150 jsint i, j, textlen, patlen;
1151 jsdouble d;
1152
1153 NORMALIZE_THIS(cx, vp, str);
1154 text = JSSTRING_CHARS(str);
1155 textlen = (jsint) JSSTRING_LENGTH(str);
1156
1157 if (argc != 0 && JSVAL_IS_STRING(vp[2])) {
1158 str2 = JSVAL_TO_STRING(vp[2]);
1159 } else {
1160 str2 = ArgToRootedString(cx, argc, vp, 0);
1161 if (!str2)
1162 return JS_FALSE;
1163 }
1164 pat = JSSTRING_CHARS(str2);
1165 patlen = (jsint) JSSTRING_LENGTH(str2);
1166
1167 i = textlen - patlen; // Start searching here
1168 if (i < 0) {
1169 *vp = INT_TO_JSVAL(-1);
1170 return JS_TRUE;
1171 }
1172
1173 if (argc > 1) {
1174 if (JSVAL_IS_INT(vp[3])) {
1175 j = JSVAL_TO_INT(vp[3]);
1176 if (j <= 0)
1177 i = 0;
1178 else if (j < i)
1179 i = j;
1180 } else {
1181 d = js_ValueToNumber(cx, &vp[3]);
1182 if (JSVAL_IS_NULL(vp[3]))
1183 return JS_FALSE;
1184 if (!JSDOUBLE_IS_NaN(d)) {
1185 d = js_DoubleToInteger(d);
1186 if (d <= 0)
1187 i = 0;
1188 else if (d < i)
1189 i = (jsint)d;
1190 }
1191 }
1192 }
1193
1194 if (patlen == 0) {
1195 *vp = INT_TO_JSVAL(i);
1196 return JS_TRUE;
1197 }
1198
1199 j = 0;
1200 while (i >= 0) {
1201 /* This is always safe because i <= textlen - patlen and j < patlen */
1202 if (text[i + j] == pat[j]) {
1203 if (++j == patlen)
1204 break;
1205 } else {
1206 i--;
1207 j = 0;
1208 }
1209 }
1210 *vp = INT_TO_JSVAL(i);
1211 return JS_TRUE;
1212 }
1213
1214 static JSBool
1215 js_TrimString(JSContext *cx, jsval *vp, JSBool trimLeft, JSBool trimRight)
1216 {
1217 JSString *str;
1218 const jschar *chars;
1219 size_t length, begin, end;
1220
1221 NORMALIZE_THIS(cx, vp, str);
1222 JSSTRING_CHARS_AND_LENGTH(str, chars, length);
1223 begin = 0;
1224 end = length;
1225
1226 if (trimLeft) {
1227 while (begin < length && JS_ISSPACE(chars[begin]))
1228 ++begin;
1229 }
1230
1231 if (trimRight) {
1232 while (end > begin && JS_ISSPACE(chars[end-1]))
1233 --end;
1234 }
1235
1236 str = js_NewDependentString(cx, str, begin, end - begin);
1237 if (!str)
1238 return JS_FALSE;
1239
1240 *vp = STRING_TO_JSVAL(str);
1241 return JS_TRUE;
1242 }
1243
1244 static JSBool
1245 str_trim(JSContext *cx, uintN argc, jsval *vp)
1246 {
1247 return js_TrimString(cx, vp, JS_TRUE, JS_TRUE);
1248 }
1249
1250 static JSBool
1251 str_trimLeft(JSContext *cx, uintN argc, jsval *vp)
1252 {
1253 return js_TrimString(cx, vp, JS_TRUE, JS_FALSE);
1254 }
1255
1256 static JSBool
1257 str_trimRight(JSContext *cx, uintN argc, jsval *vp)
1258 {
1259 return js_TrimString(cx, vp, JS_FALSE, JS_TRUE);
1260 }
1261
1262 /*
1263 * Perl-inspired string functions.
1264 */
1265 typedef struct GlobData {
1266 jsbytecode *pc; /* in: program counter resulting in us matching */
1267 uintN flags; /* inout: mode and flag bits, see below */
1268 uintN optarg; /* in: index of optional flags argument */
1269 JSString *str; /* out: 'this' parameter object as string */
1270 JSRegExp *regexp; /* out: regexp parameter object private data */
1271 } GlobData;
1272
1273 /*
1274 * Mode and flag bit definitions for match_or_replace's GlobData.flags field.
1275 */
1276 #define MODE_MATCH 0x00 /* in: return match array on success */
1277 #define MODE_REPLACE 0x01 /* in: match and replace */
1278 #define MODE_SEARCH 0x02 /* in: search only, return match index or -1 */
1279 #define GET_MODE(f) ((f) & 0x03)
1280 #define FORCE_FLAT 0x04 /* in: force flat (non-regexp) string match */
1281 #define KEEP_REGEXP 0x08 /* inout: keep GlobData.regexp alive for caller
1282 of match_or_replace; if set on input
1283 but clear on output, regexp ownership
1284 does not pass to caller */
1285 #define GLOBAL_REGEXP 0x10 /* out: regexp had the 'g' flag */
1286
1287 static JSBool
1288 match_or_replace(JSContext *cx,
1289 JSBool (*glob)(JSContext *cx, jsint count, GlobData *data),
1290 void (*destroy)(JSContext *cx, GlobData *data),
1291 GlobData *data, uintN argc, jsval *vp)
1292 {
1293 JSString *str, *src, *opt;
1294 JSObject *reobj;
1295 JSRegExp *re;
1296 size_t index, length;
1297 JSBool ok, test;
1298 jsint count;
1299
1300 NORMALIZE_THIS(cx, vp, str);
1301 data->str = str;
1302
1303 if (argc != 0 && VALUE_IS_REGEXP(cx, vp[2])) {
1304 reobj = JSVAL_TO_OBJECT(vp[2]);
1305 re = (JSRegExp *) JS_GetPrivate(cx, reobj);
1306 } else {
1307 src = ArgToRootedString(cx, argc, vp, 0);
1308 if (!src)
1309 return JS_FALSE;
1310 if (data->optarg < argc) {
1311 opt = js_ValueToString(cx, vp[2 + data->optarg]);
1312 if (!opt)
1313 return JS_FALSE;
1314 } else {
1315 opt = NULL;
1316 }
1317 re = js_NewRegExpOpt(cx, src, opt, (data->flags & FORCE_FLAT) != 0);
1318 if (!re)
1319 return JS_FALSE;
1320 reobj = NULL;
1321 }
1322 /* From here on, all control flow must reach the matching DROP. */
1323 data->regexp = re;
1324 HOLD_REGEXP(cx, re);
1325
1326 if (re->flags & JSREG_GLOB)
1327 data->flags |= GLOBAL_REGEXP;
1328 index = 0;
1329 if (GET_MODE(data->flags) == MODE_SEARCH) {
1330 ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, vp);
1331 if (ok) {
1332 *vp = (*vp == JSVAL_TRUE)
1333 ? INT_TO_JSVAL(cx->regExpStatics.leftContext.length)
1334 : INT_TO_JSVAL(-1);
1335 }
1336 } else if (data->flags & GLOBAL_REGEXP) {
1337 if (reobj) {
1338 /* Set the lastIndex property's reserved slot to 0. */
1339 ok = js_SetLastIndex(cx, reobj, 0);
1340 } else {
1341 ok = JS_TRUE;
1342 }
1343 if (ok) {
1344 length = JSSTRING_LENGTH(str);
1345 for (count = 0; index <= length; count++) {
1346 ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, vp);
1347 if (!ok || *vp != JSVAL_TRUE)
1348 break;
1349 ok = glob(cx, count, data);
1350 if (!ok)
1351 break;
1352 if (cx->regExpStatics.lastMatch.length == 0) {
1353 if (index == length)
1354 break;
1355 index++;
1356 }
1357 }
1358 if (!ok && destroy)
1359 destroy(cx, data);
1360 }
1361 } else {
1362 if (GET_MODE(data->flags) == MODE_REPLACE) {
1363 test = JS_TRUE;
1364 } else {
1365 /*
1366 * MODE_MATCH implies str_match is being called from a script or a
1367 * scripted function. If the caller cares only about testing null
1368 * vs. non-null return value, optimize away the array object that
1369 * would normally be returned in *vp.
1370 *
1371 * Assume a full array result is required, then prove otherwise.
1372 */
1373 test = JS_FALSE;
1374 if (data->pc && (*data->pc == JSOP_CALL || *data->pc == JSOP_NEW)) {
1375 JS_ASSERT(js_CodeSpec[*data->pc].length == 3);
1376 switch (data->pc[3]) {
1377 case JSOP_POP:
1378 case JSOP_IFEQ:
1379 case JSOP_IFNE:
1380 case JSOP_IFEQX:
1381 case JSOP_IFNEX:
1382 test = JS_TRUE;
1383 break;
1384 default:;
1385 }
1386 }
1387 }
1388 ok = js_ExecuteRegExp(cx, re, str, &index, test, vp);
1389 }
1390
1391 DROP_REGEXP(cx, re);
1392 if (reobj) {
1393 /* Tell our caller that it doesn't need to destroy data->regexp. */
1394 data->flags &= ~KEEP_REGEXP;
1395 } else if (!ok || !(data->flags & KEEP_REGEXP)) {
1396 /* Caller didn't want to keep data->regexp, so null and destroy it. */
1397 data->regexp = NULL;
1398 js_DestroyRegExp(cx, re);
1399 }
1400
1401 return ok;
1402 }
1403
1404 typedef struct MatchData {
1405 GlobData base;
1406 jsval *arrayval; /* NB: local root pointer */
1407 } MatchData;
1408
1409 static JSBool
1410 match_glob(JSContext *cx, jsint count, GlobData *data)
1411 {
1412 MatchData *mdata;
1413 JSObject *arrayobj;
1414 JSSubString *matchsub;
1415 JSString *matchstr;
1416 jsval v;
1417
1418 mdata = (MatchData *)data;
1419 arrayobj = JSVAL_TO_OBJECT(*mdata->arrayval);
1420 if (!arrayobj) {
1421 arrayobj = js_NewArrayObject(cx, 0, NULL);
1422 if (!arrayobj)
1423 return JS_FALSE;
1424 *mdata->arrayval = OBJECT_TO_JSVAL(arrayobj);
1425 }
1426 matchsub = &cx->regExpStatics.lastMatch;
1427 matchstr = js_NewStringCopyN(cx, matchsub->chars, matchsub->length);
1428 if (!matchstr)
1429 return JS_FALSE;
1430 v = STRING_TO_JSVAL(matchstr);
1431 JS_ASSERT(count <= JSVAL_INT_MAX);
1432
1433 JSAutoResolveFlags rf(cx, JSRESOLVE_QUALIFIED | JSRESOLVE_ASSIGNING);
1434 return OBJ_SET_PROPERTY(cx, arrayobj, INT_TO_JSID(count), &v);
1435 }
1436
1437 static JSBool
1438 StringMatchHelper(JSContext *cx, uintN argc, jsval *vp, jsbytecode *pc)
1439 {
1440 JSTempValueRooter tvr;
1441 MatchData mdata;
1442 JSBool ok;
1443
1444 JS_PUSH_SINGLE_TEMP_ROOT(cx, JSVAL_NULL, &tvr);
1445 mdata.base.pc = pc;
1446 mdata.base.flags = MODE_MATCH;
1447 mdata.base.optarg = 1;
1448 mdata.arrayval = &tvr.u.value;
1449 ok = match_or_replace(cx, match_glob, NULL, &mdata.base, argc, vp);
1450 if (ok && !JSVAL_IS_NULL(*mdata.arrayval))
1451 *vp = *mdata.arrayval;
1452 JS_POP_TEMP_ROOT(cx, &tvr);
1453 return ok;
1454 }
1455
1456 static JSBool
1457 str_match(JSContext *cx, uintN argc, jsval *vp)
1458 {
1459 return StringMatchHelper(cx, argc, vp, js_GetCurrentBytecodePC(cx));
1460 }
1461
1462 static JSBool
1463 str_search(JSContext *cx, uintN argc, jsval *vp)
1464 {
1465 GlobData data;
1466
1467 data.flags = MODE_SEARCH;
1468 data.optarg = 1;
1469 return match_or_replace(cx, NULL, NULL, &data, argc, vp);
1470 }
1471
1472 typedef struct ReplaceData {
1473 GlobData base; /* base struct state */
1474 JSObject *lambda; /* replacement function object or null */
1475 JSString *repstr; /* replacement string */
1476 jschar *dollar; /* null or pointer to first $ in repstr */
1477 jschar *dollarEnd; /* limit pointer for js_strchr_limit */
1478 jschar *chars; /* result chars, null initially */
1479 size_t length; /* result length, 0 initially */
1480 jsint index; /* index in result of next replacement */
1481 jsint leftIndex; /* left context index in base.str->chars */
1482 JSSubString dollarStr; /* for "$$" interpret_dollar result */
1483 } ReplaceData;
1484
1485 static JSSubString *
1486 interpret_dollar(JSContext *cx, jschar *dp, jschar *ep, ReplaceData *rdata,
1487 size_t *skip)
1488 {
1489 JSRegExpStatics *res;
1490 jschar dc, *cp;
1491 uintN num, tmp;
1492
1493 JS_ASSERT(*dp == '$');
1494
1495 /* If there is only a dollar, bail now */
1496 if (dp + 1 >= ep)
1497 return NULL;
1498
1499 /* Interpret all Perl match-induced dollar variables. */
1500 res = &cx->regExpStatics;
1501 dc = dp[1];
1502 if (JS7_ISDEC(dc)) {
1503 /* ECMA-262 Edition 3: 1-9 or 01-99 */
1504 num = JS7_UNDEC(dc);
1505 if (num > res->parenCount)
1506 return NULL;
1507
1508 cp = dp + 2;
1509 if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
1510 tmp = 10 * num + JS7_UNDEC(dc);
1511 if (tmp <= res->parenCount) {
1512 cp++;
1513 num = tmp;
1514 }
1515 }
1516 if (num == 0)
1517 return NULL;
1518
1519 /* Adjust num from 1 $n-origin to 0 array-index-origin. */
1520 num--;
1521 *skip = cp - dp;
1522 return REGEXP_PAREN_SUBSTRING(res, num);
1523 }
1524
1525 *skip = 2;
1526 switch (dc) {
1527 case '$':
1528 rdata->dollarStr.chars = dp;
1529 rdata->dollarStr.length = 1;
1530 return &rdata->dollarStr;
1531 case '&':
1532 return &res->lastMatch;
1533 case '+':
1534 return &res->lastParen;
1535 case '`':
1536 return &res->leftContext;
1537 case '\'':
1538 return &res->rightContext;
1539 }
1540 return NULL;
1541 }
1542
1543 static JS_REQUIRES_STACK JSBool
1544 find_replen(JSContext *cx, ReplaceData *rdata, size_t *sizep)
1545 {
1546 JSString *repstr;
1547 size_t replen, skip;
1548 jschar *dp, *ep;
1549 JSSubString *sub;
1550 JSObject *lambda;
1551
1552 lambda = rdata->lambda;
1553 if (lambda) {
1554 uintN argc, i, j, m, n, p;
1555 jsval *invokevp, *sp;
1556 void *mark;
1557 JSBool ok;
1558
1559 /*
1560 * Save the regExpStatics from the current regexp, since they may be
1561 * clobbered by a RegExp usage in the lambda function. Note that all
1562 * members of JSRegExpStatics are JSSubStrings, so not GC roots, save
1563 * input, which is rooted otherwise via vp[1] in str_replace.
1564 */
1565 JSRegExpStatics save = cx->regExpStatics;
1566 JSBool freeMoreParens = JS_FALSE;
1567
1568 /*
1569 * In the lambda case, not only do we find the replacement string's
1570 * length, we compute repstr and return it via rdata for use within
1571 * do_replace. The lambda is called with arguments ($&, $1, $2, ...,
1572 * index, input), i.e., all the properties of a regexp match array.
1573 * For $&, etc., we must create string jsvals from cx->regExpStatics.
1574 * We grab up stack space to keep the newborn strings GC-rooted.
1575 */
1576 p = rdata->base.regexp->parenCount;
1577 argc = 1 + p + 2;
1578 invokevp = js_AllocStack(cx, 2 + argc, &mark);
1579 if (!invokevp)
1580 return JS_FALSE;
1581
1582 /* Push lambda and its 'this' parameter. */
1583 sp = invokevp;
1584 *sp++ = OBJECT_TO_JSVAL(lambda);
1585 *sp++ = OBJECT_TO_JSVAL(OBJ_GET_PARENT(cx, lambda));
1586
1587 #define PUSH_REGEXP_STATIC(sub) \
1588 JS_BEGIN_MACRO \
1589 JSString *str = js_NewStringCopyN(cx, \
1590 cx->regExpStatics.sub.chars, \
1591 cx->regExpStatics.sub.length); \
1592 if (!str) { \
1593 ok = JS_FALSE; \
1594 goto lambda_out; \
1595 } \
1596 *sp++ = STRING_TO_JSVAL(str); \
1597 JS_END_MACRO
1598
1599 /* Push $&, $1, $2, ... */
1600 PUSH_REGEXP_STATIC(lastMatch);
1601 i = 0;
1602 m = cx->regExpStatics.parenCount;
1603 n = JS_MIN(m, 9);
1604 for (j = 0; i < n; i++, j++)
1605 PUSH_REGEXP_STATIC(parens[j]);
1606 for (j = 0; i < m; i++, j++)
1607 PUSH_REGEXP_STATIC(moreParens[j]);
1608
1609 /*
1610 * We need to clear moreParens in the top-of-stack cx->regExpStatics
1611 * to it won't be possibly realloc'ed, leaving the bottom-of-stack
1612 * moreParens pointing to freed memory.
1613 */
1614 cx->regExpStatics.moreParens = NULL;
1615 freeMoreParens = JS_TRUE;
1616
1617 #undef PUSH_REGEXP_STATIC
1618
1619 /* Make sure to push undefined for any unmatched parens. */
1620 for (; i < p; i++)
1621 *sp++ = JSVAL_VOID;
1622
1623 /* Push match index and input string. */
1624 *sp++ = INT_TO_JSVAL((jsint)cx->regExpStatics.leftContext.length);
1625 *sp++ = STRING_TO_JSVAL(rdata->base.str);
1626
1627 ok = js_Invoke(cx, argc, invokevp, 0);
1628 if (ok) {
1629 /*
1630 * NB: we count on the newborn string root to hold any string
1631 * created by this js_ValueToString that would otherwise be GC-
1632 * able, until we use rdata->repstr in do_replace.
1633 */
1634 repstr = js_ValueToString(cx, *invokevp);
1635 if (!repstr) {
1636 ok = JS_FALSE;
1637 } else {
1638 rdata->repstr = repstr;
1639 *sizep = JSSTRING_LENGTH(repstr);
1640 }
1641 }
1642
1643 lambda_out:
1644 js_FreeStack(cx, mark);
1645 if (freeMoreParens)
1646 JS_free(cx, cx->regExpStatics.moreParens);
1647 cx->regExpStatics = save;
1648 return ok;
1649 }
1650
1651 repstr = rdata->repstr;
1652 replen = JSSTRING_LENGTH(repstr);
1653 for (dp = rdata->dollar, ep = rdata->dollarEnd; dp;
1654 dp = js_strchr_limit(dp, '$', ep)) {
1655 sub = interpret_dollar(cx, dp, ep, rdata, &skip);
1656 if (sub) {
1657 replen += sub->length - skip;
1658 dp += skip;
1659 }
1660 else
1661 dp++;
1662 }
1663 *sizep = replen;
1664 return JS_TRUE;
1665 }
1666
1667 static void
1668 do_replace(JSContext *cx, ReplaceData *rdata, jschar *chars)
1669 {
1670 JSString *repstr;
1671 jschar *bp, *cp, *dp, *ep;
1672 size_t len, skip;
1673 JSSubString *sub;
1674
1675 repstr = rdata->repstr;
1676 bp = cp = JSSTRING_CHARS(repstr);
1677 for (dp = rdata->dollar, ep = rdata->dollarEnd; dp;
1678 dp = js_strchr_limit(dp, '$', ep)) {
1679 len = dp - cp;
1680 js_strncpy(chars, cp, len);
1681 chars += len;
1682 cp = dp;
1683 sub = interpret_dollar(cx, dp, ep, rdata, &skip);
1684 if (sub) {
1685 len = sub->length;
1686 js_strncpy(chars, sub->chars, len);
1687 chars += len;
1688 cp += skip;
1689 dp += skip;
1690 } else {
1691 dp++;
1692 }
1693 }
1694 js_strncpy(chars, cp, JSSTRING_LENGTH(repstr) - (cp - bp));
1695 }
1696
1697 static void
1698 replace_destroy(JSContext *cx, GlobData *data)
1699 {
1700 ReplaceData *rdata;
1701
1702 rdata = (ReplaceData *)data;
1703 JS_free(cx, rdata->chars);
1704 rdata->chars = NULL;
1705 }
1706
1707 static JS_REQUIRES_STACK JSBool
1708 replace_glob(JSContext *cx, jsint count, GlobData *data)
1709 {
1710 ReplaceData *rdata;
1711 JSString *str;
1712 size_t leftoff, leftlen, replen, growth;
1713 const jschar *left;
1714 jschar *chars;
1715
1716 rdata = (ReplaceData *)data;
1717 str = data->str;
1718 leftoff = rdata->leftIndex;
1719 left = JSSTRING_CHARS(str) + leftoff;
1720 leftlen = cx->regExpStatics.lastMatch.chars - left;
1721 rdata->leftIndex = cx->regExpStatics.lastMatch.chars - JSSTRING_CHARS(str);
1722 rdata->leftIndex += cx->regExpStatics.lastMatch.length;
1723 if (!find_replen(cx, rdata, &replen))
1724 return JS_FALSE;
1725 growth = leftlen + replen;
1726 chars = (jschar *)
1727 (rdata->chars
1728 ? JS_realloc(cx, rdata->chars, (rdata->length + growth + 1)
1729 * sizeof(jschar))
1730 : JS_malloc(cx, (growth + 1) * sizeof(jschar)));
1731 if (!chars)
1732 return JS_FALSE;
1733 rdata->chars = chars;
1734 rdata->length += growth;
1735 chars += rdata->index;
1736 rdata->index += growth;
1737 js_strncpy(chars, left, leftlen);
1738 chars += leftlen;
1739 do_replace(cx, rdata, chars);
1740 return JS_TRUE;
1741 }
1742
1743 static JS_REQUIRES_STACK JSBool
1744 str_replace(JSContext *cx, uintN argc, jsval *vp)
1745 {
1746 JSObject *lambda;
1747 JSString *repstr;
1748
1749 if (argc >= 2 && JS_TypeOfValue(cx, vp[3]) == JSTYPE_FUNCTION) {
1750 lambda = JSVAL_TO_OBJECT(vp[3]);
1751 repstr = NULL;
1752 } else {
1753 lambda = NULL;
1754 repstr = ArgToRootedString(cx, argc, vp, 1);
1755 if (!repstr)
1756 return JS_FALSE;
1757 }
1758
1759 return js_StringReplaceHelper(cx, argc, lambda, repstr, vp);
1760 }
1761
1762 JSBool JS_REQUIRES_STACK
1763 js_StringReplaceHelper(JSContext *cx, uintN argc, JSObject *lambda,
1764 JSString *repstr, jsval *vp)
1765 {
1766 ReplaceData rdata;
1767 JSBool ok;
1768 size_t leftlen, rightlen, length;
1769 jschar *chars;
1770 JSString *str;
1771
1772 /*
1773 * For ECMA Edition 3, the first argument is to be converted to a string
1774 * to match in a "flat" sense (without regular expression metachars having
1775 * special meanings) UNLESS the first arg is a RegExp object.
1776 */
1777 rdata.base.flags = MODE_REPLACE | KEEP_REGEXP | FORCE_FLAT;
1778 rdata.base.optarg = 2;
1779
1780 rdata.lambda = lambda;
1781 rdata.repstr = repstr;
1782 if (repstr) {
1783 if (!js_MakeStringImmutable(cx, repstr))
1784 return JS_FALSE;
1785 rdata.dollarEnd = JSSTRING_CHARS(repstr) + JSSTRING_LENGTH(repstr);
1786 rdata.dollar = js_strchr_limit(JSSTRING_CHARS(repstr), '$',
1787 rdata.dollarEnd);
1788 } else {
1789 rdata.dollar = rdata.dollarEnd = NULL;
1790 }
1791 rdata.chars = NULL;
1792 rdata.length = 0;
1793 rdata.index = 0;
1794 rdata.leftIndex = 0;
1795
1796 ok = match_or_replace(cx, replace_glob, replace_destroy, &rdata.base,
1797 argc, vp);
1798 if (!ok)
1799 return JS_FALSE;
1800
1801 if (!rdata.chars) {
1802 if ((rdata.base.flags & GLOBAL_REGEXP) || *vp != JSVAL_TRUE) {
1803 /* Didn't match even once. */
1804 *vp = STRING_TO_JSVAL(rdata.base.str);
1805 goto out;
1806 }
1807 leftlen = cx->regExpStatics.leftContext.length;
1808 ok = find_replen(cx, &rdata, &length);
1809 if (!ok)
1810 goto out;
1811 length += leftlen;
1812 chars = (jschar *) JS_malloc(cx, (length + 1) * sizeof(jschar));
1813 if (!chars) {
1814 ok = JS_FALSE;
1815 goto out;
1816 }
1817 js_strncpy(chars, cx->regExpStatics.leftContext.chars, leftlen);
1818 do_replace(cx, &rdata, chars + leftlen);
1819 rdata.chars = chars;
1820 rdata.length = length;
1821 }
1822
1823 rightlen = cx->regExpStatics.rightContext.length;
1824 length = rdata.length + rightlen;
1825 chars = (jschar *)
1826 JS_realloc(cx, rdata.chars, (length + 1) * sizeof(jschar));
1827 if (!chars) {
1828 JS_free(cx, rdata.chars);
1829 ok = JS_FALSE;
1830 goto out;
1831 }
1832 js_strncpy(chars + rdata.length, cx->regExpStatics.rightContext.chars,
1833 rightlen);
1834 chars[length] = 0;
1835
1836 str = js_NewString(cx, chars, length);
1837 if (!str) {
1838 JS_free(cx, chars);
1839 ok = JS_FALSE;
1840 goto out;
1841 }
1842 *vp = STRING_TO_JSVAL(str);
1843
1844 out:
1845 /* If KEEP_REGEXP is still set, it's our job to destroy regexp now. */
1846 if (rdata.base.flags & KEEP_REGEXP)
1847 js_DestroyRegExp(cx, rdata.base.regexp);
1848 return ok;
1849 }
1850
1851 /*
1852 * Subroutine used by str_split to find the next split point in str, starting
1853 * at offset *ip and looking either for the separator substring given by sep, or
1854 * for the next re match. In the re case, return the matched separator in *sep,
1855 * and the possibly updated offset in *ip.
1856 *
1857 * Return -2 on error, -1 on end of string, >= 0 for a valid index of the next
1858 * separator occurrence if found, or str->length if no separator is found.
1859 */
1860 static jsint
1861 find_split(JSContext *cx, JSString *str, JSRegExp *re, jsint *ip,
1862 JSSubString *sep)
1863 {
1864 jsint i, j, k;
1865 size_t length;
1866 jschar *chars;
1867
1868 /*
1869 * Stop if past end of string. If at end of string, we will compare the
1870 * null char stored there (by js_NewString*) to sep->chars[j] in the while
1871 * loop at the end of this function, so that
1872 *
1873 * "ab,".split(',') => ["ab", ""]
1874 *
1875 * and the resulting array converts back to the string "ab," for symmetry.
1876 * However, we ape Perl and do this only if there is a sufficiently large
1877 * limit argument (see str_split).
1878 */
1879 i = *ip;
1880 length = JSSTRING_LENGTH(str);
1881 if ((size_t)i > length)
1882 return -1;
1883
1884 chars = JSSTRING_CHARS(str);
1885
1886 /*
1887 * Match a regular expression against the separator at or above index i.
1888 * Call js_ExecuteRegExp with true for the test argument. On successful
1889 * match, get the separator from cx->regExpStatics.lastMatch.
1890 */
1891 if (re) {
1892 size_t index;
1893 jsval rval;
1894
1895 again:
1896 /* JS1.2 deviated from Perl by never matching at end of string. */
1897 index = (size_t)i;
1898 if (!js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, &rval))
1899 return -2;
1900 if (rval != JSVAL_TRUE) {
1901 /* Mismatch: ensure our caller advances i past end of string. */
1902 sep->length = 1;
1903 return length;
1904 }
1905 i = (jsint)index;
1906 *sep = cx->regExpStatics.lastMatch;
1907 if (sep->length == 0) {
1908 /*
1909 * Empty string match: never split on an empty match at the start
1910 * of a find_split cycle. Same rule as for an empty global match
1911 * in match_or_replace.
1912 */
1913 if (i == *ip) {
1914 /*
1915 * "Bump-along" to avoid sticking at an empty match, but don't
1916 * bump past end of string -- our caller must do that by adding
1917 * sep->length to our return value.
1918 */
1919 if ((size_t)i == length)
1920 return -1;
1921 i++;
1922 goto again;
1923 }
1924 if ((size_t)i == length) {
1925 /*
1926 * If there was a trivial zero-length match at the end of the
1927 * split, then we shouldn't output the matched string at the end
1928 * of the split array. See ECMA-262 Ed. 3, 15.5.4.14, Step 15.
1929 */
1930 sep->chars = NULL;
1931 }
1932 }
1933 JS_ASSERT((size_t)i >= sep->length);
1934 return i - sep->length;
1935 }
1936
1937 /*
1938 * Special case: if sep is the empty string, split str into one character
1939 * substrings. Let our caller worry about whether to split once at end of
1940 * string into an empty substring.
1941 */
1942 if (sep->length == 0)
1943 return ((size_t)i == length) ? -1 : i + 1;
1944
1945 /*
1946 * Now that we know sep is non-empty, search starting at i in str for an
1947 * occurrence of all of sep's chars. If we find them, return the index of
1948 * the first separator char. Otherwise, return length.
1949 */
1950 j = 0;
1951 while ((size_t)(k = i + j) < length) {
1952 if (chars[k] == sep->chars[j]) {
1953 if ((size_t)++j == sep->length)
1954 return i;
1955 } else {
1956 i++;
1957 j = 0;
1958 }
1959 }
1960 return k;
1961 }
1962
1963 static JSBool
1964 str_split(JSContext *cx, uintN argc, jsval *vp)
1965 {
1966 JSString *str, *sub;
1967 JSObject *arrayobj;
1968 jsval v;
1969 JSBool ok, limited;
1970 JSRegExp *re;
1971 JSSubString *sep, tmp;
1972 jsdouble d;
1973 jsint i, j;
1974 uint32 len, limit;
1975
1976 NORMALIZE_THIS(cx, vp, str);
1977
1978 arrayobj = js_NewArrayObject(cx, 0, NULL);
1979 if (!arrayobj)
1980 return JS_FALSE;
1981 *vp = OBJECT_TO_JSVAL(arrayobj);
1982
1983 if (argc == 0) {
1984 v = STRING_TO_JSVAL(str);
1985 ok = OBJ_SET_PROPERTY(cx, arrayobj, INT_TO_JSID(0), &v);
1986 } else {
1987 if (VALUE_IS_REGEXP(cx, vp[2])) {
1988 re = (JSRegExp *) JS_GetPrivate(cx, JSVAL_TO_OBJECT(vp[2]));
1989 sep = &tmp;
1990
1991 /* Set a magic value so we can detect a successful re match. */
1992 sep->chars = NULL;
1993 sep->length = 0;
1994 } else {
1995 JSString *str2 = js_ValueToString(cx, vp[2]);
1996 if (!str2)
1997 return JS_FALSE;
1998 vp[2] = STRING_TO_JSVAL(str2);
1999
2000 /*
2001 * Point sep at a local copy of str2's header because find_split
2002 * will modify sep->length.
2003 */
2004 JSSTRING_CHARS_AND_LENGTH(str2, tmp.chars, tmp.length);
2005 sep = &tmp;
2006 re = NULL;
2007 }
2008
2009 /* Use the second argument as the split limit, if given. */
2010 limited = (argc > 1) && !JSVAL_IS_VOID(vp[3]);
2011 limit = 0; /* Avoid warning. */
2012 if (limited) {
2013 d = js_ValueToNumber(cx, &vp[3]);
2014 if (JSVAL_IS_NULL(vp[3]))
2015 return JS_FALSE;
2016
2017 /* Clamp limit between 0 and 1 + string length. */
2018 limit = js_DoubleToECMAUint32(d);
2019 if (limit > JSSTRING_LENGTH(str))
2020 limit = 1 + JSSTRING_LENGTH(str);
2021 }
2022
2023 len = i = 0;
2024 while ((j = find_split(cx, str, re, &i, sep)) >= 0) {
2025 if (limited && len >= limit)
2026 break;
2027 sub = js_NewDependentString(cx, str, i, (size_t)(j - i));
2028 if (!sub)
2029 return JS_FALSE;
2030 v = STRING_TO_JSVAL(sub);
2031 if (!JS_SetElement(cx, arrayobj, len, &v))
2032 return JS_FALSE;
2033 len++;
2034
2035 /*
2036 * Imitate perl's feature of including parenthesized substrings
2037 * that matched part of the delimiter in the new array, after the
2038 * split substring that was delimited.
2039 */
2040 if (re && sep->chars) {
2041 uintN num;
2042 JSSubString *parsub;
2043
2044 for (num = 0; num < cx->regExpStatics.parenCount; num++) {
2045 if (limited && len >= limit)
2046 break;
2047 parsub = REGEXP_PAREN_SUBSTRING(&cx->regExpStatics, num);
2048 sub = js_NewStringCopyN(cx, parsub->chars, parsub->length);
2049 if (!sub)
2050 return JS_FALSE;
2051 v = STRING_TO_JSVAL(sub);
2052 if (!JS_SetElement(cx, arrayobj, len, &v))
2053 return JS_FALSE;
2054 len++;
2055 }
2056 sep->chars = NULL;
2057 }
2058 i = j + sep->length;
2059 }
2060 ok = (j != -2);
2061 }
2062 return ok;
2063 }
2064
2065 #if JS_HAS_PERL_SUBSTR
2066 static JSBool
2067 str_substr(JSContext *cx, uintN argc, jsval *vp)
2068 {
2069 JSString *str;
2070 jsdouble d;
2071 jsdouble length, begin, end;
2072
2073 NORMALIZE_THIS(cx, vp, str);
2074 if (argc != 0) {
2075 d = js_ValueToNumber(cx, &vp[2]);
2076 if (JSVAL_IS_NULL(vp[2]))
2077 return JS_FALSE;
2078 length = JSSTRING_LENGTH(str);
2079 begin = js_DoubleToInteger(d);
2080 if (begin < 0) {
2081 begin += length;
2082 if (begin < 0)
2083 begin = 0;
2084 } else if (begin > length) {
2085 begin = length;
2086 }
2087
2088 if (argc == 1) {
2089 end = length;
2090 } else {
2091 d = js_ValueToNumber(cx, &vp[3]);
2092 if (JSVAL_IS_NULL(vp[3]))
2093 return JS_FALSE;
2094 end = js_DoubleToInteger(d);
2095 if (end < 0)
2096 end = 0;
2097 end += begin;
2098 if (end > length)
2099 end = length;
2100 }
2101
2102 str = js_NewDependentString(cx, str,
2103 (size_t)begin,
2104 (size_t)(end - begin));
2105 if (!str)
2106 return JS_FALSE;
2107 }
2108 *vp = STRING_TO_JSVAL(str);
2109 return JS_TRUE;
2110 }
2111 #endif /* JS_HAS_PERL_SUBSTR */
2112
2113 /*
2114 * Python-esque sequence operations.
2115 */
2116 static JSBool
2117 str_concat(JSContext *cx, uintN argc, jsval *vp)
2118 {
2119 JSString *str, *str2;
2120 jsval *argv;
2121 uintN i;
2122
2123 NORMALIZE_THIS(cx, vp, str);
2124
2125 /* Set vp (aka rval) early to handle the argc == 0 case. */
2126 *vp = STRING_TO_JSVAL(str);
2127
2128 for (i = 0, argv = vp + 2; i < argc; i++) {
2129 str2 = js_ValueToString(cx, argv[i]);
2130 if (!str2)
2131 return JS_FALSE;
2132 argv[i] = STRING_TO_JSVAL(str2);
2133
2134 str = js_ConcatStrings(cx, str, str2);
2135 if (!str)
2136 return JS_FALSE;
2137 *vp = STRING_TO_JSVAL(str);
2138 }
2139
2140 return JS_TRUE;
2141 }
2142
2143 static JSBool
2144 str_slice(JSContext *cx, uintN argc, jsval *vp)
2145 {
2146 jsval t, v;
2147 JSString *str;
2148
2149 t = vp[1];
2150 v = vp[2];
2151 if (argc == 1 && JSVAL_IS_STRING(t) && JSVAL_IS_INT(v)) {
2152 size_t begin, end, length;
2153
2154 str = JSVAL_TO_STRING(t);
2155 begin = JSVAL_TO_INT(v);
2156 end = JSSTRING_LENGTH(str);
2157 if (begin <= end) {
2158 length = end - begin;
2159 if (length == 0) {
2160 str = cx->runtime->emptyString;
2161 } else {
2162 str = (length == 1)
2163 ? js_GetUnitString(cx, str, begin)
2164 : js_NewDependentString(cx, str, begin, length);
2165 if (!str)
2166 return JS_FALSE;
2167 }
2168 *vp = STRING_TO_JSVAL(str);
2169 return JS_TRUE;
2170 }
2171 }
2172
2173 NORMALIZE_THIS(cx, vp, str);
2174
2175 if (argc != 0) {
2176 double begin, end, length;
2177
2178 begin = js_ValueToNumber(cx, &vp[2]);
2179 if (JSVAL_IS_NULL(vp[2]))
2180 return JS_FALSE;
2181 begin = js_DoubleToInteger(begin);
2182 length = JSSTRING_LENGTH(str);
2183 if (begin < 0) {
2184 begin += length;
2185 if (begin < 0)
2186 begin = 0;
2187 } else if (begin > length) {
2188 begin = length;
2189 }
2190
2191 if (argc == 1) {
2192 end = length;
2193 } else {
2194 end = js_ValueToNumber(cx, &vp[3]);
2195 if (JSVAL_IS_NULL(vp[3]))
2196 return JS_FALSE;
2197 end = js_DoubleToInteger(end);
2198 if (end < 0) {
2199 end += length;
2200 if (end < 0)
2201 end = 0;
2202 } else if (end > length) {
2203 end = length;
2204 }
2205 if (end < begin)
2206 end = begin;
2207 }
2208
2209 str = js_NewDependentString(cx, str,
2210 (size_t)begin,
2211 (size_t)(end - begin));
2212 if (!str)
2213 return JS_FALSE;
2214 }
2215 *vp = STRING_TO_JSVAL(str);
2216 return JS_TRUE;
2217 }
2218
2219 #if JS_HAS_STR_HTML_HELPERS
2220 /*
2221 * HTML composition aids.
2222 */
2223 static JSBool
2224 tagify(JSContext *cx, const char *begin, JSString *param, const char *end,
2225 jsval *vp)
2226 {
2227 JSString *str;
2228 jschar *tagbuf;
2229 size_t beglen, endlen, parlen, taglen;
2230 size_t i, j;
2231
2232 NORMALIZE_THIS(cx, vp, str);
2233
2234 if (!end)
2235 end = begin;
2236
2237 beglen = strlen(begin);
2238 taglen = 1 + beglen + 1; /* '<begin' + '>' */
2239 parlen = 0; /* Avoid warning. */
2240 if (param) {
2241 parlen = JSSTRING_LENGTH(param);
2242 taglen += 2 + parlen + 1; /* '="param"' */
2243 }
2244 endlen = strlen(end);
2245 taglen += JSSTRING_LENGTH(str) + 2 + endlen + 1; /* 'str</end>' */
2246
2247 if (taglen >= ~(size_t)0 / sizeof(jschar)) {
2248 js_ReportAllocationOverflow(cx);
2249 return JS_FALSE;
2250 }
2251
2252 tagbuf = (jschar *) JS_malloc(cx, (taglen + 1) * sizeof(jschar));
2253 if (!tagbuf)
2254 return JS_FALSE;
2255
2256 j = 0;
2257 tagbuf[j++] = '<';
2258 for (i = 0; i < beglen; i++)
2259 tagbuf[j++] = (jschar)begin[i];
2260 if (param) {
2261 tagbuf[j++] = '=';
2262 tagbuf[j++] = '"';
2263 js_strncpy(&tagbuf[j], JSSTRING_CHARS(param), parlen);
2264 j += parlen;
2265 tagbuf[j++] = '"';
2266 }
2267 tagbuf[j++] = '>';
2268 js_strncpy(&tagbuf[j], JSSTRING_CHARS(str), JSSTRING_LENGTH(str));
2269 j += JSSTRING_LENGTH(str);
2270 tagbuf[j++] = '<';
2271 tagbuf[j++] = '/';
2272 for (i = 0; i < endlen; i++)
2273 tagbuf[j++] = (jschar)end[i];
2274 tagbuf[j++] = '>';
2275 JS_ASSERT(j == taglen);
2276 tagbuf[j] = 0;
2277
2278 str = js_NewString(cx, tagbuf, taglen);
2279 if (!str) {
2280 free((char *)tagbuf);
2281 return JS_FALSE;
2282 }
2283 *vp = STRING_TO_JSVAL(str);
2284 return JS_TRUE;
2285 }
2286
2287 static JSBool
2288 tagify_value(JSContext *cx, uintN argc, jsval *vp,
2289 const char *begin, const char *end)
2290 {
2291 JSString *param;
2292
2293 param = ArgToRootedString(cx, argc, vp, 0);
2294 if (!param)
2295 return JS_FALSE;
2296 return tagify(cx, begin, param, end, vp);
2297 }
2298
2299 static JSBool
2300 str_bold(JSContext *cx, uintN argc, jsval *vp)
2301 {
2302 return tagify(cx, "b", NULL, NULL, vp);
2303 }
2304
2305 static JSBool
2306 str_italics(JSContext *cx, uintN argc, jsval *vp)
2307 {
2308 return tagify(cx, "i", NULL, NULL, vp);
2309 }
2310
2311 static JSBool
2312 str_fixed(JSContext *cx, uintN argc, jsval *vp)
2313 {
2314 return tagify(cx, "tt", NULL, NULL, vp);
2315 }
2316
2317 static JSBool
2318 str_fontsize(JSContext *cx, uintN argc, jsval *vp)
2319 {
2320 return tagify_value(cx, argc, vp, "font size", "font");
2321 }
2322
2323 static JSBool
2324 str_fontcolor(JSContext *cx, uintN argc, jsval *vp)
2325 {
2326 return tagify_value(cx, argc, vp, "font color", "font");
2327 }
2328
2329 static JSBool
2330 str_link(JSContext *cx, uintN argc, jsval *vp)
2331 {
2332 return tagify_value(cx, argc, vp, "a href", "a");
2333 }
2334
2335 static JSBool
2336 str_anchor(JSContext *cx, uintN argc, jsval *vp)
2337 {
2338 return tagify_value(cx, argc, vp, "a name", "a");
2339 }
2340
2341 static JSBool
2342 str_strike(JSContext *cx, uintN argc, jsval *vp)
2343 {
2344 return tagify(cx, "strike", NULL, NULL, vp);
2345 }
2346
2347 static JSBool
2348 str_small(JSContext *cx, uintN argc, jsval *vp)
2349 {
2350 return tagify(cx, "small", NULL, NULL, vp);
2351 }
2352
2353 static JSBool
2354 str_big(JSContext *cx, uintN argc, jsval *vp)
2355 {
2356 return tagify(cx, "big", NULL, NULL, vp);
2357 }
2358
2359 static JSBool
2360 str_blink(JSContext *cx, uintN argc, jsval *vp)
2361 {
2362 return tagify(cx, "blink", NULL, NULL, vp);
2363 }
2364
2365 static JSBool
2366 str_sup(JSContext *cx, uintN argc, jsval *vp)
2367 {
2368 return tagify(cx, "sup", NULL, NULL, vp);
2369 }
2370
2371 static JSBool
2372 str_sub(JSContext *cx, uintN argc, jsval *vp)
2373 {
2374 return tagify(cx, "sub", NULL, NULL, vp);
2375 }
2376 #endif /* JS_HAS_STR_HTML_HELPERS */
2377
2378 #ifdef JS_TRACER
2379 JSString* FASTCALL
2380 js_String_getelem(JSContext* cx, JSString* str, int32 i)
2381 {
2382 if ((size_t)i >= JSSTRING_LENGTH(str))
2383 return NULL;
2384 return js_GetUnitString(cx, str, (size_t)i);
2385 }
2386 #endif
2387
2388 JS_DEFINE_CALLINFO_2(extern, BOOL, js_EqualStrings, STRING, STRING, 1, 1)
2389 JS_DEFINE_CALLINFO_2(extern, INT32, js_CompareStrings, STRING, STRING, 1, 1)
2390
2391 JS_DEFINE_TRCINFO_1(str_toString,
2392 (2, (extern, STRING_RETRY, String_p_toString, CONTEXT, THIS, 1, 1)))
2393 JS_DEFINE_TRCINFO_1(str_charAt,
2394 (3, (extern, STRING_RETRY, js_String_getelem, CONTEXT, THIS_STRING, INT32, 1, 1)))
2395 JS_DEFINE_TRCINFO_2(str_charCodeAt,
2396 (1, (extern, DOUBLE, js_String_p_charCodeAt0, THIS_STRING, 1, 1)),
2397 (2, (extern, DOUBLE, js_String_p_charCodeAt, THIS_STRING, DOUBLE, 1, 1)))
2398 JS_DEFINE_TRCINFO_1(str_concat,
2399 (3, (extern, STRING_RETRY, js_ConcatStrings, CONTEXT, THIS_STRING, STRING, 1, 1)))
2400
2401 #define GENERIC JSFUN_GENERIC_NATIVE
2402 #define PRIMITIVE JSFUN_THISP_PRIMITIVE
2403 #define GENERIC_PRIMITIVE (GENERIC | PRIMITIVE)
2404
2405 static JSFunctionSpec string_methods[] = {
2406 #if JS_HAS_TOSOURCE
2407 JS_FN("quote", str_quote, 0,GENERIC_PRIMITIVE),
2408 JS_FN(js_toSource_str, str_toSource, 0,JSFUN_THISP_STRING),
2409 #endif
2410
2411 /* Java-like methods. */
2412 JS_TN(js_toString_str, str_toString, 0,JSFUN_THISP_STRING, str_toString_trcinfo),
2413 JS_FN(js_valueOf_str, str_toString, 0,JSFUN_THISP_STRING),
2414 JS_FN(js_toJSON_str, str_toString, 0,JSFUN_THISP_STRING),
2415 JS_FN("substring", str_substring, 2,GENERIC_PRIMITIVE),
2416 JS_FN("toLowerCase", str_toLowerCase, 0,GENERIC_PRIMITIVE),
2417 JS_FN("toUpperCase", str_toUpperCase, 0,GENERIC_PRIMITIVE),
2418 JS_TN("charAt", str_charAt, 1,GENERIC_PRIMITIVE, str_charAt_trcinfo),
2419 JS_TN("charCodeAt", str_charCodeAt, 1,GENERIC_PRIMITIVE, str_charCodeAt_trcinfo),
2420 JS_FN("indexOf", str_indexOf, 1,GENERIC_PRIMITIVE),
2421 JS_FN("lastIndexOf", str_lastIndexOf, 1,GENERIC_PRIMITIVE),
2422 JS_FN("trim", str_trim, 0,GENERIC_PRIMITIVE),
2423 JS_FN("trimLeft", str_trimLeft, 0,GENERIC_PRIMITIVE),
2424 JS_FN("trimRight", str_trimRight, 0,GENERIC_PRIMITIVE),
2425 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,GENERIC_PRIMITIVE),
2426 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,GENERIC_PRIMITIVE),
2427 JS_FN("localeCompare", str_localeCompare, 1,GENERIC_PRIMITIVE),
2428
2429 /* Perl-ish methods (search is actually Python-esque). */
2430 JS_FN("match", str_match, 1,GENERIC_PRIMITIVE),
2431 JS_FN("search", str_search, 1,GENERIC_PRIMITIVE),
2432 JS_FN("replace", str_replace, 2,GENERIC_PRIMITIVE),
2433 JS_FN("split", str_split, 2,GENERIC_PRIMITIVE),
2434 #if JS_HAS_PERL_SUBSTR
2435 JS_FN("substr", str_substr, 2,GENERIC_PRIMITIVE),
2436 #endif
2437
2438 /* Python-esque sequence methods. */
2439 JS_TN("concat", str_concat, 1,GENERIC_PRIMITIVE, str_concat_trcinfo),
2440 JS_FN("slice", str_slice, 2,GENERIC_PRIMITIVE),
2441
2442 /* HTML string methods. */
2443 #if JS_HAS_STR_HTML_HELPERS
2444 JS_FN("bold", str_bold, 0,PRIMITIVE),
2445 JS_FN("italics", str_italics, 0,PRIMITIVE),
2446 JS_FN("fixed", str_fixed, 0,PRIMITIVE),
2447 JS_FN("fontsize", str_fontsize, 1,PRIMITIVE),
2448 JS_FN("fontcolor", str_fontcolor, 1,PRIMITIVE),
2449 JS_FN("link", str_link, 1,PRIMITIVE),
2450 JS_FN("anchor", str_anchor, 1,PRIMITIVE),
2451 JS_FN("strike", str_strike, 0,PRIMITIVE),
2452 JS_FN("small", str_small, 0,PRIMITIVE),
2453 JS_FN("big", str_big, 0,PRIMITIVE),
2454 JS_FN("blink", str_blink, 0,PRIMITIVE),
2455 JS_FN("sup", str_sup, 0,PRIMITIVE),
2456 JS_FN("sub", str_sub, 0,PRIMITIVE),
2457 #endif
2458
2459 JS_FS_END
2460 };
2461
2462 JSBool
2463 js_String(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
2464 {
2465 JSString *str;
2466
2467 if (argc > 0) {
2468 str = js_ValueToString(cx, argv[0]);
2469 if (!str)
2470 return JS_FALSE;
2471 argv[0] = STRING_TO_JSVAL(str);
2472 } else {
2473 str = cx->runtime->emptyString;
2474 }
2475 if (!JS_IsConstructing(cx)) {
2476 *rval = STRING_TO_JSVAL(str);
2477 return JS_TRUE;
2478 }
2479 obj->fslots[JSSLOT_PRIVATE] = STRING_TO_JSVAL(str);
2480 return JS_TRUE;
2481 }
2482
2483 #ifdef JS_TRACER
2484
2485 JSObject* FASTCALL
2486 js_String_tn(JSContext* cx, JSObject* proto, JSString* str)
2487 {
2488 JS_ASSERT(JS_ON_TRACE(cx));
2489 JSObject* obj = js_NewNativeObject(cx, &js_StringClass, proto, JSSLOT_PRIVATE + 1);
2490 if (!obj)
2491 return NULL;
2492
2493 obj->fslots[JSSLOT_PRIVATE] = STRING_TO_JSVAL(str);
2494 return obj;
2495 }
2496
2497 JS_DEFINE_CALLINFO_3(extern, OBJECT, js_String_tn, CONTEXT, CALLEE_PROTOTYPE, STRING, 0, 0)
2498
2499 #endif /* !JS_TRACER */
2500
2501 static JSBool
2502 str_fromCharCode(JSContext *cx, uintN argc, jsval *vp)
2503 {
2504 jsval *argv;
2505 uintN i;
2506 uint16 code;
2507 jschar *chars;
2508 JSString *str;
2509
2510 argv = vp + 2;
2511 JS_ASSERT(argc < ARRAY_INIT_LIMIT);
2512 if (argc == 1 &&
2513 (code = js_ValueToUint16(cx, &argv[0])) < UNIT_STRING_LIMIT) {
2514 str = js_GetUnitStringForChar(cx, code);
2515 if (!str)
2516 return JS_FALSE;
2517 *vp = STRING_TO_JSVAL(str);
2518 return JS_TRUE;
2519 }
2520 chars = (jschar *) JS_malloc(cx, (argc + 1) * sizeof(jschar));
2521 if (!chars)
2522 return JS_FALSE;
2523 for (i = 0; i < argc; i++) {
2524 code = js_ValueToUint16(cx, &argv[i]);
2525 if (JSVAL_IS_NULL(argv[i])) {
2526 JS_free(cx, chars);
2527 return JS_FALSE;
2528 }
2529 chars[i] = (jschar)code;
2530 }
2531 chars[i] = 0;
2532 str = js_NewString(cx, chars, argc);
2533 if (!str) {
2534 JS_free(cx, chars);
2535 return JS_FALSE;
2536 }
2537 *vp = STRING_TO_JSVAL(str);
2538 return JS_TRUE;
2539 }
2540
2541 #ifdef JS_TRACER
2542 static JSString* FASTCALL
2543 String_fromCharCode(JSContext* cx, int32 i)
2544 {
2545 JS_ASSERT(JS_ON_TRACE(cx));
2546 jschar c = (jschar)i;
2547 if (c < UNIT_STRING_LIMIT)
2548 return js_GetUnitStringForChar(cx, c);
2549 return js_NewStringCopyN(cx, &c, 1);
2550 }
2551 #endif
2552
2553 JS_DEFINE_TRCINFO_1(str_fromCharCode,
2554 (2, (static, STRING_RETRY, String_fromCharCode, CONTEXT, INT32, 1, 1)))
2555
2556 static JSFunctionSpec string_static_methods[] = {
2557 JS_TN("fromCharCode", str_fromCharCode, 1, 0, str_fromCharCode_trcinfo),
2558 JS_FS_END
2559 };
2560
2561 static JSHashNumber
2562 js_hash_string_pointer(const void *key)
2563 {
2564 return (JSHashNumber)JS_PTR_TO_UINT32(key) >> JSVAL_TAGBITS;
2565 }
2566
2567 JSBool
2568 js_InitRuntimeStringState(JSContext *cx)
2569 {
2570 JSRuntime *rt;
2571
2572 rt = cx->runtime;
2573 rt->emptyString = ATOM_TO_STRING(rt->atomState.emptyAtom);
2574 return JS_TRUE;
2575 }
2576
2577 JSBool
2578 js_InitDeflatedStringCache(JSRuntime *rt)
2579 {
2580 JSHashTable *cache;
2581
2582 /* Initialize string cache */
2583 JS_ASSERT(!rt->deflatedStringCache);
2584 cache = JS_NewHashTable(8, js_hash_string_pointer,
2585 JS_CompareValues, JS_CompareValues,
2586 NULL, NULL);
2587 if (!cache)
2588 return JS_FALSE;
2589 rt->deflatedStringCache = cache;
2590
2591 #ifdef JS_THREADSAFE
2592 JS_ASSERT(!rt->deflatedStringCacheLock);
2593 rt->deflatedStringCacheLock = JS_NEW_LOCK();
2594 if (!rt->deflatedStringCacheLock)
2595 return JS_FALSE;
2596 #endif
2597 return JS_TRUE;
2598 }
2599
2600 #define UNIT_STRING_SPACE(sp) ((jschar *) ((sp) + UNIT_STRING_LIMIT))
2601 #define UNIT_STRING_SPACE_RT(rt) UNIT_STRING_SPACE((rt)->unitStrings)
2602
2603 #define IN_UNIT_STRING_SPACE(sp,cp) \
2604 ((size_t)((cp) - UNIT_STRING_SPACE(sp)) < 2 * UNIT_STRING_LIMIT)
2605 #define IN_UNIT_STRING_SPACE_RT(rt,cp) \
2606 IN_UNIT_STRING_SPACE((rt)->unitStrings, cp)
2607
2608 JSString *
2609 js_GetUnitStringForChar(JSContext *cx, jschar c)
2610 {
2611 jschar *cp, i;
2612 JSRuntime *rt;
2613 JSString **sp;
2614
2615 JS_ASSERT(c < UNIT_STRING_LIMIT);
2616 rt = cx->runtime;
2617 if (!rt->unitStrings) {
2618 sp = (JSString **) calloc(UNIT_STRING_LIMIT * sizeof(JSString *) +
2619 UNIT_STRING_LIMIT * 2 * sizeof(jschar),
2620 1);
2621 if (!sp) {
2622 JS_ReportOutOfMemory(cx);
2623 return NULL;
2624 }
2625 cp = UNIT_STRING_SPACE(sp);
2626 for (i = 0; i < UNIT_STRING_LIMIT; i++) {
2627 *cp = i;
2628 cp += 2;
2629 }
2630 JS_LOCK_GC(rt);
2631 if (!rt->unitStrings) {
2632 rt->unitStrings = sp;
2633 JS_UNLOCK_GC(rt);
2634 } else {
2635 JS_UNLOCK_GC(rt);
2636 free(sp);
2637 }
2638 }
2639 if (!rt->unitStrings[c]) {
2640 JSString *str;
2641
2642 cp = UNIT_STRING_SPACE_RT(rt);
2643 str = js_NewString(cx, cp + 2 * c, 1);
2644 if (!str)
2645 return NULL;
2646 JS_LOCK_GC(rt);
2647 if (!rt->unitStrings[c])
2648 rt->unitStrings[c] = str;
2649 #ifdef DEBUG
2650 else
2651 JSFLATSTR_INIT(str, NULL, 0); /* avoid later assertion (bug 479381) */
2652 #endif
2653 JS_UNLOCK_GC(rt);
2654 }
2655 return rt->unitStrings[c];
2656 }
2657
2658 JSString *
2659 js_GetUnitString(JSContext *cx, JSString *str, size_t index)
2660 {
2661 jschar c;
2662
2663 JS_ASSERT(index < JSSTRING_LENGTH(str));
2664 c = JSSTRING_CHARS(str)[index];
2665 if (c >= UNIT_STRING_LIMIT)
2666 return js_NewDependentString(cx, str, index, 1);
2667 return js_GetUnitStringForChar(cx, c);
2668 }
2669
2670 void
2671 js_FinishUnitStrings(JSRuntime *rt)
2672 {
2673 free(rt->unitStrings);
2674 rt->unitStrings = NULL;
2675 }
2676
2677 void
2678 js_FinishRuntimeStringState(JSContext *cx)
2679 {
2680 cx->runtime->emptyString = NULL;
2681 }
2682
2683 void
2684 js_FinishDeflatedStringCache(JSRuntime *rt)
2685 {
2686 if (rt->deflatedStringCache) {
2687 JS_HashTableDestroy(rt->deflatedStringCache);
2688 rt->deflatedStringCache = NULL;
2689 }
2690 #ifdef JS_THREADSAFE
2691 if (rt->deflatedStringCacheLock) {
2692 JS_DESTROY_LOCK(rt->deflatedStringCacheLock);
2693 rt->deflatedStringCacheLock = NULL;
2694 }
2695 #endif
2696 }
2697
2698 JSObject *
2699 js_InitStringClass(JSContext *cx, JSObject *obj)
2700 {
2701 JSObject *proto;
2702
2703 /* Define the escape, unescape functions in the global object. */
2704 if (!JS_DefineFunctions(cx, obj, string_functions))
2705 return NULL;
2706
2707 proto = JS_InitClass(cx, obj, NULL, &js_StringClass, js_String, 1,
2708 NULL, string_methods,
2709 NULL, string_static_methods);
2710 if (!proto)
2711 return NULL;
2712 proto->fslots[JSSLOT_PRIVATE] = STRING_TO_JSVAL(cx->runtime->emptyString);
2713 if (!js_DefineNativeProperty(cx, proto, ATOM_TO_JSID(cx->runtime->atomState.lengthAtom),
2714 JSVAL_VOID, NULL, NULL,
2715 JSPROP_READONLY | JSPROP_PERMANENT | JSPROP_SHARED, 0, 0,
2716 NULL)) {
2717 return JS_FALSE;
2718 }
2719
2720 return proto;
2721 }
2722
2723 JSString *
2724 js_NewString(JSContext *cx, jschar *chars, size_t length)
2725 {
2726 JSString *str;
2727
2728 if (length > JSSTRING_LENGTH_MASK) {
2729 if (JS_ON_TRACE(cx)) {
2730 /*
2731 * If we can't leave the trace, signal OOM condition, otherwise
2732 * exit from trace and proceed with GC.
2733 */
2734 if (!js_CanLeaveTrace(cx))
2735 return NULL;
2736
2737 js_LeaveTrace(cx);
2738 }
2739 js_ReportAllocationOverflow(cx);
2740 return NULL;
2741 }
2742
2743 str = (JSString *) js_NewGCThing(cx, GCX_STRING, sizeof(JSString));
2744 if (!str)
2745 return NULL;
2746 JSFLATSTR_INIT(str, chars, length);
2747 #ifdef DEBUG
2748 {
2749 JSRuntime *rt = cx->runtime;
2750 JS_RUNTIME_METER(rt, liveStrings);
2751 JS_RUNTIME_METER(rt, totalStrings);
2752 JS_LOCK_RUNTIME_VOID(rt,
2753 (rt->lengthSum += (double)length,
2754 rt->lengthSquaredSum += (double)length * (double)length));
2755 }
2756 #endif
2757 return str;
2758 }
2759
2760 JSString *
2761 js_NewDependentString(JSContext *cx, JSString *base, size_t start,
2762 size_t length)
2763 {
2764 JSString *ds;
2765
2766 if (length == 0)
2767 return cx->runtime->emptyString;
2768
2769 if (start == 0 && length == JSSTRING_LENGTH(base))
2770 return base;
2771
2772 if (start > JSSTRDEP_START_MASK ||
2773 (start != 0 && length > JSSTRDEP_LENGTH_MASK)) {
2774 return js_NewStringCopyN(cx, JSSTRING_CHARS(base) + start, length);
2775 }
2776
2777 ds = (JSString *)js_NewGCThing(cx, GCX_STRING, sizeof(JSString));
2778 if (!ds)
2779 return NULL;
2780 if (start == 0)
2781 JSPREFIX_INIT(ds, base, length);
2782 else
2783 JSSTRDEP_INIT(ds, base, start, length);
2784 #ifdef DEBUG
2785 {
2786 JSRuntime *rt = cx->runtime;
2787 JS_RUNTIME_METER(rt, liveDependentStrings);
2788 JS_RUNTIME_METER(rt, totalDependentStrings);
2789 JS_RUNTIME_METER(rt, liveStrings);
2790 JS_RUNTIME_METER(rt, totalStrings);
2791 JS_LOCK_RUNTIME_VOID(rt,
2792 (rt->strdepLengthSum += (double)length,
2793 rt->strdepLengthSquaredSum += (double)length * (double)length));
2794 JS_LOCK_RUNTIME_VOID(rt,
2795 (rt->lengthSum += (double)length,
2796 rt->lengthSquaredSum += (double)length * (double)length));
2797 }
2798 #endif
2799 return ds;
2800 }
2801
2802 #ifdef DEBUG
2803 #include <math.h>
2804
2805 void printJSStringStats(JSRuntime *rt)
2806 {
2807 double mean, sigma;
2808
2809 mean = JS_MeanAndStdDev(rt->totalStrings, rt->lengthSum,
2810 rt->lengthSquaredSum, &sigma);
2811
2812 fprintf(stderr, "%lu total strings, mean length %g (sigma %g)\n",
2813 (unsigned long)rt->totalStrings, mean, sigma);
2814
2815 mean = JS_MeanAndStdDev(rt->totalDependentStrings, rt->strdepLengthSum,
2816 rt->strdepLengthSquaredSum, &sigma);
2817
2818 fprintf(stderr, "%lu total dependent strings, mean length %g (sigma %g)\n",
2819 (unsigned long)rt->totalDependentStrings, mean, sigma);
2820 }
2821 #endif
2822
2823 JSString *
2824 js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n)
2825 {
2826 jschar *news;
2827 JSString *str;
2828
2829 news = (jschar *) JS_malloc(cx, (n + 1) * sizeof(jschar));
2830 if (!news)
2831 return NULL;
2832 js_strncpy(news, s, n);
2833 news[n] = 0;
2834 str = js_NewString(cx, news, n);
2835 if (!str)
2836 JS_free(cx, news);
2837 return str;
2838 }
2839
2840 JSString *
2841 js_NewStringCopyZ(JSContext *cx, const jschar *s)
2842 {
2843 size_t n, m;
2844 jschar *news;
2845 JSString *str;
2846
2847 n = js_strlen(s);
2848 m = (n + 1) * sizeof(jschar);
2849 news = (jschar *) JS_malloc(cx, m);
2850 if (!news)
2851 return NULL;
2852 memcpy(news, s, m);
2853 str = js_NewString(cx, news, n);
2854 if (!str)
2855 JS_free(cx, news);
2856 return str;
2857 }
2858
2859 void
2860 js_PurgeDeflatedStringCache(JSRuntime *rt, JSString *str)
2861 {
2862 JSHashNumber hash;
2863 JSHashEntry *he, **hep;
2864
2865 hash = js_hash_string_pointer(str);
2866 JS_ACQUIRE_LOCK(rt->deflatedStringCacheLock);
2867 hep = JS_HashTableRawLookup(rt->deflatedStringCache, hash, str);
2868 he = *hep;
2869 if (he) {
2870 #ifdef DEBUG
2871 rt->deflatedStringCacheBytes -= JSSTRING_LENGTH(str);
2872 #endif
2873 free(he->value);
2874 JS_HashTableRawRemove(rt->deflatedStringCache, hep, he);
2875 }
2876 JS_RELEASE_LOCK(rt->deflatedStringCacheLock);
2877 }
2878
2879 static JSStringFinalizeOp str_finalizers[GCX_NTYPES - GCX_EXTERNAL_STRING] = {
2880 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
2881 };
2882
2883 intN
2884 js_ChangeExternalStringFinalizer(JSStringFinalizeOp oldop,
2885 JSStringFinalizeOp newop)
2886 {
2887 uintN i;
2888
2889 for (i = 0; i != JS_ARRAY_LENGTH(str_finalizers); i++) {
2890 if (str_finalizers[i] == oldop) {
2891 str_finalizers[i] = newop;
2892 return (intN) i;
2893 }
2894 }
2895 return -1;
2896 }
2897
2898 /*
2899 * cx is NULL when we are called from js_FinishAtomState to force the
2900 * finalization of the permanently interned strings.
2901 */
2902 void
2903 js_FinalizeStringRT(JSRuntime *rt, JSString *str, intN type, JSContext *cx)
2904 {
2905 jschar *chars;
2906 JSBool valid;
2907 JSStringFinalizeOp finalizer;
2908
2909 JS_RUNTIME_UNMETER(rt, liveStrings);
2910 if (JSSTRING_IS_DEPENDENT(str)) {
2911 /* A dependent string can not be external and must be valid. */
2912 JS_ASSERT(type < 0);
2913 JS_ASSERT(JSSTRDEP_BASE(str));
2914 JS_RUNTIME_UNMETER(rt, liveDependentStrings);
2915 valid = JS_TRUE;
2916 } else {
2917 /* A stillborn string has null chars, so is not valid. */
2918 chars = JSFLATSTR_CHARS(str);
2919 valid = (chars != NULL);
2920 if (valid) {
2921 if (IN_UNIT_STRING_SPACE_RT(rt, chars)) {
2922 JS_ASSERT(rt->unitStrings[*chars] == str);
2923 JS_ASSERT(type < 0);
2924 rt->unitStrings[*chars] = NULL;
2925 } else if (type < 0) {
2926 free(chars);
2927 } else {
2928 JS_ASSERT((uintN) type < JS_ARRAY_LENGTH(str_finalizers));
2929 finalizer = str_finalizers[type];
2930 if (finalizer) {
2931 /*
2932 * Assume that the finalizer for the permanently interned
2933 * string knows how to deal with null context.
2934 */
2935 finalizer(cx, str);
2936 }
2937 }
2938 }
2939 }
2940 if (valid && JSSTRING_IS_DEFLATED(str))
2941 js_PurgeDeflatedStringCache(rt, str);
2942 }
2943
2944 JS_FRIEND_API(const char *)
2945 js_ValueToPrintable(JSContext *cx, jsval v, JSValueToStringFun v2sfun)
2946 {
2947 JSString *str;
2948
2949 str = v2sfun(cx, v);
2950 if (!str)
2951 return NULL;
2952 str = js_QuoteString(cx, str, 0);
2953 if (!str)
2954 return NULL;
2955 return js_GetStringBytes(cx, str);
2956 }
2957
2958 JS_FRIEND_API(JSString *)
2959 js_ValueToString(JSContext *cx, jsval v)
2960 {
2961 JSObject *obj;
2962 JSString *str;
2963
2964 if (JSVAL_IS_OBJECT(v)) {
2965 obj = JSVAL_TO_OBJECT(v);
2966 if (!obj)
2967 return ATOM_TO_STRING(cx->runtime->atomState.nullAtom);
2968 if (!OBJ_DEFAULT_VALUE(cx, obj, JSTYPE_STRING, &v))
2969 return NULL;
2970 }
2971 if (JSVAL_IS_STRING(v)) {
2972 str = JSVAL_TO_STRING(v);
2973 } else if (JSVAL_IS_INT(v)) {
2974 str = js_NumberToString(cx, JSVAL_TO_INT(v));
2975 } else if (JSVAL_IS_DOUBLE(v)) {
2976 str = js_NumberToString(cx, *JSVAL_TO_DOUBLE(v));
2977 } else if (JSVAL_IS_BOOLEAN(v)) {
2978 str = js_BooleanToString(cx, JSVAL_TO_BOOLEAN(v));
2979 } else {
2980 str = ATOM_TO_STRING(cx->runtime->atomState.typeAtoms[JSTYPE_VOID]);
2981 }
2982 return str;
2983 }
2984
2985 JS_FRIEND_API(JSString *)
2986 js_ValueToSource(JSContext *cx, jsval v)
2987 {
2988 JSTempValueRooter tvr;
2989 JSString *str;
2990
2991 if (JSVAL_IS_VOID(v))
2992 return ATOM_TO_STRING(cx->runtime->atomState.void0Atom);
2993 if (JSVAL_IS_STRING(v))
2994 return js_QuoteString(cx, JSVAL_TO_STRING(v), '"');
2995 if (JSVAL_IS_PRIMITIVE(v)) {
2996 /* Special case to preserve negative zero, _contra_ toString. */
2997 if (JSVAL_IS_DOUBLE(v) && JSDOUBLE_IS_NEGZERO(*JSVAL_TO_DOUBLE(v))) {
2998 /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
2999 static const jschar js_negzero_ucNstr[] = {'-', '0'};
3000
3001 return js_NewStringCopyN(cx, js_negzero_ucNstr, 2);
3002 }
3003 return js_ValueToString(cx, v);
3004 }
3005
3006 JS_PUSH_SINGLE_TEMP_ROOT(cx, JSVAL_NULL, &tvr);
3007 if (!js_TryMethod(cx, JSVAL_TO_OBJECT(v),
3008 cx->runtime->atomState.toSourceAtom,
3009 0, NULL, &tvr.u.value)) {
3010 str = NULL;
3011 } else {
3012 str = js_ValueToString(cx, tvr.u.value);
3013 }
3014 JS_POP_TEMP_ROOT(cx, &tvr);
3015 return str;
3016 }
3017
3018 /*
3019 * str is not necessarily a GC thing here.
3020 */
3021 uint32
3022 js_HashString(JSString *str)
3023 {
3024 const jschar *s;
3025 size_t n;
3026 uint32 h;
3027
3028 JSSTRING_CHARS_AND_LENGTH(str, s, n);
3029 for (h = 0; n; s++, n--)
3030 h = JS_ROTATE_LEFT32(h, 4) ^ *s;
3031 return h;
3032 }
3033
3034 /*
3035 * str is not necessarily a GC thing here.
3036 */
3037 JSBool JS_FASTCALL
3038 js_EqualStrings(JSString *str1, JSString *str2)
3039 {
3040 size_t n;
3041 const jschar *s1, *s2;
3042
3043 JS_ASSERT(str1);
3044 JS_ASSERT(str2);
3045
3046 /* Fast case: pointer equality could be a quick win. */
3047 if (str1 == str2)
3048 return JS_TRUE;
3049
3050 n = JSSTRING_LENGTH(str1);
3051 if (n != JSSTRING_LENGTH(str2))
3052 return JS_FALSE;
3053
3054 if (n == 0)
3055 return JS_TRUE;
3056
3057 s1 = JSSTRING_CHARS(str1), s2 = JSSTRING_CHARS(str2);
3058 do {
3059 if (*s1 != *s2)
3060 return JS_FALSE;
3061 ++s1, ++s2;
3062 } while (--n != 0);
3063
3064 return JS_TRUE;
3065 }
3066
3067 int32 JS_FASTCALL
3068 js_CompareStrings(JSString *str1, JSString *str2)
3069 {
3070 size_t l1, l2, n, i;
3071 const jschar *s1, *s2;
3072 intN cmp;
3073
3074 JS_ASSERT(str1);
3075 JS_ASSERT(str2);
3076
3077 /* Fast case: pointer equality could be a quick win. */
3078 if (str1 == str2)
3079 return 0;
3080
3081 JSSTRING_CHARS_AND_LENGTH(str1, s1, l1);
3082 JSSTRING_CHARS_AND_LENGTH(str2, s2, l2);
3083 n = JS_MIN(l1, l2);
3084 for (i = 0; i < n; i++) {
3085 cmp = s1[i] - s2[i];
3086 if (cmp != 0)
3087 return cmp;
3088 }
3089 return (intN)(l1 - l2);
3090 }
3091
3092 size_t
3093 js_strlen(const jschar *s)
3094 {
3095 const jschar *t;
3096
3097 for (t = s; *t != 0; t++)
3098 continue;
3099 return (size_t)(t - s);
3100 }
3101
3102 jschar *
3103 js_strchr(const jschar *s, jschar c)
3104 {
3105 while (*s != 0) {
3106 if (*s == c)
3107 return (jschar *)s;
3108 s++;
3109 }
3110 return NULL;
3111 }
3112
3113 jschar *
3114 js_strchr_limit(const jschar *s, jschar c, const jschar *limit)
3115 {
3116 while (s < limit) {
3117 if (*s == c)
3118 return (jschar *)s;
3119 s++;
3120 }
3121 return NULL;
3122 }
3123
3124 const jschar *
3125 js_SkipWhiteSpace(const jschar *s, const jschar *end)
3126 {
3127 JS_ASSERT(s <= end);
3128 while (s != end && JS_ISSPACE(*s))
3129 s++;
3130 return s;
3131 }
3132
3133 jschar *
3134 js_InflateString(JSContext *cx, const char *bytes, size_t *lengthp)
3135 {
3136 size_t nbytes, nchars, i;
3137 jschar *chars;
3138 #ifdef DEBUG
3139 JSBool ok;
3140 #endif
3141
3142 nbytes = *lengthp;
3143 if (js_CStringsAreUTF8) {
3144 if (!js_InflateStringToBuffer(cx, bytes, nbytes, NULL, &nchars))
3145 goto bad;
3146 chars = (jschar *) JS_malloc(cx, (nchars + 1) * sizeof (jschar));
3147 if (!chars)
3148 goto bad;
3149 #ifdef DEBUG
3150 ok =
3151 #endif
3152 js_InflateStringToBuffer(cx, bytes, nbytes, chars, &nchars);
3153 JS_ASSERT(ok);
3154 } else {
3155 nchars = nbytes;
3156 chars = (jschar *) JS_malloc(cx, (nchars + 1) * sizeof(jschar));
3157 if (!chars)
3158 goto bad;
3159 for (i = 0; i < nchars; i++)
3160 chars[i] = (unsigned char) bytes[i];
3161 }
3162 *lengthp = nchars;
3163 chars[nchars] = 0;
3164 return chars;
3165
3166 bad:
3167 /*
3168 * For compatibility with callers of JS_DecodeBytes we must zero lengthp
3169 * on errors.
3170 */
3171 *lengthp = 0;
3172 return NULL;
3173 }
3174
3175 /*
3176 * May be called with null cx by js_GetStringBytes, see below.
3177 */
3178 char *
3179 js_DeflateString(JSContext *cx, const jschar *chars, size_t nchars)
3180 {