/[jscoverage]/trunk/js/jsstr.h
ViewVC logotype

Contents of /trunk/js/jsstr.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 507 - (show annotations)
Sun Jan 10 07:23:34 2010 UTC (9 years, 11 months ago) by siliconforks
File MIME type: text/plain
File size: 39579 byte(s)
Update SpiderMonkey from Firefox 3.6rc1.

1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 *
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * The Original Code is Mozilla Communicator client code, released
17 * March 31, 1998.
18 *
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
23 *
24 * Contributor(s):
25 *
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
37 *
38 * ***** END LICENSE BLOCK ***** */
39
40 #ifndef jsstr_h___
41 #define jsstr_h___
42 /*
43 * JS string type implementation.
44 *
45 * A JS string is a counted array of unicode characters. To support handoff
46 * of API client memory, the chars are allocated separately from the length,
47 * necessitating a pointer after the count, to form a separately allocated
48 * string descriptor. String descriptors are GC'ed, while their chars are
49 * allocated from the malloc heap.
50 */
51 #include <ctype.h>
52 #include "jspubtd.h"
53 #include "jsprvtd.h"
54 #include "jslock.h"
55
56 JS_BEGIN_EXTERN_C
57
58 #define JSSTRING_BIT(n) ((size_t)1 << (n))
59 #define JSSTRING_BITMASK(n) (JSSTRING_BIT(n) - 1)
60
61 #ifdef __cplusplus /* Allow inclusion from LiveConnect C files. */
62 class TraceRecorder;
63 #endif
64
65 enum {
66 UNIT_STRING_LIMIT = 256U,
67 INT_STRING_LIMIT = 256U
68 };
69
70 extern jschar *
71 js_GetDependentStringChars(JSString *str);
72
73 /*
74 * The GC-thing "string" type.
75 *
76 * When the DEPENDENT bit of the mLength field is unset, the mChars field
77 * points to a flat character array owned by its GC-thing descriptor. The
78 * array is terminated at index length by a zero character and the size of the
79 * array in bytes is (length + 1) * sizeof(jschar). The terminator is purely a
80 * backstop, in case the chars pointer flows out to native code that requires
81 * \u0000 termination.
82 *
83 * A flat string with the MUTABLE flag means that the string is accessible only
84 * from one thread and it is possible to turn it into a dependent string of the
85 * same length to optimize js_ConcatStrings. It is also possible to grow such a
86 * string, but extreme care must be taken to ensure that no other code relies
87 * on the original length of the string.
88 *
89 * A flat string with the ATOMIZED flag means that the string is hashed as
90 * an atom. This flag is used to avoid re-hashing the already-atomized string.
91 *
92 * Any string with the DEFLATED flag means that the string has an entry in the
93 * deflated string cache. The GC uses this flag to optimize string finalization
94 * and avoid an expensive cache lookup for strings that were never deflated.
95 *
96 * When the DEPENDENT flag is set, the string depends on characters of another
97 * string strongly referenced by the mBase field. The base member may point to
98 * another dependent string if chars() has not been called yet.
99 *
100 * The PREFIX flag determines the kind of the dependent string. When the flag
101 * is unset, the mLength field encodes both starting position relative to the
102 * base string and the number of characters in the dependent string, see
103 * DEPENDENT_START_MASK and DEPENDENT_LENGTH_MASK below for details.
104 *
105 * When the PREFIX flag is set, the dependent string is a prefix of the base
106 * string. The number of characters in the prefix is encoded using all non-flag
107 * bits of the mLength field and spans the same 0 .. SIZE_T_MAX/4 range as the
108 * length of the flat string.
109 *
110 * NB: Always use the length() and chars() accessor methods.
111 */
112 #ifdef __cplusplus /* Allow inclusion from LiveConnect C files. */
113 struct JSString {
114 friend class TraceRecorder;
115
116 friend JSAtom *
117 js_AtomizeString(JSContext *cx, JSString *str, uintN flags);
118
119 friend JSString * JS_FASTCALL
120 js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
121
122 size_t mLength;
123 union {
124 jschar *mChars;
125 JSString *mBase;
126 };
127
128 /*
129 * Definitions for flags stored in the high order bits of mLength.
130 *
131 * PREFIX and MUTABLE are two aliases for the same bit. PREFIX should be
132 * used only if DEPENDENT is set and MUTABLE should be used only if the
133 * string is flat.
134 *
135 * ATOMIZED is used only with flat, immutable strings.
136 */
137 enum
138 #if defined(_MSC_VER) && defined(_WIN64)
139 : size_t /* VC++ 64-bit incorrectly defaults this enum's size to int. */
140 #endif
141 {
142 DEPENDENT = JSSTRING_BIT(JS_BITS_PER_WORD - 1),
143 PREFIX = JSSTRING_BIT(JS_BITS_PER_WORD - 2),
144 MUTABLE = PREFIX,
145 ATOMIZED = JSSTRING_BIT(JS_BITS_PER_WORD - 3),
146 DEFLATED = JSSTRING_BIT(JS_BITS_PER_WORD - 4),
147
148 #if JS_BITS_PER_WORD > 32
149 LENGTH_BITS = 28,
150 #else
151 LENGTH_BITS = JS_BITS_PER_WORD - 4,
152 #endif
153 LENGTH_MASK = JSSTRING_BITMASK(LENGTH_BITS),
154
155 /*
156 * VC++ 64-bit incorrectly produces the compiler error "Conversion to
157 * enumeration type requires an explicit cast" unless we cast to size_t
158 * here.
159 */
160 DEPENDENT_LENGTH_BITS = size_t(LENGTH_BITS) / 2,
161 DEPENDENT_LENGTH_MASK = JSSTRING_BITMASK(DEPENDENT_LENGTH_BITS),
162 DEPENDENT_START_BITS = LENGTH_BITS - DEPENDENT_LENGTH_BITS,
163 DEPENDENT_START_SHIFT = DEPENDENT_LENGTH_BITS,
164 DEPENDENT_START_MASK = JSSTRING_BITMASK(DEPENDENT_START_BITS)
165 };
166
167 bool hasFlag(size_t flag) const {
168 return (mLength & flag) != 0;
169 }
170
171 public:
172 enum
173 #if defined(_MSC_VER) && defined(_WIN64)
174 : size_t /* VC++ 64-bit incorrectly defaults this enum's size to int. */
175 #endif
176 {
177 MAX_LENGTH = LENGTH_MASK,
178 MAX_DEPENDENT_START = DEPENDENT_START_MASK,
179 MAX_DEPENDENT_LENGTH = DEPENDENT_LENGTH_MASK
180 };
181
182 bool isDependent() const {
183 return hasFlag(DEPENDENT);
184 }
185
186 bool isFlat() const {
187 return !isDependent();
188 }
189
190 bool isDeflated() const {
191 return hasFlag(DEFLATED);
192 }
193
194 void setDeflated() {
195 JS_ATOMIC_SET_MASK((jsword *) &mLength, DEFLATED);
196 }
197
198 bool isMutable() const {
199 return !isDependent() && hasFlag(MUTABLE);
200 }
201
202 bool isAtomized() const {
203 return !isDependent() && hasFlag(ATOMIZED);
204 }
205
206 JS_ALWAYS_INLINE jschar *chars() {
207 return isDependent() ? dependentChars() : flatChars();
208 }
209
210 JS_ALWAYS_INLINE size_t length() const {
211 return isDependent() ? dependentLength() : flatLength();
212 }
213
214 JS_ALWAYS_INLINE bool empty() const {
215 return length() == 0;
216 }
217
218 JS_ALWAYS_INLINE void getCharsAndLength(const jschar *&chars, size_t &length) {
219 if (isDependent()) {
220 length = dependentLength();
221 chars = dependentChars();
222 } else {
223 length = flatLength();
224 chars = flatChars();
225 }
226 }
227
228 JS_ALWAYS_INLINE void getCharsAndEnd(const jschar *&chars, const jschar *&end) {
229 end = isDependent()
230 ? dependentLength() + (chars = dependentChars())
231 : flatLength() + (chars = flatChars());
232 }
233
234 /* Specific flat string initializer and accessor methods. */
235 void initFlat(jschar *chars, size_t length) {
236 JS_ASSERT(length <= MAX_LENGTH);
237 mLength = length;
238 mChars = chars;
239 }
240
241 jschar *flatChars() const {
242 JS_ASSERT(isFlat());
243 return mChars;
244 }
245
246 size_t flatLength() const {
247 JS_ASSERT(isFlat());
248 return mLength & LENGTH_MASK;
249 }
250
251 /*
252 * Special flat string initializer that preserves the JSSTR_DEFLATED flag.
253 * Use this method when reinitializing an existing string which may be
254 * hashed to its deflated bytes. Newborn strings must use initFlat.
255 */
256 void reinitFlat(jschar *chars, size_t length) {
257 JS_ASSERT(length <= MAX_LENGTH);
258 mLength = (mLength & DEFLATED) | (length & ~DEFLATED);
259 mChars = chars;
260 }
261
262 /*
263 * Methods to manipulate atomized and mutable flags of flat strings. It is
264 * safe to use these without extra locking due to the following properties:
265 *
266 * * We do not have a flatClearAtomized method, as a string remains
267 * atomized until the GC collects it.
268 *
269 * * A thread may call flatSetMutable only when it is the only
270 * thread accessing the string until a later call to
271 * flatClearMutable.
272 *
273 * * Multiple threads can call flatClearMutable but the function actually
274 * clears the mutable flag only when the flag is set -- in which case
275 * only one thread can access the string (see previous property).
276 *
277 * Thus, when multiple threads access the string, JSString::flatSetAtomized
278 * is the only function that can update the mLength field of the string by
279 * changing the mutable bit from 0 to 1. We call the method only after the
280 * string has been hashed. When some threads in js_ValueToStringId see that
281 * the flag is set, it knows that the string was atomized.
282 *
283 * On the other hand, if the thread sees that the flag is unset, it could
284 * be seeing a stale value when another thread has just atomized the string
285 * and set the flag. But this can lead only to an extra call to
286 * js_AtomizeString. This function would find that the string was already
287 * hashed and return it with the atomized bit set.
288 */
289 void flatSetAtomized() {
290 JS_ASSERT(isFlat() && !isMutable());
291 JS_STATIC_ASSERT(sizeof(mLength) == sizeof(jsword));
292 JS_ATOMIC_SET_MASK((jsword *) &mLength, ATOMIZED);
293 }
294
295 void flatSetMutable() {
296 JS_ASSERT(isFlat() && !isAtomized());
297 mLength |= MUTABLE;
298 }
299
300 void flatClearMutable() {
301 JS_ASSERT(isFlat());
302 if (hasFlag(MUTABLE))
303 mLength &= ~MUTABLE;
304 }
305
306 void initDependent(JSString *bstr, size_t off, size_t len) {
307 JS_ASSERT(off <= MAX_DEPENDENT_START);
308 JS_ASSERT(len <= MAX_DEPENDENT_LENGTH);
309 mLength = DEPENDENT | (off << DEPENDENT_START_SHIFT) | len;
310 mBase = bstr;
311 }
312
313 /* See JSString::reinitFlat. */
314 void reinitDependent(JSString *bstr, size_t off, size_t len) {
315 JS_ASSERT(off <= MAX_DEPENDENT_START);
316 JS_ASSERT(len <= MAX_DEPENDENT_LENGTH);
317 mLength = DEPENDENT | (mLength & DEFLATED) | (off << DEPENDENT_START_SHIFT) | len;
318 mBase = bstr;
319 }
320
321 JSString *dependentBase() const {
322 JS_ASSERT(isDependent());
323 return mBase;
324 }
325
326 bool dependentIsPrefix() const {
327 JS_ASSERT(isDependent());
328 return hasFlag(PREFIX);
329 }
330
331 JS_ALWAYS_INLINE jschar *dependentChars() {
332 return dependentBase()->isDependent()
333 ? js_GetDependentStringChars(this)
334 : dependentBase()->flatChars() + dependentStart();
335 }
336
337 JS_ALWAYS_INLINE size_t dependentStart() const {
338 return dependentIsPrefix()
339 ? 0
340 : ((mLength >> DEPENDENT_START_SHIFT) & DEPENDENT_START_MASK);
341 }
342
343 JS_ALWAYS_INLINE size_t dependentLength() const {
344 JS_ASSERT(isDependent());
345 return mLength & (dependentIsPrefix() ? LENGTH_MASK : DEPENDENT_LENGTH_MASK);
346 }
347
348 void initPrefix(JSString *bstr, size_t len) {
349 JS_ASSERT(len <= MAX_LENGTH);
350 mLength = DEPENDENT | PREFIX | len;
351 mBase = bstr;
352 }
353
354 /* See JSString::reinitFlat. */
355 void reinitPrefix(JSString *bstr, size_t len) {
356 JS_ASSERT(len <= MAX_LENGTH);
357 mLength = DEPENDENT | PREFIX | (mLength & DEFLATED) | len;
358 mBase = bstr;
359 }
360
361 JSString *prefixBase() const {
362 JS_ASSERT(isDependent() && dependentIsPrefix());
363 return dependentBase();
364 }
365
366 void prefixSetBase(JSString *bstr) {
367 JS_ASSERT(isDependent() && dependentIsPrefix());
368 mBase = bstr;
369 }
370
371 static inline bool isUnitString(void *ptr) {
372 jsuword delta = reinterpret_cast<jsuword>(ptr) -
373 reinterpret_cast<jsuword>(unitStringTable);
374 if (delta >= UNIT_STRING_LIMIT * sizeof(JSString))
375 return false;
376
377 /* If ptr points inside the static array, it must be well-aligned. */
378 JS_ASSERT(delta % sizeof(JSString) == 0);
379 return true;
380 }
381
382 static inline bool isIntString(void *ptr) {
383 jsuword delta = reinterpret_cast<jsuword>(ptr) -
384 reinterpret_cast<jsuword>(intStringTable);
385 if (delta >= INT_STRING_LIMIT * sizeof(JSString))
386 return false;
387
388 /* If ptr points inside the static array, it must be well-aligned. */
389 JS_ASSERT(delta % sizeof(JSString) == 0);
390 return true;
391 }
392
393 static inline bool isStatic(void *ptr) {
394 return isUnitString(ptr) || isIntString(ptr);
395 }
396
397 #ifdef __SUNPRO_CC
398 #pragma align 8 (__1cIJSStringPunitStringTable_, __1cIJSStringOintStringTable_)
399 #endif
400
401 static JSString unitStringTable[];
402 static JSString intStringTable[];
403 static const char *deflatedIntStringTable[];
404
405 static JSString *unitString(jschar c);
406 static JSString *getUnitString(JSContext *cx, JSString *str, size_t index);
407 static JSString *intString(jsint i);
408 };
409 #else /* __cplusplus */
410
411 struct JSString {
412 size_t length;
413 union {
414 jschar *chars;
415 JSString *base;
416 } u;
417 };
418
419 /*
420 * Definitions for flags stored in the high order bits of JSString.length.
421 * JSSTRFLAG_PREFIX and JSSTRFLAG_MUTABLE are two aliases for the same value.
422 * JSSTRFLAG_PREFIX should be used only if JSSTRFLAG_DEPENDENT is set and
423 * JSSTRFLAG_MUTABLE should be used only if the string is flat.
424 * JSSTRFLAG_ATOMIZED is used only with the flat immutable strings.
425 */
426 #define JSSTRFLAG_DEPENDENT JSSTRING_BIT(JS_BITS_PER_WORD - 1)
427 #define JSSTRFLAG_PREFIX JSSTRING_BIT(JS_BITS_PER_WORD - 2)
428 #define JSSTRFLAG_MUTABLE JSSTRFLAG_PREFIX
429 #define JSSTRFLAG_ATOMIZED JSSTRING_BIT(JS_BITS_PER_WORD - 3)
430 #define JSSTRFLAG_DEFLATED JSSTRING_BIT(JS_BITS_PER_WORD - 4)
431
432 #define JSSTRING_LENGTH_BITS (JS_BITS_PER_WORD - 4)
433 #define JSSTRING_LENGTH_MASK JSSTRING_BITMASK(JSSTRING_LENGTH_BITS)
434
435 /* Universal JSString type inquiry and accessor macros. */
436 #define JSSTRING_BIT(n) ((size_t)1 << (n))
437 #define JSSTRING_BITMASK(n) (JSSTRING_BIT(n) - 1)
438 #define JSSTRING_HAS_FLAG(str,flg) ((str)->length & (flg))
439 #define JSSTRING_IS_DEPENDENT(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_DEPENDENT)
440 #define JSSTRING_IS_FLAT(str) (!JSSTRING_IS_DEPENDENT(str))
441 #define JSSTRING_IS_MUTABLE(str) (((str)->length & (JSSTRFLAG_DEPENDENT | \
442 JSSTRFLAG_MUTABLE)) == \
443 JSSTRFLAG_MUTABLE)
444 #define JSSTRING_IS_ATOMIZED(str) (((str)->length & (JSSTRFLAG_DEPENDENT | \
445 JSSTRFLAG_ATOMIZED)) ==\
446 JSSTRFLAG_ATOMIZED)
447
448 #define JSSTRING_CHARS(str) (JSSTRING_IS_DEPENDENT(str) \
449 ? JSSTRDEP_CHARS(str) \
450 : JSFLATSTR_CHARS(str))
451 #define JSSTRING_LENGTH(str) (JSSTRING_IS_DEPENDENT(str) \
452 ? JSSTRDEP_LENGTH(str) \
453 : JSFLATSTR_LENGTH(str))
454
455 JS_STATIC_ASSERT(sizeof(size_t) == sizeof(jsword));
456
457 #define JSSTRING_IS_DEFLATED(str) ((str)->length & JSSTRFLAG_DEFLATED)
458
459 #define JSSTRING_SET_DEFLATED(str) \
460 JS_ATOMIC_SET_MASK((jsword*)&(str)->length, JSSTRFLAG_DEFLATED)
461
462 #define JSSTRING_CHARS_AND_LENGTH(str, chars_, length_) \
463 ((void)(JSSTRING_IS_DEPENDENT(str) \
464 ? ((length_) = JSSTRDEP_LENGTH(str), \
465 (chars_) = JSSTRDEP_CHARS(str)) \
466 : ((length_) = JSFLATSTR_LENGTH(str), \
467 (chars_) = JSFLATSTR_CHARS(str))))
468
469 #define JSSTRING_CHARS_AND_END(str, chars_, end) \
470 ((void)((end) = JSSTRING_IS_DEPENDENT(str) \
471 ? JSSTRDEP_LENGTH(str) + ((chars_) = JSSTRDEP_CHARS(str)) \
472 : JSFLATSTR_LENGTH(str) + ((chars_) = JSFLATSTR_CHARS(str))))
473
474 /* Specific flat string initializer and accessor macros. */
475 #define JSFLATSTR_INIT(str, chars_, length_) \
476 ((void)(JS_ASSERT(((length_) & ~JSSTRING_LENGTH_MASK) == 0), \
477 (str)->length = (length_), (str)->u.chars = (chars_)))
478
479 #define JSFLATSTR_LENGTH(str) \
480 (JS_ASSERT(JSSTRING_IS_FLAT(str)), (str)->length & JSSTRING_LENGTH_MASK)
481
482 #define JSFLATSTR_CHARS(str) \
483 (JS_ASSERT(JSSTRING_IS_FLAT(str)), (str)->u.chars)
484
485 /*
486 * Special flat string initializer that preserves the JSSTR_DEFLATED flag.
487 * Use this macro when reinitializing an existing string (which may be
488 * hashed to its deflated bytes. Newborn strings must use JSFLATSTR_INIT.
489 */
490 #define JSFLATSTR_REINIT(str, chars_, length_) \
491 ((void)(JS_ASSERT(((length_) & ~JSSTRING_LENGTH_MASK) == 0), \
492 (str)->length = ((str)->length & JSSTRFLAG_DEFLATED) | \
493 (length_ & ~JSSTRFLAG_DEFLATED), \
494 (str)->u.chars = (chars_)))
495
496 /*
497 * Macros to manipulate atomized and mutable flags of flat strings. It is safe
498 * to use these without extra locking due to the following properties:
499 *
500 * * We do not have a macro like JSFLATSTR_CLEAR_ATOMIZED as a string
501 * remains atomized until the GC collects it.
502 *
503 * * A thread may call JSFLATSTR_SET_MUTABLE only when it is the only thread
504 * accessing the string until a later call to JSFLATSTR_CLEAR_MUTABLE.
505 *
506 * * Multiple threads can call JSFLATSTR_CLEAR_MUTABLE but the macro
507 * actually clears the mutable flag only when the flag is set -- in which
508 * case only one thread can access the string (see previous property).
509 *
510 * Thus, when multiple threads access the string, JSFLATSTR_SET_ATOMIZED is
511 * the only macro that can update the length field of the string by changing
512 * the mutable bit from 0 to 1. We call the macro only after the string has
513 * been hashed. When some threads in js_ValueToStringId see that the flag is
514 * set, it knows that the string was atomized.
515 *
516 * On the other hand, if the thread sees that the flag is unset, it could be
517 * seeing a stale value when another thread has just atomized the string and
518 * set the flag. But this can lead only to an extra call to js_AtomizeString.
519 * This function would find that the string was already hashed and return it
520 * with the atomized bit set.
521 */
522 #define JSFLATSTR_SET_ATOMIZED(str) \
523 JS_BEGIN_MACRO \
524 JS_ASSERT(JSSTRING_IS_FLAT(str) && !JSSTRING_IS_MUTABLE(str)); \
525 JS_ATOMIC_SET_MASK((jsword*) &(str)->length, JSSTRFLAG_ATOMIZED); \
526 JS_END_MACRO
527
528 #define JSFLATSTR_SET_MUTABLE(str) \
529 ((void)(JS_ASSERT(JSSTRING_IS_FLAT(str) && !JSSTRING_IS_ATOMIZED(str)), \
530 (str)->length |= JSSTRFLAG_MUTABLE))
531
532 #define JSFLATSTR_CLEAR_MUTABLE(str) \
533 ((void)(JS_ASSERT(JSSTRING_IS_FLAT(str)), \
534 JSSTRING_HAS_FLAG(str, JSSTRFLAG_MUTABLE) && \
535 ((str)->length &= ~JSSTRFLAG_MUTABLE)))
536
537 /* Specific dependent string shift/mask accessor and mutator macros. */
538 #define JSSTRDEP_START_BITS (JSSTRING_LENGTH_BITS-JSSTRDEP_LENGTH_BITS)
539 #define JSSTRDEP_START_SHIFT JSSTRDEP_LENGTH_BITS
540 #define JSSTRDEP_START_MASK JSSTRING_BITMASK(JSSTRDEP_START_BITS)
541 #define JSSTRDEP_LENGTH_BITS (JSSTRING_LENGTH_BITS / 2)
542 #define JSSTRDEP_LENGTH_MASK JSSTRING_BITMASK(JSSTRDEP_LENGTH_BITS)
543
544 #define JSSTRDEP_IS_PREFIX(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_PREFIX)
545
546 #define JSSTRDEP_START(str) (JSSTRDEP_IS_PREFIX(str) ? 0 \
547 : (((str)->length \
548 >> JSSTRDEP_START_SHIFT) \
549 & JSSTRDEP_START_MASK))
550 #define JSSTRDEP_LENGTH(str) ((str)->length \
551 & (JSSTRDEP_IS_PREFIX(str) \
552 ? JSSTRING_LENGTH_MASK \
553 : JSSTRDEP_LENGTH_MASK))
554
555 #define JSSTRDEP_INIT(str,bstr,off,len) \
556 ((str)->length = JSSTRFLAG_DEPENDENT \
557 | ((off) << JSSTRDEP_START_SHIFT) \
558 | (len), \
559 (str)->u.base = (bstr))
560
561 /* See JSFLATSTR_INIT. */
562 #define JSSTRDEP_REINIT(str,bstr,off,len) \
563 ((str)->length = JSSTRFLAG_DEPENDENT \
564 | ((str->length) & JSSTRFLAG_DEFLATED) \
565 | ((off) << JSSTRDEP_START_SHIFT) \
566 | (len), \
567 (str)->u.base = (bstr))
568
569 #define JSPREFIX_INIT(str,bstr,len) \
570 ((str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | (len), \
571 (str)->u.base = (bstr))
572
573 /* See JSFLATSTR_INIT. */
574 #define JSPREFIX_REINIT(str,bstr,len) \
575 ((str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | \
576 ((str->length) & JSSTRFLAG_DEFLATED) | (len), \
577 (str)->u.base = (bstr))
578
579 #define JSSTRDEP_BASE(str) ((str)->u.base)
580 #define JSPREFIX_BASE(str) JSSTRDEP_BASE(str)
581 #define JSPREFIX_SET_BASE(str,bstr) ((str)->u.base = (bstr))
582
583 #define JSSTRDEP_CHARS(str) \
584 (JSSTRING_IS_DEPENDENT(JSSTRDEP_BASE(str)) \
585 ? js_GetDependentStringChars(str) \
586 : JSFLATSTR_CHARS(JSSTRDEP_BASE(str)) + JSSTRDEP_START(str))
587
588 #endif /* __cplusplus */
589
590 extern const jschar *
591 js_GetStringChars(JSContext *cx, JSString *str);
592
593 extern JSString * JS_FASTCALL
594 js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
595
596 extern const jschar *
597 js_UndependString(JSContext *cx, JSString *str);
598
599 extern JSBool
600 js_MakeStringImmutable(JSContext *cx, JSString *str);
601
602 extern JSString * JS_FASTCALL
603 js_toLowerCase(JSContext *cx, JSString *str);
604
605 extern JSString * JS_FASTCALL
606 js_toUpperCase(JSContext *cx, JSString *str);
607
608 struct JSSubString {
609 size_t length;
610 const jschar *chars;
611 };
612
613 extern jschar js_empty_ucstr[];
614 extern JSSubString js_EmptySubString;
615
616 /* Unicode character attribute lookup tables. */
617 extern const uint8 js_X[];
618 extern const uint8 js_Y[];
619 extern const uint32 js_A[];
620
621 /* Enumerated Unicode general category types. */
622 typedef enum JSCharType {
623 JSCT_UNASSIGNED = 0,
624 JSCT_UPPERCASE_LETTER = 1,
625 JSCT_LOWERCASE_LETTER = 2,
626 JSCT_TITLECASE_LETTER = 3,
627 JSCT_MODIFIER_LETTER = 4,
628 JSCT_OTHER_LETTER = 5,
629 JSCT_NON_SPACING_MARK = 6,
630 JSCT_ENCLOSING_MARK = 7,
631 JSCT_COMBINING_SPACING_MARK = 8,
632 JSCT_DECIMAL_DIGIT_NUMBER = 9,
633 JSCT_LETTER_NUMBER = 10,
634 JSCT_OTHER_NUMBER = 11,
635 JSCT_SPACE_SEPARATOR = 12,
636 JSCT_LINE_SEPARATOR = 13,
637 JSCT_PARAGRAPH_SEPARATOR = 14,
638 JSCT_CONTROL = 15,
639 JSCT_FORMAT = 16,
640 JSCT_PRIVATE_USE = 18,
641 JSCT_SURROGATE = 19,
642 JSCT_DASH_PUNCTUATION = 20,
643 JSCT_START_PUNCTUATION = 21,
644 JSCT_END_PUNCTUATION = 22,
645 JSCT_CONNECTOR_PUNCTUATION = 23,
646 JSCT_OTHER_PUNCTUATION = 24,
647 JSCT_MATH_SYMBOL = 25,
648 JSCT_CURRENCY_SYMBOL = 26,
649 JSCT_MODIFIER_SYMBOL = 27,
650 JSCT_OTHER_SYMBOL = 28
651 } JSCharType;
652
653 /* Character classifying and mapping macros, based on java.lang.Character. */
654 #define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
655 #define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
656
657 #define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
658 (1 << JSCT_LOWERCASE_LETTER) | \
659 (1 << JSCT_TITLECASE_LETTER) | \
660 (1 << JSCT_MODIFIER_LETTER) | \
661 (1 << JSCT_OTHER_LETTER)) \
662 >> JS_CTYPE(c)) & 1)
663
664 #define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
665 (1 << JSCT_LOWERCASE_LETTER) | \
666 (1 << JSCT_TITLECASE_LETTER) | \
667 (1 << JSCT_MODIFIER_LETTER) | \
668 (1 << JSCT_OTHER_LETTER) | \
669 (1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
670 >> JS_CTYPE(c)) & 1)
671
672 /* A unicode letter, suitable for use in an identifier. */
673 #define JS_ISLETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
674 (1 << JSCT_LOWERCASE_LETTER) | \
675 (1 << JSCT_TITLECASE_LETTER) | \
676 (1 << JSCT_MODIFIER_LETTER) | \
677 (1 << JSCT_OTHER_LETTER) | \
678 (1 << JSCT_LETTER_NUMBER)) \
679 >> JS_CTYPE(c)) & 1)
680
681 /*
682 * 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or
683 * digit or connector punctuation.
684 */
685 #define JS_ISIDPART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
686 (1 << JSCT_LOWERCASE_LETTER) | \
687 (1 << JSCT_TITLECASE_LETTER) | \
688 (1 << JSCT_MODIFIER_LETTER) | \
689 (1 << JSCT_OTHER_LETTER) | \
690 (1 << JSCT_LETTER_NUMBER) | \
691 (1 << JSCT_NON_SPACING_MARK) | \
692 (1 << JSCT_COMBINING_SPACING_MARK) | \
693 (1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
694 (1 << JSCT_CONNECTOR_PUNCTUATION)) \
695 >> JS_CTYPE(c)) & 1)
696
697 /* Unicode control-format characters, ignored in input */
698 #define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
699
700 #ifdef __cplusplus /* Allow inclusion from LiveConnect C files. */
701 /*
702 * This table is used in JS_ISWORD. The definition has external linkage to
703 * allow the raw table data to be used in the regular expression compiler.
704 */
705 extern const bool js_alnum[];
706 #endif
707
708 /*
709 * This macro performs testing for the regular expression word class \w, which
710 * is defined by ECMA-262 15.10.2.6 to be [0-9A-Z_a-z]. If we want a
711 * Unicode-friendlier definition of "word", we should rename this macro to
712 * something regexp-y.
713 */
714 #define JS_ISWORD(c) ((c) < 128 && js_alnum[(c)])
715
716 #define JS_ISIDSTART(c) (JS_ISLETTER(c) || (c) == '_' || (c) == '$')
717 #define JS_ISIDENT(c) (JS_ISIDPART(c) || (c) == '_' || (c) == '$')
718
719 #define JS_ISXMLSPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || \
720 (c) == '\n')
721 #define JS_ISXMLNSSTART(c) ((JS_CCODE(c) & 0x00000100) || (c) == '_')
722 #define JS_ISXMLNS(c) ((JS_CCODE(c) & 0x00000080) || (c) == '.' || \
723 (c) == '-' || (c) == '_')
724 #define JS_ISXMLNAMESTART(c) (JS_ISXMLNSSTART(c) || (c) == ':')
725 #define JS_ISXMLNAME(c) (JS_ISXMLNS(c) || (c) == ':')
726
727 #define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
728
729 #ifdef __cplusplus /* Allow inclusion from LiveConnect C files. */
730 static inline bool
731 #else
732 static JSBool
733 #endif
734 JS_ISSPACE(jschar c)
735 {
736 unsigned w = c;
737
738 if (w < 256)
739 return (w <= ' ' && (w == ' ' || (9 <= w && w <= 0xD))) || w == 0xA0;
740
741 return (JS_CCODE(w) & 0x00070000) == 0x00040000;
742 }
743
744 #define JS_ISPRINT(c) ((c) < 128 && isprint(c))
745
746 #define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
747 #define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
748
749 #define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \
750 ? (c) - ((int32)JS_CCODE(c) >> 22) \
751 : (c)))
752 #define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \
753 ? (c) + ((int32)JS_CCODE(c) >> 22) \
754 : (c)))
755
756 /*
757 * Shorthands for ASCII (7-bit) decimal and hex conversion.
758 * Manually inline isdigit for performance; MSVC doesn't do this for us.
759 */
760 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
761 #define JS7_UNDEC(c) ((c) - '0')
762 #define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
763 #define JS7_UNHEX(c) (uintN)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
764 #define JS7_ISLET(c) ((c) < 128 && isalpha(c))
765
766 /* Initialize per-runtime string state for the first context in the runtime. */
767 extern JSBool
768 js_InitRuntimeStringState(JSContext *cx);
769
770 extern JSBool
771 js_InitDeflatedStringCache(JSRuntime *rt);
772
773 extern void
774 js_FinishRuntimeStringState(JSContext *cx);
775
776 extern void
777 js_FinishDeflatedStringCache(JSRuntime *rt);
778
779 /* Initialize the String class, returning its prototype object. */
780 extern JSClass js_StringClass;
781
782 extern JSObject *
783 js_InitStringClass(JSContext *cx, JSObject *obj);
784
785 extern const char js_escape_str[];
786 extern const char js_unescape_str[];
787 extern const char js_uneval_str[];
788 extern const char js_decodeURI_str[];
789 extern const char js_encodeURI_str[];
790 extern const char js_decodeURIComponent_str[];
791 extern const char js_encodeURIComponent_str[];
792
793 /* GC-allocate a string descriptor for the given malloc-allocated chars. */
794 extern JSString *
795 js_NewString(JSContext *cx, jschar *chars, size_t length);
796
797 /*
798 * GC-allocate a string descriptor and steal the char buffer held by |cb|.
799 * This function takes responsibility for adding the terminating '\0' required
800 * by js_NewString.
801 */
802 #ifdef __cplusplus /* Allow inclusion from LiveConnect C files. */
803 extern JSString *
804 js_NewStringFromCharBuffer(JSContext *cx, JSCharBuffer &cb);
805 #endif
806
807 extern JSString *
808 js_NewDependentString(JSContext *cx, JSString *base, size_t start,
809 size_t length);
810
811 /* Copy a counted string and GC-allocate a descriptor for it. */
812 extern JSString *
813 js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n);
814
815 /* Copy a C string and GC-allocate a descriptor for it. */
816 extern JSString *
817 js_NewStringCopyZ(JSContext *cx, const jschar *s);
818
819 /*
820 * Convert a value to a printable C string.
821 */
822 typedef JSString *(*JSValueToStringFun)(JSContext *cx, jsval v);
823
824 extern JS_FRIEND_API(const char *)
825 js_ValueToPrintable(JSContext *cx, jsval v, JSValueToStringFun v2sfun);
826
827 #define js_ValueToPrintableString(cx,v) \
828 js_ValueToPrintable(cx, v, js_ValueToString)
829
830 #define js_ValueToPrintableSource(cx,v) \
831 js_ValueToPrintable(cx, v, js_ValueToSource)
832
833 /*
834 * Convert a value to a string, returning null after reporting an error,
835 * otherwise returning a new string reference.
836 */
837 extern JS_FRIEND_API(JSString *)
838 js_ValueToString(JSContext *cx, jsval v);
839
840 /*
841 * This function implements E-262-3 section 9.8, toString. Convert the given
842 * value to a string of jschars appended to the given buffer. On error, the
843 * passed buffer may have partial results appended.
844 */
845 #ifdef __cplusplus /* Allow inclusion from LiveConnect C files. */
846 extern JS_FRIEND_API(JSBool)
847 js_ValueToCharBuffer(JSContext *cx, jsval v, JSCharBuffer &cb);
848 #endif
849
850 /*
851 * Convert a value to its source expression, returning null after reporting
852 * an error, otherwise returning a new string reference.
853 */
854 extern JS_FRIEND_API(JSString *)
855 js_ValueToSource(JSContext *cx, jsval v);
856
857 /*
858 * Compute a hash function from str. The caller can call this function even if
859 * str is not a GC-allocated thing.
860 */
861 extern uint32
862 js_HashString(JSString *str);
863
864 /*
865 * Test if strings are equal. The caller can call the function even if str1
866 * or str2 are not GC-allocated things.
867 */
868 extern JSBool JS_FASTCALL
869 js_EqualStrings(JSString *str1, JSString *str2);
870
871 /*
872 * Return less than, equal to, or greater than zero depending on whether
873 * str1 is less than, equal to, or greater than str2.
874 */
875 extern int32 JS_FASTCALL
876 js_CompareStrings(JSString *str1, JSString *str2);
877
878 /*
879 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
880 * The patlen argument must be positive and no greater than sBMHPatLenMax.
881 *
882 * Return the index of pat in text, or -1 if not found.
883 */
884 static const jsuint sBMHCharSetSize = 256; /* ISO-Latin-1 */
885 static const jsuint sBMHPatLenMax = 255; /* skip table element is uint8 */
886 static const jsint sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
887
888 extern jsint
889 js_BoyerMooreHorspool(const jschar *text, jsuint textlen,
890 const jschar *pat, jsuint patlen);
891
892 extern size_t
893 js_strlen(const jschar *s);
894
895 extern jschar *
896 js_strchr(const jschar *s, jschar c);
897
898 extern jschar *
899 js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
900
901 #define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
902
903 /*
904 * Return s advanced past any Unicode white space characters.
905 */
906 static inline const jschar *
907 js_SkipWhiteSpace(const jschar *s, const jschar *end)
908 {
909 JS_ASSERT(s <= end);
910 while (s != end && JS_ISSPACE(*s))
911 s++;
912 return s;
913 }
914
915 /*
916 * Inflate bytes to JS chars and vice versa. Report out of memory via cx
917 * and return null on error, otherwise return the jschar or byte vector that
918 * was JS_malloc'ed. length is updated with the length of the new string in jschars.
919 */
920 extern jschar *
921 js_InflateString(JSContext *cx, const char *bytes, size_t *length);
922
923 extern char *
924 js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
925
926 /*
927 * Inflate bytes to JS chars into a buffer. 'chars' must be large enough for
928 * 'length' jschars. The buffer is NOT null-terminated. The destination length
929 * must be be initialized with the buffer size and will contain on return the
930 * number of copied chars.
931 */
932 extern JSBool
933 js_InflateStringToBuffer(JSContext *cx, const char *bytes, size_t length,
934 jschar *chars, size_t *charsLength);
935
936 /*
937 * Get number of bytes in the deflated sequence of characters.
938 */
939 extern size_t
940 js_GetDeflatedStringLength(JSContext *cx, const jschar *chars,
941 size_t charsLength);
942
943 /*
944 * Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
945 * 'length chars. The buffer is NOT null-terminated. The destination length
946 * must to be initialized with the buffer size and will contain on return the
947 * number of copied bytes.
948 */
949 extern JSBool
950 js_DeflateStringToBuffer(JSContext *cx, const jschar *chars,
951 size_t charsLength, char *bytes, size_t *length);
952
953 /*
954 * Associate bytes with str in the deflated string cache, returning true on
955 * successful association, false on out of memory.
956 */
957 extern JSBool
958 js_SetStringBytes(JSContext *cx, JSString *str, char *bytes, size_t length);
959
960 /*
961 * Find or create a deflated string cache entry for str that contains its
962 * characters chopped from Unicode code points into bytes.
963 */
964 extern const char *
965 js_GetStringBytes(JSContext *cx, JSString *str);
966
967 /* Remove a deflated string cache entry associated with str if any. */
968 extern void
969 js_PurgeDeflatedStringCache(JSRuntime *rt, JSString *str);
970
971 /* Export a few natives and a helper to other files in SpiderMonkey. */
972 extern JSBool
973 js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
974 jsval *rval);
975
976 extern JSBool
977 js_str_toString(JSContext *cx, uintN argc, jsval *vp);
978
979 /*
980 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
981 * least 6 bytes long. Return the number of UTF-8 bytes of data written.
982 */
983 extern int
984 js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
985
986 /*
987 * Write str into buffer escaping any non-printable or non-ASCII character.
988 * Guarantees that a NUL is at the end of the buffer. Returns the length of
989 * the written output, NOT including the NUL. If buffer is null, just returns
990 * the length of the output. If quote is not 0, it must be a single or double
991 * quote character that will quote the output.
992 *
993 * The function is only defined for debug builds.
994 */
995 #define js_PutEscapedString(buffer, bufferSize, str, quote) \
996 js_PutEscapedStringImpl(buffer, bufferSize, NULL, str, quote)
997
998 /*
999 * Write str into file escaping any non-printable or non-ASCII character.
1000 * Returns the number of bytes written to file. If quote is not 0, it must
1001 * be a single or double quote character that will quote the output.
1002 *
1003 * The function is only defined for debug builds.
1004 */
1005 #define js_FileEscapedString(file, str, quote) \
1006 (JS_ASSERT(file), js_PutEscapedStringImpl(NULL, 0, file, str, quote))
1007
1008 extern JS_FRIEND_API(size_t)
1009 js_PutEscapedStringImpl(char *buffer, size_t bufferSize, FILE *fp,
1010 JSString *str, uint32 quote);
1011
1012 extern JSBool
1013 js_String(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
1014
1015 JS_END_EXTERN_C
1016
1017 #endif /* jsstr_h___ */

  ViewVC Help
Powered by ViewVC 1.1.24