1 |
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
2 |
* |
3 |
* ***** BEGIN LICENSE BLOCK ***** |
4 |
* Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
5 |
* |
6 |
* The contents of this file are subject to the Mozilla Public License Version |
7 |
* 1.1 (the "License"); you may not use this file except in compliance with |
8 |
* the License. You may obtain a copy of the License at |
9 |
* http://www.mozilla.org/MPL/ |
10 |
* |
11 |
* Software distributed under the License is distributed on an "AS IS" basis, |
12 |
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
13 |
* for the specific language governing rights and limitations under the |
14 |
* License. |
15 |
* |
16 |
* The Original Code is Mozilla Communicator client code, released |
17 |
* March 31, 1998. |
18 |
* |
19 |
* The Initial Developer of the Original Code is |
20 |
* Netscape Communications Corporation. |
21 |
* Portions created by the Initial Developer are Copyright (C) 1998 |
22 |
* the Initial Developer. All Rights Reserved. |
23 |
* |
24 |
* Contributor(s): |
25 |
* |
26 |
* Alternatively, the contents of this file may be used under the terms of |
27 |
* either of the GNU General Public License Version 2 or later (the "GPL"), |
28 |
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
29 |
* in which case the provisions of the GPL or the LGPL are applicable instead |
30 |
* of those above. If you wish to allow use of your version of this file only |
31 |
* under the terms of either the GPL or the LGPL, and not to allow others to |
32 |
* use your version of this file under the terms of the MPL, indicate your |
33 |
* decision by deleting the provisions above and replace them with the notice |
34 |
* and other provisions required by the GPL or the LGPL. If you do not delete |
35 |
* the provisions above, a recipient may use your version of this file under |
36 |
* the terms of any one of the MPL, the GPL or the LGPL. |
37 |
* |
38 |
* ***** END LICENSE BLOCK ***** */ |
39 |
|
40 |
#ifndef jsstr_h___ |
41 |
#define jsstr_h___ |
42 |
/* |
43 |
* JS string type implementation. |
44 |
* |
45 |
* A JS string is a counted array of unicode characters. To support handoff |
46 |
* of API client memory, the chars are allocated separately from the length, |
47 |
* necessitating a pointer after the count, to form a separately allocated |
48 |
* string descriptor. String descriptors are GC'ed, while their chars are |
49 |
* allocated from the malloc heap. |
50 |
*/ |
51 |
#include <ctype.h> |
52 |
#include "jspubtd.h" |
53 |
#include "jsprvtd.h" |
54 |
|
55 |
JS_BEGIN_EXTERN_C |
56 |
|
57 |
/* |
58 |
* The GC-thing "string" type. |
59 |
* |
60 |
* When the JSSTRFLAG_DEPENDENT bit of the length field is unset, the u.chars |
61 |
* field points to a flat character array owned by its GC-thing descriptor. |
62 |
* The array is terminated at index length by a zero character and the size of |
63 |
* the array in bytes is (length + 1) * sizeof(jschar). The terminator is |
64 |
* purely a backstop, in case the chars pointer flows out to native code that |
65 |
* requires \u0000 termination. |
66 |
* |
67 |
* A flat string with JSSTRFLAG_MUTABLE set means that the string is accessible |
68 |
* only from one thread and it is possible to turn it into a dependent string |
69 |
* of the same length to optimize js_ConcatStrings. It is also possible to grow |
70 |
* such a string, but extreme care must be taken to ensure that no other code |
71 |
* relies on the original length of the string. |
72 |
* |
73 |
* A flat string with JSSTRFLAG_ATOMIZED set means that the string is hashed as |
74 |
* an atom. This flag is used to avoid re-hashing the already-atomized string. |
75 |
* |
76 |
* Any string with JSSTRFLAG_DEFLATED set means that the string has an entry |
77 |
* in the deflated string cache. The GC uses this flag to optimize string |
78 |
* finalization and avoid an expensive cache lookup for strings that were |
79 |
* never deflated. |
80 |
* |
81 |
* When JSSTRFLAG_DEPENDENT is set, the string depends on characters of another |
82 |
* string strongly referenced by the u.base field. The base member may point to |
83 |
* another dependent string if JSSTRING_CHARS has not been called yet. |
84 |
* |
85 |
* JSSTRFLAG_PREFIX determines the kind of the dependent string. When the flag |
86 |
* is unset, the length field encodes both starting position relative to the |
87 |
* base string and the number of characters in the dependent string, see |
88 |
* JSSTRDEP_START_MASK and JSSTRDEP_LENGTH_MASK macros below for details. |
89 |
* |
90 |
* When JSSTRFLAG_PREFIX is set, the dependent string is a prefix of the base |
91 |
* string. The number of characters in the prefix is encoded using all non-flag |
92 |
* bits of the length field and spans the same 0 .. SIZE_T_MAX/4 range as the |
93 |
* length of the flat string. |
94 |
* |
95 |
* NB: Always use the JSSTRING_LENGTH and JSSTRING_CHARS accessor macros. |
96 |
*/ |
97 |
struct JSString { |
98 |
size_t length; |
99 |
union { |
100 |
jschar *chars; |
101 |
JSString *base; |
102 |
} u; |
103 |
}; |
104 |
|
105 |
/* |
106 |
* Definitions for flags stored in the high order bits of JSString.length. |
107 |
* JSSTRFLAG_PREFIX and JSSTRFLAG_MUTABLE are two aliases for the same value. |
108 |
* JSSTRFLAG_PREFIX should be used only if JSSTRFLAG_DEPENDENT is set and |
109 |
* JSSTRFLAG_MUTABLE should be used only if the string is flat. |
110 |
* JSSTRFLAG_ATOMIZED is used only with the flat immutable strings. |
111 |
*/ |
112 |
#define JSSTRFLAG_DEPENDENT JSSTRING_BIT(JS_BITS_PER_WORD - 1) |
113 |
#define JSSTRFLAG_PREFIX JSSTRING_BIT(JS_BITS_PER_WORD - 2) |
114 |
#define JSSTRFLAG_MUTABLE JSSTRFLAG_PREFIX |
115 |
#define JSSTRFLAG_ATOMIZED JSSTRING_BIT(JS_BITS_PER_WORD - 3) |
116 |
#define JSSTRFLAG_DEFLATED JSSTRING_BIT(JS_BITS_PER_WORD - 4) |
117 |
|
118 |
#define JSSTRING_LENGTH_BITS (JS_BITS_PER_WORD - 4) |
119 |
#define JSSTRING_LENGTH_MASK JSSTRING_BITMASK(JSSTRING_LENGTH_BITS) |
120 |
|
121 |
/* Universal JSString type inquiry and accessor macros. */ |
122 |
#define JSSTRING_BIT(n) ((size_t)1 << (n)) |
123 |
#define JSSTRING_BITMASK(n) (JSSTRING_BIT(n) - 1) |
124 |
#define JSSTRING_HAS_FLAG(str,flg) ((str)->length & (flg)) |
125 |
#define JSSTRING_IS_DEPENDENT(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_DEPENDENT) |
126 |
#define JSSTRING_IS_FLAT(str) (!JSSTRING_IS_DEPENDENT(str)) |
127 |
#define JSSTRING_IS_MUTABLE(str) (((str)->length & (JSSTRFLAG_DEPENDENT | \ |
128 |
JSSTRFLAG_MUTABLE)) == \ |
129 |
JSSTRFLAG_MUTABLE) |
130 |
#define JSSTRING_IS_ATOMIZED(str) (((str)->length & (JSSTRFLAG_DEPENDENT | \ |
131 |
JSSTRFLAG_ATOMIZED)) ==\ |
132 |
JSSTRFLAG_ATOMIZED) |
133 |
|
134 |
#define JSSTRING_CHARS(str) (JSSTRING_IS_DEPENDENT(str) \ |
135 |
? JSSTRDEP_CHARS(str) \ |
136 |
: JSFLATSTR_CHARS(str)) |
137 |
#define JSSTRING_LENGTH(str) (JSSTRING_IS_DEPENDENT(str) \ |
138 |
? JSSTRDEP_LENGTH(str) \ |
139 |
: JSFLATSTR_LENGTH(str)) |
140 |
|
141 |
JS_STATIC_ASSERT(sizeof(size_t) == sizeof(jsword)); |
142 |
|
143 |
#define JSSTRING_IS_DEFLATED(str) ((str)->length & JSSTRFLAG_DEFLATED) |
144 |
|
145 |
#define JSSTRING_SET_DEFLATED(str) \ |
146 |
JS_ATOMIC_SET_MASK((jsword*)&(str)->length, JSSTRFLAG_DEFLATED) |
147 |
|
148 |
#define JSSTRING_CHARS_AND_LENGTH(str, chars_, length_) \ |
149 |
((void)(JSSTRING_IS_DEPENDENT(str) \ |
150 |
? ((length_) = JSSTRDEP_LENGTH(str), \ |
151 |
(chars_) = JSSTRDEP_CHARS(str)) \ |
152 |
: ((length_) = JSFLATSTR_LENGTH(str), \ |
153 |
(chars_) = JSFLATSTR_CHARS(str)))) |
154 |
|
155 |
#define JSSTRING_CHARS_AND_END(str, chars_, end) \ |
156 |
((void)((end) = JSSTRING_IS_DEPENDENT(str) \ |
157 |
? JSSTRDEP_LENGTH(str) + ((chars_) = JSSTRDEP_CHARS(str)) \ |
158 |
: JSFLATSTR_LENGTH(str) + ((chars_) = JSFLATSTR_CHARS(str)))) |
159 |
|
160 |
/* Specific flat string initializer and accessor macros. */ |
161 |
#define JSFLATSTR_INIT(str, chars_, length_) \ |
162 |
((void)(JS_ASSERT(((length_) & ~JSSTRING_LENGTH_MASK) == 0), \ |
163 |
(str)->length = (length_), (str)->u.chars = (chars_))) |
164 |
|
165 |
#define JSFLATSTR_LENGTH(str) \ |
166 |
(JS_ASSERT(JSSTRING_IS_FLAT(str)), (str)->length & JSSTRING_LENGTH_MASK) |
167 |
|
168 |
#define JSFLATSTR_CHARS(str) \ |
169 |
(JS_ASSERT(JSSTRING_IS_FLAT(str)), (str)->u.chars) |
170 |
|
171 |
/* |
172 |
* Special flat string initializer that preserves the JSSTR_DEFLATED flag. |
173 |
* Use this macro when reinitializing an existing string (which may be |
174 |
* hashed to its deflated bytes. Newborn strings must use JSFLATSTR_INIT. |
175 |
*/ |
176 |
#define JSFLATSTR_REINIT(str, chars_, length_) \ |
177 |
((void)(JS_ASSERT(((length_) & ~JSSTRING_LENGTH_MASK) == 0), \ |
178 |
(str)->length = ((str)->length & JSSTRFLAG_DEFLATED) | \ |
179 |
(length_ & ~JSSTRFLAG_DEFLATED), \ |
180 |
(str)->u.chars = (chars_))) |
181 |
|
182 |
/* |
183 |
* Macros to manipulate atomized and mutable flags of flat strings. It is safe |
184 |
* to use these without extra locking due to the following properties: |
185 |
* |
186 |
* * We do not have a macro like JSFLATSTR_CLEAR_ATOMIZED as a string |
187 |
* remains atomized until the GC collects it. |
188 |
* |
189 |
* * A thread may call JSFLATSTR_SET_MUTABLE only when it is the only thread |
190 |
* accessing the string until a later call to JSFLATSTR_CLEAR_MUTABLE. |
191 |
* |
192 |
* * Multiple threads can call JSFLATSTR_CLEAR_MUTABLE but the macro |
193 |
* actually clears the mutable flag only when the flag is set -- in which |
194 |
* case only one thread can access the string (see previous property). |
195 |
* |
196 |
* Thus, when multiple threads access the string, JSFLATSTR_SET_ATOMIZED is |
197 |
* the only macro that can update the length field of the string by changing |
198 |
* the mutable bit from 0 to 1. We call the macro only after the string has |
199 |
* been hashed. When some threads in js_ValueToStringId see that the flag is |
200 |
* set, it knows that the string was atomized. |
201 |
* |
202 |
* On the other hand, if the thread sees that the flag is unset, it could be |
203 |
* seeing a stale value when another thread has just atomized the string and |
204 |
* set the flag. But this can lead only to an extra call to js_AtomizeString. |
205 |
* This function would find that the string was already hashed and return it |
206 |
* with the atomized bit set. |
207 |
*/ |
208 |
#define JSFLATSTR_SET_ATOMIZED(str) \ |
209 |
JS_BEGIN_MACRO \ |
210 |
JS_ASSERT(JSSTRING_IS_FLAT(str) && !JSSTRING_IS_MUTABLE(str)); \ |
211 |
JS_ATOMIC_SET_MASK((jsword*) &(str)->length, JSSTRFLAG_ATOMIZED); \ |
212 |
JS_END_MACRO |
213 |
|
214 |
#define JSFLATSTR_SET_MUTABLE(str) \ |
215 |
((void)(JS_ASSERT(JSSTRING_IS_FLAT(str) && !JSSTRING_IS_ATOMIZED(str)), \ |
216 |
(str)->length |= JSSTRFLAG_MUTABLE)) |
217 |
|
218 |
#define JSFLATSTR_CLEAR_MUTABLE(str) \ |
219 |
((void)(JS_ASSERT(JSSTRING_IS_FLAT(str)), \ |
220 |
JSSTRING_HAS_FLAG(str, JSSTRFLAG_MUTABLE) && \ |
221 |
((str)->length &= ~JSSTRFLAG_MUTABLE))) |
222 |
|
223 |
/* Specific dependent string shift/mask accessor and mutator macros. */ |
224 |
#define JSSTRDEP_START_BITS (JSSTRING_LENGTH_BITS-JSSTRDEP_LENGTH_BITS) |
225 |
#define JSSTRDEP_START_SHIFT JSSTRDEP_LENGTH_BITS |
226 |
#define JSSTRDEP_START_MASK JSSTRING_BITMASK(JSSTRDEP_START_BITS) |
227 |
#define JSSTRDEP_LENGTH_BITS (JSSTRING_LENGTH_BITS / 2) |
228 |
#define JSSTRDEP_LENGTH_MASK JSSTRING_BITMASK(JSSTRDEP_LENGTH_BITS) |
229 |
|
230 |
#define JSSTRDEP_IS_PREFIX(str) JSSTRING_HAS_FLAG(str, JSSTRFLAG_PREFIX) |
231 |
|
232 |
#define JSSTRDEP_START(str) (JSSTRDEP_IS_PREFIX(str) ? 0 \ |
233 |
: (((str)->length \ |
234 |
>> JSSTRDEP_START_SHIFT) \ |
235 |
& JSSTRDEP_START_MASK)) |
236 |
#define JSSTRDEP_LENGTH(str) ((str)->length \ |
237 |
& (JSSTRDEP_IS_PREFIX(str) \ |
238 |
? JSSTRING_LENGTH_MASK \ |
239 |
: JSSTRDEP_LENGTH_MASK)) |
240 |
|
241 |
#define JSSTRDEP_INIT(str,bstr,off,len) \ |
242 |
((str)->length = JSSTRFLAG_DEPENDENT \ |
243 |
| ((off) << JSSTRDEP_START_SHIFT) \ |
244 |
| (len), \ |
245 |
(str)->u.base = (bstr)) |
246 |
|
247 |
/* See JSFLATSTR_INIT. */ |
248 |
#define JSSTRDEP_REINIT(str,bstr,off,len) \ |
249 |
((str)->length = JSSTRFLAG_DEPENDENT \ |
250 |
| ((str->length) & JSSTRFLAG_DEFLATED) \ |
251 |
| ((off) << JSSTRDEP_START_SHIFT) \ |
252 |
| (len), \ |
253 |
(str)->u.base = (bstr)) |
254 |
|
255 |
#define JSPREFIX_INIT(str,bstr,len) \ |
256 |
((str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | (len), \ |
257 |
(str)->u.base = (bstr)) |
258 |
|
259 |
/* See JSFLATSTR_INIT. */ |
260 |
#define JSPREFIX_REINIT(str,bstr,len) \ |
261 |
((str)->length = JSSTRFLAG_DEPENDENT | JSSTRFLAG_PREFIX | \ |
262 |
((str->length) & JSSTRFLAG_DEFLATED) | (len), \ |
263 |
(str)->u.base = (bstr)) |
264 |
|
265 |
#define JSSTRDEP_BASE(str) ((str)->u.base) |
266 |
#define JSPREFIX_BASE(str) JSSTRDEP_BASE(str) |
267 |
#define JSPREFIX_SET_BASE(str,bstr) ((str)->u.base = (bstr)) |
268 |
|
269 |
#define JSSTRDEP_CHARS(str) \ |
270 |
(JSSTRING_IS_DEPENDENT(JSSTRDEP_BASE(str)) \ |
271 |
? js_GetDependentStringChars(str) \ |
272 |
: JSFLATSTR_CHARS(JSSTRDEP_BASE(str)) + JSSTRDEP_START(str)) |
273 |
|
274 |
extern size_t |
275 |
js_MinimizeDependentStrings(JSString *str, int level, JSString **basep); |
276 |
|
277 |
extern jschar * |
278 |
js_GetDependentStringChars(JSString *str); |
279 |
|
280 |
extern const jschar * |
281 |
js_GetStringChars(JSContext *cx, JSString *str); |
282 |
|
283 |
extern JSString * JS_FASTCALL |
284 |
js_ConcatStrings(JSContext *cx, JSString *left, JSString *right); |
285 |
|
286 |
extern const jschar * |
287 |
js_UndependString(JSContext *cx, JSString *str); |
288 |
|
289 |
extern JSBool |
290 |
js_MakeStringImmutable(JSContext *cx, JSString *str); |
291 |
|
292 |
extern JSString* JS_FASTCALL |
293 |
js_toLowerCase(JSContext *cx, JSString *str); |
294 |
|
295 |
extern JSString* JS_FASTCALL |
296 |
js_toUpperCase(JSContext *cx, JSString *str); |
297 |
|
298 |
typedef struct JSCharBuffer { |
299 |
size_t length; |
300 |
jschar *chars; |
301 |
} JSCharBuffer; |
302 |
|
303 |
struct JSSubString { |
304 |
size_t length; |
305 |
const jschar *chars; |
306 |
}; |
307 |
|
308 |
extern jschar js_empty_ucstr[]; |
309 |
extern JSSubString js_EmptySubString; |
310 |
|
311 |
/* Unicode character attribute lookup tables. */ |
312 |
extern const uint8 js_X[]; |
313 |
extern const uint8 js_Y[]; |
314 |
extern const uint32 js_A[]; |
315 |
|
316 |
/* Enumerated Unicode general category types. */ |
317 |
typedef enum JSCharType { |
318 |
JSCT_UNASSIGNED = 0, |
319 |
JSCT_UPPERCASE_LETTER = 1, |
320 |
JSCT_LOWERCASE_LETTER = 2, |
321 |
JSCT_TITLECASE_LETTER = 3, |
322 |
JSCT_MODIFIER_LETTER = 4, |
323 |
JSCT_OTHER_LETTER = 5, |
324 |
JSCT_NON_SPACING_MARK = 6, |
325 |
JSCT_ENCLOSING_MARK = 7, |
326 |
JSCT_COMBINING_SPACING_MARK = 8, |
327 |
JSCT_DECIMAL_DIGIT_NUMBER = 9, |
328 |
JSCT_LETTER_NUMBER = 10, |
329 |
JSCT_OTHER_NUMBER = 11, |
330 |
JSCT_SPACE_SEPARATOR = 12, |
331 |
JSCT_LINE_SEPARATOR = 13, |
332 |
JSCT_PARAGRAPH_SEPARATOR = 14, |
333 |
JSCT_CONTROL = 15, |
334 |
JSCT_FORMAT = 16, |
335 |
JSCT_PRIVATE_USE = 18, |
336 |
JSCT_SURROGATE = 19, |
337 |
JSCT_DASH_PUNCTUATION = 20, |
338 |
JSCT_START_PUNCTUATION = 21, |
339 |
JSCT_END_PUNCTUATION = 22, |
340 |
JSCT_CONNECTOR_PUNCTUATION = 23, |
341 |
JSCT_OTHER_PUNCTUATION = 24, |
342 |
JSCT_MATH_SYMBOL = 25, |
343 |
JSCT_CURRENCY_SYMBOL = 26, |
344 |
JSCT_MODIFIER_SYMBOL = 27, |
345 |
JSCT_OTHER_SYMBOL = 28 |
346 |
} JSCharType; |
347 |
|
348 |
/* Character classifying and mapping macros, based on java.lang.Character. */ |
349 |
#define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]]) |
350 |
#define JS_CTYPE(c) (JS_CCODE(c) & 0x1F) |
351 |
|
352 |
#define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ |
353 |
(1 << JSCT_LOWERCASE_LETTER) | \ |
354 |
(1 << JSCT_TITLECASE_LETTER) | \ |
355 |
(1 << JSCT_MODIFIER_LETTER) | \ |
356 |
(1 << JSCT_OTHER_LETTER)) \ |
357 |
>> JS_CTYPE(c)) & 1) |
358 |
|
359 |
#define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ |
360 |
(1 << JSCT_LOWERCASE_LETTER) | \ |
361 |
(1 << JSCT_TITLECASE_LETTER) | \ |
362 |
(1 << JSCT_MODIFIER_LETTER) | \ |
363 |
(1 << JSCT_OTHER_LETTER) | \ |
364 |
(1 << JSCT_DECIMAL_DIGIT_NUMBER)) \ |
365 |
>> JS_CTYPE(c)) & 1) |
366 |
|
367 |
/* A unicode letter, suitable for use in an identifier. */ |
368 |
#define JS_ISLETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ |
369 |
(1 << JSCT_LOWERCASE_LETTER) | \ |
370 |
(1 << JSCT_TITLECASE_LETTER) | \ |
371 |
(1 << JSCT_MODIFIER_LETTER) | \ |
372 |
(1 << JSCT_OTHER_LETTER) | \ |
373 |
(1 << JSCT_LETTER_NUMBER)) \ |
374 |
>> JS_CTYPE(c)) & 1) |
375 |
|
376 |
/* |
377 |
* 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or |
378 |
* digit or connector punctuation. |
379 |
*/ |
380 |
#define JS_ISIDPART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \ |
381 |
(1 << JSCT_LOWERCASE_LETTER) | \ |
382 |
(1 << JSCT_TITLECASE_LETTER) | \ |
383 |
(1 << JSCT_MODIFIER_LETTER) | \ |
384 |
(1 << JSCT_OTHER_LETTER) | \ |
385 |
(1 << JSCT_LETTER_NUMBER) | \ |
386 |
(1 << JSCT_NON_SPACING_MARK) | \ |
387 |
(1 << JSCT_COMBINING_SPACING_MARK) | \ |
388 |
(1 << JSCT_DECIMAL_DIGIT_NUMBER) | \ |
389 |
(1 << JSCT_CONNECTOR_PUNCTUATION)) \ |
390 |
>> JS_CTYPE(c)) & 1) |
391 |
|
392 |
/* Unicode control-format characters, ignored in input */ |
393 |
#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1) |
394 |
|
395 |
/* |
396 |
* Per ECMA-262 15.10.2.6, these characters are the only ones that make up a |
397 |
* "word", as far as a RegExp is concerned. If we want a Unicode-friendlier |
398 |
* definition of "word", we should rename this macro to something regexp-y. |
399 |
*/ |
400 |
#define JS_ISWORD(c) ((c) < 128 && (isalnum(c) || (c) == '_')) |
401 |
|
402 |
#define JS_ISIDSTART(c) (JS_ISLETTER(c) || (c) == '_' || (c) == '$') |
403 |
#define JS_ISIDENT(c) (JS_ISIDPART(c) || (c) == '_' || (c) == '$') |
404 |
|
405 |
#define JS_ISXMLSPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || \ |
406 |
(c) == '\n') |
407 |
#define JS_ISXMLNSSTART(c) ((JS_CCODE(c) & 0x00000100) || (c) == '_') |
408 |
#define JS_ISXMLNS(c) ((JS_CCODE(c) & 0x00000080) || (c) == '.' || \ |
409 |
(c) == '-' || (c) == '_') |
410 |
#define JS_ISXMLNAMESTART(c) (JS_ISXMLNSSTART(c) || (c) == ':') |
411 |
#define JS_ISXMLNAME(c) (JS_ISXMLNS(c) || (c) == ':') |
412 |
|
413 |
#define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER) |
414 |
|
415 |
/* XXXbe unify on A/X/Y tbls, avoid ctype.h? */ |
416 |
/* XXXbe fs, etc. ? */ |
417 |
#define JS_ISSPACE(c) ((JS_CCODE(c) & 0x00070000) == 0x00040000) |
418 |
#define JS_ISPRINT(c) ((c) < 128 && isprint(c)) |
419 |
|
420 |
#define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER) |
421 |
#define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER) |
422 |
|
423 |
#define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \ |
424 |
? (c) - ((int32)JS_CCODE(c) >> 22) \ |
425 |
: (c))) |
426 |
#define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \ |
427 |
? (c) + ((int32)JS_CCODE(c) >> 22) \ |
428 |
: (c))) |
429 |
|
430 |
/* |
431 |
* Shorthands for ASCII (7-bit) decimal and hex conversion. |
432 |
* Manually inline isdigit for performance; MSVC doesn't do this for us. |
433 |
*/ |
434 |
#define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9) |
435 |
#define JS7_UNDEC(c) ((c) - '0') |
436 |
#define JS7_ISHEX(c) ((c) < 128 && isxdigit(c)) |
437 |
#define JS7_UNHEX(c) (uintN)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a') |
438 |
#define JS7_ISLET(c) ((c) < 128 && isalpha(c)) |
439 |
|
440 |
/* Initialize per-runtime string state for the first context in the runtime. */ |
441 |
extern JSBool |
442 |
js_InitRuntimeStringState(JSContext *cx); |
443 |
|
444 |
extern JSBool |
445 |
js_InitDeflatedStringCache(JSRuntime *rt); |
446 |
|
447 |
/* |
448 |
* Maximum character code for which we will create a pinned unit string on |
449 |
* demand -- see JSRuntime.unitStrings in jscntxt.h. |
450 |
*/ |
451 |
#define UNIT_STRING_LIMIT 256U |
452 |
|
453 |
/* |
454 |
* Get the independent string containing only character code at index in str |
455 |
* (backstopped with a zero character as usual for independent strings). |
456 |
*/ |
457 |
extern JSString * |
458 |
js_GetUnitString(JSContext *cx, JSString *str, size_t index); |
459 |
|
460 |
/* |
461 |
* Get the independent string containing only the character code c, which must |
462 |
* be less than UNIT_STRING_LIMIT. |
463 |
*/ |
464 |
extern JSString * |
465 |
js_GetUnitStringForChar(JSContext *cx, jschar c); |
466 |
|
467 |
extern void |
468 |
js_FinishUnitStrings(JSRuntime *rt); |
469 |
|
470 |
extern void |
471 |
js_FinishRuntimeStringState(JSContext *cx); |
472 |
|
473 |
extern void |
474 |
js_FinishDeflatedStringCache(JSRuntime *rt); |
475 |
|
476 |
/* Initialize the String class, returning its prototype object. */ |
477 |
extern JSClass js_StringClass; |
478 |
|
479 |
extern JSObject * |
480 |
js_InitStringClass(JSContext *cx, JSObject *obj); |
481 |
|
482 |
extern const char js_escape_str[]; |
483 |
extern const char js_unescape_str[]; |
484 |
extern const char js_uneval_str[]; |
485 |
extern const char js_decodeURI_str[]; |
486 |
extern const char js_encodeURI_str[]; |
487 |
extern const char js_decodeURIComponent_str[]; |
488 |
extern const char js_encodeURIComponent_str[]; |
489 |
|
490 |
/* GC-allocate a string descriptor for the given malloc-allocated chars. */ |
491 |
extern JSString * |
492 |
js_NewString(JSContext *cx, jschar *chars, size_t length); |
493 |
|
494 |
extern JSString * |
495 |
js_NewDependentString(JSContext *cx, JSString *base, size_t start, |
496 |
size_t length); |
497 |
|
498 |
/* Copy a counted string and GC-allocate a descriptor for it. */ |
499 |
extern JSString * |
500 |
js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n); |
501 |
|
502 |
/* Copy a C string and GC-allocate a descriptor for it. */ |
503 |
extern JSString * |
504 |
js_NewStringCopyZ(JSContext *cx, const jschar *s); |
505 |
|
506 |
/* |
507 |
* Free the chars held by str when it is finalized by the GC. When type is |
508 |
* less then zero, it denotes an internal string. Otherwise it denotes the |
509 |
* type of the external string allocated with JS_NewExternalString. |
510 |
* |
511 |
* This function always needs rt but can live with null cx. |
512 |
*/ |
513 |
extern void |
514 |
js_FinalizeStringRT(JSRuntime *rt, JSString *str, intN type, JSContext *cx); |
515 |
|
516 |
/* |
517 |
* Convert a value to a printable C string. |
518 |
*/ |
519 |
typedef JSString *(*JSValueToStringFun)(JSContext *cx, jsval v); |
520 |
|
521 |
extern JS_FRIEND_API(const char *) |
522 |
js_ValueToPrintable(JSContext *cx, jsval v, JSValueToStringFun v2sfun); |
523 |
|
524 |
#define js_ValueToPrintableString(cx,v) \ |
525 |
js_ValueToPrintable(cx, v, js_ValueToString) |
526 |
|
527 |
#define js_ValueToPrintableSource(cx,v) \ |
528 |
js_ValueToPrintable(cx, v, js_ValueToSource) |
529 |
|
530 |
/* |
531 |
* Convert a value to a string, returning null after reporting an error, |
532 |
* otherwise returning a new string reference. |
533 |
*/ |
534 |
extern JS_FRIEND_API(JSString *) |
535 |
js_ValueToString(JSContext *cx, jsval v); |
536 |
|
537 |
/* |
538 |
* Convert a value to its source expression, returning null after reporting |
539 |
* an error, otherwise returning a new string reference. |
540 |
*/ |
541 |
extern JS_FRIEND_API(JSString *) |
542 |
js_ValueToSource(JSContext *cx, jsval v); |
543 |
|
544 |
/* |
545 |
* Compute a hash function from str. The caller can call this function even if |
546 |
* str is not a GC-allocated thing. |
547 |
*/ |
548 |
extern uint32 |
549 |
js_HashString(JSString *str); |
550 |
|
551 |
/* |
552 |
* Test if strings are equal. The caller can call the function even if str1 |
553 |
* or str2 are not GC-allocated things. |
554 |
*/ |
555 |
extern JSBool JS_FASTCALL |
556 |
js_EqualStrings(JSString *str1, JSString *str2); |
557 |
|
558 |
/* |
559 |
* Return less than, equal to, or greater than zero depending on whether |
560 |
* str1 is less than, equal to, or greater than str2. |
561 |
*/ |
562 |
extern int32 JS_FASTCALL |
563 |
js_CompareStrings(JSString *str1, JSString *str2); |
564 |
|
565 |
/* |
566 |
* Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen. |
567 |
* The patlen argument must be positive and no greater than BMH_PATLEN_MAX. |
568 |
* The start argument tells where in text to begin the search. |
569 |
* |
570 |
* Return the index of pat in text, or -1 if not found. |
571 |
*/ |
572 |
#define BMH_CHARSET_SIZE 256 /* ISO-Latin-1 */ |
573 |
#define BMH_PATLEN_MAX 255 /* skip table element is uint8 */ |
574 |
|
575 |
#define BMH_BAD_PATTERN (-2) /* return value if pat is not ISO-Latin-1 */ |
576 |
|
577 |
extern jsint |
578 |
js_BoyerMooreHorspool(const jschar *text, jsint textlen, |
579 |
const jschar *pat, jsint patlen, |
580 |
jsint start); |
581 |
|
582 |
extern size_t |
583 |
js_strlen(const jschar *s); |
584 |
|
585 |
extern jschar * |
586 |
js_strchr(const jschar *s, jschar c); |
587 |
|
588 |
extern jschar * |
589 |
js_strchr_limit(const jschar *s, jschar c, const jschar *limit); |
590 |
|
591 |
#define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar)) |
592 |
|
593 |
/* |
594 |
* Return s advanced past any Unicode white space characters. |
595 |
*/ |
596 |
extern const jschar * |
597 |
js_SkipWhiteSpace(const jschar *s, const jschar *end); |
598 |
|
599 |
/* |
600 |
* Inflate bytes to JS chars and vice versa. Report out of memory via cx |
601 |
* and return null on error, otherwise return the jschar or byte vector that |
602 |
* was JS_malloc'ed. length is updated with the length of the new string in jschars. |
603 |
*/ |
604 |
extern jschar * |
605 |
js_InflateString(JSContext *cx, const char *bytes, size_t *length); |
606 |
|
607 |
extern char * |
608 |
js_DeflateString(JSContext *cx, const jschar *chars, size_t length); |
609 |
|
610 |
/* |
611 |
* Inflate bytes to JS chars into a buffer. 'chars' must be large enough for |
612 |
* 'length' jschars. The buffer is NOT null-terminated. The destination length |
613 |
* must be be initialized with the buffer size and will contain on return the |
614 |
* number of copied chars. |
615 |
*/ |
616 |
extern JSBool |
617 |
js_InflateStringToBuffer(JSContext* cx, const char *bytes, size_t length, |
618 |
jschar *chars, size_t* charsLength); |
619 |
|
620 |
/* |
621 |
* Get number of bytes in the deflated sequence of characters. |
622 |
*/ |
623 |
extern size_t |
624 |
js_GetDeflatedStringLength(JSContext *cx, const jschar *chars, |
625 |
size_t charsLength); |
626 |
|
627 |
/* |
628 |
* Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for |
629 |
* 'length chars. The buffer is NOT null-terminated. The destination length |
630 |
* must to be initialized with the buffer size and will contain on return the |
631 |
* number of copied bytes. |
632 |
*/ |
633 |
extern JSBool |
634 |
js_DeflateStringToBuffer(JSContext* cx, const jschar *chars, |
635 |
size_t charsLength, char *bytes, size_t* length); |
636 |
|
637 |
/* |
638 |
* Associate bytes with str in the deflated string cache, returning true on |
639 |
* successful association, false on out of memory. |
640 |
*/ |
641 |
extern JSBool |
642 |
js_SetStringBytes(JSContext *cx, JSString *str, char *bytes, size_t length); |
643 |
|
644 |
/* |
645 |
* Find or create a deflated string cache entry for str that contains its |
646 |
* characters chopped from Unicode code points into bytes. |
647 |
*/ |
648 |
extern const char * |
649 |
js_GetStringBytes(JSContext *cx, JSString *str); |
650 |
|
651 |
/* Remove a deflated string cache entry associated with str if any. */ |
652 |
extern void |
653 |
js_PurgeDeflatedStringCache(JSRuntime *rt, JSString *str); |
654 |
|
655 |
/* Export a few natives and a helper to other files in SpiderMonkey. */ |
656 |
extern JSBool |
657 |
js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, |
658 |
jsval *rval); |
659 |
|
660 |
extern JSBool |
661 |
js_StringReplaceHelper(JSContext *cx, uintN argc, JSObject *lambda, |
662 |
JSString *repstr, jsval *vp); |
663 |
|
664 |
/* |
665 |
* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at |
666 |
* least 6 bytes long. Return the number of UTF-8 bytes of data written. |
667 |
*/ |
668 |
extern int |
669 |
js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char); |
670 |
|
671 |
/* |
672 |
* Write str into buffer escaping any non-printable or non-ASCII character. |
673 |
* Guarantees that a NUL is at the end of the buffer. Returns the length of |
674 |
* the written output, NOT including the NUL. If buffer is null, just returns |
675 |
* the length of the output. If quote is not 0, it must be a single or double |
676 |
* quote character that will quote the output. |
677 |
* |
678 |
* The function is only defined for debug builds. |
679 |
*/ |
680 |
#define js_PutEscapedString(buffer, bufferSize, str, quote) \ |
681 |
js_PutEscapedStringImpl(buffer, bufferSize, NULL, str, quote) |
682 |
|
683 |
/* |
684 |
* Write str into file escaping any non-printable or non-ASCII character. |
685 |
* Returns the number of bytes written to file. If quote is not 0, it must |
686 |
* be a single or double quote character that will quote the output. |
687 |
* |
688 |
* The function is only defined for debug builds. |
689 |
*/ |
690 |
#define js_FileEscapedString(file, str, quote) \ |
691 |
(JS_ASSERT(file), js_PutEscapedStringImpl(NULL, 0, file, str, quote)) |
692 |
|
693 |
extern JS_FRIEND_API(size_t) |
694 |
js_PutEscapedStringImpl(char *buffer, size_t bufferSize, FILE *fp, |
695 |
JSString *str, uint32 quote); |
696 |
|
697 |
extern JSBool |
698 |
js_String(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval); |
699 |
|
700 |
JS_END_EXTERN_C |
701 |
|
702 |
#endif /* jsstr_h___ */ |