/[jscoverage]/trunk/js/nanojit/NativeAMD64.h
ViewVC logotype

Contents of /trunk/js/nanojit/NativeAMD64.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 399 - (show annotations)
Tue Dec 9 03:37:47 2008 UTC (10 years, 11 months ago) by siliconforks
File MIME type: text/plain
File size: 40273 byte(s)
Use SpiderMonkey from Firefox 3.1b2.

1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Original Code is [Open Source Virtual Machine].
15 *
16 * The Initial Developer of the Original Code is
17 * Adobe System Incorporated.
18 * Portions created by the Initial Developer are Copyright (C) 2004-2007
19 * the Initial Developer. All Rights Reserved.
20 *
21 * Contributor(s):
22 * Adobe AS3 Team
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38
39 #ifndef __nanojit_NativeAMD64__
40 #define __nanojit_NativeAMD64__
41
42 #include <limits.h>
43
44 namespace nanojit
45 {
46 const int NJ_LOG2_PAGE_SIZE = 12; // 4K
47 const int NJ_MAX_REGISTERS = 32; // gpregs, x87 regs, xmm regs
48 const int NJ_STACK_OFFSET = 0;
49
50 // WARNING: setting this allows the NJ to growth memory as needed without bounds
51 const bool NJ_UNLIMITED_GROWTH = true;
52
53 #define NJ_MAX_STACK_ENTRY 256
54 #define NJ_MAX_PARAMETERS 1
55
56 /* Stack is always aligned to 16-bit on x64 */
57 const int NJ_ALIGN_STACK = 16;
58
59 typedef uint8_t NIns;
60
61 // These are used as register numbers in various parts of the code
62 typedef enum
63 {
64 // general purpose 32bit regs
65 RAX = 0, // return value, scratch
66 RCX = 1, // this/arg0, scratch
67 RDX = 2, // arg1, return-msw, scratch
68 RBX = 3,
69 RSP = 4, // stack pointer
70 RBP = 5, // frame pointer
71 RSI = 6,
72 RDI = 7,
73
74 SP = RSP, // alias SP to RSP for convenience
75 FP = RBP, // alias FP to RBP for convenience
76
77 R8 = 8,
78 R9 = 9,
79 R10 = 10,
80 R11 = 11,
81 R12 = 12,
82 R13 = 13,
83 R14 = 14,
84 R15 = 15,
85
86 XMM0 = 16,
87 XMM1 = 17,
88 XMM2 = 18,
89 XMM3 = 19,
90 XMM4 = 20,
91 XMM5 = 21,
92 XMM6 = 22,
93 XMM7 = 23,
94 XMM8 = 24,
95 XMM9 = 25,
96 XMM10 = 26,
97 XMM11 = 27,
98 XMM12 = 28,
99 XMM13 = 29,
100 XMM14 = 30,
101 XMM15 = 31,
102
103 FirstReg = 0,
104 LastReg = 31,
105 UnknownReg = 32
106 }
107 Register;
108
109 typedef int RegisterMask;
110
111 /* RBX, R13-R15 */
112 static const int NumSavedRegs = 3;
113 static const RegisterMask SavedRegs = /*(1<<RBX) |*/ /*(1<<R12) |*/ (1<<R13) | (1<<R14) | (1<<R15);
114 /* RAX, RCX, RDX, RDI, RSI, R8-R11 */
115 static const RegisterMask TempRegs = (1<<RAX) | (1<<RCX) | (1<<RDX) | (1<<R8) | (1<<R9) | (1<<R10) | (1<<R11) | (1<<RDI) | (1<<RSI);
116 static const RegisterMask GpRegs = SavedRegs | TempRegs;
117 /* XMM0-XMM7 */
118 static const RegisterMask XmmRegs = (1<<XMM0) | (1<<XMM1) | (1<<XMM2) | (1<<XMM3) | (1<<XMM4) | (1<<XMM5) | (1<<XMM6) | (1<<XMM7) | (1<<XMM8) | (1<<XMM9) | (1<<XMM10) | (1<<XMM11) | (1<<XMM13) | (1<<XMM14) | (1<<XMM15);
119 static const RegisterMask FpRegs = XmmRegs;
120 static const RegisterMask ScratchRegs = TempRegs | XmmRegs;
121
122 static const RegisterMask AllowableFlagRegs = 1<<RAX |1<<RCX | 1<<RDX | 1<<RBX;
123
124 #if defined WIN64
125 typedef __int64 nj_printf_ld;
126 #else
127 typedef long int nj_printf_ld;
128 #endif
129
130 #define _rmask_(r) (1<<(r))
131 #define _is_xmm_reg_(r) ((_rmask_(r)&XmmRegs)!=0)
132 #define _is_fp_reg_(r) ((_rmask_(r)&FpRegs)!=0)
133 #define _is_gp_reg_(r) ((_rmask_(r)&GpRegs)!=0)
134
135 #define nextreg(r) Register(r+1)
136 #define prevreg(r) Register(r-1)
137 #define imm2register(c) (Register)(c)
138
139 verbose_only( extern const char* regNames[]; )
140
141 #define DECLARE_PLATFORM_STATS()
142
143 #define DECLARE_PLATFORM_REGALLOC()
144
145 #if !defined WIN64
146 #define DECLARE_PLATFORM_ASSEMBLER_START() \
147 const static Register argRegs[6], retRegs[2];
148 #else
149 #define DECLARE_PLATFORM_ASSEMBLER_START() \
150 const static Register argRegs[4], retRegs[2];
151 #endif
152
153 #if !defined WIN64
154 #define DECLARE_PLATFORM_ASSEMBLER() \
155 DECLARE_PLATFORM_ASSEMBLER_START() \
156 void nativePageReset(); \
157 void nativePageSetup(); \
158 void asm_farg(LInsp); \
159 void asm_qbinop(LInsp); \
160 int32_t _pageData; \
161 NIns *_dblNegPtr; \
162 NIns *_negOnePtr; \
163 NIns *overrideProtect;
164 #endif
165
166 #define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
167
168 // enough room for n bytes
169 #define underrunProtect(n) \
170 { \
171 intptr_t u = n + (sizeof(PageHeader)/sizeof(NIns) + _pageData + 5); \
172 if ( !samepage(_nIns-u,_nIns-1) ) \
173 { \
174 NIns *tt = _nIns; \
175 _nIns = pageAlloc(_inExit); \
176 if (!_inExit) { \
177 _pageData = 0; \
178 _dblNegPtr = NULL; \
179 _negOnePtr = NULL; \
180 } \
181 intptr_t d = tt-_nIns; \
182 if (d <= INT_MAX && d >= INT_MIN) { \
183 JMP_long_nochk_offset(d); \
184 } else { \
185 /* Insert a 64-bit jump... */ \
186 _nIns -= 8; \
187 *(intptr_t *)_nIns = intptr_t(tt); \
188 JMPm_nochk(0); \
189 } \
190 } \
191 overrideProtect = _nIns; \
192 }
193
194 #define AMD64_NEEDS_REX(x) (((x) & 8) == 8)
195 #define AMD64_ENCODE_REG(x) ((x) & 7)
196
197 #define AMD64_MODRM(mode, reg, rm) (uint8_t)(((mode & 3) << 6) | ((reg & 7) << 3) | (rm & 7))
198 #define AMD64_ALUOP(x) ((x)>>3)
199
200 /**
201 * Returns a REX prefix.
202 *
203 * q 1 for 64-bit, 0 for default.
204 * mdReg ModRM register field.
205 * mdRm ModRM R/M field, or opcode register field.
206 */
207 #define AMD64_REX(q,mdReg,mdRm) (uint8_t)(0x40 | (q << 3) | (((mdReg >> 3) & 1) << 2) | ((mdRm >> 3) & 1))
208
209 #define AMD64_ADD_RAX 0x05 /* imm */
210 #define AMD64_ADD_REG_RM 0x03 /* rm/r */
211 #define AMD64_ADDSD 0xF20F58 /* rm/r */
212 #define AMD64_ALU8 0x83 /* rm/x imm8 */
213 #define AMD64_ALU32 0x81 /* rm/x imm32 */
214 #define AMD64_AND_RAX 0x25 /* imm */
215 #define AMD64_AND_REG_RM 0x23 /* rm/r */
216 #define AMD64_CMP_RAX 0x3D /* imm */
217 #define AMD64_CMP_REG_RM 0x3B /* rm/r */
218 #define AMD64_CVTSI2SD 0xF20F2A /* rm/r */
219 #define AMD64_DIVSD 0xF20F5E /* rm/r */
220 #define AMD64_IMUL_REG_RM_1 0x0F /* see next */
221 #define AMD64_IMUL_REG_RM_2 0xAF /* rm/r */
222 #define AMD64_INT3 0xCC /* */
223 #define AMD64_LEA 0x8D /* rm/r */
224 #define AMD64_MOV_REG_IMM(x) (0xB8 | AMD64_ENCODE_REG(x))
225 #define AMD64_MOV_REG_RM 0x8B /* rm/r */
226 #define AMD64_MOV_RM_REG 0x89 /* rm/r */
227 #define AMD64_MOV_RM_IMM 0xC7 /* rm/0 imm */
228 #define AMD64_MOVD_REG_RM 0x660F6E /* rm/r */
229 #define AMD64_MOVD_RM_REG 0x660F7E /* rm/r */
230 #define AMD64_MOVSD_REG_RM 0xF20F10 /* rm/r */
231 #define AMD64_MOVZX 0x0FB6 /* rm/r */
232 #define AMD64_MOVSZ 0x0FB7 /* rm/r */
233 #define AMD64_MULSD 0xF20F59 /* rm/r */
234 #define AMD64_NEG_RM 0xF7 /* rm/3 */
235 #define AMD64_NOT_RM 0xF7 /* rm/2 */
236 #define AMD64_OR_RAX 0x0D /* imm */
237 #define AMD64_OR_REG_RM 0x0B /* rm/r */
238 #define AMD64_POP_REG(x) (0x58 | AMD64_ENCODE_REG(x))
239 #define AMD64_PUSH_REG(x) (0x50 | AMD64_ENCODE_REG(x))
240 #define AMD64_PUSH_RM 0xFF /* rm/6 */
241 #define AMD64_PUSHF 0x9C /* */
242 #define AMD64_RET 0xC3 /* */
243 #define AMD64_SAR_RM_1 0xD1 /* rm/7 */
244 #define AMD64_SAR_RM 0xD3 /* rm/7 */
245 #define AMD64_SAR_RM_IMM8 0xC1 /* rm/7 */
246 #define AMD64_SHL_RM_1 0xD1 /* rm/4 */
247 #define AMD64_SHL_RM 0xD3 /* rm/4 */
248 #define AMD64_SHL_RM_IMM8 0xC1 /* rm/4 imm */
249 #define AMD64_SHR_RM_1 0xD1 /* rm/5 */
250 #define AMD64_SHR_RM 0xD3 /* rm/5 */
251 #define AMD64_SHR_RM_IMM8 0xC1 /* rm/5 */
252 #define AMD64_SUB_RAX 0x2D /* imm */
253 #define AMD64_SUB_REG_RM 0x2B /* rm/r */
254 #define AMD64_SUBSD 0xF20F5C /* rm/r */
255 #define AMD64_TEST_RM_REG 0x85 /* rm/r */
256 #define AMD64_UCOMISD 0x660F2E /* rm/r */
257 #define AMD64_XOR_RAX 0x35 /* imm */
258 #define AMD64_XOR_REG_RM 0x33 /* rm/r */
259 #define AMD64_XORPD 0x660F57 /* rm/r */
260
261
262 #define IMM32(i) \
263 _nIns -= 4; \
264 *((int32_t*)_nIns) = (int32_t)(i)
265
266
267 #define IMM64(i) \
268 _nIns -= 8; \
269 *((int64_t*)_nIns) = (int64_t)(i)
270
271
272 #define AMD64_MODRM_REG(reg, rm) AMD64_MODRM(3, reg, rm)
273
274
275 #define AMD64_MODRM_DISP(reg, rm, disp) \
276 if ((disp) == 0 && (((rm) & 0x7) != 5)) { \
277 *(--_nIns) = AMD64_MODRM(0, reg, rm); \
278 } else if (isS8(disp)) { \
279 *(--_nIns) = int8_t(disp); \
280 *(--_nIns) = AMD64_MODRM(1, reg, rm); \
281 } else { \
282 IMM32(disp); \
283 *(--_nIns) = AMD64_MODRM(2, reg, rm); \
284 }
285
286
287
288 #define AMD64_ALU(op, reg, imm, q) \
289 underrunProtect(7); \
290 if (isS8(imm)) { \
291 *(--_nIns) = uint8_t(imm); \
292 *(--_nIns) = AMD64_MODRM_REG(AMD64_ALUOP(op), reg); \
293 *(--_nIns) = AMD64_ALU8; \
294 } else { \
295 IMM32(imm); \
296 if (reg == RAX) { \
297 *(--_nIns) = op; \
298 } else { \
299 *(--_nIns) = AMD64_MODRM_REG(AMD64_ALUOP(op), reg); \
300 *(--_nIns) = AMD64_ALU32; \
301 } \
302 } \
303 if (q || AMD64_NEEDS_REX(reg)) { \
304 *(--_nIns) = AMD64_REX(q,0,reg); \
305 }
306
307
308 #define AMD64_ALU_MEM(op, reg, disp, imm, q) \
309 underrunProtect(11); \
310 if (isS8(imm)) { \
311 *(--_nIns) = uint8_t(imm); \
312 } else { \
313 IMM32(imm); \
314 } \
315 AMD64_MODRM_DISP(AMD64_ALUOP(op), reg, disp); \
316 if (isS8(imm)) { \
317 *(--_nIns) = AMD64_ALU8; \
318 } else { \
319 *(--_nIns) = AMD64_ALU32; \
320 } \
321 if (q || AMD64_NEEDS_REX(reg)) { \
322 *(--_nIns) = AMD64_REX(q,0,reg); \
323 }
324
325
326 #define NOT(r) do { \
327 underrunProtect(3); \
328 *(--_nIns) = AMD64_MODRM_REG(2, r); \
329 *(--_nIns) = AMD64_NOT_RM; \
330 *(--_nIns) = AMD64_REX(1,0,r); \
331 asm_output1("not %s",gpn(r)); \
332 } while(0)
333
334
335 #define MOVZX8(d,s) do { \
336 underrunProtect(4); \
337 *(--_nIns) = AMD64_MODRM_REG(d, s); \
338 *(--_nIns) = AMD64_MOVZX & 0xFF; \
339 *(--_nIns) = AMD64_MOVZX >> 8; \
340 if (AMD64_NEEDS_REX(s) || AMD64_NEEDS_REX(d)) { \
341 *(--_nIns) = AMD64_REX(0,d,s); \
342 } \
343 asm_output2("movzx %s,%s", gpn(d),gpn(s)); \
344 } while(0)
345
346
347 #define AMD64_ADDmi(d,b,i,q) do { \
348 AMD64_ALU_MEM(AMD64_ADD_RAX, b, d, i, q); \
349 asm_output3("add %d(%s), %d", d, gpn(b), i); \
350 } while(0)
351
352
353 #define ADDQmi(d,b,i) do { \
354 AMD64_ADDmi(d,b,i,1); \
355 asm_output3("add %d(%s), %d", d, gpn(b), i); \
356 } while (0)
357
358
359 #define CMPi(r,i) do { \
360 AMD64_ALU(AMD64_CMP_RAX, r, i, 0); \
361 asm_output2("cmp %s,%d",gpn(r),i); \
362 } while(0)
363
364
365 #define TEST(d,s) do { \
366 underrunProtect(3); \
367 *(--_nIns) = AMD64_MODRM_REG(d, s); \
368 *(--_nIns) = AMD64_TEST_RM_REG; \
369 if (AMD64_NEEDS_REX(d) || AMD64_NEEDS_REX(s)) { \
370 *(--_nIns) = AMD64_REX(0,d,s); \
371 } \
372 asm_output2("test %s,%s",gpn(d),gpn(s)); \
373 } while(0)
374
375
376 #define TESTQ(d,s) do { \
377 underrunProtect(3); \
378 *(--_nIns) = AMD64_MODRM_REG(d, s); \
379 *(--_nIns) = AMD64_TEST_RM_REG; \
380 *(--_nIns) = AMD64_REX(1,d,s); \
381 asm_output2("test %s,%s",gpn(d),gpn(s)); \
382 } while(0)
383
384
385 #define CMP(l,r) do { \
386 underrunProtect(3); \
387 *(--_nIns) = AMD64_MODRM_REG(l, r); \
388 *(--_nIns) = AMD64_CMP_REG_RM; \
389 if (AMD64_NEEDS_REX(l) || AMD64_NEEDS_REX(r)) { \
390 *(--_nIns) = AMD64_REX(0,l,r); \
391 } \
392 asm_output2("cmp %s,%s",gpn(l),gpn(r)); \
393 } while(0)
394
395 #define CMPQ(l,r) do { \
396 underrunProtect(3); \
397 *(--_nIns) = AMD64_MODRM_REG(l, r); \
398 *(--_nIns) = AMD64_CMP_REG_RM; \
399 *(--_nIns) = AMD64_REX(1,l,r); \
400 asm_output2("cmp %s,%s",gpn(l),gpn(r)); \
401 } while(0)
402
403 #define ADDi(r,i) do { \
404 AMD64_ALU(AMD64_ADD_RAX, r, i, 0); \
405 asm_output2("add %s,%d",gpn(r),i); \
406 } while(0)
407
408
409 #define ADDQi(r,i) do { \
410 AMD64_ALU(AMD64_ADD_RAX, r, i, 1); \
411 asm_output2("add %s,%d",gpn(r),i); \
412 } while(0)
413
414
415 #define AMD64_PRIM(op,l,r) \
416 underrunProtect(3); \
417 *(--_nIns) = AMD64_MODRM_REG(l, r); \
418 *(--_nIns) = op; \
419 if (AMD64_NEEDS_REX(l) || AMD64_NEEDS_REX(r)) \
420 *(--_nIns) = AMD64_REX(0,l,r);
421
422
423 #define AMD64_PRIMQ(op,l,r) \
424 underrunProtect(3); \
425 *(--_nIns) = AMD64_MODRM_REG(l, r); \
426 *(--_nIns) = op; \
427 *(--_nIns) = AMD64_REX(1,l,r);
428
429
430 #define SHR(r,s) do { \
431 AMD64_PRIM(AMD64_SHR_RM, 5, r); \
432 asm_output2("shr %s,%s",gpn(r),gpn(s)); \
433 } while(0)
434
435
436 #define AND(l,r) do { \
437 AMD64_PRIM(AMD64_AND_REG_RM, l, r); \
438 asm_output2("and %s,%s",gpn(l),gpn(r)); \
439 } while(0)
440
441
442 #define ANDQ(l,r) do { \
443 AMD64_PRIMQ(AMD64_AND_REG_RM, l, r); \
444 asm_output2("and %s,%s",gpn(l),gpn(r)); \
445 } while(0)
446
447
448 #define XOR(l,r) do { \
449 AMD64_PRIM(AMD64_XOR_REG_RM, l, r); \
450 asm_output2("xor %s,%s",gpn(l),gpn(r)); \
451 } while(0)
452
453
454 #define OR(l,r) do { \
455 AMD64_PRIM(AMD64_OR_REG_RM, l, r); \
456 asm_output2("or %s,%s",gpn(l),gpn(r)); \
457 } while(0)
458
459
460 #define ORQ(l,r) do { \
461 AMD64_PRIMQ(AMD64_OR_REG_RM, l, r); \
462 asm_output2("or %s,%s",gpn(l),gpn(r)); \
463 } while(0)
464
465
466 #define SHRi(r,i) do { \
467 if (i == 1) { \
468 underrunProtect(3); \
469 *(--_nIns) = AMD64_MODRM_REG(5, r); \
470 *(--_nIns) = AMD64_SHR_RM_1; \
471 } else { \
472 underrunProtect(4); \
473 *(--_nIns) = uint8_t(i); \
474 *(--_nIns) = AMD64_MODRM_REG(5, r); \
475 *(--_nIns) = AMD64_SHR_RM_IMM8; \
476 } \
477 if (AMD64_NEEDS_REX(r)) \
478 *(--_nIns) = AMD64_REX(0,0,r); \
479 asm_output2("shr %s,%d", gpn(r), i); \
480 } while (0)
481
482
483 #define ANDi(r,i) do { \
484 AMD64_ALU(AMD64_AND_RAX, r, i, 0); \
485 asm_output2("and %s,%d",gpn(r),i); \
486 } while(0)
487
488
489 #define ANDQi(r,i) do { \
490 AMD64_ALU(AMD64_AND_RAX, r, i, 1); \
491 asm_output2("and %s,%d",gpn(r),i); \
492 } while(0)
493
494
495 #define ORQi(r,i) do { \
496 AMD64_ALU(AMD64_OR_RAX, r, i, 1); \
497 asm_output2("or %s,%d",gpn(r),i); \
498 } while(0)
499
500 #define XORi(r,i) do { \
501 AMD64_ALU(AMD64_XOR_RAX, r, i, 0); \
502 asm_output2("xor %s,%d",gpn(r),i); \
503 } while(0)
504
505
506 #define ORi(r,i) do { \
507 AMD64_ALU(AMD64_OR_RAX, r, i, 0); \
508 asm_output2("or %s,%d",gpn(r),i); \
509 } while(0)
510
511
512 #define MUL(l,r) do { \
513 underrunProtect(4); \
514 *(--_nIns) = AMD64_MODRM_REG(l, r); \
515 *(--_nIns) = AMD64_IMUL_REG_RM_2; \
516 *(--_nIns) = AMD64_IMUL_REG_RM_1; \
517 if (AMD64_NEEDS_REX(l) || AMD64_NEEDS_REX(r)) \
518 *(--_nIns) = AMD64_REX(0,l,r); \
519 asm_output2("mul %s,%s", gpn(l), gpn(r)); \
520 } while (0)
521
522
523 #define NEG(r) do { \
524 AMD64_PRIM(AMD64_NEG_RM, 3, r); \
525 asm_output1("neg %s",gpn(r)); \
526 } while(0)
527
528
529 #define ADD(l,r) do { \
530 AMD64_PRIM(AMD64_ADD_REG_RM, l, r); \
531 asm_output2("add %s,%s", gpn(l), gpn(r)); \
532 } while (0)
533
534
535 #define ADDQ(l,r) do { \
536 AMD64_PRIMQ(AMD64_ADD_REG_RM, l, r); \
537 asm_output2("add %s,%s", gpn(l), gpn(r)); \
538 } while (0)
539
540
541 #define SUB(l,r) do { \
542 AMD64_PRIM(AMD64_SUB_REG_RM, l, r); \
543 asm_output2("sub %s,%s", gpn(l), gpn(r)); \
544 } while (0)
545
546
547
548 #define SAR(r,s) do { \
549 AMD64_PRIM(AMD64_SAR_RM, 7, r); \
550 asm_output2("sar %s,%s",gpn(r),gpn(s)); \
551 } while (0)
552
553
554 #define SARi(r,i) do { \
555 if (i == 1) { \
556 underrunProtect(3); \
557 *(--_nIns) = AMD64_MODRM_REG(7, r); \
558 *(--_nIns) = AMD64_SAR_RM_1; \
559 } else { \
560 underrunProtect(4); \
561 *(--_nIns) = uint8_t(i); \
562 *(--_nIns) = AMD64_MODRM_REG(7, r); \
563 *(--_nIns) = AMD64_SAR_RM_IMM8; \
564 } \
565 if (AMD64_NEEDS_REX(r)) \
566 *(--_nIns) = AMD64_REX(0,0,r); \
567 asm_output2("sar %s,%d", gpn(r), i); \
568 } while (0)
569
570
571 #define SHLi(r,i) do { \
572 if (i == 1) { \
573 underrunProtect(3); \
574 *(--_nIns) = AMD64_MODRM_REG(4, r); \
575 *(--_nIns) = AMD64_SHL_RM_1; \
576 } else { \
577 underrunProtect(4); \
578 *(--_nIns) = uint8_t(i); \
579 *(--_nIns) = AMD64_MODRM_REG(4, r); \
580 *(--_nIns) = AMD64_SHL_RM_IMM8; \
581 } \
582 if (AMD64_NEEDS_REX(r)) \
583 *(--_nIns) = AMD64_REX(0,0,r); \
584 asm_output2("shl %s,%d", gpn(r), i); \
585 } while (0)
586
587
588 #define SHLQi(r,i) do { \
589 if (i == 1) { \
590 underrunProtect(3); \
591 *(--_nIns) = AMD64_MODRM_REG(4, r); \
592 *(--_nIns) = AMD64_SHL_RM_1; \
593 } else { \
594 underrunProtect(4); \
595 *(--_nIns) = uint8_t(i); \
596 *(--_nIns) = AMD64_MODRM_REG(4, r); \
597 *(--_nIns) = AMD64_SHL_RM_IMM8; \
598 } \
599 *(--_nIns) = AMD64_REX(1,0,r); \
600 asm_output2("shl %s,%d", gpn(r), i); \
601 } while (0)
602
603
604 #define SHL(r,s) do { \
605 AMD64_PRIM(AMD64_SHL_RM, 4, r); \
606 asm_output2("shl %s,%s",gpn(r),gpn(s)); \
607 } while (0)
608
609
610 #define AMD64_SUBi(r,i,q) do { \
611 AMD64_ALU(AMD64_SUB_RAX, r, i, q); \
612 asm_output2("sub %s,%d",gpn(r),i); \
613 } while (0)
614
615
616 #define SUBi(r,i) AMD64_SUBi(r, i, 0)
617
618
619 #define SUBQi(r,i) AMD64_SUBi(r, i, 1)
620
621
622
623 #define MR(d,s) do { \
624 underrunProtect(3); \
625 *(--_nIns) = AMD64_MODRM_REG(d, s); \
626 *(--_nIns) = AMD64_MOV_REG_RM; \
627 *(--_nIns) = AMD64_REX(1,d,s); \
628 asm_output2("mov %s,%s",gpn(d),gpn(s)); \
629 } while (0)
630
631
632
633 #define LEA(r,d,b) do { \
634 underrunProtect(8); \
635 AMD64_MODRM_DISP(r, b, d); \
636 *(--_nIns) = AMD64_LEA; \
637 if (AMD64_NEEDS_REX(r) || AMD64_NEEDS_REX(b)) { \
638 *(--_nIns) = AMD64_REX(0,r,b); \
639 } \
640 asm_output3("lea %s,%d(%s)",gpn(r),d,gpn(b)); \
641 } while(0)
642
643
644 #define LEAQ(r,d,b) do { \
645 underrunProtect(8); \
646 AMD64_MODRM_DISP(r, b, d); \
647 *(--_nIns) = AMD64_LEA; \
648 *(--_nIns) = AMD64_REX(1,r,b); \
649 asm_output3("lea %s,%d(%s)",gpn(r),d,gpn(b)); \
650 } while(0)
651
652
653 #define AMD64_SETCC(op, r) \
654 underrunProtect(4); \
655 *(--_nIns) = AMD64_MODRM_REG(0,r); \
656 *(--_nIns) = op & 0xFF; \
657 *(--_nIns) = op >> 8; \
658 if (AMD64_NEEDS_REX(r)) { \
659 *(--_nIns) = AMD64_REX(0,0,r); \
660 }
661
662
663
664 #define SETE(r) do { AMD64_SETCC(0x0f94,(r)); asm_output1("sete %s",gpn(r)); } while(0)
665 #define SETNP(r) do { AMD64_SETCC(0x0f9B,(r)); asm_output1("setnp %s",gpn(r)); } while(0)
666 #define SETL(r) do { AMD64_SETCC(0x0f9C,(r)); asm_output1("setl %s",gpn(r)); } while(0)
667 #define SETLE(r) do { AMD64_SETCC(0x0f9E,(r)); asm_output1("setle %s",gpn(r)); } while(0)
668 #define SETG(r) do { AMD64_SETCC(0x0f9F,(r)); asm_output1("setg %s",gpn(r)); } while(0)
669 #define SETGE(r) do { AMD64_SETCC(0x0f9D,(r)); asm_output1("setge %s",gpn(r)); } while(0)
670 #define SETB(r) do { AMD64_SETCC(0x0f92,(r)); asm_output1("setb %s",gpn(r)); } while(0)
671 #define SETBE(r) do { AMD64_SETCC(0x0f96,(r)); asm_output1("setbe %s",gpn(r)); } while(0)
672 #define SETA(r) do { AMD64_SETCC(0x0f97,(r)); asm_output1("seta %s",gpn(r)); } while(0)
673 #define SETAE(r) do { AMD64_SETCC(0x0f93,(r)); asm_output1("setae %s",gpn(r)); } while(0)
674 #define SETC(r) do { AMD64_SETCC(0x0f90,(r)); asm_output1("setc %s",gpn(r)); } while(0)
675 #define SETO(r) do { AMD64_SETCC(0x0f92,(r)); asm_output1("seto %s",gpn(r)); } while(0)
676
677
678 #define AMD64_CMOV(op, dr, sr) \
679 underrunProtect(4); \
680 *(--_nIns) = AMD64_MODRM_REG(dr, sr); \
681 *(--_nIns) = op & 0xFF; \
682 *(--_nIns) = op >> 8; \
683 if (AMD64_NEEDS_REX(dr) || AMD64_NEEDS_REX(sr)) { \
684 *(--_nIns) = AMD64_REX(0,dr,sr); \
685 }
686
687 #define AMD64_CMOVQ(op, dr, sr) \
688 underrunProtect(4); \
689 *(--_nIns) = AMD64_MODRM_REG(dr, sr); \
690 *(--_nIns) = op & 0xFF; \
691 *(--_nIns) = op >> 8; \
692 *(--_nIns) = AMD64_REX(1,dr,sr);
693
694
695 #define MREQ(dr,sr) do { AMD64_CMOV(0x0f44,dr,sr); asm_output2("cmove %s,%s", gpn(dr),gpn(sr)); } while(0)
696 #define MRNE(dr,sr) do { AMD64_CMOV(0x0f45,dr,sr); asm_output2("cmovne %s,%s", gpn(dr),gpn(sr)); } while(0)
697 #define MRL(dr,sr) do { AMD64_CMOV(0x0f4C,dr,sr); asm_output2("cmovl %s,%s", gpn(dr),gpn(sr)); } while(0)
698 #define MRLE(dr,sr) do { AMD64_CMOV(0x0f4E,dr,sr); asm_output2("cmovle %s,%s", gpn(dr),gpn(sr)); } while(0)
699 #define MRG(dr,sr) do { AMD64_CMOV(0x0f4F,dr,sr); asm_output2("cmovg %s,%s", gpn(dr),gpn(sr)); } while(0)
700 #define MRGE(dr,sr) do { AMD64_CMOV(0x0f4D,dr,sr); asm_output2("cmovge %s,%s", gpn(dr),gpn(sr)); } while(0)
701 #define MRB(dr,sr) do { AMD64_CMOV(0x0f42,dr,sr); asm_output2("cmovb %s,%s", gpn(dr),gpn(sr)); } while(0)
702 #define MRBE(dr,sr) do { AMD64_CMOV(0x0f46,dr,sr); asm_output2("cmovbe %s,%s", gpn(dr),gpn(sr)); } while(0)
703 #define MRA(dr,sr) do { AMD64_CMOV(0x0f47,dr,sr); asm_output2("cmova %s,%s", gpn(dr),gpn(sr)); } while(0)
704 #define MRAE(dr,sr) do { AMD64_CMOV(0x0f43,dr,sr); asm_output2("cmovae %s,%s", gpn(dr),gpn(sr)); } while(0)
705 #define MRNC(dr,sr) do { AMD64_CMOV(0x0f43,dr,sr); asm_output2("cmovnc %s,%s", gpn(dr),gpn(sr)); } while(0)
706 #define MRNO(dr,sr) do { AMD64_CMOV(0x0f41,dr,sr); asm_output2("cmovno %s,%s", gpn(dr),gpn(sr)); } while(0)
707
708 #define MRQEQ(dr,sr) do { AMD64_CMOVQ(0x0f44,dr,sr); asm_output2("cmove %s,%s", gpn(dr),gpn(sr)); } while(0)
709 #define MRQNE(dr,sr) do { AMD64_CMOVQ(0x0f45,dr,sr); asm_output2("cmovne %s,%s", gpn(dr),gpn(sr)); } while(0)
710 #define MRQL(dr,sr) do { AMD64_CMOVQ(0x0f4C,dr,sr); asm_output2("cmovl %s,%s", gpn(dr),gpn(sr)); } while(0)
711 #define MRQLE(dr,sr) do { AMD64_CMOVQ(0x0f4E,dr,sr); asm_output2("cmovle %s,%s", gpn(dr),gpn(sr)); } while(0)
712 #define MRQG(dr,sr) do { AMD64_CMOVQ(0x0f4F,dr,sr); asm_output2("cmovg %s,%s", gpn(dr),gpn(sr)); } while(0)
713 #define MRQGE(dr,sr) do { AMD64_CMOVQ(0x0f4D,dr,sr); asm_output2("cmovge %s,%s", gpn(dr),gpn(sr)); } while(0)
714 #define MRQB(dr,sr) do { AMD64_CMOVQ(0x0f42,dr,sr); asm_output2("cmovb %s,%s", gpn(dr),gpn(sr)); } while(0)
715 #define MRQBE(dr,sr) do { AMD64_CMOVQ(0x0f46,dr,sr); asm_output2("cmovbe %s,%s", gpn(dr),gpn(sr)); } while(0)
716 #define MRQA(dr,sr) do { AMD64_CMOVQ(0x0f47,dr,sr); asm_output2("cmova %s,%s", gpn(dr),gpn(sr)); } while(0)
717 #define MRQAE(dr,sr) do { AMD64_CMOVQ(0x0f43,dr,sr); asm_output2("cmovae %s,%s", gpn(dr),gpn(sr)); } while(0)
718 #define MRQNC(dr,sr) do { AMD64_CMOVQ(0x0f43,dr,sr); asm_output2("cmovnc %s,%s", gpn(dr),gpn(sr)); } while(0)
719 #define MRQNO(dr,sr) do { AMD64_CMOVQ(0x0f41,dr,sr); asm_output2("cmovno %s,%s", gpn(dr),gpn(sr)); } while(0)
720
721 #define AMD64_LD(reg,disp,base,q) \
722 underrunProtect(7); \
723 AMD64_MODRM_DISP(reg,base,disp); \
724 *(--_nIns) = AMD64_MOV_REG_RM; \
725 if (q || AMD64_NEEDS_REX(reg) || AMD64_NEEDS_REX(base)) \
726 *(--_nIns) = AMD64_REX(q,reg,base);
727
728
729 #define LD(reg,disp,base) do { \
730 AMD64_LD(reg,disp,base,0); \
731 asm_output3("mov dword %s,%d(%s)",gpn(reg),disp,gpn(base)); \
732 } while (0)
733
734
735 #define LDQ(reg,disp,base) do { \
736 AMD64_LD(reg,disp,base,1); \
737 asm_output3("mov %s,%d(%s)",gpn(reg),disp,gpn(base)); \
738 } while (0)
739
740 // load 8-bit, zero extend
741 // note, only 5-bit offsets (!) are supported for this, but that's all we need at the moment
742 // (movzx actually allows larger offsets mode but 5-bit gives us advantage in Thumb mode)
743
744 #define LD8Z(r,d,b) do { \
745 underrunProtect(5); \
746 AMD64_MODRM_DISP(r, b, d); \
747 *(--_nIns) = AMD64_MOVZX & 0xFF; \
748 *(--_nIns) = AMD64_MOVZX >> 8; \
749 if (AMD64_NEEDS_REX(r) || AMD64_NEEDS_REX(b)) { \
750 *(--_nIns) = AMD64_REX(0,r,b); \
751 } \
752 asm_output3("movzx %s,%d(%s)", gpn(r),d,gpn(b)); \
753 } while(0)
754
755
756 #define LD16Z(r,d,b) do { \
757 underrunProtect(5); \
758 AMD64_MODRM_DISP(r, b, d); \
759 *(--_nIns) = AMD64_MOVSX & 0xFF; \
760 *(--_nIns) = AMD64_MOVSX >> 8; \
761 if (AMD64_NEEDS_REX(r) || AMD64_NEEDS_REX(b)) { \
762 *(--_nIns) = AMD64_REX(0,r,b); \
763 } \
764 asm_output3("movsx %s,%d(%s)", gpn(r),d,gpn(b)); \
765 } while(0)
766
767
768 #define LDi(r,i) do { \
769 underrunProtect(6); \
770 IMM32(i); \
771 *(--_nIns) = AMD64_MODRM_REG(0, r); \
772 *(--_nIns) = AMD64_MOV_RM_IMM; \
773 *(--_nIns) = AMD64_REX(1,0,r); \
774 asm_output2("mov %s,%d",gpn(r),i); \
775 } while (0)
776
777
778 #define LDQi(r,i) do { \
779 underrunProtect(10); \
780 IMM64(i); \
781 *(--_nIns) = AMD64_MOV_REG_IMM(r); \
782 *(--_nIns) = AMD64_REX(1,0,r); \
783 asm_output2("mov %s,%ld",gpn(r),(nj_printf_ld)i); \
784 } while(0)
785
786
787 #define AMD64_ST(base,disp,reg,q) \
788 underrunProtect(7); \
789 AMD64_MODRM_DISP(reg, base, disp); \
790 *(--_nIns) = AMD64_MOV_RM_REG; \
791 if (q || AMD64_NEEDS_REX(reg) || AMD64_NEEDS_REX(base)) \
792 *(--_nIns) = AMD64_REX(q,reg,base); \
793
794
795 #define ST(base,disp,reg) do { \
796 AMD64_ST(base,disp,reg,0); \
797 asm_output3("mov dword %d(%s),%s",disp,gpn(base),gpn(reg)); \
798 } while(0);
799
800
801 #define STQ(base,disp,reg) do { \
802 AMD64_ST(base,disp,reg,1); \
803 asm_output3("mov %d(%s),%s",disp,gpn(base),gpn(reg)); \
804 } while(0);
805
806
807 #define STi(base,disp,imm) do { \
808 underrunProtect(11); \
809 IMM32(imm); \
810 AMD64_MODRM_DISP(0, base, disp); \
811 *(--_nIns) = AMD64_MOV_RM_IMM; \
812 if (AMD64_NEEDS_REX(base)) \
813 *(--_nIns) = AMD64_REX(0,0,base); \
814 asm_output3("mov %d(%s),%d",disp,gpn(base),imm); \
815 } while(0);
816
817
818 #define RET() do { \
819 underrunProtect(1); \
820 *(--_nIns) = AMD64_RET; \
821 asm_output("ret"); \
822 } while (0)
823
824
825 #define INT3() do { \
826 underrunProtect(1); \
827 *(--_nIns) = AMD64_INT3; \
828 asm_output("int3"); \
829 } while (0)
830
831 #define PUSHi(i) do { \
832 if (isS8(i)) { \
833 underrunProtect(2); \
834 _nIns-=2; _nIns[0] = 0x6a; _nIns[1] = (uint8_t)(i); \
835 asm_output1("push %d",i); \
836 } else \
837 { PUSHi32(i); } } while(0)
838
839 #define PUSHi32(i) do { \
840 underrunProtect(5); \
841 IMM32(i); \
842 *(--_nIns) = 0x68; \
843 asm_output1("push %d",i); } while(0)
844
845 /**
846 * Note: PUSH/POP do not use REX's prefix 64-bit field.
847 */
848
849 #define PUSHr(r) do { \
850 underrunProtect(2); \
851 *(--_nIns) = (uint8_t)AMD64_PUSH_REG(r); \
852 if (AMD64_NEEDS_REX(r)) \
853 *(--_nIns) = AMD64_REX(0,0,r); \
854 asm_output1("push %s",gpn(r)); \
855 } while(0)
856
857 #define PUSHm(d,b) do { \
858 underrunProtect(7); \
859 AMD64_MODRM_DISP(6, b, d); \
860 *(--_nIns) = AMD64_PUSH_RM; \
861 if (AMD64_NEEDS_REX(b)) { \
862 *(--_nIns) = AMD64_REX(0,6,b); \
863 } \
864 asm_output2("push %d(%s)",d,gpn(b)); \
865 } while(0)
866
867
868 #define POPr(r) do { \
869 underrunProtect(2); \
870 *(--_nIns) = (uint8_t)(AMD64_POP_REG(r)); \
871 if (AMD64_NEEDS_REX(r)) \
872 *(--_nIns) = AMD64_REX(0,0,r); \
873 asm_output1("pop %s",gpn(r)); \
874 } while(0)
875
876 #define JCC(o,t,n) do { \
877 underrunProtect(6); \
878 intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \
879 if (isS8(tt)) { \
880 verbose_only( NIns* next = _nIns; (void)next; ) \
881 _nIns -= 2; \
882 _nIns[0] = (uint8_t) ( 0x70 | (o) ); \
883 _nIns[1] = (uint8_t) (tt); \
884 asm_output2("%s %lX",(n),(ptrdiff_t)(next+tt)); \
885 } else if (tt <= INT_MAX && tt >= INT_MIN) { \
886 verbose_only( NIns* next = _nIns; ) \
887 IMM32(tt); \
888 _nIns -= 2; \
889 _nIns[0] = 0x0f; \
890 _nIns[1] = (uint8_t) ( 0x80 | (o) ); \
891 asm_output2("%s %lX",(n),(ptrdiff_t)(next+tt)); \
892 } else { \
893 underrunProtect(20); \
894 NanoAssert(!_inExit); \
895 /* We could now be in range, but assume we're not. */ \
896 /* Note we generate the thunk forwards, and the */ \
897 /* jcc to the thunk backwards. */ \
898 uint8_t* base; \
899 intptr_t offs; \
900 base = (uint8_t *)((uintptr_t)_nIns & ~((uintptr_t)NJ_PAGE_SIZE-1)); \
901 base += sizeof(PageHeader) + _pageData; \
902 _pageData += 14; \
903 *(base++) = 0xFF; \
904 *(base++) = 0x25; \
905 *(int *)base = 0; \
906 base += 4; \
907 *(intptr_t *)base = intptr_t(t); \
908 offs = intptr_t(base-6) - intptr_t(_nIns); \
909 NanoAssert(offs >= INT_MIN && offs <= INT_MAX); \
910 if (isS8(offs)) { \
911 _nIns -= 2; \
912 _nIns[0] = uint8_t( 0x70 | (o) ); \
913 _nIns[1] = uint8_t( (offs) ); \
914 } else { \
915 IMM32(offs); \
916 _nIns -= 2; \
917 _nIns[0] = 0x0f; \
918 _nIns[1] = uint8_t( 0x80 | (o) ); \
919 } \
920 asm_output3("%s %d(rip) #%lX",n,offs,intptr_t(t)); \
921 } \
922 } while(0)
923
924 #define JMPm_nochk(rip) do { \
925 IMM32(rip); \
926 *(--_nIns) = 0x25; \
927 *(--_nIns) = 0xFF; \
928 } while (0)
929
930 #define JMP_long(t) do { \
931 underrunProtect(5); \
932 intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \
933 JMP_long_nochk_offset(tt); \
934 } while(0)
935
936 #define JMP(t) do { \
937 underrunProtect(5); \
938 intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \
939 if (isS8(tt)) { \
940 verbose_only( NIns* next = _nIns; (void)next; ) \
941 _nIns -= 2; \
942 _nIns[0] = 0xeb; \
943 _nIns[1] = (uint8_t) ( (tt)&0xff ); \
944 asm_output1("jmp %lX",(ptrdiff_t)(next+tt)); \
945 } else { \
946 if (tt >= INT_MIN && tt <= INT_MAX) { \
947 JMP_long_nochk_offset(tt); \
948 } else { \
949 underrunProtect(14); \
950 _nIns -= 8; \
951 *(intptr_t *)_nIns = intptr_t(t); \
952 JMPm_nochk(0); \
953 } \
954 } } while(0)
955
956 #define JMP_long_nochk(t) do { \
957 intptr_t tt = (intptr_t)t - (intptr_t)_nIns; \
958 JMP_long_nochk_offset(tt); \
959 } while(0)
960
961 #define JMPc 0xe9
962
963 #define JMP_long_placeholder() do { \
964 underrunProtect(14); \
965 IMM64(-1); \
966 JMPm_nochk(0); \
967 } while (0)
968
969 // this should only be used when you can guarantee there is enough room on the page
970 #define JMP_long_nochk_offset(o) do {\
971 verbose_only( NIns* next = _nIns; (void)next; ) \
972 NanoAssert(o <= INT_MAX && o >= INT_MIN); \
973 IMM32((o)); \
974 *(--_nIns) = JMPc; \
975 asm_output1("jmp %lX",(ptrdiff_t)(next+(o))); } while(0)
976
977 #define JMPr(r) do { \
978 underrunProtect(2); \
979 *(--_nIns) = AMD64_MODRM_REG(4, r); \
980 *(--_nIns) = 0xFF; \
981 } while (0)
982
983 #define JE(t) JCC(0x04, t, "je")
984 #define JNE(t) JCC(0x05, t, "jne")
985 #define JP(t) JCC(0x0A, t, "jp")
986 #define JNP(t) JCC(0x0B, t, "jnp")
987
988 #define JB(t) JCC(0x02, t, "jb")
989 #define JNB(t) JCC(0x03, t, "jnb")
990 #define JBE(t) JCC(0x06, t, "jbe")
991 #define JNBE(t) JCC(0x07, t, "jnbe")
992
993 #define JA(t) JCC(0x07, t, "ja")
994 #define JNA(t) JCC(0x06, t, "jna")
995 #define JAE(t) JCC(0x03, t, "jae")
996 #define JNAE(t) JCC(0x02, t, "jnae")
997
998 #define JL(t) JCC(0x0C, t, "jl")
999 #define JNL(t) JCC(0x0D, t, "jnl")
1000 #define JLE(t) JCC(0x0E, t, "jle")
1001 #define JNLE(t) JCC(0x0F, t, "jnle")
1002
1003 #define JG(t) JCC(0x0F, t, "jg")
1004 #define JNG(t) JCC(0x0E, t, "jng")
1005 #define JGE(t) JCC(0x0D, t, "jge")
1006 #define JNGE(t) JCC(0x0C, t, "jnge")
1007
1008 #define JC(t) JCC(0x02, t, "jc")
1009 #define JNC(t) JCC(0x03, t, "jnc")
1010 #define JO(t) JCC(0x00, t, "jo")
1011 #define JNO(t) JCC(0x01, t, "jno")
1012
1013
1014 #define AMD64_OP3(c,q,r,b) \
1015 *(--_nIns) = (uint8_t)((c)&0xff); \
1016 *(--_nIns) = (uint8_t)(((c)>>8)&0xff); \
1017 if (q \
1018 || AMD64_NEEDS_REX(r) \
1019 || AMD64_NEEDS_REX(b)) { \
1020 *(--_nIns) = AMD64_REX(q,r,b); \
1021 } \
1022 *(--_nIns) = (uint8_t)(((c)>>16)&0xff);
1023
1024
1025 #define SSE_LDQ(r,d,b) do { \
1026 underrunProtect(7); \
1027 AMD64_MODRM_DISP(r,b,d); \
1028 AMD64_OP3(AMD64_MOVD_REG_RM,1,r,b); \
1029 asm_output3("movd %s,%d(%s)",gpn(r),(d),gpn(b)); \
1030 } while (0)
1031
1032
1033 #define SSE_STQ(d,r,b) do { \
1034 underrunProtect(7); \
1035 AMD64_MODRM_DISP(b,r,d); \
1036 AMD64_OP3(AMD64_MOVD_RM_REG,1,b,r); \
1037 asm_output3("movd %d(%s),%s",(d),gpn(r),gpn(b)); \
1038 } while (0)
1039
1040
1041 #define SSE_CVTSI2SD(xr,gr) do{ \
1042 underrunProtect(5); \
1043 *(--_nIns) = AMD64_MODRM_REG(xr, gr); \
1044 AMD64_OP3(AMD64_CVTSI2SD,0,xr,gr); \
1045 asm_output2("cvtsi2sd %s,%s",gpn(xr),gpn(gr)); \
1046 } while(0)
1047
1048 // move and zero-extend gpreg to xmm reg
1049
1050 #define SSE_MOVD(d,s) do{ \
1051 underrunProtect(7); \
1052 if (_is_xmm_reg_(s)) { \
1053 NanoAssert(_is_gp_reg_(d)); \
1054 *(--_nIns) = AMD64_MODRM_REG(s, d); \
1055 AMD64_OP3(AMD64_MOVD_RM_REG, 1, s, d); \
1056 } else { \
1057 NanoAssert(_is_gp_reg_(s)); \
1058 NanoAssert(_is_xmm_reg_(d)); \
1059 *(--_nIns) = AMD64_MODRM_REG(d, s); \
1060 AMD64_OP3(AMD64_MOVD_REG_RM, 1, d, s); \
1061 } \
1062 asm_output2("movd %s,%s",gpn(d),gpn(s)); \
1063 } while(0)
1064
1065
1066 #define SSE_MOVSD(rd,rs) do{ \
1067 underrunProtect(7); \
1068 *(--_nIns) = AMD64_MODRM_REG(rd, rs); \
1069 AMD64_OP3(AMD64_MOVSD_REG_RM,0,rd,rs); \
1070 asm_output2("movsd %s,%s",gpn(rd),gpn(rs)); \
1071 } while(0)
1072
1073
1074 #define SSE_MOVDm(d,b,xrs) do { \
1075 AMD64_MODRM_DISP(xrs, b, d); \
1076 AMD64_OP3(AMD64_MOVD_RM_REG, 1, xrs, b); \
1077 asm_output3("movd %d(%s),%s", d, gpn(b), gpn(xrs)); \
1078 } while(0)
1079
1080
1081 #define SSE_ADDSD(rd,rs) do{ \
1082 underrunProtect(5); \
1083 *(--_nIns) = AMD64_MODRM_REG(rd, rs); \
1084 AMD64_OP3(AMD64_ADDSD, 0, rd, rs); \
1085 asm_output2("addsd %s,%s",gpn(rd),gpn(rs)); \
1086 } while(0)
1087
1088
1089 #define SSE_ADDSDm(r,addr)do { \
1090 underrunProtect(10); \
1091 ptrdiff_t d = (NIns *)addr - _nIns; \
1092 NanoAssert(d >= INT_MIN && d <= INT_MAX); \
1093 IMM32((int32_t)d); \
1094 *(--_nIns) = AMD64_MODRM(0, r, 5); \
1095 AMD64_OP3(AMD64_ADDSD, 0, r, 0); \
1096 asm_output3("addsd %s,%p // =%f",gpn(r),addr,*(double*)addr); \
1097 } while(0)
1098
1099
1100 #define SSE_SUBSD(rd,rs) do{ \
1101 underrunProtect(5); \
1102 *(--_nIns) = AMD64_MODRM_REG(rd, rs); \
1103 AMD64_OP3(AMD64_SUBSD, 0, rd, rs); \
1104 asm_output2("subsd %s,%s",gpn(rd),gpn(rs)); \
1105 } while(0)
1106
1107
1108 #define SSE_MULSD(rd,rs) do{ \
1109 underrunProtect(5); \
1110 *(--_nIns) = AMD64_MODRM_REG(rd, rs); \
1111 AMD64_OP3(AMD64_MULSD, 0, rd, rs); \
1112 asm_output2("mulsd %s,%s",gpn(rd),gpn(rs)); \
1113 } while(0)
1114
1115
1116 #define SSE_DIVSD(rd,rs) do{ \
1117 underrunProtect(5); \
1118 *(--_nIns) = AMD64_MODRM_REG(rd, rs); \
1119 AMD64_OP3(AMD64_DIVSD, 0, rd, rs); \
1120 asm_output2("divsd %s,%s",gpn(rd),gpn(rs)); \
1121 } while(0)
1122
1123
1124 #define SSE_UCOMISD(rl,rr) do{ \
1125 *(--_nIns) = AMD64_MODRM_REG(rl, rr); \
1126 AMD64_OP3(AMD64_UCOMISD, 0, rl, rr); \
1127 asm_output2("ucomisd %s,%s",gpn(rl),gpn(rr)); \
1128 } while(0)
1129
1130 #define EMIT_XORPD_MASK(mask) \
1131 do { \
1132 uint8_t *base, *begin; \
1133 uint32_t *addr; \
1134 base = (uint8_t *)((uintptr_t)_nIns & ~((uintptr_t)NJ_PAGE_SIZE-1)); \
1135 base += sizeof(PageHeader) + _pageData; \
1136 begin = base; \
1137 /* Make sure we align */ \
1138 if ((uintptr_t)base & 0xF) { \
1139 base = (NIns *)((uintptr_t)base & ~(0xF)); \
1140 base += 16; \
1141 } \
1142 _pageData += (int32_t)(base - begin) + 16; \
1143 _dblNegPtr = (NIns *)base; \
1144 addr = (uint32_t *)base; \
1145 addr[0] = mask[0]; \
1146 addr[1] = mask[1]; \
1147 addr[2] = mask[2]; \
1148 addr[3] = mask[3]; \
1149 } while (0)
1150
1151 /**
1152 * Note: high underrun protect is for:
1153 * 12 bytes of alignment max
1154 * 16 bytes for the data
1155 * 10 bytes for the instruction
1156 */
1157 #define SSE_XORPD(r, maskaddr) do { \
1158 if (_dblNegPtr != NULL) { \
1159 underrunProtect(10); \
1160 } \
1161 if (_dblNegPtr == NULL) { \
1162 underrunProtect(38); \
1163 EMIT_XORPD_MASK(maskaddr); \
1164 } \
1165 ptrdiff_t d = _dblNegPtr - _nIns; \
1166 IMM32((int32_t)d); \
1167 *(--_nIns) = AMD64_MODRM(0, rr, 5); \
1168 AMD64_OP3(AMD64_XORPD, 0, rr, 0); \
1169 asm_output2("xorpd %s,[0x%X]",gpn(rr),(int32_t)d); \
1170 } while(0)
1171
1172
1173 #define SSE_XORPDr(rd,rs) do{ \
1174 underrunProtect(5); \
1175 *(--_nIns) = AMD64_MODRM_REG(rd, rs); \
1176 AMD64_OP3(AMD64_XORPD, 0, rd, rs); \
1177 asm_output2("xorpd %s,%s",gpn(rd),gpn(rs)); \
1178 } while(0)
1179
1180
1181 #define TEST_AL(i) do { \
1182 underrunProtect(2); \
1183 *(--_nIns) = uint8_t(i); \
1184 *(--_nIns) = 0xA8; \
1185 asm_output1("test al, %d",i); \
1186 } while(0)
1187
1188
1189 #define PUSHFQ() do { \
1190 underrunProtect(1); \
1191 *(--_nIns) = AMD64_PUSHF; \
1192 asm_output("pushf"); \
1193 } while (0)
1194
1195 #define CALL(c) do { \
1196 underrunProtect(5); \
1197 intptr_t offset = (c->_address) - ((intptr_t)_nIns); \
1198 if (offset <= INT_MAX && offset >= INT_MIN) { \
1199 IMM32( (uint32_t)offset ); \
1200 *(--_nIns) = 0xE8; \
1201 } else { \
1202 *(--_nIns) = 0xD0; \
1203 *(--_nIns) = 0xFF; \
1204 LDQi(RAX, c->_address); \
1205 } \
1206 verbose_only(asm_output1("call %s",(c->_name));) \
1207 } while (0)
1208
1209 }
1210 #endif // __nanojit_NativeAMD64__

  ViewVC Help
Powered by ViewVC 1.1.24