/[jscoverage]/trunk/js/nanojit/Assembler.cpp
ViewVC logotype

Contents of /trunk/js/nanojit/Assembler.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 399 - (show annotations)
Tue Dec 9 03:37:47 2008 UTC (11 years ago) by siliconforks
File size: 53028 byte(s)
Use SpiderMonkey from Firefox 3.1b2.

1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 *
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
9 *
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
14 *
15 * The Original Code is [Open Source Virtual Machine].
16 *
17 * The Initial Developer of the Original Code is
18 * Adobe System Incorporated.
19 * Portions created by the Initial Developer are Copyright (C) 2004-2007
20 * the Initial Developer. All Rights Reserved.
21 *
22 * Contributor(s):
23 * Adobe AS3 Team
24 *
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
36 *
37 * ***** END LICENSE BLOCK ***** */
38
39 #include "nanojit.h"
40
41 #ifdef FEATURE_NANOJIT
42
43 #ifdef AVMPLUS_PORTING_API
44 #include "portapi_nanojit.h"
45 #endif
46
47 #if defined(AVMPLUS_UNIX) && defined(AVMPLUS_ARM)
48 #include <asm/unistd.h>
49 extern "C" void __clear_cache(char *BEG, char *END);
50 #endif
51
52 namespace nanojit
53 {
54
55
56 class DeadCodeFilter: public LirFilter
57 {
58 const CallInfo *functions;
59
60 bool ignoreInstruction(LInsp ins)
61 {
62 LOpcode op = ins->opcode();
63 if (ins->isStore() ||
64 op == LIR_loop ||
65 op == LIR_label ||
66 op == LIR_live ||
67 isRet(op)) {
68 return false;
69 }
70 return ins->resv() == 0;
71 }
72
73 public:
74 DeadCodeFilter(LirFilter *in, const CallInfo *f) : LirFilter(in), functions(f) {}
75 LInsp read() {
76 for (;;) {
77 LInsp i = in->read();
78 if (!i || i->isGuard() || i->isBranch()
79 || i->isCall() && !i->isCse(functions)
80 || !ignoreInstruction(i))
81 return i;
82 }
83 }
84 };
85
86 #ifdef NJ_VERBOSE
87 class VerboseBlockReader: public LirFilter
88 {
89 Assembler *assm;
90 LirNameMap *names;
91 avmplus::List<LInsp, avmplus::LIST_NonGCObjects> block;
92 bool flushnext;
93 public:
94 VerboseBlockReader(LirFilter *in, Assembler *a, LirNameMap *n)
95 : LirFilter(in), assm(a), names(n), block(a->_gc), flushnext(false)
96 {}
97
98 void flush() {
99 flushnext = false;
100 if (!block.isEmpty()) {
101 for (int j=0,n=block.size(); j < n; j++) {
102 LIns *i = block[j];
103 assm->outputf(" %s", names->formatIns(block[j]));
104 if (i->isop(LIR_label)) {
105 assm->outputf(" %p:", assm->_nIns);
106 assm->output("");
107 }
108 }
109 block.clear();
110 }
111 }
112
113 void flush_add(LInsp i) {
114 flush();
115 block.add(i);
116 }
117
118 LInsp read() {
119 LInsp i = in->read();
120 if (!i) {
121 flush();
122 return i;
123 }
124 if (i->isGuard()) {
125 flush_add(i);
126 if (i->oprnd1())
127 block.add(i->oprnd1());
128 }
129 else if (isRet(i->opcode()) || i->isBranch()) {
130 flush_add(i);
131 }
132 else {
133 if (flushnext)
134 flush();
135 flush_add(i);//block.add(i);
136 if (i->isop(LIR_label))
137 flushnext = true;
138 }
139 return i;
140 }
141 };
142 #endif
143
144 /**
145 * Need the following:
146 *
147 * - merging paths ( build a graph? ), possibly use external rep to drive codegen
148 */
149 Assembler::Assembler(Fragmento* frago)
150 : hasLoop(0)
151 , _frago(frago)
152 , _gc(frago->core()->gc)
153 , _labels(_gc)
154 , _patches(_gc)
155 , pending_lives(_gc)
156 {
157 AvmCore *core = frago->core();
158 nInit(core);
159 verbose_only( _verbose = !core->quiet_opt() && core->verbose() );
160 verbose_only( _outputCache = 0);
161
162 internalReset();
163 pageReset();
164 }
165
166 void Assembler::arReset()
167 {
168 _activation.highwatermark = 0;
169 _activation.lowwatermark = 0;
170 _activation.tos = 0;
171
172 for(uint32_t i=0; i<NJ_MAX_STACK_ENTRY; i++)
173 _activation.entry[i] = 0;
174 }
175
176 void Assembler::registerResetAll()
177 {
178 nRegisterResetAll(_allocator);
179
180 // keep a tally of the registers to check that our allocator works correctly
181 debug_only(_allocator.count = _allocator.countFree(); )
182 debug_only(_allocator.checkCount(); )
183 debug_only(_fpuStkDepth = 0; )
184 }
185
186 Register Assembler::registerAlloc(RegisterMask allow)
187 {
188 RegAlloc &regs = _allocator;
189 // RegisterMask prefer = livePastCall(_ins) ? saved : scratch;
190 RegisterMask prefer = SavedRegs & allow;
191 RegisterMask free = regs.free & allow;
192
193 RegisterMask set = prefer;
194 if (set == 0) set = allow;
195
196 if (free)
197 {
198 // at least one is free
199 set &= free;
200
201 // ok we have at least 1 free register so let's try to pick
202 // the best one given the profile of the instruction
203 if (!set)
204 {
205 // desired register class is not free so pick first of any class
206 set = free;
207 }
208 NanoAssert((set & allow) != 0);
209 Register r = nRegisterAllocFromSet(set);
210 regs.used |= rmask(r);
211 return r;
212 }
213 counter_increment(steals);
214
215 // nothing free, steal one
216 // LSRA says pick the one with the furthest use
217 LIns* vic = findVictim(regs, allow);
218 NanoAssert(vic != NULL);
219
220 Reservation* resv = getresv(vic);
221
222 // restore vic
223 Register r = resv->reg;
224 regs.removeActive(r);
225 resv->reg = UnknownReg;
226
227 asm_restore(vic, resv, r);
228 return r;
229 }
230
231 void Assembler::reserveReset()
232 {
233 _resvTable[0].arIndex = 0;
234 int i;
235 for(i=1; i<NJ_MAX_STACK_ENTRY; i++) {
236 _resvTable[i].arIndex = i-1;
237 _resvTable[i].used = 0;
238 }
239 _resvFree= i-1;
240 }
241
242 /**
243 * these instructions don't have to be saved & reloaded to spill,
244 * they can just be recalculated w/out any inputs.
245 */
246 bool Assembler::canRemat(LIns *i) {
247 return i->isconst() || i->isconstq() || i->isop(LIR_alloc);
248 }
249
250 Reservation* Assembler::reserveAlloc(LInsp i)
251 {
252 uint32_t item = _resvFree;
253 Reservation *r = &_resvTable[item];
254 _resvFree = r->arIndex;
255 r->reg = UnknownReg;
256 r->arIndex = 0;
257 r->used = 1;
258 if (!item)
259 setError(ResvFull);
260 i->setresv(item);
261 return r;
262 }
263
264 void Assembler::reserveFree(LInsp i)
265 {
266 Reservation *rs = getresv(i);
267 NanoAssert(rs == &_resvTable[i->resv()]);
268 rs->arIndex = _resvFree;
269 rs->used = 0;
270 _resvFree = i->resv();
271 i->setresv(0);
272 }
273
274 void Assembler::internalReset()
275 {
276 // readies for a brand spanking new code generation pass.
277 registerResetAll();
278 reserveReset();
279 arReset();
280 pending_lives.clear();
281 }
282
283 NIns* Assembler::pageAlloc(bool exitPage)
284 {
285 Page*& list = (exitPage) ? _nativeExitPages : _nativePages;
286 Page* page = _frago->pageAlloc();
287 if (page)
288 {
289 page->next = list;
290 list = page;
291 nMarkExecute(page);
292 _stats.pages++;
293 }
294 else
295 {
296 // return prior page (to allow overwrites) and mark out of mem
297 page = list;
298 setError(OutOMem);
299 if (!list)
300 return NULL;
301 }
302 return &page->code[sizeof(page->code)/sizeof(NIns)]; // just past the end
303 }
304
305 void Assembler::pageReset()
306 {
307 pagesFree(_nativePages);
308 pagesFree(_nativeExitPages);
309
310 _nIns = 0;
311 _nExitIns = 0;
312 _stats.pages = 0;
313
314 nativePageReset();
315 }
316
317 void Assembler::pagesFree(Page*& page)
318 {
319 while(page)
320 {
321 Page *next = page->next; // pull next ptr prior to free
322 _frago->pageFree(page);
323 page = next;
324 }
325 }
326
327 #define bytesFromTop(x) ( (size_t)(x) - (size_t)pageTop(x) )
328 #define bytesToBottom(x) ( (size_t)pageBottom(x) - (size_t)(x) )
329 #define bytesBetween(x,y) ( (size_t)(x) - (size_t)(y) )
330
331 int32_t Assembler::codeBytes()
332 {
333 // start and end on same page?
334 size_t exit = 0;
335 int32_t pages = _stats.pages;
336 if (_nExitIns-1 == _stats.codeExitStart)
337 ;
338 else if (samepage(_nExitIns,_stats.codeExitStart))
339 exit = bytesBetween(_stats.codeExitStart, _nExitIns);
340 else
341 {
342 pages--;
343 exit = ((intptr_t)_stats.codeExitStart & (NJ_PAGE_SIZE-1)) ? bytesFromTop(_stats.codeExitStart)+1 : 0;
344 exit += bytesToBottom(_nExitIns)+1;
345 }
346
347 size_t main = 0;
348 if (_nIns-1 == _stats.codeStart)
349 ;
350 else if (samepage(_nIns,_stats.codeStart))
351 main = bytesBetween(_stats.codeStart, _nIns);
352 else
353 {
354 pages--;
355 main = ((intptr_t)_stats.codeStart & (NJ_PAGE_SIZE-1)) ? bytesFromTop(_stats.codeStart)+1 : 0;
356 main += bytesToBottom(_nIns)+1;
357 }
358 //fprintf(stderr,"size %d, exit is %d, main is %d, page count %d, sizeof %d\n", (int)((pages) * NJ_PAGE_SIZE + main + exit),(int)exit, (int)main, (int)_stats.pages, (int)sizeof(Page));
359 return (pages) * NJ_PAGE_SIZE + main + exit;
360 }
361
362 #undef bytesFromTop
363 #undef bytesToBottom
364 #undef byteBetween
365
366 Page* Assembler::handoverPages(bool exitPages)
367 {
368 Page*& list = (exitPages) ? _nativeExitPages : _nativePages;
369 NIns*& ins = (exitPages) ? _nExitIns : _nIns;
370 Page* start = list;
371 list = 0;
372 ins = 0;
373 return start;
374 }
375
376 #ifdef _DEBUG
377 bool Assembler::onPage(NIns* where, bool exitPages)
378 {
379 Page* page = (exitPages) ? _nativeExitPages : _nativePages;
380 bool on = false;
381 while(page)
382 {
383 if (samepage(where-1,page))
384 on = true;
385 page = page->next;
386 }
387 return on;
388 }
389
390 void Assembler::pageValidate()
391 {
392 if (error()) return;
393 // _nIns and _nExitIns need to be at least on
394 // one of these pages
395 NanoAssertMsg( onPage(_nIns)&& onPage(_nExitIns,true), "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
396 }
397 #endif
398
399 #ifdef _DEBUG
400
401 void Assembler::resourceConsistencyCheck()
402 {
403 if (error()) return;
404
405 #ifdef NANOJIT_IA32
406 NanoAssert(_allocator.active[FST0] && _fpuStkDepth == -1 ||
407 !_allocator.active[FST0] && _fpuStkDepth == 0);
408 #endif
409
410 AR &ar = _activation;
411 // check AR entries
412 NanoAssert(ar.highwatermark < NJ_MAX_STACK_ENTRY);
413 LIns* ins = 0;
414 RegAlloc* regs = &_allocator;
415 for(uint32_t i = ar.lowwatermark; i < ar.tos; i++)
416 {
417 ins = ar.entry[i];
418 if ( !ins )
419 continue;
420 Reservation *r = getresv(ins);
421 NanoAssert(r != 0);
422 int32_t idx = r - _resvTable;
423 NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a stack location assigned to it");
424 if (r->arIndex) {
425 if (ins->isop(LIR_alloc)) {
426 int j=i+1;
427 for (int n = i + (ins->size()>>2); j < n; j++) {
428 NanoAssert(ar.entry[j]==ins);
429 }
430 NanoAssert(r->arIndex == (uint32_t)j-1);
431 i = j-1;
432 }
433 else if (ins->isQuad()) {
434 NanoAssert(ar.entry[i - stack_direction(1)]==ins);
435 i += 1; // skip high word
436 }
437 else {
438 NanoAssertMsg(r->arIndex == i, "Stack record index mismatch");
439 }
440 }
441 NanoAssertMsg( r->reg==UnknownReg || regs->isConsistent(r->reg,ins), "Register record mismatch");
442 }
443
444 registerConsistencyCheck();
445
446 // check resv table
447 int32_t inuseCount = 0;
448 int32_t notInuseCount = 0;
449 for(uint32_t i=1; i < sizeof(_resvTable)/sizeof(_resvTable[0]); i++) {
450 _resvTable[i].used ? inuseCount++ : notInuseCount++;
451 }
452
453 int32_t freeCount = 0;
454 uint32_t free = _resvFree;
455 while(free) {
456 free = _resvTable[free].arIndex;
457 freeCount++;
458 }
459 NanoAssert( ( freeCount==notInuseCount && inuseCount+notInuseCount==(NJ_MAX_STACK_ENTRY-1) ) );
460 }
461
462 void Assembler::registerConsistencyCheck()
463 {
464 // check registers
465 RegAlloc *regs = &_allocator;
466 uint32_t managed = regs->managed;
467 Register r = FirstReg;
468 while(managed)
469 {
470 if (managed&1)
471 {
472 if (regs->isFree(r))
473 {
474 NanoAssert(regs->getActive(r)==0);
475 }
476 else
477 {
478 LIns* ins = regs->getActive(r);
479 // @todo we should be able to check across RegAlloc's somehow (to include savedGP...)
480 Reservation *v = getresv(ins);
481 NanoAssert(v != 0);
482 int32_t idx = v - _resvTable;
483 NanoAssert(idx >= 0 && idx < NJ_MAX_STACK_ENTRY);
484 NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a register assigned to it");
485 NanoAssertMsg( regs->getActive(v->reg)==ins, "Register record mismatch");
486 }
487 }
488
489 // next register in bitfield
490 r = nextreg(r);
491 managed >>= 1;
492 }
493 }
494 #endif /* _DEBUG */
495
496 void Assembler::findRegFor2(RegisterMask allow, LIns* ia, Reservation* &resva, LIns* ib, Reservation* &resvb)
497 {
498 if (ia == ib)
499 {
500 findRegFor(ia, allow);
501 resva = resvb = getresv(ia);
502 }
503 else
504 {
505 Register rb = UnknownReg;
506 resvb = getresv(ib);
507 if (resvb && (rb = resvb->reg) != UnknownReg) {
508 if (allow & rmask(rb)) {
509 // ib already assigned to an allowable reg, keep that one
510 allow &= ~rmask(rb);
511 } else {
512 // ib assigned to unusable reg, pick a different one below.
513 rb = UnknownReg;
514 }
515 }
516 Register ra = findRegFor(ia, allow);
517 resva = getresv(ia);
518 NanoAssert(error() || (resva != 0 && ra != UnknownReg));
519 if (rb == UnknownReg)
520 {
521 allow &= ~rmask(ra);
522 findRegFor(ib, allow);
523 resvb = getresv(ib);
524 }
525 }
526 }
527
528 Register Assembler::findSpecificRegFor(LIns* i, Register w)
529 {
530 return findRegFor(i, rmask(w));
531 }
532
533 Register Assembler::getBaseReg(LIns *i, int &d, RegisterMask allow)
534 {
535 if (i->isop(LIR_alloc)) {
536 d += findMemFor(i);
537 return FP;
538 } else {
539 return findRegFor(i, allow);
540 }
541 }
542
543 Register Assembler::findRegFor(LIns* i, RegisterMask allow)
544 {
545 if (i->isop(LIR_alloc)) {
546 // never allocate a reg for this w/out stack space too
547 findMemFor(i);
548 }
549
550 Reservation* resv = getresv(i);
551 Register r;
552
553 // if we have an existing reservation and it has a non-unknown
554 // register allocated, and that register is in our allowed mask,
555 // return it.
556 if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) {
557 _allocator.useActive(r);
558 return r;
559 }
560
561 // figure out what registers are preferred for this instruction
562 RegisterMask prefer = hint(i, allow);
563
564 // if we didn't have a reservation, allocate one now
565 if (!resv)
566 resv = reserveAlloc(i);
567
568 r = resv->reg;
569
570 #ifdef AVMPLUS_IA32
571 if (r != UnknownReg &&
572 ((rmask(r)&XmmRegs) && !(allow&XmmRegs) ||
573 (rmask(r)&x87Regs) && !(allow&x87Regs)))
574 {
575 // x87 <-> xmm copy required
576 //_nvprof("fpu-evict",1);
577 evict(r);
578 r = UnknownReg;
579 }
580 #endif
581
582 if (r == UnknownReg)
583 {
584 r = resv->reg = registerAlloc(prefer);
585 _allocator.addActive(r, i);
586 return r;
587 }
588 else
589 {
590 // the already-allocated register isn't in the allowed mask;
591 // we need to grab a new one and then copy over the old
592 // contents to the new.
593 resv->reg = UnknownReg;
594 _allocator.retire(r);
595 Register s = resv->reg = registerAlloc(prefer);
596 _allocator.addActive(s, i);
597 if ((rmask(r) & GpRegs) && (rmask(s) & GpRegs)) {
598 MR(r, s);
599 }
600 else {
601 asm_nongp_copy(r, s);
602 }
603 return s;
604 }
605 }
606
607 int Assembler::findMemFor(LIns *i)
608 {
609 Reservation* resv = getresv(i);
610 if (!resv)
611 resv = reserveAlloc(i);
612 if (!resv->arIndex) {
613 resv->arIndex = arReserve(i);
614 NanoAssert(resv->arIndex <= _activation.highwatermark);
615 }
616 return disp(resv);
617 }
618
619 Register Assembler::prepResultReg(LIns *i, RegisterMask allow)
620 {
621 Reservation* resv = getresv(i);
622 const bool pop = !resv || resv->reg == UnknownReg;
623 Register rr = findRegFor(i, allow);
624 freeRsrcOf(i, pop);
625 return rr;
626 }
627
628 void Assembler::asm_spilli(LInsp i, Reservation *resv, bool pop)
629 {
630 int d = disp(resv);
631 Register rr = resv->reg;
632 bool quad = i->opcode() == LIR_param || i->isQuad();
633 asm_spill(rr, d, pop, quad);
634 if (d)
635 {
636 verbose_only(if (_verbose) {
637 outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
638 })
639 }
640 }
641
642 void Assembler::freeRsrcOf(LIns *i, bool pop)
643 {
644 Reservation* resv = getresv(i);
645 int index = resv->arIndex;
646 Register rr = resv->reg;
647
648 if (rr != UnknownReg)
649 {
650 asm_spilli(i, resv, pop);
651 _allocator.retire(rr); // free any register associated with entry
652 }
653 if (index)
654 arFree(index); // free any stack stack space associated with entry
655 reserveFree(i); // clear fields of entry and add it to free list
656 }
657
658 void Assembler::evict(Register r)
659 {
660 registerAlloc(rmask(r));
661 _allocator.addFree(r);
662 }
663
664 void Assembler::patch(GuardRecord *lr)
665 {
666 Fragment *frag = lr->exit->target;
667 NanoAssert(frag->fragEntry != 0);
668 NIns* was = nPatchBranch((NIns*)lr->jmpToTarget, frag->fragEntry);
669 verbose_only(verbose_outputf("patching jump at %p to target %p (was %p)\n",
670 lr->jmpToTarget, frag->fragEntry, was);)
671 (void)was;
672 }
673
674 void Assembler::patch(SideExit *exit)
675 {
676 GuardRecord *rec = exit->guards;
677 AvmAssert(rec);
678 while (rec) {
679 patch(rec);
680 rec = rec->next;
681 }
682 }
683
684 void Assembler::disconnectLoop(GuardRecord *lr)
685 {
686 NanoAssert(lr->stubEntry);
687 NIns* was = nPatchBranch((NIns*)lr->jmpToStub, (NIns*)lr->stubEntry);
688 verbose_only(verbose_outputf("disconnected loop-jump at %p: exiting to %p (was looping to %p)\n",
689 lr->jmpToStub, lr->stubEntry, was);)
690 NanoAssert(lr->exit->from->loopEntry == was);
691 }
692
693 void Assembler::reconnectLoop(GuardRecord *lr)
694 {
695 NanoAssert(lr->exit->from->loopEntry);
696 NIns* was = nPatchBranch((NIns*)lr->jmpToStub, lr->exit->from->loopEntry);
697 verbose_only(verbose_outputf("reconnected loop-jump at %p: looping to %p (was exiting to %p)\n",
698 lr->jmpToStub, lr->exit->from->loopEntry, was);)
699 NanoAssert(lr->stubEntry == was);
700 }
701
702 NIns* Assembler::asm_exit(LInsp guard)
703 {
704 SideExit *exit = guard->record()->exit;
705 NIns* at = 0;
706 if (!_branchStateMap->get(exit))
707 {
708 at = asm_leave_trace(guard);
709 }
710 else
711 {
712 RegAlloc* captured = _branchStateMap->get(exit);
713 intersectRegisterState(*captured);
714 verbose_only(
715 verbose_outputf(" merging trunk with %s",
716 _frago->labels->format(exit->target));
717 verbose_outputf(" %p:",_nIns);
718 )
719 at = exit->target->fragEntry;
720 NanoAssert(at != 0);
721 _branchStateMap->remove(exit);
722 }
723 return at;
724 }
725
726 NIns* Assembler::asm_leave_trace(LInsp guard)
727 {
728 verbose_only(bool priorVerbose = _verbose; )
729 verbose_only( _verbose = verbose_enabled() && _frago->core()->config.verbose_exits; )
730 verbose_only( int32_t nativeSave = _stats.native );
731 verbose_only(verbose_outputf("--------------------------------------- end exit block %p", guard);)
732
733 RegAlloc capture = _allocator;
734
735 // this point is unreachable. so free all the registers.
736 // if an instruction has a stack entry we will leave it alone,
737 // otherwise we free it entirely. intersectRegisterState will restore.
738 releaseRegisters();
739
740 swapptrs();
741 _inExit = true;
742
743 //verbose_only( verbose_outputf(" LIR_xend swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
744 debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
745
746 nFragExit(guard);
747
748 // restore the callee-saved register and parameters
749 assignSavedRegs();
750 assignParamRegs();
751
752 intersectRegisterState(capture);
753
754 // this can be useful for breaking whenever an exit is taken
755 //INT3();
756 //NOP();
757
758 // we are done producing the exit logic for the guard so demark where our exit block code begins
759 guard->record()->stubEntry = _nIns; // target in exit path for our mainline conditional jump
760
761 // swap back pointers, effectively storing the last location used in the exit path
762 swapptrs();
763 _inExit = false;
764
765 //verbose_only( verbose_outputf(" LIR_xt/xf swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
766 verbose_only( verbose_outputf(" %p:",guard->record()->stubEntry);)
767 verbose_only( verbose_outputf("--------------------------------------- exit block (LIR_xt|LIR_xf)") );
768
769 #ifdef NANOJIT_IA32
770 NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth, _sv_fpuStkDepth);
771 debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; )
772 #endif
773
774 verbose_only( _verbose = priorVerbose; )
775 verbose_only(_stats.exitnative += (_stats.native-nativeSave));
776
777 return (NIns*) guard->record()->stubEntry;
778 }
779
780 void Assembler::beginAssembly(Fragment *frag, RegAllocMap* branchStateMap)
781 {
782 _thisfrag = frag;
783 _activation.lowwatermark = 1;
784 _activation.tos = _activation.lowwatermark;
785 _activation.highwatermark = _activation.tos;
786
787 counter_reset(native);
788 counter_reset(exitnative);
789 counter_reset(steals);
790 counter_reset(spills);
791 counter_reset(remats);
792
793 setError(None);
794
795 // native code gen buffer setup
796 nativePageSetup();
797
798 #ifdef AVMPLUS_PORTING_API
799 _endJit1Addr = _nIns;
800 _endJit2Addr = _nExitIns;
801 #endif
802
803 // make sure we got memory at least one page
804 if (error()) return;
805
806 #ifdef PERFM
807 _stats.pages = 0;
808 _stats.codeStart = _nIns-1;
809 _stats.codeExitStart = _nExitIns-1;
810 //fprintf(stderr,"pageReset %d start %x exit start %x\n", _stats.pages, (int)_stats.codeStart, (int)_stats.codeExitStart);
811 #endif /* PERFM */
812
813 _epilogue = genEpilogue();
814 _branchStateMap = branchStateMap;
815 _labels.clear();
816 _patches.clear();
817
818 verbose_only( verbose_outputf(" %p:",_nIns) );
819 verbose_only( verbose_output(" epilogue:") );
820 }
821
822 void Assembler::assemble(Fragment* frag, NInsList& loopJumps)
823 {
824 if (error()) return;
825 AvmCore *core = _frago->core();
826 _thisfrag = frag;
827
828 // set up backwards pipeline: assembler -> StackFilter -> LirReader
829 LirReader bufreader(frag->lastIns);
830 avmplus::GC *gc = core->gc;
831 StackFilter storefilter1(&bufreader, gc, frag->lirbuf, frag->lirbuf->sp);
832 StackFilter storefilter2(&storefilter1, gc, frag->lirbuf, frag->lirbuf->rp);
833 DeadCodeFilter deadfilter(&storefilter2, frag->lirbuf->_functions);
834 LirFilter* rdr = &deadfilter;
835 verbose_only(
836 VerboseBlockReader vbr(rdr, this, frag->lirbuf->names);
837 if (verbose_enabled())
838 rdr = &vbr;
839 )
840
841 verbose_only(_thisfrag->compileNbr++; )
842 verbose_only(_frago->_stats.compiles++; )
843 verbose_only(_frago->_stats.totalCompiles++; )
844 _inExit = false;
845 gen(rdr, loopJumps);
846 frag->loopEntry = _nIns;
847 //frag->outbound = core->config.tree_opt? _latestGuard : 0;
848 //fprintf(stderr, "assemble frag %X entry %X\n", (int)frag, (int)frag->fragEntry);
849
850 if (!error()) {
851 // patch all branches
852 while(!_patches.isEmpty())
853 {
854 NIns* where = _patches.lastKey();
855 LInsp targ = _patches.removeLast();
856 LabelState *label = _labels.get(targ);
857 NIns* ntarg = label->addr;
858 if (ntarg) {
859 nPatchBranch(where,ntarg);
860 }
861 else {
862 _err = UnknownBranch;
863 break;
864 }
865 }
866 }
867 }
868
869 void Assembler::endAssembly(Fragment* frag, NInsList& loopJumps)
870 {
871 NIns* SOT = 0;
872 if (frag->isRoot()) {
873 SOT = frag->loopEntry;
874 verbose_only( verbose_outputf(" %p:",_nIns); )
875 } else {
876 SOT = frag->root->fragEntry;
877 }
878 AvmAssert(SOT);
879 while(!loopJumps.isEmpty())
880 {
881 NIns* loopJump = (NIns*)loopJumps.removeLast();
882 verbose_only( verbose_outputf("patching %p to %p", loopJump, SOT); )
883 nPatchBranch(loopJump, SOT);
884 }
885
886 NIns* fragEntry = 0;
887
888 if (!error())
889 {
890 fragEntry = genPrologue();
891 verbose_only( verbose_outputf(" %p:",_nIns); )
892 verbose_only( verbose_output(" prologue"); )
893 }
894
895 // something bad happened?
896 if (!error())
897 {
898 // check for resource leaks
899 debug_only(
900 for(uint32_t i=_activation.lowwatermark;i<_activation.highwatermark; i++) {
901 NanoAssertMsgf(_activation.entry[i] == 0, "frame entry %d wasn't freed",-4*i);
902 }
903 )
904
905 frag->fragEntry = fragEntry;
906 NIns* code = _nIns;
907 #ifdef PERFM
908 _nvprof("code", codeBytes()); // requires that all pages are released between begin/endAssembly()otherwise we double count
909 #endif
910 // let the fragment manage the pages if we're using trees and there are branches
911 Page* manage = (_frago->core()->config.tree_opt) ? handoverPages() : 0;
912 frag->setCode(code, manage); // root of tree should manage all pages
913 //fprintf(stderr, "endAssembly frag %X entry %X\n", (int)frag, (int)frag->fragEntry);
914 }
915
916 NanoAssertMsgf(error() || _fpuStkDepth == 0,"_fpuStkDepth %d",_fpuStkDepth);
917
918 internalReset(); // clear the reservation tables and regalloc
919 NanoAssert(!_branchStateMap || _branchStateMap->isEmpty());
920 _branchStateMap = 0;
921
922 #ifdef AVMPLUS_ARM
923 // If we've modified the code, we need to flush so we don't end up trying
924 // to execute junk
925 # if defined(UNDER_CE)
926 FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
927 # elif defined(AVMPLUS_UNIX)
928 for (int i = 0; i < 2; i++) {
929 Page *p = (i == 0) ? _nativePages : _nativeExitPages;
930
931 Page *first = p;
932 while (p) {
933 if (!p->next || p->next != p+1) {
934 __clear_cache((char*)first, (char*)(p+1));
935 first = p->next;
936 }
937 p = p->next;
938 }
939 }
940 # endif
941 #endif
942
943 # ifdef AVMPLUS_PORTING_API
944 NanoJIT_PortAPI_FlushInstructionCache(_nIns, _endJit1Addr);
945 NanoJIT_PortAPI_FlushInstructionCache(_nExitIns, _endJit2Addr);
946 # endif
947 }
948
949 void Assembler::copyRegisters(RegAlloc* copyTo)
950 {
951 *copyTo = _allocator;
952 }
953
954 void Assembler::releaseRegisters()
955 {
956 for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
957 {
958 LIns *i = _allocator.getActive(r);
959 if (i)
960 {
961 // clear reg allocation, preserve stack allocation.
962 Reservation* resv = getresv(i);
963 NanoAssert(resv != 0);
964 _allocator.retire(r);
965 if (r == resv->reg)
966 resv->reg = UnknownReg;
967
968 if (!resv->arIndex && resv->reg == UnknownReg)
969 {
970 reserveFree(i);
971 }
972 }
973 }
974 }
975
976 #ifdef PERFM
977 #define countlir_live() _nvprof("lir-live",1)
978 #define countlir_ret() _nvprof("lir-ret",1)
979 #define countlir_alloc() _nvprof("lir-alloc",1)
980 #define countlir_var() _nvprof("lir-var",1)
981 #define countlir_use() _nvprof("lir-use",1)
982 #define countlir_def() _nvprof("lir-def",1)
983 #define countlir_imm() _nvprof("lir-imm",1)
984 #define countlir_param() _nvprof("lir-param",1)
985 #define countlir_cmov() _nvprof("lir-cmov",1)
986 #define countlir_ld() _nvprof("lir-ld",1)
987 #define countlir_ldq() _nvprof("lir-ldq",1)
988 #define countlir_alu() _nvprof("lir-alu",1)
989 #define countlir_qjoin() _nvprof("lir-qjoin",1)
990 #define countlir_qlo() _nvprof("lir-qlo",1)
991 #define countlir_qhi() _nvprof("lir-qhi",1)
992 #define countlir_fpu() _nvprof("lir-fpu",1)
993 #define countlir_st() _nvprof("lir-st",1)
994 #define countlir_stq() _nvprof("lir-stq",1)
995 #define countlir_jmp() _nvprof("lir-jmp",1)
996 #define countlir_jcc() _nvprof("lir-jcc",1)
997 #define countlir_label() _nvprof("lir-label",1)
998 #define countlir_xcc() _nvprof("lir-xcc",1)
999 #define countlir_x() _nvprof("lir-x",1)
1000 #define countlir_loop() _nvprof("lir-loop",1)
1001 #define countlir_call() _nvprof("lir-call",1)
1002 #else
1003 #define countlir_live()
1004 #define countlir_ret()
1005 #define countlir_alloc()
1006 #define countlir_var()
1007 #define countlir_use()
1008 #define countlir_def()
1009 #define countlir_imm()
1010 #define countlir_param()
1011 #define countlir_cmov()
1012 #define countlir_ld()
1013 #define countlir_ldq()
1014 #define countlir_alu()
1015 #define countlir_qjoin()
1016 #define countlir_qlo()
1017 #define countlir_qhi()
1018 #define countlir_fpu()
1019 #define countlir_st()
1020 #define countlir_stq()
1021 #define countlir_jmp()
1022 #define countlir_jcc()
1023 #define countlir_label()
1024 #define countlir_xcc()
1025 #define countlir_x()
1026 #define countlir_loop()
1027 #define countlir_call()
1028 #endif
1029
1030 void Assembler::gen(LirFilter* reader, NInsList& loopJumps)
1031 {
1032 // trace must start with LIR_x or LIR_loop
1033 NanoAssert(reader->pos()->isop(LIR_x) || reader->pos()->isop(LIR_loop));
1034
1035 for (LInsp ins = reader->read(); ins != 0 && !error(); ins = reader->read())
1036 {
1037 LOpcode op = ins->opcode();
1038 switch(op)
1039 {
1040 default:
1041 NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)", op, op&~LIR64);
1042 break;
1043
1044 case LIR_live: {
1045 countlir_live();
1046 pending_lives.add(ins->oprnd1());
1047 break;
1048 }
1049
1050 case LIR_ret: {
1051 countlir_ret();
1052 if (_nIns != _epilogue) {
1053 JMP(_epilogue);
1054 }
1055 assignSavedRegs();
1056 #ifdef NANOJIT_ARM
1057 // the epilogue moves R2 to R0; we may want to do this
1058 // after assignSavedRegs
1059 findSpecificRegFor(ins->oprnd1(), R2);
1060 #else
1061 findSpecificRegFor(ins->oprnd1(), retRegs[0]);
1062 #endif
1063 break;
1064 }
1065
1066 case LIR_fret: {
1067 countlir_ret();
1068 if (_nIns != _epilogue) {
1069 JMP(_epilogue);
1070 }
1071 assignSavedRegs();
1072 #ifdef NANOJIT_IA32
1073 findSpecificRegFor(ins->oprnd1(), FST0);
1074 #else
1075 NanoAssert(false);
1076 #endif
1077 fpu_pop();
1078 break;
1079 }
1080
1081 // allocate some stack space. the value of this instruction
1082 // is the address of the stack space.
1083 case LIR_alloc: {
1084 countlir_alloc();
1085 Reservation *resv = getresv(ins);
1086 NanoAssert(resv->arIndex != 0);
1087 Register r = resv->reg;
1088 if (r != UnknownReg) {
1089 _allocator.retire(r);
1090 resv->reg = UnknownReg;
1091 asm_restore(ins, resv, r);
1092 }
1093 freeRsrcOf(ins, 0);
1094 break;
1095 }
1096 case LIR_short:
1097 {
1098 countlir_imm();
1099 asm_short(ins);
1100 break;
1101 }
1102 case LIR_int:
1103 {
1104 countlir_imm();
1105 asm_int(ins);
1106 break;
1107 }
1108 case LIR_quad:
1109 {
1110 countlir_imm();
1111 asm_quad(ins);
1112 break;
1113 }
1114 #if !defined NANOJIT_64BIT
1115 case LIR_callh:
1116 {
1117 // return result of quad-call in register
1118 prepResultReg(ins, rmask(retRegs[1]));
1119 // if hi half was used, we must use the call to ensure it happens
1120 findRegFor(ins->oprnd1(), rmask(retRegs[0]));
1121 break;
1122 }
1123 #endif
1124 case LIR_param:
1125 {
1126 countlir_param();
1127 asm_param(ins);
1128 break;
1129 }
1130 case LIR_qlo:
1131 {
1132 countlir_qlo();
1133 asm_qlo(ins);
1134 break;
1135 }
1136 case LIR_qhi:
1137 {
1138 countlir_qhi();
1139 asm_qhi(ins);
1140 break;
1141 }
1142 case LIR_qcmov:
1143 case LIR_cmov:
1144 {
1145 countlir_cmov();
1146 asm_cmov(ins);
1147 break;
1148 }
1149 case LIR_ld:
1150 case LIR_ldc:
1151 case LIR_ldcb:
1152 case LIR_ldcs:
1153 {
1154 countlir_ld();
1155 asm_ld(ins);
1156 break;
1157 }
1158 case LIR_ldq:
1159 case LIR_ldqc:
1160 {
1161 countlir_ldq();
1162 asm_load64(ins);
1163 break;
1164 }
1165 case LIR_neg:
1166 case LIR_not:
1167 {
1168 countlir_alu();
1169 asm_neg_not(ins);
1170 break;
1171 }
1172 case LIR_qjoin:
1173 {
1174 countlir_qjoin();
1175 asm_qjoin(ins);
1176 break;
1177 }
1178
1179 #if defined NANOJIT_64BIT
1180 case LIR_qiadd:
1181 case LIR_qiand:
1182 case LIR_qilsh:
1183 case LIR_qior:
1184 {
1185 asm_qbinop(ins);
1186 break;
1187 }
1188 #endif
1189
1190 case LIR_add:
1191 case LIR_addp:
1192 case LIR_sub:
1193 case LIR_mul:
1194 case LIR_and:
1195 case LIR_or:
1196 case LIR_xor:
1197 case LIR_lsh:
1198 case LIR_rsh:
1199 case LIR_ush:
1200 {
1201 countlir_alu();
1202 asm_arith(ins);
1203 break;
1204 }
1205 #ifndef NJ_SOFTFLOAT
1206 case LIR_fneg:
1207 {
1208 countlir_fpu();
1209 asm_fneg(ins);
1210 break;
1211 }
1212 case LIR_fadd:
1213 case LIR_fsub:
1214 case LIR_fmul:
1215 case LIR_fdiv:
1216 {
1217 countlir_fpu();
1218 asm_fop(ins);
1219 break;
1220 }
1221 case LIR_i2f:
1222 {
1223 countlir_fpu();
1224 asm_i2f(ins);
1225 break;
1226 }
1227 case LIR_u2f:
1228 {
1229 countlir_fpu();
1230 asm_u2f(ins);
1231 break;
1232 }
1233 #endif // NJ_SOFTFLOAT
1234 case LIR_st:
1235 case LIR_sti:
1236 {
1237 countlir_st();
1238 asm_store32(ins->oprnd1(), ins->immdisp(), ins->oprnd2());
1239 break;
1240 }
1241 case LIR_stq:
1242 case LIR_stqi:
1243 {
1244 countlir_stq();
1245 LIns* value = ins->oprnd1();
1246 LIns* base = ins->oprnd2();
1247 int dr = ins->immdisp();
1248 if (value->isop(LIR_qjoin))
1249 {
1250 // this is correct for little-endian only
1251 asm_store32(value->oprnd1(), dr, base);
1252 asm_store32(value->oprnd2(), dr+4, base);
1253 }
1254 else
1255 {
1256 asm_store64(value, dr, base);
1257 }
1258 break;
1259 }
1260
1261 case LIR_j:
1262 {
1263 countlir_jmp();
1264 LInsp to = ins->getTarget();
1265 LabelState *label = _labels.get(to);
1266 // the jump is always taken so whatever register state we
1267 // have from downstream code, is irrelevant to code before
1268 // this jump. so clear it out. we will pick up register
1269 // state from the jump target, if we have seen that label.
1270 releaseRegisters();
1271 if (label && label->addr) {
1272 // forward jump - pick up register state from target.
1273 unionRegisterState(label->regs);
1274 JMP(label->addr);
1275 }
1276 else {
1277 // backwards jump
1278 hasLoop = true;
1279 handleLoopCarriedExprs();
1280 if (!label) {
1281 // save empty register state at loop header
1282 _labels.add(to, 0, _allocator);
1283 }
1284 else {
1285 intersectRegisterState(label->regs);
1286 }
1287 JMP(0);
1288 _patches.put(_nIns, to);
1289 verbose_only(
1290 verbose_outputf(" Loop %s -> %s",
1291 lirNames[ins->opcode()],
1292 _thisfrag->lirbuf->names->formatRef(to));
1293 )
1294 }
1295 break;
1296 }
1297
1298 case LIR_jt:
1299 case LIR_jf:
1300 {
1301 countlir_jcc();
1302 LInsp to = ins->getTarget();
1303 LIns* cond = ins->oprnd1();
1304 LabelState *label = _labels.get(to);
1305 if (label && label->addr) {
1306 // forward jump to known label. need to merge with label's register state.
1307 unionRegisterState(label->regs);
1308 asm_branch(op == LIR_jf, cond, label->addr, false);
1309 }
1310 else {
1311 // back edge.
1312 hasLoop = true;
1313 handleLoopCarriedExprs();
1314 if (!label) {
1315 // evict all registers, most conservative approach.
1316 evictRegs(~_allocator.free);
1317 _labels.add(to, 0, _allocator);
1318 }
1319 else {
1320 // evict all registers, most conservative approach.
1321 intersectRegisterState(label->regs);
1322 }
1323 NIns *branch = asm_branch(op == LIR_jf, cond, 0, false);
1324 _patches.put(branch,to);
1325 verbose_only(
1326 verbose_outputf("Loop %s -> %s",
1327 lirNames[ins->opcode()],
1328 _thisfrag->lirbuf->names->formatRef(to));
1329 )
1330 }
1331 break;
1332 }
1333 case LIR_label:
1334 {
1335 countlir_label();
1336 LabelState *label = _labels.get(ins);
1337 if (!label) {
1338 // label seen first, normal target of forward jump, save addr & allocator
1339 _labels.add(ins, _nIns, _allocator);
1340 }
1341 else {
1342 // we're at the top of a loop
1343 hasLoop = true;
1344 NanoAssert(label->addr == 0 && label->regs.isValid());
1345 //evictRegs(~_allocator.free);
1346 intersectRegisterState(label->regs);
1347 //asm_align_code();
1348 label->addr = _nIns;
1349 verbose_only(
1350 verbose_outputf("Loop %s", _thisfrag->lirbuf->names->formatRef(ins));
1351 )
1352 }
1353 break;
1354 }
1355
1356 case LIR_xt:
1357 case LIR_xf:
1358 {
1359 countlir_xcc();
1360 // we only support cmp with guard right now, also assume it is 'close' and only emit the branch
1361 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1362 LIns* cond = ins->oprnd1();
1363 asm_branch(op == LIR_xf, cond, exit, false);
1364 break;
1365 }
1366 case LIR_x:
1367 {
1368 countlir_x();
1369 verbose_only(verbose_output(""));
1370 // generate the side exit branch on the main trace.
1371 NIns *exit = asm_exit(ins);
1372 JMP( exit );
1373 break;
1374 }
1375 case LIR_loop:
1376 {
1377 countlir_loop();
1378 asm_loop(ins, loopJumps);
1379 assignSavedRegs();
1380 assignParamRegs();
1381 break;
1382 }
1383
1384 #ifndef NJ_SOFTFLOAT
1385 case LIR_feq:
1386 case LIR_fle:
1387 case LIR_flt:
1388 case LIR_fgt:
1389 case LIR_fge:
1390 {
1391 countlir_fpu();
1392 asm_fcond(ins);
1393 break;
1394 }
1395 #endif
1396 case LIR_eq:
1397 case LIR_ov:
1398 case LIR_cs:
1399 case LIR_le:
1400 case LIR_lt:
1401 case LIR_gt:
1402 case LIR_ge:
1403 case LIR_ult:
1404 case LIR_ule:
1405 case LIR_ugt:
1406 case LIR_uge:
1407 {
1408 countlir_alu();
1409 asm_cond(ins);
1410 break;
1411 }
1412
1413 #ifndef NJ_SOFTFLOAT
1414 case LIR_fcall:
1415 case LIR_fcalli:
1416 #endif
1417 #if defined NANOJIT_64BIT
1418 case LIR_callh:
1419 #endif
1420 case LIR_call:
1421 case LIR_calli:
1422 {
1423 countlir_call();
1424 Register rr = UnknownReg;
1425 #ifndef NJ_SOFTFLOAT
1426 if ((op&LIR64))
1427 {
1428 // fcall or fcalli
1429 Reservation* rR = getresv(ins);
1430 rr = asm_prep_fcall(rR, ins);
1431 }
1432 else
1433 #endif
1434 {
1435 rr = retRegs[0];
1436 prepResultReg(ins, rmask(rr));
1437 }
1438
1439 // do this after we've handled the call result, so we dont
1440 // force the call result to be spilled unnecessarily.
1441
1442 evictScratchRegs();
1443
1444 asm_call(ins);
1445 }
1446 }
1447
1448 if (error())
1449 return;
1450
1451 // check that all is well (don't check in exit paths since its more complicated)
1452 debug_only( pageValidate(); )
1453 debug_only( resourceConsistencyCheck(); )
1454 }
1455 }
1456
1457 void Assembler::assignSavedRegs()
1458 {
1459 // restore saved regs
1460 releaseRegisters();
1461 LirBuffer *b = _thisfrag->lirbuf;
1462 for (int i=0, n = NumSavedRegs; i < n; i++) {
1463 LIns *p = b->savedRegs[i];
1464 if (p)
1465 findSpecificRegFor(p, savedRegs[p->imm8()]);
1466 }
1467 }
1468
1469 void Assembler::reserveSavedRegs()
1470 {
1471 LirBuffer *b = _thisfrag->lirbuf;
1472 for (int i=0, n = NumSavedRegs; i < n; i++) {
1473 LIns *p = b->savedRegs[i];
1474 if (p)
1475 findMemFor(p);
1476 }
1477 }
1478
1479 // restore parameter registers
1480 void Assembler::assignParamRegs()
1481 {
1482 LInsp state = _thisfrag->lirbuf->state;
1483 if (state)
1484 findSpecificRegFor(state, argRegs[state->imm8()]);
1485 LInsp param1 = _thisfrag->lirbuf->param1;
1486 if (param1)
1487 findSpecificRegFor(param1, argRegs[param1->imm8()]);
1488 }
1489
1490 void Assembler::handleLoopCarriedExprs()
1491 {
1492 // ensure that exprs spanning the loop are marked live at the end of the loop
1493 reserveSavedRegs();
1494 for (int i=0, n=pending_lives.size(); i < n; i++) {
1495 findMemFor(pending_lives[i]);
1496 }
1497 }
1498
1499 void Assembler::arFree(uint32_t idx)
1500 {
1501 AR &ar = _activation;
1502 LIns *i = ar.entry[idx];
1503 NanoAssert(i != 0);
1504 do {
1505 ar.entry[idx] = 0;
1506 idx--;
1507 } while (ar.entry[idx] == i);
1508 }
1509
1510 #ifdef NJ_VERBOSE
1511 void Assembler::printActivationState()
1512 {
1513 bool verbose_activation = false;
1514 if (!verbose_activation)
1515 return;
1516
1517 #ifdef NANOJIT_ARM
1518 // @todo Why is there here?!? This routine should be indep. of platform
1519 verbose_only(
1520 if (_verbose) {
1521 char* s = &outline[0];
1522 memset(s, ' ', 51); s[51] = '\0';
1523 s += strlen(s);
1524 sprintf(s, " SP ");
1525 s += strlen(s);
1526 for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) {
1527 LInsp ins = _activation.entry[i];
1528 if (ins && ins !=_activation.entry[i+1]) {
1529 sprintf(s, "%d(%s) ", 4*i, _thisfrag->lirbuf->names->formatRef(ins));
1530 s += strlen(s);
1531 }
1532 }
1533 output(&outline[0]);
1534 }
1535 )
1536 #else
1537 verbose_only(
1538 char* s = &outline[0];
1539 if (_verbose) {
1540 memset(s, ' ', 51); s[51] = '\0';
1541 s += strlen(s);
1542 sprintf(s, " ebp ");
1543 s += strlen(s);
1544
1545 for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) {
1546 LInsp ins = _activation.entry[i];
1547 if (ins /* && _activation.entry[i]!=_activation.entry[i+1]*/) {
1548 sprintf(s, "%d(%s) ", -4*i,_thisfrag->lirbuf->names->formatRef(ins));
1549 s += strlen(s);
1550 }
1551 }
1552 output(&outline[0]);
1553 }
1554 )
1555 #endif
1556 }
1557 #endif
1558
1559 bool canfit(int32_t size, int32_t loc, AR &ar) {
1560 for (int i=0; i < size; i++) {
1561 if (ar.entry[loc+stack_direction(i)])
1562 return false;
1563 }
1564 return true;
1565 }
1566
1567 uint32_t Assembler::arReserve(LIns* l)
1568 {
1569 NanoAssert(!l->isTramp());
1570
1571 //verbose_only(printActivationState());
1572 int32_t size = l->isop(LIR_alloc) ? (l->size()>>2) : l->isQuad() ? 2 : sizeof(intptr_t)>>2;
1573 AR &ar = _activation;
1574 const int32_t tos = ar.tos;
1575 int32_t start = ar.lowwatermark;
1576 int32_t i = 0;
1577 NanoAssert(start>0);
1578
1579 if (size == 1) {
1580 // easy most common case -- find a hole, or make the frame bigger
1581 for (i=start; i < NJ_MAX_STACK_ENTRY; i++) {
1582 if (ar.entry[i] == 0) {
1583 // found a hole
1584 ar.entry[i] = l;
1585 break;
1586 }
1587 }
1588 }
1589 else if (size == 2) {
1590 if ( (start&1)==1 ) start++; // even 8 boundary
1591 for (i=start; i < NJ_MAX_STACK_ENTRY; i+=2) {
1592 if ( (ar.entry[i+stack_direction(1)] == 0) && (i==tos || (ar.entry[i] == 0)) ) {
1593 // found 2 adjacent aligned slots
1594 NanoAssert(_activation.entry[i] == 0);
1595 NanoAssert(_activation.entry[i+stack_direction(1)] == 0);
1596 ar.entry[i] = l;
1597 ar.entry[i+stack_direction(1)] = l;
1598 break;
1599 }
1600 }
1601 }
1602 else {
1603 // alloc larger block on 8byte boundary.
1604 if (start < size) start = size;
1605 if ((start&1)==1) start++;
1606 for (i=start; i < NJ_MAX_STACK_ENTRY; i+=2) {
1607 if (canfit(size, i, ar)) {
1608 // place the entry in the table and mark the instruction with it
1609 for (int32_t j=0; j < size; j++) {
1610 NanoAssert(_activation.entry[i+stack_direction(j)] == 0);
1611 _activation.entry[i+stack_direction(j)] = l;
1612 }
1613 break;
1614 }
1615 }
1616 }
1617 if (i >= (int32_t)ar.tos) {
1618 ar.tos = ar.highwatermark = i+1;
1619 }
1620 if (tos+size >= NJ_MAX_STACK_ENTRY) {
1621 setError(StackFull);
1622 }
1623 return i;
1624 }
1625
1626 /**
1627 * move regs around so the SavedRegs contains the highest priority regs.
1628 */
1629 void Assembler::evictScratchRegs()
1630 {
1631 // find the top GpRegs that are candidates to put in SavedRegs
1632
1633 // tosave is a binary heap stored in an array. the root is tosave[0],
1634 // left child is at i+1, right child is at i+2.
1635
1636 Register tosave[LastReg-FirstReg+1];
1637 int len=0;
1638 RegAlloc *regs = &_allocator;
1639 for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
1640 if (rmask(r) & GpRegs) {
1641 LIns *i = regs->getActive(r);
1642 if (i) {
1643 if (canRemat(i)) {
1644 evict(r);
1645 }
1646 else {
1647 int32_t pri = regs->getPriority(r);
1648 // add to heap by adding to end and bubbling up
1649 int j = len++;
1650 while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
1651 tosave[j] = tosave[j/2];
1652 j /= 2;
1653 }
1654 NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
1655 tosave[j] = r;
1656 }
1657 }
1658 }
1659 }
1660
1661 // now primap has the live exprs in priority order.
1662 // allocate each of the top priority exprs to a SavedReg
1663
1664 RegisterMask allow = SavedRegs;
1665 while (allow && len > 0) {
1666 // get the highest priority var
1667 Register hi = tosave[0];
1668 LIns *i = regs->getActive(hi);
1669 Register r = findRegFor(i, allow);
1670 allow &= ~rmask(r);
1671
1672 // remove from heap by replacing root with end element and bubbling down.
1673 if (allow && --len > 0) {
1674 Register last = tosave[len];
1675 int j = 0;
1676 while (j+1 < len) {
1677 int child = j+1;
1678 if (j+2 < len && regs->getPriority(tosave[j+2]) > regs->getPriority(tosave[j+1]))
1679 child++;
1680 if (regs->getPriority(last) > regs->getPriority(tosave[child]))
1681 break;
1682 tosave[j] = tosave[child];
1683 j = child;
1684 }
1685 tosave[j] = last;
1686 }
1687 }
1688
1689 // now evict everything else.
1690 evictRegs(~SavedRegs);
1691 }
1692
1693 void Assembler::evictRegs(RegisterMask regs)
1694 {
1695 // generate code to restore callee saved registers
1696 // @todo speed this up
1697 for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
1698 if ((rmask(r) & regs) && _allocator.getActive(r)) {
1699 evict(r);
1700 }
1701 }
1702 }
1703
1704 /**
1705 * Merge the current state of the registers with a previously stored version
1706 * current == saved skip
1707 * current & saved evict current, keep saved
1708 * current & !saved evict current (unionRegisterState would keep)
1709 * !current & saved keep saved
1710 */
1711 void Assembler::intersectRegisterState(RegAlloc& saved)
1712 {
1713 // evictions and pops first
1714 RegisterMask skip = 0;
1715 for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
1716 {
1717 LIns * curins = _allocator.getActive(r);
1718 LIns * savedins = saved.getActive(r);
1719 if (curins == savedins)
1720 {
1721 verbose_only( if (curins) verbose_outputf(" skip %s", regNames[r]); )
1722 skip |= rmask(r);
1723 }
1724 else
1725 {
1726 if (curins) {
1727 //_nvprof("intersect-evict",1);
1728 evict(r);
1729 }
1730
1731 #ifdef NANOJIT_IA32
1732 if (savedins && (rmask(r) & x87Regs))
1733 FSTP(r);
1734 #endif
1735 }
1736 }
1737 assignSaved(saved, skip);
1738 }
1739
1740 /**
1741 * Merge the current state of the registers with a previously stored version.
1742 *
1743 * current == saved skip
1744 * current & saved evict current, keep saved
1745 * current & !saved keep current (intersectRegisterState would evict)
1746 * !current & saved keep saved
1747 */
1748 void Assembler::unionRegisterState(RegAlloc& saved)
1749 {
1750 // evictions and pops first
1751 RegisterMask skip = 0;
1752 for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
1753 {
1754 LIns * curins = _allocator.getActive(r);
1755 LIns * savedins = saved.getActive(r);
1756 if (curins == savedins)
1757 {
1758 verbose_only( if (curins) verbose_outputf(" skip %s", regNames[r]); )
1759 skip |= rmask(r);
1760 }
1761 else
1762 {
1763 if (curins && savedins) {
1764 //_nvprof("union-evict",1);
1765 evict(r);
1766 }
1767
1768 #ifdef NANOJIT_IA32
1769 if (rmask(r) & x87Regs) {
1770 if (savedins) {
1771 FSTP(r);
1772 }
1773 else {
1774 // saved state did not have fpu reg allocated,
1775 // so we must evict here to keep x87 stack balanced.
1776 evict(r);
1777 }
1778 }
1779 #endif
1780 }
1781 }
1782 assignSaved(saved, skip);
1783 }
1784
1785 void Assembler::assignSaved(RegAlloc &saved, RegisterMask skip)
1786 {
1787 // now reassign mainline registers
1788 for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
1789 {
1790 LIns *i = saved.getActive(r);
1791 if (i && !(skip&rmask(r)))
1792 findSpecificRegFor(i, r);
1793 }
1794 debug_only(saved.used = 0); // marker that we are no longer in exit path
1795 }
1796
1797 void Assembler::setCallTable(const CallInfo* functions)
1798 {
1799 _functions = functions;
1800 }
1801
1802 #ifdef NJ_VERBOSE
1803 char Assembler::outline[8192];
1804
1805 void Assembler::outputf(const char* format, ...)
1806 {
1807 va_list args;
1808 va_start(args, format);
1809 outline[0] = '\0';
1810 vsprintf(outline, format, args);
1811 output(outline);
1812 }
1813
1814 void Assembler::output(const char* s)
1815 {
1816 if (_outputCache)
1817 {
1818 char* str = (char*)_gc->Alloc(strlen(s)+1);
1819 strcpy(str, s);
1820 _outputCache->add(str);
1821 }
1822 else
1823 {
1824 _frago->core()->console << s << "\n";
1825 }
1826 }
1827
1828 void Assembler::output_asm(const char* s)
1829 {
1830 if (!verbose_enabled())
1831 return;
1832 if (*s != '^')
1833 output(s);
1834 }
1835
1836 char* Assembler::outputAlign(char *s, int col)
1837 {
1838 int len = strlen(s);
1839 int add = ((col-len)>0) ? col-len : 1;
1840 memset(&s[len], ' ', add);
1841 s[col] = '\0';
1842 return &s[col];
1843 }
1844 #endif // verbose
1845
1846 #endif /* FEATURE_NANOJIT */
1847
1848 #if defined(FEATURE_NANOJIT) || defined(NJ_VERBOSE)
1849 uint32_t CallInfo::_count_args(uint32_t mask) const
1850 {
1851 uint32_t argc = 0;
1852 uint32_t argt = _argtypes;
1853 for (uint32_t i = 0; i < MAXARGS; ++i) {
1854 argt >>= 2;
1855 argc += (argt & mask) != 0;
1856 }
1857 return argc;
1858 }
1859
1860 uint32_t CallInfo::get_sizes(ArgSize* sizes) const
1861 {
1862 uint32_t argt = _argtypes;
1863 uint32_t argc = 0;
1864 for (uint32_t i = 0; i < MAXARGS; i++) {
1865 argt >>= 2;
1866 ArgSize a = ArgSize(argt&3);
1867 #ifdef NJ_SOFTFLOAT
1868 if (a == ARGSIZE_F) {
1869 sizes[argc++] = ARGSIZE_LO;
1870 sizes[argc++] = ARGSIZE_LO;
1871 continue;
1872 }
1873 #endif
1874 if (a != ARGSIZE_NONE) {
1875 sizes[argc++] = a;
1876 }
1877 }
1878 if (isIndirect()) {
1879 // add one more arg for indirect call address
1880 argc++;
1881 }
1882 return argc;
1883 }
1884
1885 void LabelStateMap::add(LIns *label, NIns *addr, RegAlloc &regs) {
1886 LabelState *st = new (gc) LabelState(addr, regs);
1887 labels.put(label, st);
1888 }
1889
1890 LabelStateMap::~LabelStateMap() {
1891 clear();
1892 }
1893
1894 void LabelStateMap::clear() {
1895 LabelState *st;
1896
1897 while (!labels.isEmpty()) {
1898 st = labels.removeLast();
1899 delete st;
1900 }
1901 }
1902
1903 LabelState* LabelStateMap::get(LIns *label) {
1904 return labels.get(label);
1905 }
1906 }
1907 #endif // FEATURE_NANOJIT

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.24