mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
2 lines
36 KiB
Plaintext
2 lines
36 KiB
Plaintext
<div class="pd w0 h0"><div id="pfc" class="pf" data-page-no="c"><div class="pc pcc"><img class="bi x2f y244 w4 h23" alt="" src=""/><div class="t m0 x5f h5 y208 ff2 fs3 fc0 sc0 ls0 ws0">Loops<span class="_ _22"> </span>T<span class="_ _2"></span>rees<span class="_ _2b"> </span>T<span class="_ _2"></span>races<span class="_ _2b"> </span>Aborts<span class="_ _22"> </span>Flushes<span class="_ _22"> </span>T<span class="_ _2"></span>re<span class="_ _2"></span>es/Loop<span class="_ _22"> </span>T<span class="_ _2"></span>races/T<span class="_ _b"></span>ree<span class="_ _22"> </span>T<span class="_ _2"></span>races/Loop<span class="_ _2b"> </span>Speedup</div><div class="t m0 xb9 h5 y209 ff2 fs3 fc0 sc0 ls0 ws0">3d-cube<span class="_ _2c"> </span>25<span class="_ _2d"> </span>27<span class="_ _28"> </span>29<span class="_ _2e"> </span>3<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.1<span class="_ _1e"> </span>1.1<span class="_ _31"> </span>1.2<span class="_ _2d"> </span>2.20x</div><div class="t m0 xb9 h5 y20a ff2 fs3 fc0 sc0 ls0 ws0">3d-morph<span class="_ _32"> </span>5<span class="_ _33"> </span>8<span class="_ _34"> </span>8<span class="_ _2e"> </span>2<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.6<span class="_ _1e"> </span>1.0<span class="_ _31"> </span>1.6<span class="_ _2d"> </span>2.86x</div><div class="t m0 xb9 h5 y20b ff2 fs3 fc0 sc0 ls0 ws0">3d-raytrace<span class="_ _35"> </span>10<span class="_ _2d"> </span>25<span class="_ _25"> </span>100<span class="_ _33"> </span>10<span class="_ _2f"> </span>1<span class="_ _30"> </span>2.5<span class="_ _1e"> </span>4.0<span class="_ _36"> </span>10.0<span class="_ _2d"> </span>1.18x</div><div class="t m0 xb9 h5 y20c ff2 fs3 fc0 sc0 ls0 ws0">access-binary-trees<span class="_ _37"> </span>0<span class="_ _33"> </span>0<span class="_ _34"> </span>0<span class="_ _2e"> </span>5<span class="_ _2f"> </span>0<span class="_ _38"> </span>-<span class="_ _39"> </span>-<span class="_ _3a"> </span>-<span class="_ _2d"> </span>0.93x</div><div class="t m0 xb9 h5 y20d ff2 fs3 fc0 sc0 ls0 ws0">access-fannkuch<span class="_ _3b"> </span>10<span class="_ _2d"> </span>34<span class="_ _28"> </span>57<span class="_ _3c"> </span>24<span class="_ _2f"> </span>0<span class="_ _30"> </span>3.4<span class="_ _1e"> </span>1.7<span class="_ _31"> </span>5.7<span class="_ _2d"> </span>2.20x</div><div class="t m0 xb9 h5 y20e ff2 fs3 fc0 sc0 ls0 ws0">access-nbody<span class="_ _3d"> </span>8<span class="_ _2d"> </span>16<span class="_ _28"> </span>18<span class="_ _2e"> </span>5<span class="_ _2f"> </span>0<span class="_ _30"> </span>2.0<span class="_ _1e"> </span>1.1<span class="_ _31"> </span>2.3<span class="_ _2d"> </span>4.19x</div><div class="t m0 xb9 h5 y20f ff2 fs3 fc0 sc0 ls0 ws0">access-nsiev<span class="_ _2"></span>e<span class="_ _3d"> </span>3<span class="_ _28"> </span>6<span class="_ _34"> </span>8<span class="_ _3e"> </span>3<span class="_ _2f"> </span>0<span class="_ _30"> </span>2.0<span class="_ _1e"> </span>1.3<span class="_ _31"> </span>2.7<span class="_ _2d"> </span>3.05x</div><div class="t m0 xb9 h5 y27d ff2 fs3 fc0 sc0 ls0 ws0">bitops-3bit-bits-in-byte<span class="_ _3f"> </span>2<span class="_ _33"> </span>2<span class="_ _34"> </span>2<span class="_ _2e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>1.0<span class="_ _31"> </span>1.0<span class="_ _26"> </span>25.47x</div><div class="t m0 xb9 h5 y27e ff2 fs3 fc0 sc0 ls0 ws0">bitops-bits-in-byte<span class="_ _13"> </span>3<span class="_ _28"> </span>3<span class="_ _34"> </span>4<span class="_ _2e"> </span>1<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>1.3<span class="_ _31"> </span>1.3<span class="_ _2d"> </span>8.67x</div><div class="t m0 xb9 h5 y27f ff2 fs3 fc0 sc0 ls0 ws0">bitops-bitwise-and<span class="_ _40"> </span>1<span class="_ _33"> </span>1<span class="_ _34"> </span>1<span class="_ _2e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>1.0<span class="_ _31"> </span>1.0<span class="_ _26"> </span>25.20x</div><div class="t m0 xb9 h5 y280 ff2 fs3 fc0 sc0 ls0 ws0">bitops-nsiev<span class="_ _2"></span>e-bits<span class="_ _3b"> </span>3<span class="_ _33"> </span>3<span class="_ _34"> </span>5<span class="_ _3e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>1.7<span class="_ _31"> </span>1.7<span class="_ _2d"> </span>2.75x</div><div class="t m0 xb9 h5 y281 ff2 fs3 fc0 sc0 ls0 ws0">controlflow-recursi<span class="_ _2"></span>v<span class="_ _2"></span>e<span class="_ _41"> </span>0<span class="_ _33"> </span>0<span class="_ _34"> </span>0<span class="_ _2e"> </span>1<span class="_ _2f"> </span>0<span class="_ _38"> </span>-<span class="_ _39"> </span>-<span class="_ _3a"> </span>-<span class="_ _2d"> </span>0.98x</div><div class="t m0 xb9 h5 y282 ff2 fs3 fc0 sc0 ls0 ws0">crypto-aes<span class="_ _42"> </span>50<span class="_ _2d"> </span>72<span class="_ _28"> </span>78<span class="_ _3c"> </span>19<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.4<span class="_ _1e"> </span>1.1<span class="_ _31"> </span>1.6<span class="_ _2d"> </span>1.64x</div><div class="t m0 xb9 h5 y283 ff2 fs3 fc0 sc0 ls0 ws0">crypto-md5<span class="_ _42"> </span>4<span class="_ _33"> </span>4<span class="_ _34"> </span>5<span class="_ _2e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>1.3<span class="_ _31"> </span>1.3<span class="_ _2d"> </span>2.30x</div><div class="t m0 xb9 h5 y284 ff2 fs3 fc0 sc0 ls0 ws0">crypto-sha1<span class="_ _19"> </span>5<span class="_ _33"> </span>5<span class="_ _28"> </span>10<span class="_ _2e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>2.0<span class="_ _31"> </span>2.0<span class="_ _2d"> </span>5.95x</div><div class="t m0 xb9 h5 y233 ff2 fs3 fc0 sc0 ls0 ws0">date-format-tofte<span class="_ _12"> </span>3<span class="_ _33"> </span>3<span class="_ _34"> </span>4<span class="_ _2e"> </span>7<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>1.3<span class="_ _31"> </span>1.3<span class="_ _2d"> </span>1.07x</div><div class="t m0 xb9 h5 y285 ff2 fs3 fc0 sc0 ls0 ws0">date-format-xparb<span class="_ _43"> </span>3<span class="_ _33"> </span>3<span class="_ _28"> </span>11<span class="_ _2e"> </span>3<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>3.7<span class="_ _31"> </span>3.7<span class="_ _2d"> </span>0.98x</div><div class="t m0 xb9 h5 y286 ff2 fs3 fc0 sc0 ls0 ws0">math-cordic<span class="_ _44"> </span>2<span class="_ _33"> </span>4<span class="_ _34"> </span>5<span class="_ _2e"> </span>1<span class="_ _2f"> </span>0<span class="_ _30"> </span>2.0<span class="_ _1e"> </span>1.3<span class="_ _31"> </span>2.5<span class="_ _2d"> </span>4.92x</div><div class="t m0 xb9 h5 y287 ff2 fs3 fc0 sc0 ls0 ws0">math-partial-sums<span class="_ _43"> </span>2<span class="_ _33"> </span>4<span class="_ _34"> </span>4<span class="_ _2e"> </span>1<span class="_ _2f"> </span>0<span class="_ _30"> </span>2.0<span class="_ _1e"> </span>1.0<span class="_ _31"> </span>2.0<span class="_ _2d"> </span>5.90x</div><div class="t m0 xb9 h5 y288 ff2 fs3 fc0 sc0 ls0 ws0">math-spectral-norm<span class="_ _14"> </span>15<span class="_ _2d"> </span>20<span class="_ _28"> </span>20<span class="_ _2e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.3<span class="_ _1e"> </span>1.0<span class="_ _31"> </span>1.3<span class="_ _45"> </span>7.12x</div><div class="t m0 xb9 h5 y289 ff2 fs3 fc0 sc0 ls0 ws0">rege<span class="_ _2"></span>xp-dna<span class="_ _46"> </span>2<span class="_ _33"> </span>2<span class="_ _34"> </span>2<span class="_ _2e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>1.0<span class="_ _31"> </span>1.0<span class="_ _2d"> </span>4.21x</div><div class="t m0 xb9 h5 y28a ff2 fs3 fc0 sc0 ls0 ws0">string-base64<span class="_ _3d"> </span>3<span class="_ _33"> </span>5<span class="_ _34"> </span>7<span class="_ _2e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.7<span class="_ _1e"> </span>1.4<span class="_ _31"> </span>2.3<span class="_ _2d"> </span>2.53x</div><div class="t m0 xb9 h5 y1f0 ff2 fs3 fc0 sc0 ls0 ws0">string-fasta<span class="_ _46"> </span>5<span class="_ _25"> </span>11<span class="_ _28"> </span>15<span class="_ _2e"> </span>6<span class="_ _2f"> </span>0<span class="_ _30"> </span>2.2<span class="_ _1e"> </span>1.4<span class="_ _31"> </span>3.0<span class="_ _2d"> </span>1.49x</div><div class="t m0 xb9 h5 y1f1 ff2 fs3 fc0 sc0 ls0 ws0">string-tagcloud<span class="_ _47"> </span>3<span class="_ _33"> </span>6<span class="_ _34"> </span>6<span class="_ _2e"> </span>5<span class="_ _2f"> </span>0<span class="_ _30"> </span>2.0<span class="_ _1e"> </span>1.0<span class="_ _31"> </span>2.0<span class="_ _2d"> </span>1.09x</div><div class="t m0 xb9 h5 y21b ff2 fs3 fc0 sc0 ls0 ws0">string-unpack-code<span class="_ _48"> </span>4<span class="_ _33"> </span>4<span class="_ _28"> </span>37<span class="_ _2e"> </span>0<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.0<span class="_ _1e"> </span>9.3<span class="_ _31"> </span>9.3<span class="_ _2d"> </span>1.20x</div><div class="t m0 xb9 h5 y28b ff2 fs3 fc0 sc0 ls0 ws0">string-validate-input<span class="_ _49"> </span>6<span class="_ _2d"> </span>10<span class="_ _28"> </span>13<span class="_ _2e"> </span>1<span class="_ _2f"> </span>0<span class="_ _30"> </span>1.7<span class="_ _1e"> </span>1.3<span class="_ _31"> </span>2.2<span class="_ _2d"> </span>1.86x</div><div class="t m0 x59 h5 y1f6 ff1 fs3 fc0 sc0 ls0 ws0">Figure<span class="_ _5"> </span>13.<span class="_ _1"> </span><span class="ff2">Detailed<span class="_ _5"> </span>trace<span class="_ _5"> </span>recording<span class="_ _5"> </span>statistics<span class="_ _3"> </span>for<span class="_ _5"> </span>the<span class="_ _5"> </span>SunSpider<span class="_ _5"> </span>benchmark<span class="_ _5"> </span>set.</span></div><div class="t m0 x2f h5 y28c ff2 fs3 fc0 sc0 ls0 ws0">mean).<span class="_ _3"> </span>W<span class="_ _b"></span>e<span class="_ _6"> </span>exclude<span class="_ _3"> </span><span class="ff7">regexp-dna<span class="_ _3"> </span></span>from<span class="_ _3"> </span>the<span class="_ _3"> </span>following<span class="_ _3"> </span>calculations,</div><div class="t m0 x2f h5 y28d ff2 fs3 fc0 sc0 ls0 ws0">because<span class="_ _5"> </span>most<span class="_ _5"> </span>of<span class="_ _5"> </span>its<span class="_ _5"> </span>time<span class="_ _3"> </span>is<span class="_ _5"> </span>spent<span class="_ _5"> </span>in<span class="_ _5"> </span>the<span class="_ _5"> </span>regular<span class="_ _5"> </span>e<span class="_ _2"></span>xpression<span class="_ _5"> </span>matcher<span class="_ _2"></span>,</div><div class="t m0 x2f h5 y28e ff2 fs3 fc0 sc0 ls0 ws0">which<span class="_ _1"> </span>has<span class="_ _d"> </span>much<span class="_ _1"> </span>dif<span class="_ _2"></span>ferent<span class="_ _d"> </span>performance<span class="_ _1"> </span>characteristics<span class="_ _1"> </span>from<span class="_ _d"> </span>the</div><div class="t m0 x2f h5 y28f ff2 fs3 fc0 sc0 ls0 ws0">other<span class="_ _6"> </span>programs.<span class="_ _6"> </span>(Note<span class="_ _6"> </span>that<span class="_ _6"> </span>this<span class="_ _6"> </span>only<span class="_ _6"> </span>makes<span class="_ _3"> </span>a<span class="_ _6"> </span>difference<span class="_ _6"> </span>of<span class="_ _6"> </span>about</div><div class="t m0 x2f h5 y290 ff2 fs3 fc0 sc0 ls0 ws0">10%<span class="_ _3"> </span>in<span class="_ _5"> </span>the<span class="_ _3"> </span>results.)<span class="_ _3"> </span>Di<span class="_ _2"></span>viding<span class="_ _3"> </span>the<span class="_ _5"> </span>total<span class="_ _3"> </span>ex<span class="_ _2"></span>ecution<span class="_ _3"> </span>time<span class="_ _5"> </span>in<span class="_ _3"> </span>processor</div><div class="t m0 x2f h5 y291 ff2 fs3 fc0 sc0 ls0 ws0">clock<span class="_ _d"> </span>cycles<span class="_ _1"> </span>by<span class="_ _d"> </span>the<span class="_ _d"> </span>number<span class="_ _1"> </span>of<span class="_ _d"> </span>bytecodes<span class="_ _d"> </span>executed<span class="_ _d"> </span>in<span class="_ _1"> </span>the<span class="_ _d"> </span>base</div><div class="t m0 x2f h5 y292 ff2 fs3 fc0 sc0 ls0 ws0">interpreter<span class="_ _8"> </span>shows<span class="_ _d"> </span>that<span class="_ _8"> </span>on<span class="_ _d"> </span>a<span class="_ _2"></span>verage,<span class="_ _8"> </span>a<span class="_ _d"> </span>bytecode<span class="_ _8"> </span>executes<span class="_ _8"> </span>in<span class="_ _d"> </span>about</div><div class="t m0 x2f h5 y293 ff2 fs3 fc0 sc0 ls0 ws0">35<span class="_ _6"> </span>cycles.<span class="_ _3"> </span>Native<span class="_ _3"> </span>traces<span class="_ _6"> </span>take<span class="_ _6"> </span>about<span class="_ _3"> </span>9<span class="_ _6"> </span>cycles<span class="_ _6"> </span>per<span class="_ _3"> </span>bytecode,<span class="_ _6"> </span>a<span class="_ _6"> </span>3.9x</div><div class="t m0 x2f h5 y294 ff2 fs3 fc0 sc0 ls0 ws0">speedup<span class="_ _5"> </span>over<span class="_ _5"> </span>the<span class="_ _5"> </span>interpreter<span class="_ _2"></span>.</div><div class="t m0 x34 h5 y295 ff2 fs3 fc0 sc0 ls0 ws0">Using<span class="_ _3"> </span>similar<span class="_ _3"> </span>computations,<span class="_ _3"> </span>we<span class="_ _3"> </span>find<span class="_ _3"> </span>that<span class="_ _6"> </span>trace<span class="_ _3"> </span>recording<span class="_ _3"> </span>takes</div><div class="t m0 x2f h5 y296 ff2 fs3 fc0 sc0 ls0 ws0">about<span class="_ _3"> </span>3800<span class="_ _3"> </span>cycles<span class="_ _3"> </span>per<span class="_ _3"> </span>bytecode,<span class="_ _6"> </span>and<span class="_ _3"> </span>compilation<span class="_ _3"> </span>3150<span class="_ _3"> </span>cycles<span class="_ _3"> </span>per</div><div class="t m0 x2f h5 y1d5 ff2 fs3 fc0 sc0 ls0 ws0">bytecode.<span class="_ _3"> </span>Hence,<span class="_ _3"> </span>during<span class="_ _3"> </span>recording<span class="_ _3"> </span>and<span class="_ _3"> </span>compiling<span class="_ _3"> </span>the<span class="_ _3"> </span>VM<span class="_ _6"> </span>runs<span class="_ _3"> </span>at</div><div class="t m0 x2f h5 y1d6 ff2 fs3 fc0 sc0 ls0 ws0">1/200<span class="_ _3"> </span>the<span class="_ _3"> </span>speed<span class="_ _3"> </span>of<span class="_ _3"> </span>the<span class="_ _3"> </span>interpreter<span class="_ _b"></span>.<span class="_ _3"> </span>Because<span class="_ _3"> </span>it<span class="_ _3"> </span>costs<span class="_ _3"> </span>6950<span class="_ _3"> </span>cycles<span class="_ _3"> </span>to</div><div class="t m0 x2f h5 y1d7 ff2 fs3 fc0 sc0 ls0 ws0">compile<span class="_ _3"> </span>a<span class="_ _3"> </span>bytecode,<span class="_ _3"> </span>and<span class="_ _3"> </span>we<span class="_ _3"> </span>sa<span class="_ _2"></span>ve<span class="_ _3"> </span>26<span class="_ _3"> </span>c<span class="_ _2"></span>ycles<span class="_ _3"> </span>each<span class="_ _3"> </span>time<span class="_ _3"> </span>that<span class="_ _3"> </span>code<span class="_ _3"> </span>is</div><div class="t m0 x2f h5 y1d8 ff2 fs3 fc0 sc0 ls0 ws0">run<span class="_ _5"> </span>nativ<span class="_ _2"></span>ely<span class="_ _2"></span>,<span class="_ _5"> </span>we<span class="_ _5"> </span>break<span class="_ _5"> </span>e<span class="_ _2"></span>ven<span class="_ _5"> </span>after<span class="_ _5"> </span>running<span class="_ _5"> </span>a<span class="_ _5"> </span>trace<span class="_ _3"> </span>270<span class="_ _5"> </span>times.</div><div class="t m0 x34 h5 y1d9 ff2 fs3 fc0 sc0 ls0 ws0">The<span class="_ _3"> </span>other<span class="_ _3"> </span>VMs<span class="_ _6"> </span>we<span class="_ _3"> </span>compared<span class="_ _3"> </span>with<span class="_ _3"> </span>achieve<span class="_ _3"> </span>an<span class="_ _3"> </span>overall<span class="_ _3"> </span>speedup</div><div class="t m0 x2f h5 y172 ff2 fs3 fc0 sc0 ls0 ws0">of<span class="_ _8"> </span>3.0x<span class="_ _8"> </span>relative<span class="_ _6"> </span>to<span class="_ _d"> </span>our<span class="_ _8"> </span>baseline<span class="_ _8"> </span>interpreter<span class="_ _2"></span>.<span class="_ _8"> </span>Our<span class="_ _8"> </span>estimated<span class="_ _8"> </span>native</div><div class="t m0 x2f h5 y1db ff2 fs3 fc0 sc0 ls0 ws0">code<span class="_ _d"> </span>speedup<span class="_ _1"> </span>of<span class="_ _d"> </span>3.9x<span class="_ _1"> </span>is<span class="_ _d"> </span>significantly<span class="_ _1"> </span>better<span class="_ _b"></span>.<span class="_ _1"> </span>This<span class="_ _d"> </span>suggests<span class="_ _1"> </span>that</div><div class="t m0 x2f h5 y203 ff2 fs3 fc0 sc0 ls0 ws0">our<span class="_ _5"> </span>compilation<span class="_ _5"> </span>techniques<span class="_ _5"> </span>can<span class="_ _5"> </span>generate<span class="_ _5"> </span>more<span class="_ _5"> </span>ef<span class="_ _2"></span>ficient<span class="_ _5"> </span>nati<span class="_ _2"></span>ve<span class="_ _5"> </span>code</div><div class="t m0 x2f h5 y204 ff2 fs3 fc0 sc0 ls0 ws0">than<span class="_ _5"> </span>any<span class="_ _5"> </span>other<span class="_ _5"> </span>current<span class="_ _5"> </span>JavaScript<span class="_ _5"> </span>VM.</div><div class="t m0 x34 h5 y1dc ff2 fs3 fc0 sc0 ls0 ws0">These<span class="_ _5"> </span>estimates<span class="_ _7"> </span>also<span class="_ _5"> </span>indicate<span class="_ _7"> </span>that<span class="_ _7"> </span>our<span class="_ _5"> </span>startup<span class="_ _7"> </span>performance<span class="_ _5"> </span>could</div><div class="t m0 x2f h5 y1dd ff2 fs3 fc0 sc0 ls0 ws0">be<span class="_ _3"> </span>substantially<span class="_ _5"> </span>better<span class="_ _5"> </span>if<span class="_ _3"> </span>we<span class="_ _5"> </span>improved<span class="_ _5"> </span>the<span class="_ _3"> </span>speed<span class="_ _5"> </span>of<span class="_ _3"> </span>trace<span class="_ _5"> </span>recording</div><div class="t m0 x2f h5 y1b1 ff2 fs3 fc0 sc0 ls0 ws0">and<span class="_ _3"> </span>compilation.<span class="_ _3"> </span>The<span class="_ _5"> </span>estimated<span class="_ _3"> </span>200x<span class="_ _3"> </span>slo<span class="_ _2"></span>wdo<span class="_ _2"></span>wn<span class="_ _3"> </span>for<span class="_ _5"> </span>recording<span class="_ _3"> </span>and</div><div class="t m0 x2f h5 y1b3 ff2 fs3 fc0 sc0 ls0 ws0">compilation<span class="_ _5"> </span>is<span class="_ _7"> </span>very<span class="_ _5"> </span>rough,<span class="_ _5"> </span>and<span class="_ _7"> </span>may<span class="_ _5"> </span>be<span class="_ _5"> </span>influenced<span class="_ _7"> </span>by<span class="_ _5"> </span>startup<span class="_ _5"> </span>f<span class="_ _2"></span>actors</div><div class="t m0 x2f h5 y1b4 ff2 fs3 fc0 sc0 ls0 ws0">in<span class="_ _3"> </span>the<span class="_ _5"> </span>interpreter<span class="_ _3"> </span>(e.g.,<span class="_ _3"> </span>caches<span class="_ _3"> </span>that<span class="_ _5"> </span>have<span class="_ _5"> </span>not<span class="_ _3"> </span>warmed<span class="_ _3"> </span>up<span class="_ _5"> </span>yet<span class="_ _3"> </span>during</div><div class="t m0 x2f h5 y1b5 ff2 fs3 fc0 sc0 ls0 ws0">recording).<span class="_ _8"> </span>One<span class="_ _8"> </span>observation<span class="_ _6"> </span>supporting<span class="_ _8"> </span>this<span class="_ _8"> </span>conjecture<span class="_ _8"> </span>is<span class="_ _8"> </span>that<span class="_ _8"> </span>in</div><div class="t m0 x2f h5 y1b6 ff2 fs3 fc0 sc0 ls0 ws0">the<span class="_ _5"> </span>tracer<span class="_ _2"></span>,<span class="_ _5"> </span>interpreted<span class="_ _5"> </span>bytecodes<span class="_ _5"> </span>take<span class="_ _5"> </span>about<span class="_ _3"> </span>180<span class="_ _5"> </span>c<span class="_ _2"></span>ycles<span class="_ _5"> </span>to<span class="_ _5"> </span>run.<span class="_ _5"> </span>Still,</div><div class="t m0 x2f h5 y297 ff2 fs3 fc0 sc0 ls0 ws0">recording<span class="_ _3"> </span>and<span class="_ _5"> </span>compilation<span class="_ _3"> </span>are<span class="_ _3"> </span>clearly<span class="_ _5"> </span>both<span class="_ _3"> </span>e<span class="_ _2"></span>xpensiv<span class="_ _2"></span>e,<span class="_ _3"> </span>and<span class="_ _5"> </span>a<span class="_ _3"> </span>better</div><div class="t m0 x2f h5 y298 ff2 fs3 fc0 sc0 ls0 ws0">implementation,<span class="_ _8"> </span>possibly<span class="_ _d"> </span>including<span class="_ _8"> </span>redesign<span class="_ _d"> </span>of<span class="_ _8"> </span>the<span class="_ _d"> </span>LIR<span class="_ _8"> </span>abstract</div><div class="t m0 x2f h5 y299 ff2 fs3 fc0 sc0 ls0 ws0">syntax<span class="_ _5"> </span>or<span class="_ _5"> </span>encoding,<span class="_ _3"> </span>w<span class="_ _2"></span>ould<span class="_ _5"> </span>improv<span class="_ _2"></span>e<span class="_ _5"> </span>startup<span class="_ _5"> </span>performance.</div><div class="t m0 x34 h5 y29a ff2 fs3 fc0 sc0 ls0 ws0">Our<span class="_ _3"> </span>performance<span class="_ _3"> </span>results<span class="_ _3"> </span>confirm<span class="_ _3"> </span>that<span class="_ _3"> </span>type<span class="_ _3"> </span>specialization<span class="_ _3"> </span>using</div><div class="t m0 x2f h5 y29b ff2 fs3 fc0 sc0 ls0 ws0">trace<span class="_ _1"> </span>trees<span class="_ _d"> </span>substantially<span class="_ _1"> </span>improv<span class="_ _2"></span>es<span class="_ _1"> </span>performance.<span class="_ _d"> </span>W<span class="_ _b"></span>e<span class="_ _1"> </span>are<span class="_ _1"> </span>able<span class="_ _d"> </span>to</div><div class="t m0 x2f h5 y190 ff2 fs3 fc0 sc0 ls0 ws0">outperform<span class="_ _3"> </span>the<span class="_ _6"> </span>fastest<span class="_ _3"> </span>av<span class="_ _2"></span>ailable<span class="_ _3"> </span>Jav<span class="_ _2"></span>aScript<span class="_ _3"> </span>compiler<span class="_ _6"> </span>(V8)<span class="_ _3"> </span>and<span class="_ _3"> </span>the</div><div class="t m0 x32 h5 y29c ff2 fs3 fc0 sc0 ls0 ws0">fastest<span class="_ _3"> </span>av<span class="_ _2"></span>ailable<span class="_ _3"> </span>Jav<span class="_ _2"></span>aScript<span class="_ _3"> </span>inline<span class="_ _3"> </span>threaded<span class="_ _3"> </span>interpreter<span class="_ _6"> </span>(SFX)<span class="_ _3"> </span>on<span class="_ _3"> </span>9</div><div class="t m0 x32 h5 y28d ff2 fs3 fc0 sc0 ls0 ws0">of<span class="_ _5"> </span>26<span class="_ _5"> </span>benchmarks.</div><div class="t m0 x32 h9 y169 ff1 fs1 fc0 sc0 ls0 ws0">8.<span class="_ _a"> </span>Related<span class="_ _3"> </span>W<span class="_ _b"></span>ork</div><div class="t m0 x32 h5 y124 ff1 fs3 fc0 sc0 ls0 ws0">T<span class="_ _b"></span>race<span class="_ _6"> </span>optimization<span class="_ _6"> </span>f<span class="_ _2"></span>or<span class="_ _3"> </span>dynamic<span class="_ _6"> </span>languages.<span class="_ _6"> </span><span class="ff2">The<span class="_ _3"> </span>closest<span class="_ _6"> </span>area<span class="_ _6"> </span>of</span></div><div class="t m0 x32 h5 y125 ff2 fs3 fc0 sc0 ls0 ws0">related<span class="_ _6"> </span>work<span class="_ _6"> </span>is<span class="_ _8"> </span>on<span class="_ _6"> </span>applying<span class="_ _8"> </span>trace<span class="_ _6"> </span>optimization<span class="_ _8"> </span>to<span class="_ _6"> </span>type-specialize</div><div class="t m0 x32 h5 y126 ff2 fs3 fc0 sc0 ls0 ws0">dynamic<span class="_ _8"> </span>languages.<span class="_ _8"> </span>Existing<span class="_ _8"> </span>work<span class="_ _8"> </span>shares<span class="_ _8"> </span>the<span class="_ _d"> </span>idea<span class="_ _6"> </span>of<span class="_ _d"> </span>generating</div><div class="t m0 x32 h5 y127 ff2 fs3 fc0 sc0 ls0 ws0">type-specialized<span class="_ _6"> </span>code<span class="_ _8"> </span>speculati<span class="_ _2"></span>vely<span class="_ _6"> </span>with<span class="_ _6"> </span>guards<span class="_ _8"> </span>along<span class="_ _6"> </span>interpreter</div><div class="t m0 x32 h5 y128 ff2 fs3 fc0 sc0 ls0 ws0">traces.</div><div class="t m0 x33 h5 y129 ff2 fs3 fc0 sc0 ls0 ws0">T<span class="_ _b"></span>o<span class="_ _1"> </span>our<span class="_ _d"> </span>knowledge,<span class="_ _d"> </span>Rigo’<span class="_ _2"></span>s<span class="_ _d"> </span>Psyco<span class="_ _d"> </span>(16)<span class="_ _1"> </span>is<span class="_ _d"> </span>the<span class="_ _d"> </span>only<span class="_ _1"> </span>published</div><div class="t m0 x32 h5 y12a ff2 fs3 fc0 sc0 ls0 ws0">type-specializing<span class="_ _3"> </span>trace<span class="_ _5"> </span>compiler<span class="_ _3"> </span>for<span class="_ _5"> </span>a<span class="_ _3"> </span>dynamic<span class="_ _5"> </span>language<span class="_ _3"> </span>(Python).</div><div class="t m0 x32 h5 y12b ff2 fs3 fc0 sc0 ls0 ws0">Psyco<span class="_ _5"> </span>does<span class="_ _7"> </span>not<span class="_ _5"> </span>attempt<span class="_ _7"> </span>to<span class="_ _5"> </span>identify<span class="_ _5"> </span>hot<span class="_ _7"> </span>loops<span class="_ _5"> </span>or<span class="_ _7"> </span>inline<span class="_ _5"> </span>function<span class="_ _7"> </span>calls.</div><div class="t m0 x32 h5 y12c ff2 fs3 fc0 sc0 ls0 ws0">Instead,<span class="_ _5"> </span>Psyco<span class="_ _5"> </span>transforms<span class="_ _7"> </span>loops<span class="_ _5"> </span>to<span class="_ _5"> </span>mutual<span class="_ _7"> </span>recursion<span class="_ _5"> </span>before<span class="_ _5"> </span>running</div><div class="t m0 x32 h5 y12d ff2 fs3 fc0 sc0 ls0 ws0">and<span class="_ _5"> </span>traces<span class="_ _5"> </span>all<span class="_ _3"> </span>operations.</div><div class="t m0 x33 h5 ycc ff2 fs3 fc0 sc0 ls0 ws0">Pall’<span class="_ _b"></span>s<span class="_ _3"> </span>LuaJIT<span class="_ _5"> </span>is<span class="_ _5"> </span>a<span class="_ _3"> </span>Lua<span class="_ _5"> </span>VM<span class="_ _5"> </span>in<span class="_ _5"> </span>development<span class="_ _5"> </span>that<span class="_ _5"> </span>uses<span class="_ _3"> </span>tra<span class="_ _2"></span>ce<span class="_ _5"> </span>com-</div><div class="t m0 x32 h5 ycd ff2 fs3 fc0 sc0 ls0 ws0">pilation<span class="_ _5"> </span>ideas.<span class="_ _5"> </span>(1).<span class="_ _5"> </span>There<span class="_ _5"> </span>are<span class="_ _7"> </span>no<span class="_ _5"> </span>publications<span class="_ _5"> </span>on<span class="_ _5"> </span>LuaJIT<span class="_ _5"> </span>b<span class="_ _2"></span>ut<span class="_ _5"> </span>the<span class="_ _5"> </span>cre-</div><div class="t m0 x32 h5 yce ff2 fs3 fc0 sc0 ls0 ws0">ator<span class="_ _5"> </span>has<span class="_ _3"> </span>told<span class="_ _5"> </span>us<span class="_ _5"> </span>that<span class="_ _3"> </span>LuaJIT<span class="_ _5"> </span>has<span class="_ _5"> </span>a<span class="_ _3"> </span>similar<span class="_ _5"> </span>design<span class="_ _5"> </span>to<span class="_ _5"> </span>our<span class="_ _3"> </span>system,<span class="_ _5"> </span>but</div><div class="t m0 x32 h5 ycf ff2 fs3 fc0 sc0 ls0 ws0">will<span class="_ _6"> </span>use<span class="_ _3"> </span>a<span class="_ _6"> </span>less<span class="_ _3"> </span>aggressive<span class="_ _3"> </span>type<span class="_ _6"> </span>speculation<span class="_ _3"> </span>(e.g.,<span class="_ _6"> </span>using<span class="_ _6"> </span>a<span class="_ _3"> </span>floating-</div><div class="t m0 x32 h5 yd0 ff2 fs3 fc0 sc0 ls0 ws0">point<span class="_ _3"> </span>representation<span class="_ _6"> </span>for<span class="_ _3"> </span>all<span class="_ _6"> </span>number<span class="_ _3"> </span>values)<span class="_ _3"> </span>and<span class="_ _3"> </span>does<span class="_ _6"> </span>not<span class="_ _3"> </span>generate</div><div class="t m0 x32 h5 yd1 ff2 fs3 fc0 sc0 ls0 ws0">nested<span class="_ _5"> </span>traces<span class="_ _5"> </span>for<span class="_ _3"> </span>nested<span class="_ _5"> </span>loops.</div><div class="t m0 x33 h5 yd2 ff1 fs3 fc0 sc0 ls0 ws0">General<span class="_ _d"> </span>trace<span class="_ _d"> </span>optimization.<span class="_ _d"> </span><span class="ff2">General<span class="_ _d"> </span>trace<span class="_ _8"> </span>optimization<span class="_ _d"> </span>has</span></div><div class="t m0 x32 h5 yd3 ff2 fs3 fc0 sc0 ls0 ws0">a<span class="_ _1"> </span>longer<span class="_ _d"> </span>history<span class="_ _d"> </span>that<span class="_ _1"> </span>has<span class="_ _d"> </span>treated<span class="_ _1"> </span>mostly<span class="_ _d"> </span>native<span class="_ _d"> </span>code<span class="_ _1"> </span>and<span class="_ _d"> </span>typed</div><div class="t m0 x32 h5 y94 ff2 fs3 fc0 sc0 ls0 ws0">languages<span class="_ _5"> </span>like<span class="_ _5"> </span>Java.<span class="_ _5"> </span>Thus,<span class="_ _5"> </span>these<span class="_ _5"> </span>systems<span class="_ _3"> </span>ha<span class="_ _2"></span>v<span class="_ _2"></span>e<span class="_ _5"> </span>focused<span class="_ _5"> </span>less<span class="_ _3"> </span>on<span class="_ _5"> </span>type</div><div class="t m0 x32 h5 y95 ff2 fs3 fc0 sc0 ls0 ws0">specialization<span class="_ _5"> </span>and<span class="_ _5"> </span>more<span class="_ _3"> </span>on<span class="_ _5"> </span>other<span class="_ _5"> </span>optimizations.</div><div class="t m0 x33 h5 y96 ff2 fs3 fc0 sc0 ls0 ws0">Dynamo<span class="_ _3"> </span>(7)<span class="_ _6"> </span>by<span class="_ _3"> </span>Bala<span class="_ _6"> </span>et<span class="_ _3"> </span>al,<span class="_ _3"> </span>introduced<span class="_ _6"> </span>nati<span class="_ _2"></span>ve<span class="_ _3"> </span>code<span class="_ _3"> </span>tracing<span class="_ _6"> </span>as<span class="_ _3"> </span>a</div><div class="t m0 x32 h5 y97 ff2 fs3 fc0 sc0 ls0 ws0">replacement<span class="_ _3"> </span>for<span class="_ _3"> </span>profile-guided<span class="_ _3"> </span>optimization<span class="_ _6"> </span>(PGO).<span class="_ _3"> </span>A<span class="_ _3"> </span>major<span class="_ _3"> </span>goal</div><div class="t m0 x32 h5 y98 ff2 fs3 fc0 sc0 ls0 ws0">was<span class="_ _d"> </span>to<span class="_ _1"> </span>perform<span class="_ _d"> </span>PGO<span class="_ _d"> </span>online<span class="_ _d"> </span>so<span class="_ _1"> </span>that<span class="_ _d"> </span>the<span class="_ _d"> </span>profile<span class="_ _1"> </span>w<span class="_ _2"></span>as<span class="_ _d"> </span>specific<span class="_ _1"> </span>to</div><div class="t m0 x32 h5 y99 ff2 fs3 fc0 sc0 ls0 ws0">the<span class="_ _3"> </span>current<span class="_ _5"> </span>ex<span class="_ _2"></span>ecution.<span class="_ _3"> </span>Dynamo<span class="_ _5"> </span>used<span class="_ _3"> </span>loop<span class="_ _5"> </span>headers<span class="_ _3"> </span>as<span class="_ _5"> </span>candidate<span class="_ _3"> </span>hot</div><div class="t m0 x32 h5 y9a ff2 fs3 fc0 sc0 ls0 ws0">traces,<span class="_ _5"> </span>but<span class="_ _5"> </span>did<span class="_ _5"> </span>not<span class="_ _5"> </span>try<span class="_ _3"> </span>to<span class="_ _5"> </span>create<span class="_ _5"> </span>loop<span class="_ _5"> </span>traces<span class="_ _5"> </span>specifically<span class="_ _2"></span>.</div><div class="t m0 x33 h5 y9b ff2 fs3 fc0 sc0 ls0 ws0">T<span class="_ _2"></span>race<span class="_ _6"> </span>trees<span class="_ _6"> </span>were<span class="_ _6"> </span>originally<span class="_ _6"> </span>proposed<span class="_ _6"> </span>by<span class="_ _6"> </span>Gal<span class="_ _6"> </span>et<span class="_ _6"> </span>al.<span class="_ _6"> </span>(11)<span class="_ _6"> </span>in<span class="_ _6"> </span>the</div><div class="t m0 x32 h5 y9c ff2 fs3 fc0 sc0 ls0 ws0">context<span class="_ _6"> </span>of<span class="_ _6"> </span>Ja<span class="_ _2"></span>va,<span class="_ _3"> </span>a<span class="_ _8"> </span>statically<span class="_ _6"> </span>typed<span class="_ _6"> </span>language.<span class="_ _6"> </span>Their<span class="_ _6"> </span>trace<span class="_ _6"> </span>trees<span class="_ _6"> </span>ac-</div><div class="t m0 x32 h5 y9d ff2 fs3 fc0 sc0 ls0 ws0">tually<span class="_ _3"> </span>inlined<span class="_ _3"> </span>parts<span class="_ _3"> </span>of<span class="_ _3"> </span>outer<span class="_ _6"> </span>loops<span class="_ _3"> </span>within<span class="_ _3"> </span>the<span class="_ _3"> </span>inner<span class="_ _3"> </span>loops<span class="_ _3"> </span>(because</div></div><div class="pi" data-data='{"ctm":[1.673203,0.000000,0.000000,1.673203,0.000000,0.000000]}'></div></div></div>
|