LCOV - code coverage report
Current view: directory - js/src/yarr - YarrJIT.cpp (source / functions) Found Hit Coverage
Test: app.info Lines: 1118 1009 90.3 %
Date: 2012-04-07 Functions: 70 66 94.3 %

       1                 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2                 :  * vim: set ts=8 sw=4 et tw=99 ft=cpp:
       3                 :  *
       4                 :  * ***** BEGIN LICENSE BLOCK *****
       5                 :  * Copyright (C) 2009 Apple Inc. All rights reserved.
       6                 :  *
       7                 :  * Redistribution and use in source and binary forms, with or without
       8                 :  * modification, are permitted provided that the following conditions
       9                 :  * are met:
      10                 :  * 1. Redistributions of source code must retain the above copyright
      11                 :  *    notice, this list of conditions and the following disclaimer.
      12                 :  * 2. Redistributions in binary form must reproduce the above copyright
      13                 :  *    notice, this list of conditions and the following disclaimer in the
      14                 :  *    documentation and/or other materials provided with the distribution.
      15                 :  *
      16                 :  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
      17                 :  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      18                 :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
      19                 :  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
      20                 :  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      21                 :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      22                 :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
      23                 :  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
      24                 :  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      25                 :  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      26                 :  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
      27                 :  *
      28                 :  * ***** END LICENSE BLOCK ***** */
      29                 : 
      30                 : #include "YarrJIT.h"
      31                 : 
      32                 : #include "assembler/assembler/LinkBuffer.h"
      33                 : #include "Yarr.h"
      34                 : 
      35                 : #if ENABLE_YARR_JIT
      36                 : 
      37                 : using namespace WTF;
      38                 : 
      39                 : namespace JSC { namespace Yarr {
      40                 : 
      41           24046 : class YarrGenerator : private MacroAssembler {
      42                 :     friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
      43                 : 
      44                 : #if WTF_CPU_ARM
      45                 :     static const RegisterID input = ARMRegisters::r0;
      46                 :     static const RegisterID index = ARMRegisters::r1;
      47                 :     static const RegisterID length = ARMRegisters::r2;
      48                 :     static const RegisterID output = ARMRegisters::r4;
      49                 : 
      50                 :     static const RegisterID regT0 = ARMRegisters::r5;
      51                 :     static const RegisterID regT1 = ARMRegisters::r6;
      52                 : 
      53                 :     static const RegisterID returnRegister = ARMRegisters::r0;
      54                 : #elif WTF_CPU_MIPS
      55                 :     static const RegisterID input = MIPSRegisters::a0;
      56                 :     static const RegisterID index = MIPSRegisters::a1;
      57                 :     static const RegisterID length = MIPSRegisters::a2;
      58                 :     static const RegisterID output = MIPSRegisters::a3;
      59                 : 
      60                 :     static const RegisterID regT0 = MIPSRegisters::t4;
      61                 :     static const RegisterID regT1 = MIPSRegisters::t5;
      62                 : 
      63                 :     static const RegisterID returnRegister = MIPSRegisters::v0;
      64                 : #elif WTF_CPU_SH4
      65                 :     static const RegisterID input = SH4Registers::r4;
      66                 :     static const RegisterID index = SH4Registers::r5;
      67                 :     static const RegisterID length = SH4Registers::r6;
      68                 :     static const RegisterID output = SH4Registers::r7;
      69                 : 
      70                 :     static const RegisterID regT0 = SH4Registers::r0;
      71                 :     static const RegisterID regT1 = SH4Registers::r1;
      72                 : 
      73                 :     static const RegisterID returnRegister = SH4Registers::r0;
      74                 : #elif WTF_CPU_SPARC
      75                 :     static const RegisterID input = SparcRegisters::i0;
      76                 :     static const RegisterID index = SparcRegisters::i1;
      77                 :     static const RegisterID length = SparcRegisters::i2;
      78                 :     static const RegisterID output = SparcRegisters::i3;
      79                 : 
      80                 :     static const RegisterID regT0 = SparcRegisters::i4;
      81                 :     static const RegisterID regT1 = SparcRegisters::i5;
      82                 : 
      83                 :     static const RegisterID returnRegister = SparcRegisters::i0;
      84                 : #elif WTF_CPU_X86
      85                 :     static const RegisterID input = X86Registers::eax;
      86                 :     static const RegisterID index = X86Registers::edx;
      87                 :     static const RegisterID length = X86Registers::ecx;
      88                 :     static const RegisterID output = X86Registers::edi;
      89                 : 
      90                 :     static const RegisterID regT0 = X86Registers::ebx;
      91                 :     static const RegisterID regT1 = X86Registers::esi;
      92                 : 
      93                 :     static const RegisterID returnRegister = X86Registers::eax;
      94                 : #elif WTF_CPU_X86_64
      95                 : #if WTF_PLATFORM_WIN
      96                 :     static const RegisterID input = X86Registers::ecx;
      97                 :     static const RegisterID index = X86Registers::edx;
      98                 :     static const RegisterID length = X86Registers::r8;
      99                 :     static const RegisterID output = X86Registers::r9;
     100                 : #else
     101                 :     static const RegisterID input = X86Registers::edi;
     102                 :     static const RegisterID index = X86Registers::esi;
     103                 :     static const RegisterID length = X86Registers::edx;
     104                 :     static const RegisterID output = X86Registers::ecx;
     105                 : #endif
     106                 : 
     107                 :     static const RegisterID regT0 = X86Registers::eax;
     108                 :     static const RegisterID regT1 = X86Registers::ebx;
     109                 : 
     110                 :     static const RegisterID returnRegister = X86Registers::eax;
     111                 : #endif
     112                 : 
     113           48717 :     void optimizeAlternative(PatternAlternative* alternative)
     114                 :     {
     115           48717 :         if (!alternative->m_terms.size())
     116           12735 :             return;
     117                 : 
     118          115572 :         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
     119           79590 :             PatternTerm& term = alternative->m_terms[i];
     120           79590 :             PatternTerm& nextTerm = alternative->m_terms[i + 1];
     121                 : 
     122           79590 :             if ((term.type == PatternTerm::TypeCharacterClass)
     123                 :                 && (term.quantityType == QuantifierFixedCount)
     124                 :                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
     125                 :                 && (nextTerm.quantityType == QuantifierFixedCount)) {
     126            1053 :                 PatternTerm termCopy = term;
     127            1053 :                 alternative->m_terms[i] = nextTerm;
     128            1053 :                 alternative->m_terms[i + 1] = termCopy;
     129                 :             }
     130                 :         }
     131                 :     }
     132                 : 
     133            1290 :     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
     134                 :     {
     135            1290 :         do {
     136                 :             // pick which range we're going to generate
     137            1290 :             int which = count >> 1;
     138            1290 :             char lo = ranges[which].begin;
     139            1290 :             char hi = ranges[which].end;
     140                 : 
     141                 :             // check if there are any ranges or matches below lo.  If not, just jl to failure -
     142                 :             // if there is anything else to check, check that first, if it falls through jmp to failure.
     143            1290 :             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
     144             306 :                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
     145                 : 
     146                 :                 // generate code for all ranges before this one
     147             306 :                 if (which)
     148             117 :                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
     149                 : 
     150             900 :                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
     151             288 :                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
     152             288 :                     ++*matchIndex;
     153                 :                 }
     154             306 :                 failures.append(jump());
     155                 : 
     156             306 :                 loOrAbove.link(this);
     157             984 :             } else if (which) {
     158              45 :                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
     159                 : 
     160              45 :                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
     161              45 :                 failures.append(jump());
     162                 : 
     163              45 :                 loOrAbove.link(this);
     164                 :             } else
     165             939 :                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
     166                 : 
     167            2580 :             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
     168               0 :                 ++*matchIndex;
     169                 : 
     170            1290 :             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
     171                 :             // fall through to here, the value is above hi.
     172                 : 
     173                 :             // shuffle along & loop around if there are any more matches to handle.
     174            1290 :             unsigned next = which + 1;
     175            1290 :             ranges += next;
     176            1290 :             count -= next;
     177                 :         } while (count);
     178            1191 :     }
     179                 : 
     180            8652 :     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
     181                 :     {
     182            8652 :         if (charClass->m_table) {
     183            2700 :             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
     184            2700 :             matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
     185            2700 :             return;
     186                 :         }
     187            5952 :         Jump unicodeFail;
     188            5952 :         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
     189            4446 :             Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
     190                 : 
     191            4446 :             if (charClass->m_matchesUnicode.size()) {
     192           14112 :                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
     193            9675 :                     UChar ch = charClass->m_matchesUnicode[i];
     194            9675 :                     matchDest.append(branch32(Equal, character, Imm32(ch)));
     195                 :                 }
     196                 :             }
     197                 : 
     198            4446 :             if (charClass->m_rangesUnicode.size()) {
     199             288 :                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
     200             144 :                     UChar lo = charClass->m_rangesUnicode[i].begin;
     201             144 :                     UChar hi = charClass->m_rangesUnicode[i].end;
     202                 : 
     203             144 :                     Jump below = branch32(LessThan, character, Imm32(lo));
     204             144 :                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
     205             144 :                     below.link(this);
     206                 :                 }
     207                 :             }
     208                 : 
     209            4446 :             unicodeFail = jump();
     210            4446 :             isAscii.link(this);
     211                 :         }
     212                 : 
     213            5952 :         if (charClass->m_ranges.size()) {
     214            1029 :             unsigned matchIndex = 0;
     215            2058 :             JumpList failures;
     216            1029 :             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
     217            2256 :             while (matchIndex < charClass->m_matches.size())
     218             198 :                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
     219                 : 
     220            1029 :             failures.link(this);
     221            4923 :         } else if (charClass->m_matches.size()) {
     222                 :             // optimization: gather 'a','A' etc back together, can mask & test once.
     223            9846 :             Vector<char> matchesAZaz;
     224                 : 
     225           15444 :             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
     226           10521 :                 char ch = charClass->m_matches[i];
     227           10521 :                 if (m_pattern.m_ignoreCase) {
     228            1638 :                     if (isASCIILower(ch)) {
     229             432 :                         matchesAZaz.append(ch);
     230             432 :                         continue;
     231                 :                     }
     232            1206 :                     if (isASCIIUpper(ch))
     233             432 :                         continue;
     234                 :                 }
     235            9657 :                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
     236                 :             }
     237                 : 
     238            4923 :             if (unsigned countAZaz = matchesAZaz.size()) {
     239             144 :                 or32(TrustedImm32(32), character);
     240             576 :                 for (unsigned i = 0; i < countAZaz; ++i)
     241             432 :                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
     242                 :             }
     243                 :         }
     244                 : 
     245            5952 :         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
     246            4446 :             unicodeFail.link(this);
     247                 :     }
     248                 : 
     249                 :     // Jumps if input not available; will have (incorrectly) incremented already!
     250           49269 :     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
     251                 :     {
     252           49269 :         if (countToCheck)
     253           24946 :             add32(Imm32(countToCheck), index);
     254           49269 :         return branch32(Above, index, length);
     255                 :     }
     256                 : 
     257                 :     Jump jumpIfAvailableInput(unsigned countToCheck)
     258                 :     {
     259                 :         add32(Imm32(countToCheck), index);
     260                 :         return branch32(BelowOrEqual, index, length);
     261                 :     }
     262                 : 
     263           23873 :     Jump checkInput()
     264                 :     {
     265           23873 :         return branch32(BelowOrEqual, index, length);
     266                 :     }
     267                 : 
     268            2661 :     Jump atEndOfInput()
     269                 :     {
     270            2661 :         return branch32(Equal, index, length);
     271                 :     }
     272                 : 
     273             468 :     Jump notAtEndOfInput()
     274                 :     {
     275             468 :         return branch32(NotEqual, index, length);
     276                 :     }
     277                 : 
     278                 :     Jump jumpIfCharEquals(UChar ch, int inputPosition)
     279                 :     {
     280                 :         return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
     281                 :     }
     282                 : 
     283           23734 :     Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
     284                 :     {
     285           23734 :         return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
     286                 :     }
     287                 : 
     288            9031 :     void readCharacter(int inputPosition, RegisterID reg)
     289                 :     {
     290            9031 :         load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
     291            9031 :     }
     292                 : 
     293            2481 :     void storeToFrame(RegisterID reg, unsigned frameLocation)
     294                 :     {
     295            2481 :         poke(reg, frameLocation);
     296            2481 :     }
     297                 : 
     298             243 :     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
     299                 :     {
     300             243 :         poke(imm, frameLocation);
     301             243 :     }
     302                 : 
     303            5436 :     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
     304                 :     {
     305            5436 :         return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
     306                 :     }
     307                 : 
     308            2193 :     void loadFromFrame(unsigned frameLocation, RegisterID reg)
     309                 :     {
     310            2193 :         peek(reg, frameLocation);
     311            2193 :     }
     312                 : 
     313            2376 :     void loadFromFrameAndJump(unsigned frameLocation)
     314                 :     {
     315            2376 :         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
     316            2376 :     }
     317                 : 
     318                 :     enum YarrOpCode {
     319                 :         // These nodes wrap body alternatives - those in the main disjunction,
     320                 :         // rather than subpatterns or assertions. These are chained together in
     321                 :         // a doubly linked list, with a 'begin' node for the first alternative,
     322                 :         // a 'next' node for each subsequent alternative, and an 'end' node at
     323                 :         // the end. In the case of repeating alternatives, the 'end' node also
     324                 :         // has a reference back to 'begin'.
     325                 :         OpBodyAlternativeBegin,
     326                 :         OpBodyAlternativeNext,
     327                 :         OpBodyAlternativeEnd,
     328                 :         // Similar to the body alternatives, but used for subpatterns with two
     329                 :         // or more alternatives.
     330                 :         OpNestedAlternativeBegin,
     331                 :         OpNestedAlternativeNext,
     332                 :         OpNestedAlternativeEnd,
     333                 :         // Used for alternatives in subpatterns where there is only a single
     334                 :         // alternative (backtrackingis easier in these cases), or for alternatives
     335                 :         // which never need to be backtracked (those in parenthetical assertions,
     336                 :         // terminal subpatterns).
     337                 :         OpSimpleNestedAlternativeBegin,
     338                 :         OpSimpleNestedAlternativeNext,
     339                 :         OpSimpleNestedAlternativeEnd,
     340                 :         // Used to wrap 'Once' subpattern matches (quantityCount == 1).
     341                 :         OpParenthesesSubpatternOnceBegin,
     342                 :         OpParenthesesSubpatternOnceEnd,
     343                 :         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
     344                 :         OpParenthesesSubpatternTerminalBegin,
     345                 :         OpParenthesesSubpatternTerminalEnd,
     346                 :         // Used to wrap parenthetical assertions.
     347                 :         OpParentheticalAssertionBegin,
     348                 :         OpParentheticalAssertionEnd,
     349                 :         // Wraps all simple terms (pattern characters, character classes).
     350                 :         OpTerm,
     351                 :         // Where an expression contains only 'once through' body alternatives
     352                 :         // and no repeating ones, this op is used to return match failure.
     353                 :         OpMatchFailed
     354                 :     };
     355                 : 
     356                 :     // This structure is used to hold the compiled opcode information,
     357                 :     // including reference back to the original PatternTerm/PatternAlternatives,
     358                 :     // and JIT compilation data structures.
     359         1639044 :     struct YarrOp {
     360           94762 :         explicit YarrOp(PatternTerm* term)
     361                 :             : m_op(OpTerm)
     362                 :             , m_term(term)
     363           94762 :             , m_isDeadCode(false)
     364                 :         {
     365           94762 :         }
     366                 : 
     367          135168 :         explicit YarrOp(YarrOpCode op)
     368                 :             : m_op(op)
     369          135168 :             , m_isDeadCode(false)
     370                 :         {
     371          135168 :         }
     372                 : 
     373                 :         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
     374                 :         YarrOpCode m_op;
     375                 :         PatternTerm* m_term;
     376                 : 
     377                 :         // For alternatives, this holds the PatternAlternative and doubly linked
     378                 :         // references to this alternative's siblings. In the case of the
     379                 :         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
     380                 :         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
     381                 :         // repeating alternative.
     382                 :         PatternAlternative* m_alternative;
     383                 :         size_t m_previousOp;
     384                 :         size_t m_nextOp;
     385                 : 
     386                 :         // Used to record a set of Jumps out of the generated code, typically
     387                 :         // used for jumps out to backtracking code, and a single reentry back
     388                 :         // into the code for a node (likely where a backtrack will trigger
     389                 :         // rematching).
     390                 :         Label m_reentry;
     391                 :         JumpList m_jumps;
     392                 : 
     393                 :         // This flag is used to null out the second pattern character, when
     394                 :         // two are fused to match a pair together.
     395                 :         bool m_isDeadCode;
     396                 : 
     397                 :         // Currently used in the case of some of the more complex management of
     398                 :         // 'm_checked', to cache the offset used in this alternative, to avoid
     399                 :         // recalculating it.
     400                 :         int m_checkAdjust;
     401                 : 
     402                 :         // Used by OpNestedAlternativeNext/End to hold the pointer to the
     403                 :         // value that will be pushed into the pattern's frame to return to,
     404                 :         // upon backtracking back into the disjunction.
     405                 :         DataLabelPtr m_returnAddress;
     406                 :     };
     407                 : 
     408                 :     // BacktrackingState
     409                 :     // This class encapsulates information about the state of code generation
     410                 :     // whilst generating the code for backtracking, when a term fails to match.
     411                 :     // Upon entry to code generation of the backtracking code for a given node,
     412                 :     // the Backtracking state will hold references to all control flow sources
     413                 :     // that are outputs in need of further backtracking from the prior node
     414                 :     // generated (which is the subsequent operation in the regular expression,
     415                 :     // and in the m_ops Vector, since we generated backtracking backwards).
     416                 :     // These references to control flow take the form of:
     417                 :     //  - A jump list of jumps, to be linked to code that will backtrack them
     418                 :     //    further.
     419                 :     //  - A set of DataLabelPtr values, to be populated with values to be
     420                 :     //    treated effectively as return addresses backtracking into complex
     421                 :     //    subpatterns.
     422                 :     //  - A flag indicating that the current sequence of generated code up to
     423                 :     //    this point requires backtracking.
     424           24046 :     class BacktrackingState {
     425                 :     public:
     426           24046 :         BacktrackingState()
     427           24046 :             : m_pendingFallthrough(false)
     428                 :         {
     429           24046 :         }
     430                 : 
     431                 :         // Add a jump or jumps, a return address, or set the flag indicating
     432                 :         // that the current 'fallthrough' control flow requires backtracking.
     433            2112 :         void append(const Jump& jump)
     434                 :         {
     435            2112 :             m_laterFailures.append(jump);
     436            2112 :         }
     437          156295 :         void append(JumpList& jumpList)
     438                 :         {
     439          156295 :             m_laterFailures.append(jumpList);
     440          156295 :         }
     441            5436 :         void append(const DataLabelPtr& returnAddress)
     442                 :         {
     443            5436 :             m_pendingReturns.append(returnAddress);
     444            5436 :         }
     445           20322 :         void fallthrough()
     446                 :         {
     447           20322 :             ASSERT(!m_pendingFallthrough);
     448           20322 :             m_pendingFallthrough = true;
     449           20322 :         }
     450                 : 
     451                 :         // These methods clear the backtracking state, either linking to the
     452                 :         // current location, a provided label, or copying the backtracking out
     453                 :         // to a JumpList. All actions may require code generation to take place,
     454                 :         // and as such are passed a pointer to the assembler.
     455           49142 :         void link(MacroAssembler* assembler)
     456                 :         {
     457           49142 :             if (m_pendingReturns.size()) {
     458             810 :                 Label here(assembler);
     459            1620 :                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
     460             810 :                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
     461             810 :                 m_pendingReturns.clear();
     462                 :             }
     463           49142 :             m_laterFailures.link(assembler);
     464           49142 :             m_laterFailures.clear();
     465           49142 :             m_pendingFallthrough = false;
     466           49142 :         }
     467            3773 :         void linkTo(Label label, MacroAssembler* assembler)
     468                 :         {
     469            3773 :             if (m_pendingReturns.size()) {
     470            4914 :                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
     471            2457 :                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
     472            2457 :                 m_pendingReturns.clear();
     473                 :             }
     474            3773 :             if (m_pendingFallthrough)
     475             405 :                 assembler->jump(label);
     476            3773 :             m_laterFailures.linkTo(label, assembler);
     477            3773 :             m_laterFailures.clear();
     478            3773 :             m_pendingFallthrough = false;
     479            3773 :         }
     480            2205 :         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
     481                 :         {
     482            2205 :             if (m_pendingReturns.size()) {
     483            2169 :                 Label here(assembler);
     484            4338 :                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
     485            2169 :                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
     486            2169 :                 m_pendingReturns.clear();
     487            2169 :                 m_pendingFallthrough = true;
     488                 :             }
     489            2205 :             if (m_pendingFallthrough)
     490            2169 :                 jumpList.append(assembler->jump());
     491            2205 :             jumpList.append(m_laterFailures);
     492            2205 :             m_laterFailures.clear();
     493            2205 :             m_pendingFallthrough = false;
     494            2205 :         }
     495                 : 
     496           47966 :         bool isEmpty()
     497                 :         {
     498           47966 :             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
     499                 :         }
     500                 : 
     501                 :         // Called at the end of code generation to link all return addresses.
     502           23884 :         void linkDataLabels(LinkBuffer& linkBuffer)
     503                 :         {
     504           23884 :             ASSERT(isEmpty());
     505           29320 :             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
     506            5436 :                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
     507           23884 :         }
     508                 : 
     509                 :     private:
     510            7236 :         struct ReturnAddressRecord {
     511            5436 :             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
     512                 :                 : m_dataLabel(dataLabel)
     513            5436 :                 , m_backtrackLocation(backtrackLocation)
     514                 :             {
     515            5436 :             }
     516                 : 
     517                 :             DataLabelPtr m_dataLabel;
     518                 :             Label m_backtrackLocation;
     519                 :         };
     520                 : 
     521                 :         JumpList m_laterFailures;
     522                 :         bool m_pendingFallthrough;
     523                 :         Vector<DataLabelPtr, 4> m_pendingReturns;
     524                 :         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
     525                 :     };
     526                 : 
     527                 :     // Generation methods:
     528                 :     // ===================
     529                 : 
     530                 :     // This method provides a default implementation of backtracking common
     531                 :     // to many terms; terms commonly jump out of the forwards  matching path
     532                 :     // on any failed conditions, and add these jumps to the m_jumps list. If
     533                 :     // no special handling is required we can often just backtrack to m_jumps.
     534           92101 :     void backtrackTermDefault(size_t opIndex)
     535                 :     {
     536           92101 :         YarrOp& op = m_ops[opIndex];
     537           92101 :         m_backtrackingState.append(op.m_jumps);
     538           92101 :     }
     539                 : 
     540             470 :     void generateAssertionBOL(size_t opIndex)
     541                 :     {
     542             470 :         YarrOp& op = m_ops[opIndex];
     543             470 :         PatternTerm* term = op.m_term;
     544                 : 
     545             470 :         if (m_pattern.m_multiline) {
     546               0 :             const RegisterID character = regT0;
     547                 : 
     548               0 :             JumpList matchDest;
     549               0 :             if (!term->inputPosition)
     550               0 :                 matchDest.append(branch32(Equal, index, Imm32(m_checked)));
     551                 : 
     552               0 :             readCharacter((term->inputPosition - m_checked) - 1, character);
     553               0 :             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
     554               0 :             op.m_jumps.append(jump());
     555                 : 
     556               0 :             matchDest.link(this);
     557                 :         } else {
     558                 :             // Erk, really should poison out these alternatives early. :-/
     559             470 :             if (term->inputPosition)
     560               0 :                 op.m_jumps.append(jump());
     561                 :             else
     562             470 :                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
     563                 :         }
     564             470 :     }
     565             470 :     void backtrackAssertionBOL(size_t opIndex)
     566                 :     {
     567             470 :         backtrackTermDefault(opIndex);
     568             470 :     }
     569                 : 
     570            5868 :     void generateAssertionEOL(size_t opIndex)
     571                 :     {
     572            5868 :         YarrOp& op = m_ops[opIndex];
     573            5868 :         PatternTerm* term = op.m_term;
     574                 : 
     575            5868 :         if (m_pattern.m_multiline) {
     576               0 :             const RegisterID character = regT0;
     577                 : 
     578               0 :             JumpList matchDest;
     579               0 :             if (term->inputPosition == m_checked)
     580               0 :                 matchDest.append(atEndOfInput());
     581                 : 
     582               0 :             readCharacter((term->inputPosition - m_checked), character);
     583               0 :             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
     584               0 :             op.m_jumps.append(jump());
     585                 : 
     586               0 :             matchDest.link(this);
     587                 :         } else {
     588            5868 :             if (term->inputPosition == m_checked)
     589             468 :                 op.m_jumps.append(notAtEndOfInput());
     590                 :             // Erk, really should poison out these alternatives early. :-/
     591                 :             else
     592            5400 :                 op.m_jumps.append(jump());
     593                 :         }
     594            5868 :     }
     595            5868 :     void backtrackAssertionEOL(size_t opIndex)
     596                 :     {
     597            5868 :         backtrackTermDefault(opIndex);
     598            5868 :     }
     599                 : 
     600                 :     // Also falls though on nextIsNotWordChar.
     601             828 :     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
     602                 :     {
     603             828 :         YarrOp& op = m_ops[opIndex];
     604             828 :         PatternTerm* term = op.m_term;
     605                 : 
     606             828 :         const RegisterID character = regT0;
     607                 : 
     608             828 :         if (term->inputPosition == m_checked)
     609             504 :             nextIsNotWordChar.append(atEndOfInput());
     610                 : 
     611             828 :         readCharacter((term->inputPosition - m_checked), character);
     612             828 :         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
     613             828 :     }
     614                 : 
     615             414 :     void generateAssertionWordBoundary(size_t opIndex)
     616                 :     {
     617             414 :         YarrOp& op = m_ops[opIndex];
     618             414 :         PatternTerm* term = op.m_term;
     619                 : 
     620             414 :         const RegisterID character = regT0;
     621                 : 
     622             414 :         Jump atBegin;
     623             828 :         JumpList matchDest;
     624             414 :         if (!term->inputPosition)
     625             162 :             atBegin = branch32(Equal, index, Imm32(m_checked));
     626             414 :         readCharacter((term->inputPosition - m_checked) - 1, character);
     627             414 :         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
     628             414 :         if (!term->inputPosition)
     629             162 :             atBegin.link(this);
     630                 : 
     631                 :         // We fall through to here if the last character was not a wordchar.
     632             828 :         JumpList nonWordCharThenWordChar;
     633             828 :         JumpList nonWordCharThenNonWordChar;
     634             414 :         if (term->invert()) {
     635               0 :             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
     636               0 :             nonWordCharThenWordChar.append(jump());
     637                 :         } else {
     638             414 :             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
     639             414 :             nonWordCharThenNonWordChar.append(jump());
     640                 :         }
     641             414 :         op.m_jumps.append(nonWordCharThenNonWordChar);
     642                 : 
     643                 :         // We jump here if the last character was a wordchar.
     644             414 :         matchDest.link(this);
     645             828 :         JumpList wordCharThenWordChar;
     646             828 :         JumpList wordCharThenNonWordChar;
     647             414 :         if (term->invert()) {
     648               0 :             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
     649               0 :             wordCharThenWordChar.append(jump());
     650                 :         } else {
     651             414 :             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
     652                 :             // This can fall-though!
     653                 :         }
     654                 : 
     655             414 :         op.m_jumps.append(wordCharThenWordChar);
     656                 : 
     657             414 :         nonWordCharThenWordChar.link(this);
     658             414 :         wordCharThenNonWordChar.link(this);
     659             414 :     }
     660             414 :     void backtrackAssertionWordBoundary(size_t opIndex)
     661                 :     {
     662             414 :         backtrackTermDefault(opIndex);
     663             414 :     }
     664                 : 
     665           79898 :     void generatePatternCharacterOnce(size_t opIndex)
     666                 :     {
     667           79898 :         YarrOp& op = m_ops[opIndex];
     668                 : 
     669                 :         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
     670                 :         // node, so there must always be at least one more node.
     671           79898 :         ASSERT(opIndex + 1 < m_ops.size());
     672           79898 :         YarrOp& nextOp = m_ops[opIndex + 1];
     673                 : 
     674           79898 :         if (op.m_isDeadCode)
     675           27978 :             return;
     676                 : 
     677           51920 :         PatternTerm* term = op.m_term;
     678           51920 :         UChar ch = term->patternCharacter;
     679                 : 
     680           51920 :         const RegisterID character = regT0;
     681                 : 
     682           51920 :         if (nextOp.m_op == OpTerm) {
     683           28788 :             PatternTerm* nextTerm = nextOp.m_term;
     684           28788 :             if (nextTerm->type == PatternTerm::TypePatternCharacter
     685                 :                 && nextTerm->quantityType == QuantifierFixedCount
     686                 :                 && nextTerm->quantityCount == 1
     687                 :                 && nextTerm->inputPosition == (term->inputPosition + 1)) {
     688                 : 
     689           27978 :                 UChar ch2 = nextTerm->patternCharacter;
     690                 : 
     691           27978 :                 int mask = 0;
     692                 : #if WTF_CPU_BIG_ENDIAN
     693                 :                 int chPair = ch2 | (ch << 16);
     694                 : #else
     695           27978 :                 int chPair = ch | (ch2 << 16);
     696                 : #endif
     697                 : 
     698           27978 :                 if (m_pattern.m_ignoreCase) {
     699                 : #if WTF_CPU_BIG_ENDIAN
     700                 :                     if (isASCIIAlpha(ch))
     701                 :                         mask |= 32 << 16;
     702                 :                     if (isASCIIAlpha(ch2))
     703                 :                         mask |= 32;
     704                 : #else
     705            3107 :                     if (isASCIIAlpha(ch))
     706            2333 :                         mask |= 32;
     707            3107 :                     if (isASCIIAlpha(ch2))
     708            2288 :                         mask |= 32 << 16;
     709                 : #endif
     710                 :                 }
     711                 : 
     712           27978 :                 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
     713           27978 :                 if (mask) {
     714            2387 :                     load32WithUnalignedHalfWords(address, character);
     715            2387 :                     or32(Imm32(mask), character);
     716            2387 :                     op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
     717                 :                 } else
     718           25591 :                     op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
     719                 : 
     720           27978 :                 nextOp.m_isDeadCode = true;
     721           27978 :                 return;
     722                 :             }
     723                 :         }
     724                 : 
     725           23942 :         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
     726             406 :             readCharacter(term->inputPosition - m_checked, character);
     727             406 :             or32(TrustedImm32(32), character);
     728             406 :             op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
     729                 :         } else {
     730           23536 :             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
     731           23536 :             op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
     732                 :         }
     733                 :     }
     734           79898 :     void backtrackPatternCharacterOnce(size_t opIndex)
     735                 :     {
     736           79898 :         backtrackTermDefault(opIndex);
     737           79898 :     }
     738                 : 
     739               0 :     void generatePatternCharacterFixed(size_t opIndex)
     740                 :     {
     741               0 :         YarrOp& op = m_ops[opIndex];
     742               0 :         PatternTerm* term = op.m_term;
     743               0 :         UChar ch = term->patternCharacter;
     744                 : 
     745               0 :         const RegisterID character = regT0;
     746               0 :         const RegisterID countRegister = regT1;
     747                 : 
     748               0 :         move(index, countRegister);
     749               0 :         sub32(Imm32(term->quantityCount), countRegister);
     750                 : 
     751               0 :         Label loop(this);
     752               0 :         BaseIndex address(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar));
     753                 : 
     754               0 :         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
     755               0 :             load16(address, character);
     756               0 :             or32(TrustedImm32(32), character);
     757               0 :             op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
     758                 :         } else {
     759               0 :             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
     760               0 :             op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
     761                 :         }
     762               0 :         add32(TrustedImm32(1), countRegister);
     763               0 :         branch32(NotEqual, countRegister, index).linkTo(loop, this);
     764               0 :     }
     765               0 :     void backtrackPatternCharacterFixed(size_t opIndex)
     766                 :     {
     767               0 :         backtrackTermDefault(opIndex);
     768               0 :     }
     769                 : 
     770             198 :     void generatePatternCharacterGreedy(size_t opIndex)
     771                 :     {
     772             198 :         YarrOp& op = m_ops[opIndex];
     773             198 :         PatternTerm* term = op.m_term;
     774             198 :         UChar ch = term->patternCharacter;
     775                 : 
     776             198 :         const RegisterID character = regT0;
     777             198 :         const RegisterID countRegister = regT1;
     778                 : 
     779             198 :         move(TrustedImm32(0), countRegister);
     780                 : 
     781             396 :         JumpList failures;
     782             198 :         Label loop(this);
     783             198 :         failures.append(atEndOfInput());
     784             198 :         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
     785               0 :             readCharacter(term->inputPosition - m_checked, character);
     786               0 :             or32(TrustedImm32(32), character);
     787               0 :             failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
     788                 :         } else {
     789             198 :             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
     790             198 :             failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
     791                 :         }
     792                 : 
     793             198 :         add32(TrustedImm32(1), countRegister);
     794             198 :         add32(TrustedImm32(1), index);
     795             198 :         if (term->quantityCount == quantifyInfinite)
     796              63 :             jump(loop);
     797                 :         else
     798             135 :             branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this);
     799                 : 
     800             198 :         failures.link(this);
     801             198 :         op.m_reentry = label();
     802                 : 
     803             198 :         storeToFrame(countRegister, term->frameLocation);
     804                 : 
     805             198 :     }
     806             198 :     void backtrackPatternCharacterGreedy(size_t opIndex)
     807                 :     {
     808             198 :         YarrOp& op = m_ops[opIndex];
     809             198 :         PatternTerm* term = op.m_term;
     810                 : 
     811             198 :         const RegisterID countRegister = regT1;
     812                 : 
     813             198 :         m_backtrackingState.link(this);
     814                 : 
     815             198 :         loadFromFrame(term->frameLocation, countRegister);
     816             198 :         m_backtrackingState.append(branchTest32(Zero, countRegister));
     817             198 :         sub32(TrustedImm32(1), countRegister);
     818             198 :         sub32(TrustedImm32(1), index);
     819             198 :         jump(op.m_reentry);
     820             198 :     }
     821                 : 
     822               0 :     void generatePatternCharacterNonGreedy(size_t opIndex)
     823                 :     {
     824               0 :         YarrOp& op = m_ops[opIndex];
     825               0 :         PatternTerm* term = op.m_term;
     826                 : 
     827               0 :         const RegisterID countRegister = regT1;
     828                 : 
     829               0 :         move(TrustedImm32(0), countRegister);
     830               0 :         op.m_reentry = label();
     831               0 :         storeToFrame(countRegister, term->frameLocation);
     832               0 :     }
     833               0 :     void backtrackPatternCharacterNonGreedy(size_t opIndex)
     834                 :     {
     835               0 :         YarrOp& op = m_ops[opIndex];
     836               0 :         PatternTerm* term = op.m_term;
     837               0 :         UChar ch = term->patternCharacter;
     838                 : 
     839               0 :         const RegisterID character = regT0;
     840               0 :         const RegisterID countRegister = regT1;
     841                 : 
     842               0 :         JumpList nonGreedyFailures;
     843                 : 
     844               0 :         m_backtrackingState.link(this);
     845                 : 
     846               0 :         loadFromFrame(term->frameLocation, countRegister);
     847                 : 
     848               0 :         nonGreedyFailures.append(atEndOfInput());
     849               0 :         if (term->quantityCount != quantifyInfinite)
     850               0 :             nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount)));
     851               0 :         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
     852               0 :             readCharacter(term->inputPosition - m_checked, character);
     853               0 :             or32(TrustedImm32(32), character);
     854               0 :             nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
     855                 :         } else {
     856               0 :             ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
     857               0 :             nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
     858                 :         }
     859                 : 
     860               0 :         add32(TrustedImm32(1), countRegister);
     861               0 :         add32(TrustedImm32(1), index);
     862                 : 
     863               0 :         jump(op.m_reentry);
     864                 : 
     865               0 :         nonGreedyFailures.link(this);
     866               0 :         sub32(countRegister, index);
     867               0 :         m_backtrackingState.fallthrough();
     868               0 :     }
     869                 : 
     870            5424 :     void generateCharacterClassOnce(size_t opIndex)
     871                 :     {
     872            5424 :         YarrOp& op = m_ops[opIndex];
     873            5424 :         PatternTerm* term = op.m_term;
     874                 : 
     875            5424 :         const RegisterID character = regT0;
     876                 : 
     877           10848 :         JumpList matchDest;
     878            5424 :         readCharacter((term->inputPosition - m_checked), character);
     879            5424 :         matchCharacterClass(character, matchDest, term->characterClass);
     880                 : 
     881            5424 :         if (term->invert())
     882            4128 :             op.m_jumps.append(matchDest);
     883                 :         else {
     884            1296 :             op.m_jumps.append(jump());
     885            1296 :             matchDest.link(this);
     886                 :         }
     887            5424 :     }
     888            5424 :     void backtrackCharacterClassOnce(size_t opIndex)
     889                 :     {
     890            5424 :         backtrackTermDefault(opIndex);
     891            5424 :     }
     892                 : 
     893              27 :     void generateCharacterClassFixed(size_t opIndex)
     894                 :     {
     895              27 :         YarrOp& op = m_ops[opIndex];
     896              27 :         PatternTerm* term = op.m_term;
     897                 : 
     898              27 :         const RegisterID character = regT0;
     899              27 :         const RegisterID countRegister = regT1;
     900                 : 
     901              27 :         move(index, countRegister);
     902              27 :         sub32(Imm32(term->quantityCount), countRegister);
     903                 : 
     904              27 :         Label loop(this);
     905              54 :         JumpList matchDest;
     906              27 :         load16(BaseIndex(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar)), character);
     907              27 :         matchCharacterClass(character, matchDest, term->characterClass);
     908                 : 
     909              27 :         if (term->invert())
     910              18 :             op.m_jumps.append(matchDest);
     911                 :         else {
     912               9 :             op.m_jumps.append(jump());
     913               9 :             matchDest.link(this);
     914                 :         }
     915                 : 
     916              27 :         add32(TrustedImm32(1), countRegister);
     917              27 :         branch32(NotEqual, countRegister, index).linkTo(loop, this);
     918              27 :     }
     919              27 :     void backtrackCharacterClassFixed(size_t opIndex)
     920                 :     {
     921              27 :         backtrackTermDefault(opIndex);
     922              27 :     }
     923                 : 
     924            1914 :     void generateCharacterClassGreedy(size_t opIndex)
     925                 :     {
     926            1914 :         YarrOp& op = m_ops[opIndex];
     927            1914 :         PatternTerm* term = op.m_term;
     928                 : 
     929            1914 :         const RegisterID character = regT0;
     930            1914 :         const RegisterID countRegister = regT1;
     931                 : 
     932            1914 :         move(TrustedImm32(0), countRegister);
     933                 : 
     934            3828 :         JumpList failures;
     935            1914 :         Label loop(this);
     936            1914 :         failures.append(atEndOfInput());
     937                 : 
     938            1914 :         if (term->invert()) {
     939             600 :             readCharacter(term->inputPosition - m_checked, character);
     940             600 :             matchCharacterClass(character, failures, term->characterClass);
     941                 :         } else {
     942            2628 :             JumpList matchDest;
     943            1314 :             readCharacter(term->inputPosition - m_checked, character);
     944            1314 :             matchCharacterClass(character, matchDest, term->characterClass);
     945            1314 :             failures.append(jump());
     946            1314 :             matchDest.link(this);
     947                 :         }
     948                 : 
     949            1914 :         add32(TrustedImm32(1), countRegister);
     950            1914 :         add32(TrustedImm32(1), index);
     951            1914 :         if (term->quantityCount != quantifyInfinite) {
     952             126 :             branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this);
     953             126 :             failures.append(jump());
     954                 :         } else
     955            1788 :             jump(loop);
     956                 : 
     957            1914 :         failures.link(this);
     958            1914 :         op.m_reentry = label();
     959                 : 
     960            1914 :         storeToFrame(countRegister, term->frameLocation);
     961            1914 :     }
     962            1914 :     void backtrackCharacterClassGreedy(size_t opIndex)
     963                 :     {
     964            1914 :         YarrOp& op = m_ops[opIndex];
     965            1914 :         PatternTerm* term = op.m_term;
     966                 : 
     967            1914 :         const RegisterID countRegister = regT1;
     968                 : 
     969            1914 :         m_backtrackingState.link(this);
     970                 : 
     971            1914 :         loadFromFrame(term->frameLocation, countRegister);
     972            1914 :         m_backtrackingState.append(branchTest32(Zero, countRegister));
     973            1914 :         sub32(TrustedImm32(1), countRegister);
     974            1914 :         sub32(TrustedImm32(1), index);
     975            1914 :         jump(op.m_reentry);
     976            1914 :     }
     977                 : 
     978              45 :     void generateCharacterClassNonGreedy(size_t opIndex)
     979                 :     {
     980              45 :         YarrOp& op = m_ops[opIndex];
     981              45 :         PatternTerm* term = op.m_term;
     982                 : 
     983              45 :         const RegisterID countRegister = regT1;
     984                 : 
     985              45 :         move(TrustedImm32(0), countRegister);
     986              45 :         op.m_reentry = label();
     987              45 :         storeToFrame(countRegister, term->frameLocation);
     988              45 :     }
     989              45 :     void backtrackCharacterClassNonGreedy(size_t opIndex)
     990                 :     {
     991              45 :         YarrOp& op = m_ops[opIndex];
     992              45 :         PatternTerm* term = op.m_term;
     993                 : 
     994              45 :         const RegisterID character = regT0;
     995              45 :         const RegisterID countRegister = regT1;
     996                 : 
     997              90 :         JumpList nonGreedyFailures;
     998                 : 
     999              45 :         m_backtrackingState.link(this);
    1000                 : 
    1001              45 :         Label backtrackBegin(this);
    1002              45 :         loadFromFrame(term->frameLocation, countRegister);
    1003                 : 
    1004              45 :         nonGreedyFailures.append(atEndOfInput());
    1005              45 :         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount)));
    1006                 : 
    1007              90 :         JumpList matchDest;
    1008              45 :         readCharacter(term->inputPosition - m_checked, character);
    1009              45 :         matchCharacterClass(character, matchDest, term->characterClass);
    1010                 : 
    1011              45 :         if (term->invert())
    1012              45 :             nonGreedyFailures.append(matchDest);
    1013                 :         else {
    1014               0 :             nonGreedyFailures.append(jump());
    1015               0 :             matchDest.link(this);
    1016                 :         }
    1017                 : 
    1018              45 :         add32(TrustedImm32(1), countRegister);
    1019              45 :         add32(TrustedImm32(1), index);
    1020                 : 
    1021              45 :         jump(op.m_reentry);
    1022                 : 
    1023              45 :         nonGreedyFailures.link(this);
    1024              45 :         sub32(countRegister, index);
    1025              45 :         m_backtrackingState.fallthrough();
    1026              45 :     }
    1027                 : 
    1028                 :     // Code generation/backtracking for simple terms
    1029                 :     // (pattern characters, character classes, and assertions).
    1030                 :     // These methods farm out work to the set of functions above.
    1031           94258 :     void generateTerm(size_t opIndex)
    1032                 :     {
    1033           94258 :         YarrOp& op = m_ops[opIndex];
    1034           94258 :         PatternTerm* term = op.m_term;
    1035                 : 
    1036           94258 :         switch (term->type) {
    1037                 :         case PatternTerm::TypePatternCharacter:
    1038           80096 :             switch (term->quantityType) {
    1039                 :             case QuantifierFixedCount:
    1040           79898 :                 if (term->quantityCount == 1)
    1041           79898 :                     generatePatternCharacterOnce(opIndex);
    1042                 :                 else
    1043               0 :                     generatePatternCharacterFixed(opIndex);
    1044           79898 :                 break;
    1045                 :             case QuantifierGreedy:
    1046             198 :                 generatePatternCharacterGreedy(opIndex);
    1047             198 :                 break;
    1048                 :             case QuantifierNonGreedy:
    1049               0 :                 generatePatternCharacterNonGreedy(opIndex);
    1050               0 :                 break;
    1051                 :             }
    1052           80096 :             break;
    1053                 : 
    1054                 :         case PatternTerm::TypeCharacterClass:
    1055            7410 :             switch (term->quantityType) {
    1056                 :             case QuantifierFixedCount:
    1057            5451 :                 if (term->quantityCount == 1)
    1058            5424 :                     generateCharacterClassOnce(opIndex);
    1059                 :                 else
    1060              27 :                     generateCharacterClassFixed(opIndex);
    1061            5451 :                 break;
    1062                 :             case QuantifierGreedy:
    1063            1914 :                 generateCharacterClassGreedy(opIndex);
    1064            1914 :                 break;
    1065                 :             case QuantifierNonGreedy:
    1066              45 :                 generateCharacterClassNonGreedy(opIndex);
    1067              45 :                 break;
    1068                 :             }
    1069            7410 :             break;
    1070                 : 
    1071                 :         case PatternTerm::TypeAssertionBOL:
    1072             470 :             generateAssertionBOL(opIndex);
    1073             470 :             break;
    1074                 : 
    1075                 :         case PatternTerm::TypeAssertionEOL:
    1076            5868 :             generateAssertionEOL(opIndex);
    1077            5868 :             break;
    1078                 : 
    1079                 :         case PatternTerm::TypeAssertionWordBoundary:
    1080             414 :             generateAssertionWordBoundary(opIndex);
    1081             414 :             break;
    1082                 : 
    1083                 :         case PatternTerm::TypeForwardReference:
    1084               0 :             break;
    1085                 : 
    1086                 :         case PatternTerm::TypeParenthesesSubpattern:
    1087                 :         case PatternTerm::TypeParentheticalAssertion:
    1088               0 :             ASSERT_NOT_REACHED();
    1089                 :         case PatternTerm::TypeBackReference:
    1090               0 :             m_shouldFallBack = true;
    1091               0 :             break;
    1092                 :         }
    1093           94258 :     }
    1094           94258 :     void backtrackTerm(size_t opIndex)
    1095                 :     {
    1096           94258 :         YarrOp& op = m_ops[opIndex];
    1097           94258 :         PatternTerm* term = op.m_term;
    1098                 : 
    1099           94258 :         switch (term->type) {
    1100                 :         case PatternTerm::TypePatternCharacter:
    1101           80096 :             switch (term->quantityType) {
    1102                 :             case QuantifierFixedCount:
    1103           79898 :                 if (term->quantityCount == 1)
    1104           79898 :                     backtrackPatternCharacterOnce(opIndex);
    1105                 :                 else
    1106               0 :                     backtrackPatternCharacterFixed(opIndex);
    1107           79898 :                 break;
    1108                 :             case QuantifierGreedy:
    1109             198 :                 backtrackPatternCharacterGreedy(opIndex);
    1110             198 :                 break;
    1111                 :             case QuantifierNonGreedy:
    1112               0 :                 backtrackPatternCharacterNonGreedy(opIndex);
    1113               0 :                 break;
    1114                 :             }
    1115           80096 :             break;
    1116                 : 
    1117                 :         case PatternTerm::TypeCharacterClass:
    1118            7410 :             switch (term->quantityType) {
    1119                 :             case QuantifierFixedCount:
    1120            5451 :                 if (term->quantityCount == 1)
    1121            5424 :                     backtrackCharacterClassOnce(opIndex);
    1122                 :                 else
    1123              27 :                     backtrackCharacterClassFixed(opIndex);
    1124            5451 :                 break;
    1125                 :             case QuantifierGreedy:
    1126            1914 :                 backtrackCharacterClassGreedy(opIndex);
    1127            1914 :                 break;
    1128                 :             case QuantifierNonGreedy:
    1129              45 :                 backtrackCharacterClassNonGreedy(opIndex);
    1130              45 :                 break;
    1131                 :             }
    1132            7410 :             break;
    1133                 : 
    1134                 :         case PatternTerm::TypeAssertionBOL:
    1135             470 :             backtrackAssertionBOL(opIndex);
    1136             470 :             break;
    1137                 : 
    1138                 :         case PatternTerm::TypeAssertionEOL:
    1139            5868 :             backtrackAssertionEOL(opIndex);
    1140            5868 :             break;
    1141                 : 
    1142                 :         case PatternTerm::TypeAssertionWordBoundary:
    1143             414 :             backtrackAssertionWordBoundary(opIndex);
    1144             414 :             break;
    1145                 : 
    1146                 :         case PatternTerm::TypeForwardReference:
    1147               0 :             break;
    1148                 : 
    1149                 :         case PatternTerm::TypeParenthesesSubpattern:
    1150                 :         case PatternTerm::TypeParentheticalAssertion:
    1151               0 :             ASSERT_NOT_REACHED();
    1152                 :         case PatternTerm::TypeBackReference:
    1153               0 :             m_shouldFallBack = true;
    1154               0 :             break;
    1155                 :         }
    1156           94258 :     }
    1157                 : 
    1158           23884 :     void generate()
    1159                 :     {
    1160                 :         // Forwards generate the matching code.
    1161           23884 :         ASSERT(m_ops.size());
    1162           23884 :         size_t opIndex = 0;
    1163                 : 
    1164          227977 :         do {
    1165          227977 :             YarrOp& op = m_ops[opIndex];
    1166          227977 :             switch (op.m_op) {
    1167                 : 
    1168                 :             case OpTerm:
    1169           94258 :                 generateTerm(opIndex);
    1170           94258 :                 break;
    1171                 : 
    1172                 :             // OpBodyAlternativeBegin/Next/End
    1173                 :             //
    1174                 :             // These nodes wrap the set of alternatives in the body of the regular expression.
    1175                 :             // There may be either one or two chains of OpBodyAlternative nodes, one representing
    1176                 :             // the 'once through' sequence of alternatives (if any exist), and one representing
    1177                 :             // the repeating alternatives (again, if any exist).
    1178                 :             //
    1179                 :             // Upon normal entry to the Begin alternative, we will check that input is available.
    1180                 :             // Reentry to the Begin alternative will take place after the check has taken place,
    1181                 :             // and will assume that the input position has already been progressed as appropriate.
    1182                 :             //
    1183                 :             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
    1184                 :             // successfully completed a match - return a success state from JIT code.
    1185                 :             //
    1186                 :             // Next alternatives allow for reentry optimized to suit backtracking from its
    1187                 :             // preceding alternative. It expects the input position to still be set to a position
    1188                 :             // appropriate to its predecessor, and it will only perform an input check if the
    1189                 :             // predecessor had a minimum size less than its own.
    1190                 :             //
    1191                 :             // In the case 'once through' expressions, the End node will also have a reentry
    1192                 :             // point to jump to when the last alternative fails. Again, this expects the input
    1193                 :             // position to still reflect that expected by the prior alternative.
    1194                 :             case OpBodyAlternativeBegin: {
    1195           24037 :                 PatternAlternative* alternative = op.m_alternative;
    1196                 : 
    1197                 :                 // Upon entry at the head of the set of alternatives, check if input is available
    1198                 :                 // to run the first alternative. (This progresses the input position).
    1199           24037 :                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
    1200                 :                 // We will reenter after the check, and assume the input position to have been
    1201                 :                 // set as appropriate to this alternative.
    1202           24037 :                 op.m_reentry = label();
    1203                 : 
    1204           24037 :                 m_checked += alternative->m_minimumSize;
    1205           24037 :                 break;
    1206                 :             }
    1207                 :             case OpBodyAlternativeNext:
    1208                 :             case OpBodyAlternativeEnd: {
    1209           24793 :                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
    1210           24793 :                 PatternAlternative* alternative = op.m_alternative;
    1211                 : 
    1212                 :                 // If we get here, the prior alternative matched - return success.
    1213                 :                 
    1214                 :                 // Adjust the stack pointer to remove the pattern's frame.
    1215                 : #if !WTF_CPU_SPARC
    1216           24793 :                 if (m_pattern.m_body->m_callFrameSize)
    1217            3156 :                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
    1218                 : #endif
    1219                 : 
    1220                 :                 // Load appropriate values into the return register and the first output
    1221                 :                 // slot, and return. In the case of pattern with a fixed size, we will
    1222                 :                 // not have yet set the value in the first 
    1223                 :                 ASSERT(index != returnRegister);
    1224           24793 :                 if (m_pattern.m_body->m_hasFixedSize) {
    1225            4913 :                     move(index, returnRegister);
    1226            4913 :                     if (priorAlternative->m_minimumSize)
    1227            4868 :                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
    1228            4913 :                     store32(returnRegister, output);
    1229                 :                 } else
    1230           19880 :                     load32(Address(output), returnRegister);
    1231           24793 :                 store32(index, Address(output, 4));
    1232           24793 :                 generateReturn();
    1233                 : 
    1234                 :                 // This is the divide between the tail of the prior alternative, above, and
    1235                 :                 // the head of the subsequent alternative, below.
    1236                 : 
    1237           24793 :                 if (op.m_op == OpBodyAlternativeNext) {
    1238                 :                     // This is the reentry point for the Next alternative. We expect any code
    1239                 :                     // that jumps here to do so with the input position matching that of the
    1240                 :                     // PRIOR alteranative, and we will only check input availability if we
    1241                 :                     // need to progress it forwards.
    1242             756 :                     op.m_reentry = label();
    1243             756 :                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
    1244             180 :                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
    1245             180 :                         op.m_jumps.append(jumpIfNoAvailableInput());
    1246             576 :                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
    1247             414 :                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
    1248           24037 :                 } else if (op.m_nextOp == notFound) {
    1249                 :                     // This is the reentry point for the End of 'once through' alternatives,
    1250                 :                     // jumped to when the las alternative fails to match.
    1251             470 :                     op.m_reentry = label();
    1252             470 :                     sub32(Imm32(priorAlternative->m_minimumSize), index);
    1253                 :                 }
    1254                 : 
    1255           24793 :                 if (op.m_op == OpBodyAlternativeNext)
    1256             756 :                     m_checked += alternative->m_minimumSize;
    1257           24793 :                 m_checked -= priorAlternative->m_minimumSize;
    1258           24793 :                 break;
    1259                 :             }
    1260                 : 
    1261                 :             // OpSimpleNestedAlternativeBegin/Next/End
    1262                 :             // OpNestedAlternativeBegin/Next/End
    1263                 :             //
    1264                 :             // These nodes are used to handle sets of alternatives that are nested within
    1265                 :             // subpatterns and parenthetical assertions. The 'simple' forms are used where
    1266                 :             // we do not need to be able to backtrack back into any alternative other than
    1267                 :             // the last, the normal forms allow backtracking into any alternative.
    1268                 :             //
    1269                 :             // Each Begin/Next node is responsible for planting an input check to ensure
    1270                 :             // sufficient input is available on entry. Next nodes additionally need to
    1271                 :             // jump to the end - Next nodes use the End node's m_jumps list to hold this
    1272                 :             // set of jumps.
    1273                 :             //
    1274                 :             // In the non-simple forms, successful alternative matches must store a
    1275                 :             // 'return address' using a DataLabelPtr, used to store the address to jump
    1276                 :             // to when backtracking, to get to the code for the appropriate alternative.
    1277                 :             case OpSimpleNestedAlternativeBegin:
    1278                 :             case OpNestedAlternativeBegin: {
    1279           20378 :                 PatternTerm* term = op.m_term;
    1280           20378 :                 PatternAlternative* alternative = op.m_alternative;
    1281           20378 :                 PatternDisjunction* disjunction = term->parentheses.disjunction;
    1282                 : 
    1283                 :                 // Calculate how much input we need to check for, and if non-zero check.
    1284           20378 :                 op.m_checkAdjust = alternative->m_minimumSize;
    1285           20378 :                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
    1286           20054 :                     op.m_checkAdjust -= disjunction->m_minimumSize;
    1287           20378 :                 if (op.m_checkAdjust)
    1288             558 :                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
    1289                 :  
    1290           20378 :                 m_checked += op.m_checkAdjust;
    1291           20378 :                 break;
    1292                 :             }
    1293                 :             case OpSimpleNestedAlternativeNext:
    1294                 :             case OpNestedAlternativeNext: {
    1295            3060 :                 PatternTerm* term = op.m_term;
    1296            3060 :                 PatternAlternative* alternative = op.m_alternative;
    1297            3060 :                 PatternDisjunction* disjunction = term->parentheses.disjunction;
    1298                 : 
    1299                 :                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
    1300            3060 :                 if (op.m_op == OpNestedAlternativeNext) {
    1301            3060 :                     unsigned parenthesesFrameLocation = term->frameLocation;
    1302            3060 :                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
    1303            3060 :                     if (term->quantityType != QuantifierFixedCount)
    1304               0 :                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
    1305            3060 :                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
    1306                 :                 }
    1307                 : 
    1308                 :                 // If we reach here then the last alternative has matched - jump to the
    1309                 :                 // End node, to skip over any further alternatives.
    1310                 :                 //
    1311                 :                 // FIXME: this is logically O(N^2) (though N can be expected to be very
    1312                 :                 // small). We could avoid this either by adding an extra jump to the JIT
    1313                 :                 // data structures, or by making backtracking code that jumps to Next
    1314                 :                 // alternatives are responsible for checking that input is available (if
    1315                 :                 // we didn't need to plant the input checks, then m_jumps would be free).
    1316            3060 :                 YarrOp* endOp = &m_ops[op.m_nextOp];
    1317           10278 :                 while (endOp->m_nextOp != notFound) {
    1318            4158 :                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
    1319            4158 :                     endOp = &m_ops[endOp->m_nextOp];
    1320                 :                 }
    1321            3060 :                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
    1322            3060 :                 endOp->m_jumps.append(jump());
    1323                 : 
    1324                 :                 // This is the entry point for the next alternative.
    1325            3060 :                 op.m_reentry = label();
    1326                 : 
    1327                 :                 // Calculate how much input we need to check for, and if non-zero check.
    1328            3060 :                 op.m_checkAdjust = alternative->m_minimumSize;
    1329            3060 :                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
    1330            3060 :                     op.m_checkAdjust -= disjunction->m_minimumSize;
    1331            3060 :                 if (op.m_checkAdjust)
    1332             513 :                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
    1333                 : 
    1334            3060 :                 YarrOp& lastOp = m_ops[op.m_previousOp];
    1335            3060 :                 m_checked -= lastOp.m_checkAdjust;
    1336            3060 :                 m_checked += op.m_checkAdjust;
    1337            3060 :                 break;
    1338                 :             }
    1339                 :             case OpSimpleNestedAlternativeEnd:
    1340                 :             case OpNestedAlternativeEnd: {
    1341           20378 :                 PatternTerm* term = op.m_term;
    1342                 : 
    1343                 :                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
    1344           20378 :                 if (op.m_op == OpNestedAlternativeEnd) {
    1345            2376 :                     unsigned parenthesesFrameLocation = term->frameLocation;
    1346            2376 :                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
    1347            2376 :                     if (term->quantityType != QuantifierFixedCount)
    1348               0 :                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
    1349            2376 :                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
    1350                 :                 }
    1351                 : 
    1352                 :                 // If this set of alternatives contains more than one alternative,
    1353                 :                 // then the Next nodes will have planted jumps to the End, and added
    1354                 :                 // them to this node's m_jumps list.
    1355           20378 :                 op.m_jumps.link(this);
    1356           20378 :                 op.m_jumps.clear();
    1357                 : 
    1358           20378 :                 YarrOp& lastOp = m_ops[op.m_previousOp];
    1359           20378 :                 m_checked -= lastOp.m_checkAdjust;
    1360           20378 :                 break;
    1361                 :             }
    1362                 : 
    1363                 :             // OpParenthesesSubpatternOnceBegin/End
    1364                 :             //
    1365                 :             // These nodes support (optionally) capturing subpatterns, that have a
    1366                 :             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). 
    1367                 :             case OpParenthesesSubpatternOnceBegin: {
    1368           20297 :                 PatternTerm* term = op.m_term;
    1369           20297 :                 unsigned parenthesesFrameLocation = term->frameLocation;
    1370           20297 :                 const RegisterID indexTemporary = regT0;
    1371           20297 :                 ASSERT(term->quantityCount == 1);
    1372                 : 
    1373                 :                 // Upon entry to a Greedy quantified set of parenthese store the index.
    1374                 :                 // We'll use this for two purposes:
    1375                 :                 //  - To indicate which iteration we are on of mathing the remainder of
    1376                 :                 //    the expression after the parentheses - the first, including the
    1377                 :                 //    match within the parentheses, or the second having skipped over them.
    1378                 :                 //  - To check for empty matches, which must be rejected.
    1379                 :                 //
    1380                 :                 // At the head of a NonGreedy set of parentheses we'll immediately set the
    1381                 :                 // value on the stack to -1 (indicating a match skipping the subpattern),
    1382                 :                 // and plant a jump to the end. We'll also plant a label to backtrack to
    1383                 :                 // to reenter the subpattern later, with a store to set up index on the
    1384                 :                 // second iteration.
    1385                 :                 //
    1386                 :                 // FIXME: for capturing parens, could use the index in the capture array?
    1387           20297 :                 if (term->quantityType == QuantifierGreedy)
    1388             243 :                     storeToFrame(index, parenthesesFrameLocation);
    1389           20054 :                 else if (term->quantityType == QuantifierNonGreedy) {
    1390               0 :                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
    1391               0 :                     op.m_jumps.append(jump());
    1392               0 :                     op.m_reentry = label();
    1393               0 :                     storeToFrame(index, parenthesesFrameLocation);
    1394                 :                 }
    1395                 : 
    1396                 :                 // If the parenthese are capturing, store the starting index value to the
    1397                 :                 // captures array, offsetting as necessary.
    1398                 :                 //
    1399                 :                 // FIXME: could avoid offsetting this value in JIT code, apply
    1400                 :                 // offsets only afterwards, at the point the results array is
    1401                 :                 // being accessed.
    1402           20297 :                 if (term->capture()) {
    1403           19629 :                     int offsetId = term->parentheses.subpatternId << 1;
    1404           19629 :                     int inputOffset = term->inputPosition - m_checked;
    1405           19629 :                     if (term->quantityType == QuantifierFixedCount)
    1406           19476 :                         inputOffset -= term->parentheses.disjunction->m_minimumSize;
    1407           19629 :                     if (inputOffset) {
    1408           19197 :                         move(index, indexTemporary);
    1409           19197 :                         add32(Imm32(inputOffset), indexTemporary);
    1410           19197 :                         store32(indexTemporary, Address(output, offsetId * sizeof(int)));
    1411                 :                     } else
    1412             432 :                         store32(index, Address(output, offsetId * sizeof(int)));
    1413                 :                 }
    1414           20297 :                 break;
    1415                 :             }
    1416                 :             case OpParenthesesSubpatternOnceEnd: {
    1417           20297 :                 PatternTerm* term = op.m_term;
    1418           20297 :                 unsigned parenthesesFrameLocation = term->frameLocation;
    1419           20297 :                 const RegisterID indexTemporary = regT0;
    1420           20297 :                 ASSERT(term->quantityCount == 1);
    1421                 : 
    1422                 :                 // For Greedy/NonGreedy quantified parentheses, we must reject zero length
    1423                 :                 // matches. If the minimum size is know to be non-zero we need not check.
    1424           20297 :                 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
    1425               9 :                     op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
    1426                 : 
    1427                 :                 // If the parenthese are capturing, store the ending index value to the
    1428                 :                 // captures array, offsetting as necessary.
    1429                 :                 //
    1430                 :                 // FIXME: could avoid offsetting this value in JIT code, apply
    1431                 :                 // offsets only afterwards, at the point the results array is
    1432                 :                 // being accessed.
    1433           20297 :                 if (term->capture()) {
    1434           19629 :                     int offsetId = (term->parentheses.subpatternId << 1) + 1;
    1435           19629 :                     int inputOffset = term->inputPosition - m_checked;
    1436           19629 :                     if (inputOffset) {
    1437           18360 :                         move(index, indexTemporary);
    1438           18360 :                         add32(Imm32(inputOffset), indexTemporary);
    1439           18360 :                         store32(indexTemporary, Address(output, offsetId * sizeof(int)));
    1440                 :                     } else
    1441            1269 :                         store32(index, Address(output, offsetId * sizeof(int)));
    1442                 :                 }
    1443                 : 
    1444                 :                 // If the parentheses are quantified Greedy then add a label to jump back
    1445                 :                 // to if get a failed match from after the parentheses. For NonGreedy
    1446                 :                 // parentheses, link the jump from before the subpattern to here.
    1447           20297 :                 if (term->quantityType == QuantifierGreedy)
    1448             243 :                     op.m_reentry = label();
    1449           20054 :                 else if (term->quantityType == QuantifierNonGreedy) {
    1450               0 :                     YarrOp& beginOp = m_ops[op.m_previousOp];
    1451               0 :                     beginOp.m_jumps.link(this);
    1452                 :                 }
    1453           20297 :                 break;
    1454                 :             }
    1455                 : 
    1456                 :             // OpParenthesesSubpatternTerminalBegin/End
    1457                 :             case OpParenthesesSubpatternTerminalBegin: {
    1458              45 :                 PatternTerm* term = op.m_term;
    1459              45 :                 ASSERT(term->quantityType == QuantifierGreedy);
    1460              45 :                 ASSERT(term->quantityCount == quantifyInfinite);
    1461              45 :                 ASSERT(!term->capture());
    1462                 : 
    1463                 :                 // Upon entry set a label to loop back to.
    1464              45 :                 op.m_reentry = label();
    1465                 : 
    1466                 :                 // Store the start index of the current match; we need to reject zero
    1467                 :                 // length matches.
    1468              45 :                 storeToFrame(index, term->frameLocation);
    1469              45 :                 break;
    1470                 :             }
    1471                 :             case OpParenthesesSubpatternTerminalEnd: {
    1472              45 :                 PatternTerm* term = op.m_term;
    1473                 : 
    1474                 :                 // Check for zero length matches - if the match is non-zero, then we
    1475                 :                 // can accept it & loop back up to the head of the subpattern.
    1476              45 :                 YarrOp& beginOp = m_ops[op.m_previousOp];
    1477              45 :                 branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry);
    1478                 : 
    1479                 :                 // Reject the match - backtrack back into the subpattern.
    1480              45 :                 op.m_jumps.append(jump());
    1481                 : 
    1482                 :                 // This is the entry point to jump to when we stop matching - we will
    1483                 :                 // do so once the subpattern cannot match any more.
    1484              45 :                 op.m_reentry = label();
    1485              45 :                 break;
    1486                 :             }
    1487                 : 
    1488                 :             // OpParentheticalAssertionBegin/End
    1489                 :             case OpParentheticalAssertionBegin: {
    1490              36 :                 PatternTerm* term = op.m_term;
    1491                 : 
    1492                 :                 // Store the current index - assertions should not update index, so
    1493                 :                 // we will need to restore it upon a successful match.
    1494              36 :                 unsigned parenthesesFrameLocation = term->frameLocation;
    1495              36 :                 storeToFrame(index, parenthesesFrameLocation);
    1496                 : 
    1497                 :                 // Check 
    1498              36 :                 op.m_checkAdjust = m_checked - term->inputPosition;
    1499              36 :                 if (op.m_checkAdjust)
    1500              18 :                     sub32(Imm32(op.m_checkAdjust), index);
    1501                 : 
    1502              36 :                 m_checked -= op.m_checkAdjust;
    1503              36 :                 break;
    1504                 :             }
    1505                 :             case OpParentheticalAssertionEnd: {
    1506              36 :                 PatternTerm* term = op.m_term;
    1507                 : 
    1508                 :                 // Restore the input index value.
    1509              36 :                 unsigned parenthesesFrameLocation = term->frameLocation;
    1510              36 :                 loadFromFrame(parenthesesFrameLocation, index);
    1511                 : 
    1512                 :                 // If inverted, a successful match of the assertion must be treated
    1513                 :                 // as a failure, so jump to backtracking.
    1514              36 :                 if (term->invert()) {
    1515               9 :                     op.m_jumps.append(jump());
    1516               9 :                     op.m_reentry = label();
    1517                 :                 }
    1518                 : 
    1519              36 :                 YarrOp& lastOp = m_ops[op.m_previousOp];
    1520              36 :                 m_checked += lastOp.m_checkAdjust;
    1521              36 :                 break;
    1522                 :             }
    1523                 : 
    1524                 :             case OpMatchFailed:
    1525                 : #if !WTF_CPU_SPARC
    1526             317 :                 if (m_pattern.m_body->m_callFrameSize)
    1527             234 :                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
    1528                 : #endif
    1529             317 :                 move(TrustedImm32(-1), returnRegister);
    1530             317 :                 generateReturn();
    1531             317 :                 break;
    1532                 :             }
    1533                 : 
    1534          227977 :             ++opIndex;
    1535          227977 :         } while (opIndex < m_ops.size());
    1536           23884 :     }
    1537                 : 
    1538           23884 :     void backtrack()
    1539                 :     {
    1540                 :         // Backwards generate the backtracking code.
    1541           23884 :         size_t opIndex = m_ops.size();
    1542           23884 :         ASSERT(opIndex);
    1543                 : 
    1544          227977 :         do {
    1545          227977 :             --opIndex;
    1546          227977 :             YarrOp& op = m_ops[opIndex];
    1547          227977 :             switch (op.m_op) {
    1548                 : 
    1549                 :             case OpTerm:
    1550           94258 :                 backtrackTerm(opIndex);
    1551           94258 :                 break;
    1552                 : 
    1553                 :             // OpBodyAlternativeBegin/Next/End
    1554                 :             //
    1555                 :             // For each Begin/Next node representing an alternative, we need to decide what to do
    1556                 :             // in two circumstances:
    1557                 :             //  - If we backtrack back into this node, from within the alternative.
    1558                 :             //  - If the input check at the head of the alternative fails (if this exists).
    1559                 :             //
    1560                 :             // We treat these two cases differently since in the former case we have slightly
    1561                 :             // more information - since we are backtracking out of a prior alternative we know
    1562                 :             // that at least enough input was available to run it. For example, given the regular
    1563                 :             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
    1564                 :             // character match of 'a'), then we need not perform an additional input availability
    1565                 :             // check before running the second alternative.
    1566                 :             //
    1567                 :             // Backtracking required differs for the last alternative, which in the case of the
    1568                 :             // repeating set of alternatives must loop. The code generated for the last alternative
    1569                 :             // will also be used to handle all input check failures from any prior alternatives -
    1570                 :             // these require similar functionality, in seeking the next available alternative for
    1571                 :             // which there is sufficient input.
    1572                 :             //
    1573                 :             // Since backtracking of all other alternatives simply requires us to link backtracks
    1574                 :             // to the reentry point for the subsequent alternative, we will only be generating any
    1575                 :             // code when backtracking the last alternative.
    1576                 :             case OpBodyAlternativeBegin:
    1577                 :             case OpBodyAlternativeNext: {
    1578           24793 :                 PatternAlternative* alternative = op.m_alternative;
    1579                 : 
    1580           24793 :                 if (op.m_op == OpBodyAlternativeNext) {
    1581             756 :                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
    1582             756 :                     m_checked += priorAlternative->m_minimumSize;
    1583                 :                 }
    1584           24793 :                 m_checked -= alternative->m_minimumSize;
    1585                 : 
    1586                 :                 // Is this the last alternative? If not, then if we backtrack to this point we just
    1587                 :                 // need to jump to try to match the next alternative.
    1588           24793 :                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
    1589             756 :                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
    1590             756 :                     break;
    1591                 :                 }
    1592           24037 :                 YarrOp& endOp = m_ops[op.m_nextOp];
    1593                 : 
    1594           24037 :                 YarrOp* beginOp = &op;
    1595           48830 :                 while (beginOp->m_op != OpBodyAlternativeBegin) {
    1596             756 :                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
    1597             756 :                     beginOp = &m_ops[beginOp->m_previousOp];
    1598                 :                 }
    1599                 : 
    1600           24037 :                 bool onceThrough = endOp.m_nextOp == notFound;
    1601                 : 
    1602                 :                 // First, generate code to handle cases where we backtrack out of an attempted match
    1603                 :                 // of the last alternative. If this is a 'once through' set of alternatives then we
    1604                 :                 // have nothing to do - link this straight through to the End.
    1605           24037 :                 if (onceThrough)
    1606             470 :                     m_backtrackingState.linkTo(endOp.m_reentry, this);
    1607                 :                 else {
    1608                 :                     // If we don't need to move the input poistion, and the pattern has a fixed size
    1609                 :                     // (in which case we omit the store of the start index until the pattern has matched)
    1610                 :                     // then we can just link the backtrack out of the last alternative straight to the
    1611                 :                     // head of the first alternative.
    1612           23567 :                     if (m_pattern.m_body->m_hasFixedSize
    1613                 :                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
    1614                 :                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
    1615               9 :                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
    1616                 :                     else {
    1617                 :                         // We need to generate a trampoline of code to execute before looping back
    1618                 :                         // around to the first alternative.
    1619           23558 :                         m_backtrackingState.link(this);
    1620                 : 
    1621                 :                         // If the pattern size is not fixed, then store the start index, for use if we match.
    1622           23558 :                         if (!m_pattern.m_body->m_hasFixedSize) {
    1623           19158 :                             if (alternative->m_minimumSize == 1)
    1624             573 :                                 store32(index, Address(output));
    1625                 :                             else {
    1626           18585 :                                 move(index, regT0);
    1627           18585 :                                 if (alternative->m_minimumSize)
    1628           18522 :                                     sub32(Imm32(alternative->m_minimumSize - 1), regT0);
    1629                 :                                 else
    1630              63 :                                     add32(Imm32(1), regT0);
    1631           18585 :                                 store32(regT0, Address(output));
    1632                 :                             }
    1633                 :                         }
    1634                 : 
    1635                 :                         // Generate code to loop. Check whether the last alternative is longer than the
    1636                 :                         // first (e.g. /a|xy/ or /a|xyz/).
    1637           23558 :                         if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
    1638                 :                             // We want to loop, and increment input position. If the delta is 1, it is
    1639                 :                             // already correctly incremented, if more than one then decrement as appropriate.
    1640              45 :                             unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
    1641              45 :                             ASSERT(delta);
    1642              45 :                             if (delta != 1)
    1643              45 :                                 sub32(Imm32(delta - 1), index);
    1644              45 :                             jump(beginOp->m_reentry);
    1645                 :                         } else {
    1646                 :                             // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
    1647                 :                             // be sufficent input available to handle this, so just fall through.
    1648           23513 :                             unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
    1649           23513 :                             if (delta != 0xFFFFFFFFu) {
    1650                 :                                 // We need to check input because we are incrementing the input.
    1651           23513 :                                 add32(Imm32(delta + 1), index);
    1652           23513 :                                 checkInput().linkTo(beginOp->m_reentry, this);
    1653                 :                             }
    1654                 :                         }
    1655                 :                     }
    1656                 :                 }
    1657                 : 
    1658                 :                 // We can reach this point in the code in two ways:
    1659                 :                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
    1660                 :                 //    last alternative, and did not have sufficent input to run the first).
    1661                 :                 //  - We will loop back up to the following label when a releating alternative loops,
    1662                 :                 //    following a failed input check.
    1663                 :                 //
    1664                 :                 // Either way, we have just failed the input check for the first alternative.
    1665           24037 :                 Label firstInputCheckFailed(this);
    1666                 : 
    1667                 :                 // Generate code to handle input check failures from alternatives except the last.
    1668                 :                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
    1669                 :                 // nextOp is the alternative we will be attempting to reenter into.
    1670                 :                 // 
    1671                 :                 // We will link input check failures from the forwards matching path back to the code
    1672                 :                 // that can handle them.
    1673           24037 :                 YarrOp* prevOp = beginOp;
    1674           24037 :                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
    1675           48830 :                 while (nextOp->m_op != OpBodyAlternativeEnd) {
    1676             756 :                     prevOp->m_jumps.link(this);
    1677                 : 
    1678                 :                     // We only get here if an input check fails, it is only worth checking again
    1679                 :                     // if the next alternative has a minimum size less than the last.
    1680             756 :                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
    1681                 :                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
    1682                 :                         // subtract delta back out, and reduce this code. Should performance test
    1683                 :                         // the benefit of this.
    1684             414 :                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
    1685             414 :                         sub32(Imm32(delta), index);
    1686             414 :                         Jump fail = jumpIfNoAvailableInput();
    1687             414 :                         add32(Imm32(delta), index);
    1688             414 :                         jump(nextOp->m_reentry);
    1689             414 :                         fail.link(this);
    1690             342 :                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
    1691             180 :                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
    1692             756 :                     prevOp = nextOp;
    1693             756 :                     nextOp = &m_ops[nextOp->m_nextOp];
    1694                 :                 }
    1695                 : 
    1696                 :                 // We fall through to here if there is insufficient input to run the last alternative.
    1697                 : 
    1698                 :                 // If there is insufficient input to run the last alternative, then for 'once through'
    1699                 :                 // alternatives we are done - just jump back up into the forwards matching path at the End.
    1700           24037 :                 if (onceThrough) {
    1701             470 :                     op.m_jumps.linkTo(endOp.m_reentry, this);
    1702             470 :                     jump(endOp.m_reentry);
    1703             470 :                     break;
    1704                 :                 }
    1705                 : 
    1706                 :                 // For repeating alternatives, link any input check failure from the last alternative to
    1707                 :                 // this point.
    1708           23567 :                 op.m_jumps.link(this);
    1709                 : 
    1710           23567 :                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
    1711                 : 
    1712                 :                 // Check for cases where input position is already incremented by 1 for the last
    1713                 :                 // alternative (this is particularly useful where the minimum size of the body
    1714                 :                 // disjunction is 0, e.g. /a*|b/).
    1715           23567 :                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
    1716                 :                     // index is already incremented by 1, so just store it now!
    1717             573 :                     store32(index, Address(output));
    1718             573 :                     needsToUpdateMatchStart = false;
    1719                 :                 }
    1720                 : 
    1721                 :                 // Check whether there is sufficient input to loop. Increment the input position by
    1722                 :                 // one, and check. Also add in the minimum disjunction size before checking - there
    1723                 :                 // is no point in looping if we're just going to fail all the input checks around
    1724                 :                 // the next iteration.
    1725           23567 :                 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
    1726           23567 :                 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
    1727                 :                     // If the last alternative had the same minimum size as the disjunction,
    1728                 :                     // just simply increment input pos by 1, no adjustment based on minimum size.
    1729           23387 :                     add32(Imm32(1), index);
    1730                 :                 } else {
    1731                 :                     // If the minumum for the last alternative was one greater than than that
    1732                 :                     // for the disjunction, we're already progressed by 1, nothing to do!
    1733             180 :                     unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
    1734             180 :                     if (delta)
    1735              54 :                         sub32(Imm32(delta), index);
    1736                 :                 }
    1737           23567 :                 Jump matchFailed = jumpIfNoAvailableInput();
    1738                 : 
    1739           23567 :                 if (needsToUpdateMatchStart) {
    1740           18585 :                     if (!m_pattern.m_body->m_minimumSize)
    1741              72 :                         store32(index, Address(output));
    1742                 :                     else {
    1743           18513 :                         move(index, regT0);
    1744           18513 :                         sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
    1745           18513 :                         store32(regT0, Address(output));
    1746                 :                     }
    1747                 :                 }
    1748                 : 
    1749                 :                 // Calculate how much more input the first alternative requires than the minimum
    1750                 :                 // for the body as a whole. If no more is needed then we dont need an additional
    1751                 :                 // input check here - jump straight back up to the start of the first alternative.
    1752           23567 :                 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
    1753           23207 :                     jump(beginOp->m_reentry);
    1754                 :                 else {
    1755             360 :                     if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
    1756             360 :                         add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
    1757                 :                     else
    1758               0 :                         sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
    1759             360 :                     checkInput().linkTo(beginOp->m_reentry, this);
    1760             360 :                     jump(firstInputCheckFailed);
    1761                 :                 }
    1762                 : 
    1763                 :                 // We jump to here if we iterate to the point that there is insufficient input to
    1764                 :                 // run any matches, and need to return a failure state from JIT code.
    1765           23567 :                 matchFailed.link(this);
    1766                 : 
    1767                 : #if !WTF_CPU_SPARC
    1768           23567 :                 if (m_pattern.m_body->m_callFrameSize)
    1769            2634 :                     addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
    1770                 : #endif
    1771           23567 :                 move(TrustedImm32(-1), returnRegister);
    1772           23567 :                 generateReturn();
    1773           23567 :                 break;
    1774                 :             }
    1775                 :             case OpBodyAlternativeEnd: {
    1776                 :                 // We should never backtrack back into a body disjunction.
    1777           24037 :                 ASSERT(m_backtrackingState.isEmpty());
    1778                 : 
    1779           24037 :                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
    1780           24037 :                 m_checked += priorAlternative->m_minimumSize;
    1781           24037 :                 break;
    1782                 :             }
    1783                 : 
    1784                 :             // OpSimpleNestedAlternativeBegin/Next/End
    1785                 :             // OpNestedAlternativeBegin/Next/End
    1786                 :             //
    1787                 :             // Generate code for when we backtrack back out of an alternative into
    1788                 :             // a Begin or Next node, or when the entry input count check fails. If
    1789                 :             // there are more alternatives we need to jump to the next alternative,
    1790                 :             // if not we backtrack back out of the current set of parentheses.
    1791                 :             //
    1792                 :             // In the case of non-simple nested assertions we need to also link the
    1793                 :             // 'return address' appropriately to backtrack back out into the correct
    1794                 :             // alternative.
    1795                 :             case OpSimpleNestedAlternativeBegin:
    1796                 :             case OpSimpleNestedAlternativeNext:
    1797                 :             case OpNestedAlternativeBegin:
    1798                 :             case OpNestedAlternativeNext: {
    1799           23438 :                 YarrOp& nextOp = m_ops[op.m_nextOp];
    1800           23438 :                 bool isBegin = op.m_previousOp == notFound;
    1801           23438 :                 bool isLastAlternative = nextOp.m_nextOp == notFound;
    1802           23438 :                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
    1803           23438 :                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
    1804                 : 
    1805                 :                 // Treat an input check failure the same as a failed match.
    1806           23438 :                 m_backtrackingState.append(op.m_jumps);
    1807                 : 
    1808                 :                 // Set the backtracks to jump to the appropriate place. We may need
    1809                 :                 // to link the backtracks in one of three different way depending on
    1810                 :                 // the type of alternative we are dealing with:
    1811                 :                 //  - A single alternative, with no simplings.
    1812                 :                 //  - The last alternative of a set of two or more.
    1813                 :                 //  - An alternative other than the last of a set of two or more.
    1814                 :                 //
    1815                 :                 // In the case of a single alternative on its own, we don't need to
    1816                 :                 // jump anywhere - if the alternative fails to match we can just
    1817                 :                 // continue to backtrack out of the parentheses without jumping.
    1818                 :                 //
    1819                 :                 // In the case of the last alternative in a set of more than one, we
    1820                 :                 // need to jump to return back out to the beginning. We'll do so by
    1821                 :                 // adding a jump to the End node's m_jumps list, and linking this
    1822                 :                 // when we come to generate the Begin node. For alternatives other
    1823                 :                 // than the last, we need to jump to the next alternative.
    1824                 :                 //
    1825                 :                 // If the alternative had adjusted the input position we must link
    1826                 :                 // backtracking to here, correct, and then jump on. If not we can
    1827                 :                 // link the backtracks directly to their destination.
    1828           23438 :                 if (op.m_checkAdjust) {
    1829                 :                     // Handle the cases where we need to link the backtracks here.
    1830            1071 :                     m_backtrackingState.link(this);
    1831            1071 :                     sub32(Imm32(op.m_checkAdjust), index);
    1832            1071 :                     if (!isLastAlternative) {
    1833                 :                         // An alternative that is not the last should jump to its successor.
    1834             567 :                         jump(nextOp.m_reentry);
    1835             504 :                     } else if (!isBegin) {
    1836                 :                         // The last of more than one alternatives must jump back to the begnning.
    1837             207 :                         nextOp.m_jumps.append(jump());
    1838                 :                     } else {
    1839                 :                         // A single alternative on its own can fall through.
    1840             297 :                         m_backtrackingState.fallthrough();
    1841                 :                     }
    1842                 :                 } else {
    1843                 :                     // Handle the cases where we can link the backtracks directly to their destinations.
    1844           22367 :                     if (!isLastAlternative) {
    1845                 :                         // An alternative that is not the last should jump to its successor.
    1846            2493 :                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
    1847           19874 :                     } else if (!isBegin) {
    1848                 :                         // The last of more than one alternatives must jump back to the begnning.
    1849            2169 :                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
    1850                 :                     }
    1851                 :                     // In the case of a single alternative on its own do nothing - it can fall through.
    1852                 :                 }
    1853                 : 
    1854                 :                 // At this point we've handled the backtracking back into this node.
    1855                 :                 // Now link any backtracks that need to jump to here.
    1856                 : 
    1857                 :                 // For non-simple alternatives, link the alternative's 'return address'
    1858                 :                 // so that we backtrack back out into the previous alternative.
    1859           23438 :                 if (op.m_op == OpNestedAlternativeNext)
    1860            3060 :                     m_backtrackingState.append(op.m_returnAddress);
    1861                 : 
    1862                 :                 // If there is more than one alternative, then the last alternative will
    1863                 :                 // have planted a jump to be linked to the end. This jump was added to the
    1864                 :                 // End node's m_jumps list. If we are back at the beginning, link it here.
    1865           23438 :                 if (isBegin) {
    1866           20378 :                     YarrOp* endOp = &m_ops[op.m_nextOp];
    1867           43816 :                     while (endOp->m_nextOp != notFound) {
    1868            3060 :                         ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
    1869            3060 :                         endOp = &m_ops[endOp->m_nextOp];
    1870                 :                     }
    1871           20378 :                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
    1872           20378 :                     m_backtrackingState.append(endOp->m_jumps);
    1873                 :                 }
    1874                 : 
    1875           23438 :                 if (!isBegin) {
    1876            3060 :                     YarrOp& lastOp = m_ops[op.m_previousOp];
    1877            3060 :                     m_checked += lastOp.m_checkAdjust;
    1878                 :                 }
    1879           23438 :                 m_checked -= op.m_checkAdjust;
    1880           23438 :                 break;
    1881                 :             }
    1882                 :             case OpSimpleNestedAlternativeEnd:
    1883                 :             case OpNestedAlternativeEnd: {
    1884           20378 :                 PatternTerm* term = op.m_term;
    1885                 : 
    1886                 :                 // If we backtrack into the end of a simple subpattern do nothing;
    1887                 :                 // just continue through into the last alternative. If we backtrack
    1888                 :                 // into the end of a non-simple set of alterntives we need to jump
    1889                 :                 // to the backtracking return address set up during generation.
    1890           20378 :                 if (op.m_op == OpNestedAlternativeEnd) {
    1891            2376 :                     m_backtrackingState.link(this);
    1892                 : 
    1893                 :                     // Plant a jump to the return address.
    1894            2376 :                     unsigned parenthesesFrameLocation = term->frameLocation;
    1895            2376 :                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
    1896            2376 :                     if (term->quantityType != QuantifierFixedCount)
    1897               0 :                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
    1898            2376 :                     loadFromFrameAndJump(alternativeFrameLocation);
    1899                 : 
    1900                 :                     // Link the DataLabelPtr associated with the end of the last
    1901                 :                     // alternative to this point.
    1902            2376 :                     m_backtrackingState.append(op.m_returnAddress);
    1903                 :                 }
    1904                 : 
    1905           20378 :                 YarrOp& lastOp = m_ops[op.m_previousOp];
    1906           20378 :                 m_checked += lastOp.m_checkAdjust;
    1907           20378 :                 break;
    1908                 :             }
    1909                 : 
    1910                 :             // OpParenthesesSubpatternOnceBegin/End
    1911                 :             //
    1912                 :             // When we are backtracking back out of a capturing subpattern we need
    1913                 :             // to clear the start index in the matches output array, to record that
    1914                 :             // this subpattern has not been captured.
    1915                 :             //
    1916                 :             // When backtracking back out of a Greedy quantified subpattern we need
    1917                 :             // to catch this, and try running the remainder of the alternative after
    1918                 :             // the subpattern again, skipping the parentheses.
    1919                 :             //
    1920                 :             // Upon backtracking back into a quantified set of parentheses we need to
    1921                 :             // check whether we were currently skipping the subpattern. If not, we
    1922                 :             // can backtrack into them, if we were we need to either backtrack back
    1923                 :             // out of the start of the parentheses, or jump back to the forwards
    1924                 :             // matching start, depending of whether the match is Greedy or NonGreedy.
    1925                 :             case OpParenthesesSubpatternOnceBegin: {
    1926           20297 :                 PatternTerm* term = op.m_term;
    1927           20297 :                 ASSERT(term->quantityCount == 1);
    1928                 : 
    1929                 :                 // We only need to backtrack to thispoint if capturing or greedy.
    1930           20297 :                 if (term->capture() || term->quantityType == QuantifierGreedy) {
    1931           19719 :                     m_backtrackingState.link(this);
    1932                 : 
    1933                 :                     // If capturing, clear the capture (we only need to reset start).
    1934           19719 :                     if (term->capture())
    1935           19629 :                         store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int)));
    1936                 : 
    1937                 :                     // If Greedy, jump to the end.
    1938           19719 :                     if (term->quantityType == QuantifierGreedy) {
    1939                 :                         // Clear the flag in the stackframe indicating we ran through the subpattern.
    1940             243 :                         unsigned parenthesesFrameLocation = term->frameLocation;
    1941             243 :                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
    1942                 :                         // Jump to after the parentheses, skipping the subpattern.
    1943             243 :                         jump(m_ops[op.m_nextOp].m_reentry);
    1944                 :                         // A backtrack from after the parentheses, when skipping the subpattern,
    1945                 :                         // will jump back to here.
    1946             243 :                         op.m_jumps.link(this);
    1947                 :                     }
    1948                 : 
    1949           19719 :                     m_backtrackingState.fallthrough();
    1950                 :                 }
    1951           20297 :                 break;
    1952                 :             }
    1953                 :             case OpParenthesesSubpatternOnceEnd: {
    1954           20297 :                 PatternTerm* term = op.m_term;
    1955                 : 
    1956           20297 :                 if (term->quantityType != QuantifierFixedCount) {
    1957             243 :                     m_backtrackingState.link(this);
    1958                 : 
    1959                 :                     // Check whether we should backtrack back into the parentheses, or if we
    1960                 :                     // are currently in a state where we had skipped over the subpattern
    1961                 :                     // (in which case the flag value on the stack will be -1).
    1962             243 :                     unsigned parenthesesFrameLocation = term->frameLocation;
    1963             243 :                     Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
    1964                 : 
    1965             243 :                     if (term->quantityType == QuantifierGreedy) {
    1966                 :                         // For Greedy parentheses, we skip after having already tried going
    1967                 :                         // through the subpattern, so if we get here we're done.
    1968             243 :                         YarrOp& beginOp = m_ops[op.m_previousOp];
    1969             243 :                         beginOp.m_jumps.append(hadSkipped);
    1970                 :                     } else {
    1971                 :                         // For NonGreedy parentheses, we try skipping the subpattern first,
    1972                 :                         // so if we get here we need to try running through the subpattern
    1973                 :                         // next. Jump back to the start of the parentheses in the forwards
    1974                 :                         // matching path.
    1975               0 :                         ASSERT(term->quantityType == QuantifierNonGreedy);
    1976               0 :                         YarrOp& beginOp = m_ops[op.m_previousOp];
    1977               0 :                         hadSkipped.linkTo(beginOp.m_reentry, this);
    1978                 :                     }
    1979                 : 
    1980             243 :                     m_backtrackingState.fallthrough();
    1981                 :                 }
    1982                 : 
    1983           20297 :                 m_backtrackingState.append(op.m_jumps);
    1984           20297 :                 break;
    1985                 :             }
    1986                 : 
    1987                 :             // OpParenthesesSubpatternTerminalBegin/End
    1988                 :             //
    1989                 :             // Terminal subpatterns will always match - there is nothing after them to
    1990                 :             // force a backtrack, and they have a minimum count of 0, and as such will
    1991                 :             // always produce an acceptable result.
    1992                 :             case OpParenthesesSubpatternTerminalBegin: {
    1993                 :                 // We will backtrack to this point once the subpattern cannot match any
    1994                 :                 // more. Since no match is accepted as a successful match (we are Greedy
    1995                 :                 // quantified with a minimum of zero) jump back to the forwards matching
    1996                 :                 // path at the end.
    1997              45 :                 YarrOp& endOp = m_ops[op.m_nextOp];
    1998              45 :                 m_backtrackingState.linkTo(endOp.m_reentry, this);
    1999              45 :                 break;
    2000                 :             }
    2001                 :             case OpParenthesesSubpatternTerminalEnd:
    2002                 :                 // We should never be backtracking to here (hence the 'terminal' in the name).
    2003              45 :                 ASSERT(m_backtrackingState.isEmpty());
    2004              45 :                 m_backtrackingState.append(op.m_jumps);
    2005              45 :                 break;
    2006                 : 
    2007                 :             // OpParentheticalAssertionBegin/End
    2008                 :             case OpParentheticalAssertionBegin: {
    2009              36 :                 PatternTerm* term = op.m_term;
    2010              36 :                 YarrOp& endOp = m_ops[op.m_nextOp];
    2011                 : 
    2012                 :                 // We need to handle the backtracks upon backtracking back out
    2013                 :                 // of a parenthetical assertion if either we need to correct
    2014                 :                 // the input index, or the assertion was inverted.
    2015              36 :                 if (op.m_checkAdjust || term->invert()) {
    2016              18 :                      m_backtrackingState.link(this);
    2017                 : 
    2018              18 :                     if (op.m_checkAdjust)
    2019              18 :                         add32(Imm32(op.m_checkAdjust), index);
    2020                 : 
    2021                 :                     // In an inverted assertion failure to match the subpattern
    2022                 :                     // is treated as a successful match - jump to the end of the
    2023                 :                     // subpattern. We already have adjusted the input position
    2024                 :                     // back to that before the assertion, which is correct.
    2025              18 :                     if (term->invert())
    2026               9 :                         jump(endOp.m_reentry);
    2027                 : 
    2028              18 :                     m_backtrackingState.fallthrough();
    2029                 :                 }
    2030                 : 
    2031                 :                 // The End node's jump list will contain any backtracks into
    2032                 :                 // the end of the assertion. Also, if inverted, we will have
    2033                 :                 // added the failure caused by a successful match to this.
    2034              36 :                 m_backtrackingState.append(endOp.m_jumps);
    2035                 : 
    2036              36 :                 m_checked += op.m_checkAdjust;
    2037              36 :                 break;
    2038                 :             }
    2039                 :             case OpParentheticalAssertionEnd: {
    2040                 :                 // FIXME: We should really be clearing any nested subpattern
    2041                 :                 // matches on bailing out from after the pattern. Firefox has
    2042                 :                 // this bug too (presumably because they use YARR!)
    2043                 : 
    2044                 :                 // Never backtrack into an assertion; later failures bail to before the begin.
    2045              36 :                 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
    2046                 : 
    2047              36 :                 YarrOp& lastOp = m_ops[op.m_previousOp];
    2048              36 :                 m_checked -= lastOp.m_checkAdjust;
    2049              36 :                 break;
    2050                 :             }
    2051                 : 
    2052                 :             case OpMatchFailed:
    2053             317 :                 break;
    2054                 :             }
    2055                 : 
    2056                 :         } while (opIndex);
    2057           23884 :     }
    2058                 : 
    2059                 :     // Compilation methods:
    2060                 :     // ====================
    2061                 : 
    2062                 :     // opCompileParenthesesSubpattern
    2063                 :     // Emits ops for a subpattern (set of parentheses). These consist
    2064                 :     // of a set of alternatives wrapped in an outer set of nodes for
    2065                 :     // the parentheses.
    2066                 :     // Supported types of parentheses are 'Once' (quantityCount == 1)
    2067                 :     // and 'Terminal' (non-capturing parentheses quantified as greedy
    2068                 :     // and infinite).
    2069                 :     // Alternatives will use the 'Simple' set of ops if either the
    2070                 :     // subpattern is terminal (in which case we will never need to
    2071                 :     // backtrack), or if the subpattern only contains one alternative.
    2072           20765 :     void opCompileParenthesesSubpattern(PatternTerm* term)
    2073                 :     {
    2074                 :         YarrOpCode parenthesesBeginOpCode;
    2075                 :         YarrOpCode parenthesesEndOpCode;
    2076           20765 :         YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
    2077           20765 :         YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
    2078           20765 :         YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
    2079                 : 
    2080                 :         // We can currently only compile quantity 1 subpatterns that are
    2081                 :         // not copies. We generate a copy in the case of a range quantifier,
    2082                 :         // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
    2083                 :         // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
    2084                 :         // comes where the subpattern is capturing, in which case we would
    2085                 :         // need to restore the capture from the first subpattern upon a
    2086                 :         // failure in the second.
    2087           20765 :         if (term->quantityCount == 1 && !term->parentheses.isCopy) {
    2088                 :             // Select the 'Once' nodes.
    2089           20531 :             parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
    2090           20531 :             parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
    2091                 : 
    2092                 :             // If there is more than one alternative we cannot use the 'simple' nodes.
    2093           41062 :             if (term->parentheses.disjunction->m_alternatives.size() != 1) {
    2094            2439 :                 alternativeBeginOpCode = OpNestedAlternativeBegin;
    2095            2439 :                 alternativeNextOpCode = OpNestedAlternativeNext;
    2096            2439 :                 alternativeEndOpCode = OpNestedAlternativeEnd;
    2097                 :             }
    2098             234 :         } else if (term->parentheses.isTerminal) {
    2099                 :             // Terminal groups are optimized on the assumption that matching will never
    2100                 :             // backtrack into the terminal group. But this is false if there is more
    2101                 :             // than one alternative and one of the alternatives can match empty. In that
    2102                 :             // case, the empty match is counted as a failure, so we would need to backtrack.
    2103                 :             // The backtracking code doesn't handle this case correctly, so we fall back
    2104                 :             // to the interpreter.
    2105              54 :             Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
    2106              54 :             if (alternatives.size() != 1) {
    2107               9 :                 for (unsigned i = 0; i < alternatives.size(); ++i) {
    2108               9 :                     if (alternatives[i]->m_minimumSize == 0) {
    2109               9 :                         m_shouldFallBack = true;
    2110               9 :                         return;
    2111                 :                     }
    2112                 :                 }
    2113                 :             }
    2114                 :                         
    2115                 :             // Select the 'Terminal' nodes.
    2116              45 :             parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
    2117              45 :             parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
    2118                 :         } else {
    2119                 :             // This subpattern is not supported by the JIT.
    2120             180 :             m_shouldFallBack = true;
    2121             180 :             return;
    2122                 :         }
    2123                 : 
    2124           20576 :         size_t parenBegin = m_ops.size();
    2125           20576 :         m_ops.append(parenthesesBeginOpCode);
    2126                 : 
    2127           20576 :         m_ops.append(alternativeBeginOpCode);
    2128           20576 :         m_ops.last().m_previousOp = notFound;
    2129           20576 :         m_ops.last().m_term = term;
    2130           20576 :         Vector<PatternAlternative*>& alternatives =  term->parentheses.disjunction->m_alternatives;
    2131           44275 :         for (unsigned i = 0; i < alternatives.size(); ++i) {
    2132           23699 :             size_t lastOpIndex = m_ops.size() - 1;
    2133                 : 
    2134           23699 :             PatternAlternative* nestedAlternative = alternatives[i];
    2135           23699 :             opCompileAlternative(nestedAlternative);
    2136                 : 
    2137           23699 :             size_t thisOpIndex = m_ops.size();
    2138           23699 :             m_ops.append(YarrOp(alternativeNextOpCode));
    2139                 : 
    2140           23699 :             YarrOp& lastOp = m_ops[lastOpIndex];
    2141           23699 :             YarrOp& thisOp = m_ops[thisOpIndex];
    2142                 : 
    2143           23699 :             lastOp.m_alternative = nestedAlternative;
    2144           23699 :             lastOp.m_nextOp = thisOpIndex;
    2145           23699 :             thisOp.m_previousOp = lastOpIndex;
    2146           23699 :             thisOp.m_term = term;
    2147                 :         }
    2148           20576 :         YarrOp& lastOp = m_ops.last();
    2149           20576 :         ASSERT(lastOp.m_op == alternativeNextOpCode);
    2150           20576 :         lastOp.m_op = alternativeEndOpCode;
    2151           20576 :         lastOp.m_alternative = 0;
    2152           20576 :         lastOp.m_nextOp = notFound;
    2153                 : 
    2154           20576 :         size_t parenEnd = m_ops.size();
    2155           20576 :         m_ops.append(parenthesesEndOpCode);
    2156                 : 
    2157           20576 :         m_ops[parenBegin].m_term = term;
    2158           20576 :         m_ops[parenBegin].m_previousOp = notFound;
    2159           20576 :         m_ops[parenBegin].m_nextOp = parenEnd;
    2160           20576 :         m_ops[parenEnd].m_term = term;
    2161           20576 :         m_ops[parenEnd].m_previousOp = parenBegin;
    2162           20576 :         m_ops[parenEnd].m_nextOp = notFound;
    2163                 :     }
    2164                 : 
    2165                 :     // opCompileParentheticalAssertion
    2166                 :     // Emits ops for a parenthetical assertion. These consist of an
    2167                 :     // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
    2168                 :     // the alternatives, with these wrapped by an outer pair of
    2169                 :     // OpParentheticalAssertionBegin/End nodes.
    2170                 :     // We can always use the OpSimpleNestedAlternative nodes in the
    2171                 :     // case of parenthetical assertions since these only ever match
    2172                 :     // once, and will never backtrack back into the assertion.
    2173              45 :     void opCompileParentheticalAssertion(PatternTerm* term)
    2174                 :     {
    2175              45 :         size_t parenBegin = m_ops.size();
    2176              45 :         m_ops.append(OpParentheticalAssertionBegin);
    2177                 : 
    2178              45 :         m_ops.append(OpSimpleNestedAlternativeBegin);
    2179              45 :         m_ops.last().m_previousOp = notFound;
    2180              45 :         m_ops.last().m_term = term;
    2181              45 :         Vector<PatternAlternative*>& alternatives =  term->parentheses.disjunction->m_alternatives;
    2182              90 :         for (unsigned i = 0; i < alternatives.size(); ++i) {
    2183              45 :             size_t lastOpIndex = m_ops.size() - 1;
    2184                 : 
    2185              45 :             PatternAlternative* nestedAlternative = alternatives[i];
    2186              45 :             opCompileAlternative(nestedAlternative);
    2187                 : 
    2188              45 :             size_t thisOpIndex = m_ops.size();
    2189              45 :             m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
    2190                 : 
    2191              45 :             YarrOp& lastOp = m_ops[lastOpIndex];
    2192              45 :             YarrOp& thisOp = m_ops[thisOpIndex];
    2193                 : 
    2194              45 :             lastOp.m_alternative = nestedAlternative;
    2195              45 :             lastOp.m_nextOp = thisOpIndex;
    2196              45 :             thisOp.m_previousOp = lastOpIndex;
    2197              45 :             thisOp.m_term = term;
    2198                 :         }
    2199              45 :         YarrOp& lastOp = m_ops.last();
    2200              45 :         ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
    2201              45 :         lastOp.m_op = OpSimpleNestedAlternativeEnd;
    2202              45 :         lastOp.m_alternative = 0;
    2203              45 :         lastOp.m_nextOp = notFound;
    2204                 : 
    2205              45 :         size_t parenEnd = m_ops.size();
    2206              45 :         m_ops.append(OpParentheticalAssertionEnd);
    2207                 : 
    2208              45 :         m_ops[parenBegin].m_term = term;
    2209              45 :         m_ops[parenBegin].m_previousOp = notFound;
    2210              45 :         m_ops[parenBegin].m_nextOp = parenEnd;
    2211              45 :         m_ops[parenEnd].m_term = term;
    2212              45 :         m_ops[parenEnd].m_previousOp = parenBegin;
    2213              45 :         m_ops[parenEnd].m_nextOp = notFound;
    2214              45 :     }
    2215                 : 
    2216                 :     // opCompileAlternative
    2217                 :     // Called to emit nodes for all terms in an alternative.
    2218           48717 :     void opCompileAlternative(PatternAlternative* alternative)
    2219                 :     {
    2220           48717 :         optimizeAlternative(alternative);
    2221                 : 
    2222          164289 :         for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
    2223          115572 :             PatternTerm* term = &alternative->m_terms[i];
    2224                 : 
    2225          115572 :             switch (term->type) {
    2226                 :             case PatternTerm::TypeParenthesesSubpattern:
    2227           20765 :                 opCompileParenthesesSubpattern(term);
    2228           20765 :                 break;
    2229                 : 
    2230                 :             case PatternTerm::TypeParentheticalAssertion:
    2231              45 :                 opCompileParentheticalAssertion(term);
    2232              45 :                 break;
    2233                 : 
    2234                 :             default:
    2235           94762 :                 m_ops.append(term);
    2236                 :             }
    2237                 :         }
    2238           48717 :     }
    2239                 : 
    2240                 :     // opCompileBody
    2241                 :     // This method compiles the body disjunction of the regular expression.
    2242                 :     // The body consists of two sets of alternatives - zero or more 'once
    2243                 :     // through' (BOL anchored) alternatives, followed by zero or more
    2244                 :     // repeated alternatives.
    2245                 :     // For each of these two sets of alteratives, if not empty they will be
    2246                 :     // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
    2247                 :     // 'begin' node referencing the first alternative, and 'next' nodes
    2248                 :     // referencing any further alternatives. The begin/next/end nodes are
    2249                 :     // linked together in a doubly linked list. In the case of repeating
    2250                 :     // alternatives, the end node is also linked back to the beginning.
    2251                 :     // If no repeating alternatives exist, then a OpMatchFailed node exists
    2252                 :     // to return the failing result.
    2253           24046 :     void opCompileBody(PatternDisjunction* disjunction)
    2254                 :     {
    2255           24046 :         Vector<PatternAlternative*>& alternatives =  disjunction->m_alternatives;
    2256           24046 :         size_t currentAlternativeIndex = 0;
    2257                 : 
    2258                 :         // Emit the 'once through' alternatives.
    2259           24046 :         if (alternatives.size() && alternatives[0]->onceThrough()) {
    2260             542 :             m_ops.append(YarrOp(OpBodyAlternativeBegin));
    2261             542 :             m_ops.last().m_previousOp = notFound;
    2262                 : 
    2263             767 :             do {
    2264             578 :                 size_t lastOpIndex = m_ops.size() - 1;
    2265             578 :                 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
    2266             578 :                 opCompileAlternative(alternative);
    2267                 : 
    2268             578 :                 size_t thisOpIndex = m_ops.size();
    2269             578 :                 m_ops.append(YarrOp(OpBodyAlternativeNext));
    2270                 : 
    2271             578 :                 YarrOp& lastOp = m_ops[lastOpIndex];
    2272             578 :                 YarrOp& thisOp = m_ops[thisOpIndex];
    2273                 : 
    2274             578 :                 lastOp.m_alternative = alternative;
    2275             578 :                 lastOp.m_nextOp = thisOpIndex;
    2276             578 :                 thisOp.m_previousOp = lastOpIndex;
    2277                 :                 
    2278             578 :                 ++currentAlternativeIndex;
    2279             767 :             } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
    2280                 : 
    2281             542 :             YarrOp& lastOp = m_ops.last();
    2282                 : 
    2283             542 :             ASSERT(lastOp.m_op == OpBodyAlternativeNext);
    2284             542 :             lastOp.m_op = OpBodyAlternativeEnd;
    2285             542 :             lastOp.m_alternative = 0;
    2286             542 :             lastOp.m_nextOp = notFound;
    2287                 :         }
    2288                 : 
    2289           24046 :         if (currentAlternativeIndex == alternatives.size()) {
    2290             389 :             m_ops.append(YarrOp(OpMatchFailed));
    2291             389 :             return;
    2292                 :         }
    2293                 : 
    2294                 :         // Emit the repeated alternatives.
    2295           23657 :         size_t repeatLoop = m_ops.size();
    2296           23657 :         m_ops.append(YarrOp(OpBodyAlternativeBegin));
    2297           23657 :         m_ops.last().m_previousOp = notFound;
    2298           24395 :         do {
    2299           24395 :             size_t lastOpIndex = m_ops.size() - 1;
    2300           24395 :             PatternAlternative* alternative = alternatives[currentAlternativeIndex];
    2301           24395 :             ASSERT(!alternative->onceThrough());
    2302           24395 :             opCompileAlternative(alternative);
    2303                 : 
    2304           24395 :             size_t thisOpIndex = m_ops.size();
    2305           24395 :             m_ops.append(YarrOp(OpBodyAlternativeNext));
    2306                 : 
    2307           24395 :             YarrOp& lastOp = m_ops[lastOpIndex];
    2308           24395 :             YarrOp& thisOp = m_ops[thisOpIndex];
    2309                 : 
    2310           24395 :             lastOp.m_alternative = alternative;
    2311           24395 :             lastOp.m_nextOp = thisOpIndex;
    2312           24395 :             thisOp.m_previousOp = lastOpIndex;
    2313                 :             
    2314           24395 :             ++currentAlternativeIndex;
    2315           24395 :         } while (currentAlternativeIndex < alternatives.size());
    2316           23657 :         YarrOp& lastOp = m_ops.last();
    2317           23657 :         ASSERT(lastOp.m_op == OpBodyAlternativeNext);
    2318           23657 :         lastOp.m_op = OpBodyAlternativeEnd;
    2319           23657 :         lastOp.m_alternative = 0;
    2320           23657 :         lastOp.m_nextOp = repeatLoop;
    2321                 :     }
    2322                 : 
    2323           24046 :     void generateEnter()
    2324                 :     {
    2325                 : #if WTF_CPU_X86_64
    2326                 :         push(X86Registers::ebp);
    2327                 :         move(stackPointerRegister, X86Registers::ebp);
    2328                 :         push(X86Registers::ebx);
    2329                 : #elif WTF_CPU_X86
    2330           24046 :         push(X86Registers::ebp);
    2331           24046 :         move(stackPointerRegister, X86Registers::ebp);
    2332                 :         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
    2333           24046 :         push(X86Registers::ebx);
    2334           24046 :         push(X86Registers::edi);
    2335           24046 :         push(X86Registers::esi);
    2336                 :         // load output into edi (2 = saved ebp + return address).
    2337                 :     #if WTF_COMPILER_MSVC || WTF_COMPILER_SUNCC
    2338                 :         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
    2339                 :         loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
    2340                 :         loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
    2341                 :         loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
    2342                 :     #else
    2343           24046 :         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
    2344                 :     #endif
    2345                 : #elif WTF_CPU_ARM
    2346                 :         push(ARMRegisters::r4);
    2347                 :         push(ARMRegisters::r5);
    2348                 :         push(ARMRegisters::r6);
    2349                 : #if WTF_CPU_ARM_TRADITIONAL
    2350                 :         push(ARMRegisters::r8); // scratch register
    2351                 : #endif
    2352                 :         move(ARMRegisters::r3, output);
    2353                 : #elif WTF_CPU_SH4
    2354                 :         push(SH4Registers::r11);
    2355                 :         push(SH4Registers::r13);
    2356                 : #elif WTF_CPU_SPARC
    2357                 :         save(Imm32(-m_pattern.m_body->m_callFrameSize * sizeof(void*)));
    2358                 : #elif WTF_CPU_MIPS
    2359                 :         // Do nothing.
    2360                 : #endif
    2361           24046 :     }
    2362                 : 
    2363           48677 :     void generateReturn()
    2364                 :     {
    2365                 : #if WTF_CPU_X86_64
    2366                 :         pop(X86Registers::ebx);
    2367                 :         pop(X86Registers::ebp);
    2368                 : #elif WTF_CPU_X86
    2369           48677 :         pop(X86Registers::esi);
    2370           48677 :         pop(X86Registers::edi);
    2371           48677 :         pop(X86Registers::ebx);
    2372           48677 :         pop(X86Registers::ebp);
    2373                 : #elif WTF_CPU_ARM
    2374                 : #if WTF_CPU_ARM_TRADITIONAL
    2375                 :         pop(ARMRegisters::r8); // scratch register
    2376                 : #endif
    2377                 :         pop(ARMRegisters::r6);
    2378                 :         pop(ARMRegisters::r5);
    2379                 :         pop(ARMRegisters::r4);
    2380                 : #elif WTF_CPU_SH4
    2381                 :         pop(SH4Registers::r13);
    2382                 :         pop(SH4Registers::r11);
    2383                 : #elif WTF_CPU_SPARC
    2384                 :         ret_and_restore();
    2385                 :         return;
    2386                 : #elif WTF_CPU_MIPS
    2387                 :         // Do nothing
    2388                 : #endif
    2389           48677 :         ret();
    2390           48677 :     }
    2391                 : 
    2392                 : public:
    2393           24046 :     YarrGenerator(YarrPattern& pattern)
    2394                 :         : m_pattern(pattern)
    2395                 :         , m_shouldFallBack(false)
    2396           24046 :         , m_checked(0)
    2397                 :     {
    2398           24046 :     }
    2399                 : 
    2400           24046 :     void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
    2401                 :     {
    2402           24046 :         generateEnter();
    2403                 : 
    2404           24046 :         if (!m_pattern.m_body->m_hasFixedSize)
    2405           19574 :             store32(index, Address(output));
    2406                 : 
    2407                 : #if !WTF_CPU_SPARC
    2408           24046 :         if (m_pattern.m_body->m_callFrameSize)
    2409            3030 :             subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
    2410                 : #endif
    2411                 : 
    2412                 :         // Compile the pattern to the internal 'YarrOp' representation.
    2413           24046 :         opCompileBody(m_pattern.m_body);
    2414                 : 
    2415                 :         // If we encountered anything we can't handle in the JIT code
    2416                 :         // (e.g. backreferences) then return early.
    2417           24046 :         if (m_shouldFallBack) {
    2418             162 :             jitObject.setFallBack(true);
    2419             162 :             return;
    2420                 :         }
    2421                 : 
    2422           23884 :         generate();
    2423           23884 :         backtrack();
    2424                 : 
    2425                 :         // Link & finalize the code.
    2426                 :         // XXX yarr-oom
    2427                 :         ExecutablePool *pool;
    2428                 :         bool ok;
    2429           47768 :         LinkBuffer linkBuffer(this, globalData->regexAllocator, &pool, &ok, REGEXP_CODE);
    2430           23884 :         m_backtrackingState.linkDataLabels(linkBuffer);
    2431           23884 :         jitObject.set(linkBuffer.finalizeCode());
    2432           23884 :         jitObject.setFallBack(m_shouldFallBack);
    2433                 :     }
    2434                 : 
    2435                 : private:
    2436                 :     YarrPattern& m_pattern;
    2437                 : 
    2438                 :     // Used to detect regular expression constructs that are not currently
    2439                 :     // supported in the JIT; fall back to the interpreter when this is detected.
    2440                 :     bool m_shouldFallBack;
    2441                 : 
    2442                 :     // The regular expression expressed as a linear sequence of operations.
    2443                 :     Vector<YarrOp, 128> m_ops;
    2444                 : 
    2445                 :     // This records the current input offset being applied due to the current
    2446                 :     // set of alternatives we are nested within. E.g. when matching the
    2447                 :     // character 'b' within the regular expression /abc/, we will know that
    2448                 :     // the minimum size for the alternative is 3, checked upon entry to the
    2449                 :     // alternative, and that 'b' is at offset 1 from the start, and as such
    2450                 :     // when matching 'b' we need to apply an offset of -2 to the load.
    2451                 :     //
    2452                 :     // FIXME: This should go away. Rather than tracking this value throughout
    2453                 :     // code generation, we should gather this information up front & store it
    2454                 :     // on the YarrOp structure.
    2455                 :     int m_checked;
    2456                 : 
    2457                 :     // This class records state whilst generating the backtracking path of code.
    2458                 :     BacktrackingState m_backtrackingState;
    2459                 : };
    2460                 : 
    2461           24046 : void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject)
    2462                 : {
    2463           24046 :     YarrGenerator(pattern).compile(globalData, jitObject);
    2464           24046 : }
    2465                 : 
    2466         2994535 : int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
    2467                 : {
    2468         2994535 :     return jitObject.execute(input, start, length, output);
    2469                 : }
    2470                 : 
    2471                 : }}
    2472                 : 
    2473                 : #endif

Generated by: LCOV version 1.7