• Welcome to Jose's Read Only Forum 2023.
 

Fast INSTR Replacement by Hutch

Started by Theo Gottwald, April 24, 2018, 06:12:04 PM

Previous topic - Next topic

0 Members and 1 Guest are viewing this topic.

Theo Gottwald

ASM-INSTR revisited.



'##################################################################################################
' This small modification allows for negative startpos.
'##################################################################################################
' P1 - LONG Variable
' P2 Label to Jump to
' Jump on Positive or zero (Not Minus)
MACRO JOP_LNG(P1,P2)
! BT P1,31
! JNC P2
END MACRO

'¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
' INSTR replacement, very fast when doing repeated search for long strings
' in long strings. Original code by Steve Hutchesson.
' Adjusted to work as direct replacement for INSTR by Borje Hagsten.
'
' startpos can range from 1 to LEN(MainStr) - LEN(SearchStr).
' Returned position is 1-based, just like INSTR. Return is zero if not found.
'¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
FUNCTION asmINSTR(BYVAL startpos AS LONG, m AS STRING, s AS STRING) AS LONG
  #REGISTER NONE
  LOCAL eLen     AS LONG, cval     AS LONG
  LOCAL lpSource AS LONG, lnSource AS LONG, lpSearch AS LONG, lnSearch AS LONG
  LOCAL shift_table AS STRING * 1024
  ' Testen ob startpos negativ
  JOP_LNG(startpos,Lab_Posi)
  startpos=LEN(m)-startpos
  Lab_Posi:
  ! mov esi, s              ; store Search string ptr and len
  ! mov esi, [esi]
  ! cmp esi, 0              ; if edi is zero - no search string
  ! je Cleanup2             ; then get out
  ! mov lpSearch, esi
  ! mov esi, [esi-4]
  ! mov lnSearch, esi

  ! cmp lnSearch, 3         ; check search len
  ! jb ShortWordScan        ; if shorter than 3, use INSTR instead

  ! mov esi, m              ; store Main string ptr and len
  ! mov esi, [esi]
  ! cmp esi, 0              ; if esi is zero - no search string
  ! je Cleanup2             ; then get out
  ! mov lpSource, esi
  ! mov esi, [esi-4]
  ! mov lnSource, esi

  ! cmp startpos, 0         ; check startpos
  ! je OKsize2              ; if zero, ok
  ! dec startpos            ; else assume > 0 - decrease, since bmINSTR is 0-based

  OKsize2:
    ! mov esi, lpSource
    ! add esi, lnSource
    ! sub esi, lnSearch
    ! mov eLen, esi            ; set Exit Length

    ' ----------------------------------------
    ' load shift table with value in lnSearch
    ' ----------------------------------------
    ! mov ecx, 256
    ! mov eax, lnSearch
    ! lea edi, shift_table
    ! rep stosd

    ' ----------------------------------------------
    ' load decending count values into shift table
    ' ----------------------------------------------
    ! mov ecx, lnSearch           ; SubString length in ECX
    ! dec ecx                     ; correct for zero based index
    ! mov esi, lpSearch           ; address of SubString in ESI
    ! lea edi, shift_table
    ! xor eax, eax

  Write_Shift_Chars2:
    ! mov al, [esi]               ; get the character
    ! inc esi                     ; next one
    ! mov [edi+eax*4], ecx        ; write shift for each character
    ! dec ecx                     ; to ascii location in table
    ! jnz Write_Shift_Chars2

    ' -----------------------------
    ' set up for main compare loop
    ' -----------------------------
    ! mov ecx, lnSearch
    ! dec ecx
    ! mov cval, ecx

    ! mov esi, lpSource
    ! mov edi, lpSearch
    ! add esi, startpos           ; add starting position

    ! mov edx, lnSearch           ; pattern length in edx
    ! jmp Cmp_Loop2

  '*********************** Loop Code ***************************
  Set_Suffix_Shift2:
    ! add eax, ecx                ; add CMP count
    ! sub eax, cval               ; sub loop count
    ! cmp eax, 0                  ; test eax for zero
    ! jg  Add_Suffix_Shift2
    ! mov eax, 1                  ; minimum shift is 1

  Add_Suffix_Shift2:
    ! add esi, eax                ; add suffix shift

  Pre_Cmp2:
    ! cmp esi, eLen               ; test exit length
    ! ja  No_Match2
    ! xor eax, eax                ; reset EAX
    ! mov ecx, cval               ; reset counter in compare loop

  Cmp_Loop2:
    ! mov al, [esi+ecx]
    ! cmp al, [edi+ecx]           ; cmp characters in ESI / EDI
    ! jne Get_Shift2              ; if not equal, get next shift
    ! dec ecx
    ! jns Cmp_Loop2

    ! jmp Match2

  Get_Shift2:
    ! mov eax, shift_table[eax*4] ; get char shift value
    ! cmp eax, edx                ; is eax pattern length ?
    ! jne Set_Suffix_Shift2       ; if not, jump to Calc_Suffix_Shift
    ! lea esi, [esi+ecx+1]        ; add bad char shift
    ! jmp Pre_Cmp2
    ' ***************************************************************

  Match2:
    ! sub esi, lpSource           ; sub source from ESI
    ! mov eax, esi                ; put length in eax
    ! inc eax                     ; adjust for 1-based return
    ! jmp Cleanup2                ; exit

  No_Match2:
     ! mov eax, 0                  ; set value for no match

  Cleanup2:
    ! mov FUNCTION, eax           ; return 1-based position
    EXIT FUNCTION

ShortWordScan:                     ' if search len is < 3
  FUNCTION = INSTR(startpos, m, s)

END FUNCTION