'2017-05-17 T 08:00:37

'PARSEUTIL.INC

'assumes indexbase 1


uses StringUtil

'parseutil state varables used by getword etc
=============================================
  int sttw '<word position
  int ascb '<word ascii code actual
  int ascw '<word ascii code presented
  int lenw '<word length
  int opw  '<operator token


function instrword(string s,k) as int
=====================================
/**
  to find whole words in a string

  @param s is the string to be searched
  @param k is the keyword to be located in s
  @return the index of the first position of k in s

  if the search fails then the return value is 0 
**/
int  lk=len k
int ,lb,rb
byte b at strptr(s)
if not k then return 0
int i=1
do
  i=instr(i,s,k)
  if i=0
    return 0
  endif
  lb=i-1
  rb=i+lk
  if b[rb]<48
    if i=1
      return 1
    elseif b[lb]<48
      return i
    endif
  endif
  i+=lk
loop
end function


function lookupindex(string s,k) as int
=======================================
/**
  searches dictionary for keywords and
  returns its number

  example:
  i=lookupindex("left 10, mid 20, right 30, ",k)

  if the key is not found then i will be 0

  @param s is the dictionary
  @param k is the keyword to search for
  @return is the code number following the keyword
**/
int a
a=instrword(s,k)
if a then
  return valat a+len(k)+strptr s
end if
end function


function matchbytes(byte*bb,*bk) as int
=======================================
/**
  checks keyword bytes in a string search

  @param *bb is is a byte pointer in the main string
  @param *bk is a byte pointer for the keyword
  @return 1 if the key matches, otherwise 0
**/
int b
do
  b=bb
  select bk
  case 0    : return 1
  case b    : 'match chars
  case else : return 0
  end select
  @bk++
  @bb++
end do
end function


function instrev(int i,string s,k) as int
=========================================
/**
  reverse search for a keyword in a string

  @param i is the start index (base 1)
  @param s is the string to be searched
  @param k is the key string to be located in s
  @return the index of the first match, or 0 if k is not found
**/
indexbase 1
int  a=asc k
int  lk=len k
int  ls=len s
if i<0
  i+=ls+1
endif
if i=0
  i=ls
elseif i<0
  i=1 'return 0
elseif i>ls
  i=ls
end if
'
byte b at strptr s
byte c at strptr k
do
  if i<1 then exit do
  select b[i]
  case a : if matchbytes(b[i],c) then exit do
  end select
  i--
loop
return i
end function


function replace(string t,w,r) as string
========================================
/**
  substitutes each w with r in t

  @param t is the main string where substitutions are made
  @param w is the key string to be replaced
  @param r is the replacement string
  @return the main string with replacements
**/
int a,b,lw,lr
string s=t
'
lw=len(w)
lr=len(r)
a=1
do
  a=instr(a,s,w)
  if a=0 then exit do
  s=left(s,a-1)+r+mid(s,a+lw)
  a+=lr
loop
return s
end function


/**
  DEFAULT ASCII EQUATES USED BY POSWORD ETC
  
  configured for demo language LeanLisp 
**/
%% SymbolTerm     40,41,44
%% SymbolQuote    34,39,96
%% SymbolComment  59
%% SymbolEndLine  13,10,11,12
%% SymbolEndState 41


function skipspace(string s, int*i)
===================================
/**
  skips white space until a SymbolEndState
  or other ascii is encountered
**/
byte b at strptr s
do
  ascb=b[i]
  ascw=ascb
  select ascb
  case 0              : exit do
  case SymbolEndState : ascw=0 : exit do
  case 33 to 255 :    : exit do
  end select
  i++
end do
end function



function skiplspace(string s, int*i)
====================================
/**
  skips white space until a SymbolEndState
  or SymbolEndLine or other ascii is encountered
**/
byte b at strptr s
do
  ascb=b[i]
  ascw=ascb
  select ascb
  case 0              : exit do
  case SymbolEndLine  : exit do
  case SymbolEndState : ascw=0 : exit do
  case 33 to 255      : exit do
  end select
  i++
end do
end function


function startline(string s, int*i)
===================================
/**
  skips to next line
**/
byte b at strptr s
do
  if i<2 then exit do
  select b[i]
  case 0             : exit do
  case SymbolEndLine : i++ : exit do
  end select
  i--
end do
end function


function endline(string s, int*i)
=================================
/**
  skips to end of line
**/
byte b at strptr s
do
  select b[i]
  case 0             : exit do
  case SymbolEndLine : exit do
  end select
  i++
end do
end function



function nextline(string s, int*i)
==================================
/**
  skips to next line
  using fixed ascii codes
**/
byte b at strptr s
do
  select b[i]
  case 0        : exit do 'END OF STRING
  case 10,11,12 : i++ : exit do
  case 13       : i++
    if b[i]=10 then i++
    exit do
  case else : i++
  end select
end do
end function



function GetRestOfLine(string s, int *b) as string
==================================================
/**
  @param s is the text
  @param b is the character index in s
  @return the current line (ending with cr)

  note: the index is adjusted by this function
**/
if b=0 then b=1
int a=instr(b,s,cr)
if a then
  function=mid(s,b,a-b)
  b=a+2
end if
end function


function SameName(string a,b) as int
====================================
/**
  compares the lowercase of two strings
  for case-insesitive matching.
  @param a is the first string
  @param b is the second string
  @return -1 when the string params match
**/
if lcase(a) = lcase(b) then return -1
end function


macro posword(s,i)
==================
/**
  reads the next word in text s,
  and sets state variables
  @param s is a text string
  @param i is the current index

  state variables affected:

  @ascw is the ascii of the first char
  @sttw is the starting position
  @lenw is the length of the word

  note: 'i' will index the char 
  immediately after the word
**/
byte b at strptr s
'
rwordpos:
'
skipspace s,i
sttw=i
select ascb 'FIRST CHAR
case SymbolTerm
  i++ : jmp fwd nwordpos 'normally brackets    (  )
case 0
  jmp fwd nwordpos 'END
case SymbolQuote
  do
    i++
    select b[i]
    case 0    : jmp fwd nwordpos
    case ascw : i++ : jmp fwd nwordpos
    end select
  end do
  jmp fwd nwordpos
case SymbolComment
  endline(s,i) : jmp rwordpos      ' ;   comment ;
case 255
  i++ : opw=b[i] : i++ : jmp fwd nwordpos 'EMBEDDED TOKEN
end select
do 'MOVE TO WORD BOUNDARY
  select b[i]
  case 0 to 32
    exit do
  case SymbolTerm
    exit do
  case SymbolQuote
    exit do
  case SymbolComment
    exit do
  case 255
    exit do ' embedded token marker
  end select
  i++
end do
nwordpos:
lenw=i-sttw
end macro


function stepword(string s,int*i) as int
========================================
/**
  steps over the next word in text s
**/
posword(s,i)
return lenw
end function



function getword(string s, int*i) as string
===========================================
/**
  returns the next word in text s
**/
posword(s,i)
return mid s,sttw,lenw
end function



function maygetword(string s, int*i) as string
==============================================
/**
  returns the next word in text 's' if it
  exists on the current line. Otherwise an
  empty string is returned
**/
skiplspace s,i
if ascb>32 then return mid s,sttw,lenw
end function



function isnumber() as int
==========================
/**
  checks whether the first character of the 
  current word is a number or not.

  '-' signs as well as 0..9 are included

  @return boolean value
**/
select ascb
case 47
case 45 to 57 :return -1
end select
end function



function isalpha() as int
=========================
/**
  checks whether the first character of the 
  current word is an alpha or not.

  both uppercase and lowercase ranges are checked

  @return boolean value
**/
select ascb
case 0x41 to 0x5a : return -1
case 0x61 to 0x7a : return -1
case "_" : return -1
end select
end function



function stepitem(string sr, int*i)
===================================
/**
  to identify the start and end of an
  item which could be a word or expression
  enclosed by brackets (..)

  inner brackets are included: ( (..) (..) )

  @param sr source string
  @param character index

**/
string wr
int bc
stepword(sr,i)
if ascw<>40 then return 'skip the word only
'otherwiwise skip block including any nested blocks
bc++
do 'STEP OVER NESTED BRACKETS
  stepword(sr,i)
  select ascw
  case 0 :
    if ascb = 41 then
      if bc <= 0 then exit do
      bc--
    else
      exit do
    end if
  case 40 : bc++
  case 41 : bc--
  end select
  if bc=0 then exit do
end do
end function


function getitem(string sr, int*i) as string
============================================
/**
  captures an item in the source's string,
  using stepitem

  @param sr is the source string
  @param i is the character index
  @return the item string

  getitem is used to delineate the item.
  Boundary brackets are included if they occur.

**/
int b,c
skipspace(sr,i)
b=i
c=ascw
stepitem(sr,i)
sttw=b
ascw=c
return mid sr, b, i-b
end function


function unquote(string s) as string
====================================
/**
  removes left ant right quote characters
  from a string expression if they occur

  @param s the input string
  @return the string without outer quotes

**/
ascw=asc s
select ascw
case SymbolQuote 
  if asc(s,-1)=ascw then
    ascw=asc s,2
    return mid s, 2, len(s)-2
  end if
end select
return s
end function


function inner(string s) as string
==================================
/**
  trims off spaces left and right, 
  then removes outer chars left and right

  @param s is the inptut string
  @return the inner part of s
**/
int i=1, le
'EFFECTIVE LEFT TRIM
skipspace(s,i)
'EFFECTIVE RIGHT TRIM
le=len(s)
byte b at le+strptr s
le=le-i+1
do
  if le<=0 then exit do
  @b--
  if b>32 then exit do
  le--
end do
'REMOVING OUTER SYMBOLS
le-=2
i++
if le>0 then return mid s,i,le
end function


'https://en.wikipedia.org/wiki/Escape_sequences_in_C
'
function EscSeq(string s) as string
===================================
/**
  converts ascii escape codings into ascii
  characters in a string

  @param s is the input string
  @return the result after character conversion
**/
string t=s
byte   b1 at strptr s
byte   b2 at strptr t
byte   c
int    i, v
do
  c=b1
  select c
  case 0 : exit do
  case 92
    @b1++
    select b1
   'case "\","?","'",34
    case "n" : b2=13 : @b2++ : c=10 'new line
    case "a" : c=7  '<beep
    case "b" : c=8  '<backspace
    case "t" : c=9  '<htab
    case "v" : c=11 '<vtab
    case "f" : c=12 '<formfeed
    case "r" : c=13 '<return
    case "e" : c=27 '<esc
    case "x"        '<hexadecimal asci value
      v=0 : i=0
      do
        i++ : if i>2 then exit do
        @b1++
        if b1<48 then @b1-- : exit do
        c=b1-48
        if c>9  then c-=7  'hex adjust
        if c>15 then c-=32 'hex lowercase
        select c
        case 0 to 15 : v=v*16+c
        case else : @b1-- : exit do
        end select
      end do
      c=v
      case  48 to 57 'octal ascii value
      v=b1-48 : i=1
      do
        i++ : if i>3 then exit do
        @b1++
        c=b1-48
        select b1
        case 48 to 55 : v=v*8+c
        case else     : @b1-- : exit do
        end select
      end do
      c=v
    end select
  end select
  b2=c
  @b1++
  @b2++
end do
return left t, @b2-strptr t
end function
'
'print escseq "Escape Sequences\n1\n2\n3\n\101\t\061\t\x4d\n"


function BlockData(string*s, int*i) as string
=============================================
/**
  returns data from inside a pair of brackets

  @param s is the source string
  @param i is the char index for s
  @return the string data between outer brackets
**/
skipspace(s,i)
byte b at i-1+strptr(s)
int lb,rb,d,e,k
def setb k=i+1 : d++
do
  select b
  case 0 : e=i : exit do
  case 1 to 31 : if not lb then e=i : exit do
  case "("  : if not lb then lb=40  : rb=41  : setb
  case "<"  : if not lb then lb=60  : rb=62  : setb
  case "["  : if not lb then lb=91  : rb=93  : setb
  case "{"  : if not lb then lb=123 : rb=125 : setb
  case lb   : d++ 'nesting
  case rb   : d-- : if d<=0 then e=i : i++ : exit do
  case else : if not k then k=i : lb=-1
  end select
  @b++
  i++
end do
skipspace(s,i)
if k then return mid(s,k,e-k)
end function


function ExtractData(string s,w, int i=1) as string
===================================================
/**
  locates a key symbol then returns its
  corresponding data contained between brackets

  @param s is the source string
  @param w is the key symbol/word
  @param i is the char index within string s
  @return the data string excluding outer brackets
**/
'format: $keyword lbracket data rbracket
i=instr(i,s,w)
if i then return BlockData(s,i+len(w))
end function



macro split(s, d, max, n,  i,w)
===============================
/**
  @param s is the data string to be split
  @param d is the array for the split data
  @param max is the max number of elements in d
  @param n is the count of elements split
**/
scope
  indexbase 1
  int i = 1
  string w
  do
    w=GetWord s,i
    if ascb=0 then exit do
    if n>=max then exit do
    n++
    d[n]=unquote(w)
  end do
end scope
end macro


macro join(d,e,b,g,t,  i,lw,p,w)
================================
/**
  joins an array of data to make a
  string of data delimited by commas

 the data may be enclosed by quotes

  param d is the array of data
  @param e is the count of array elements to join
  @param b is is left side element marker
  @param  g is the right side element marker + delimiter
  @param t is the  string buffer for joined data

  normally, b is a quote char and g is the
  closing quote and a comment char

  if qoutes are not required then b is left
  empty and g is a comma char only.
**/
scope
  indexbase 1
  int    i,lw
  int    p=1
  string w
  t=""
  for i=1 to e
    w=b+d[i]+g 'autoconvert d[i]
    lw=len(w)
    if lw+p>=len t then t+=nuls 16000+lw 'stretch buffer
    mid t,p,w
    p+=lw
  next
  if len(g) then p--
  t=left t,p-1
end scope
end macro



class StrBuffer
===============

  /**
   used for efficiently
   accumulating string data
  **/
  '
  bstring prbuf '<stretchable buffer
  int prpos     '<position for next string
  int prslen    '<length of incoming string
  int prblen    '<length of buffer
  '
  method in(string s)
  ===================
  /**
    appends string s into the buffer
    @param s is the string to be added
  **/
  prslen=len(s)
  if prpos+prslen>prblen then
    prbuf+=nuls 0x4000 + prslen*2 : prblen=len(prbuf)
  end if
  mid prbuf,prpos+1,s
  prpos+=prslen
  end method
  '
  method empty()
  ==============
  /**
    clears the buffer
  **/
  prblen=0 : prpos=0 : del prbuf
  end method
  '
  method out() as string
  ======================
  /**
    out returns the buffer contents and empties the buffer
    @return the accumulated buffer contents
  **/
  method=left prbuf,prpos
  empty()
  end method
  '
  end class




