Mercurial > repos > rhope
view string.rhope @ 100:f51c4c17457c
Broken port of parser to compiler
author | Mike Pavone <pavone@retrodev.com> |
---|---|
date | Mon, 09 Aug 2010 02:03:57 -0400 |
parents | e09c2d1d6d5b |
children | 2d2da148d844 43cc42df26cc |
line wrap: on
line source
UTF8 Expect[num,arr,index,count,consumed:out] { byte <- [arr]Index[index] { If[[128u8]>[byte]] { //Error: ASCII byte when we were expecting part of a mutlibyte sequence //treat each byte as a separate character ncount <- [1i32]+[[count]+[consumed]] }{ If[[192u8]>[byte]] { If[[num]=[1]] { //Sequence is complete count as single character ncount <- [1i32]+[count] }{ out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]] } }{ //Error: too high to be a continuation byte ncount <- [1i32]+[[count]+[consumed]] } } }{ //Error: string ended in the middle of a multi-byte sequence out <- [count]+[consumed] } Val[ncount] { [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } } Count UTF8[arr,index,count:out] { byte <- [arr]Index[index] If[[128u8]>[byte]] { ncount <- [1i32]+[count] } { If[[192u8]>[byte]] { //Error: Encoding for 2nd,3rd or 4th byte of sequence //treat as a single character ncount <- [1i32]+[count] }{ If[[224u8]>[byte]] { out <- UTF8 Expect[1, arr, [index]+[1], count, 1] }{ If[[240u8]>[byte]] { out <- UTF8 Expect[2, arr, [index]+[1], count, 1] }{ If[[245u8]>[byte]] { out <- UTF8 Expect[3, arr, [index]+[1], count, 1] }{ //Error: Out of range of Unicode standard //treat as a single character ncount <- [1i32]+[count] } } } } } [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } Blueprint String { Buffer Length(Int32,Naked) } String@Array[in:out(String)] { [in]First { len <- Count UTF8[in, ~, 0i32] } { len <- 0i32 } out <- [[Build[String()]]Buffer <<[in]]Length <<[len] } Print@String[string:out] { //TODO: Sanitize string (remove terminal escapes and replace invalid UTF) write[1i32, [string]Buffer >>, Int64[[[string]Buffer >>]Length >>]] { out <- write[1i32, [Array[]]Append[10u8], 1i64] } } Get Char[:out] { read[0, [Array[]]Set[0, 0u8], 1i64] {} { out <- String[~] } } _String to Int[current,index,array,ten,conv:out] { char <- [array]Index[index] { If[[char]<[48u8]] { out <- Val[current] }{ If[[char]>[57u8]] { out <- Val[current] }{ out <- _String to Int[[[current]*[ten]]+[[conv]Call[[char]-[48u8]]], [index]+[1], array, ten, conv] } } }{ out <- Val[current] } } U8toI8[val:out] { out <- Trunc Int8[Int16[val]] } Int8@String[string:out] { buf <- [string]Buffer >> [buf]Index[0] { If[[~]=[45u8]] { out <- [0i8]-[_String to Int[0i8, 1, buf, 10i8, U8toI8[?]]] }{ out <- _String to Int[0i8, 0, buf, 10i8, U8toI8[?]] } }{ out <- 0i8 } } Int16@String[string:out] { buf <- [string]Buffer >> [buf]Index[0] { If[[~]=[45u8]] { out <- [0i16]-[_String to Int[0i16, 1, buf, 10i16, Int16[?]]] }{ out <- _String to Int[0i16, 0, buf, 10i16, Int16[?]] } }{ out <- 0i16 } } Int32@String[string:out] { buf <- [string]Buffer >> [buf]Index[0] { If[[~]=[45u8]] { out <- [0i32]-[_String to Int[0i32, 1, buf, 10i32, Int32[?]]] }{ out <- _String to Int[0i32, 0, buf, 10i32, Int32[?]] } }{ out <- 0i32 } } _Hex Int32[str,cur,idx:out] { char <- [str]Byte[idx] { //0-9 [char]Between[47u8, 58u8] { adjust <- 48u8 }{ //A-F [char]Between[64u8, 71u8] { adjust <- 55u8 }{ //a-f [char]Between[96u8, 103u8] { adjust <- 87u8 }{ out <- cur } } } Val[adjust] { out <- _Hex Int32[str, [[cur]*[16i32]]+[Int32[[char]-[adjust]]], [idx]+[1]] } }{ out <- cur } } Hex Int32[str:out] { out <- _Hex Int32[str, 0i32, 0] } Int64@String[string:out] { buf <- [string]Buffer >> [buf]Index[0] { If[[~]=[45u8]] { out <- [0i64]-[_String to Int[0i64, 1, buf, 10i64, Int64[?]]] }{ out <- _String to Int[0i64, 0, buf, 10i64, Int64[?]] } }{ out <- 0i64 } } UInt8@String[string:out] { out <- _String to Int[0u8, 0, [string]Buffer >>, 10u8, Val[?]] } UInt16@String[string:out] { out <- _String to Int[0u16, 0, [string]Buffer >>, 10u16, UInt16[?]] } UInt32@String[string:out] { out <- _String to Int[0u32, 0, [string]Buffer >>, 10u32, UInt32[?]] } UInt64@String[string:out] { out <- _String to Int[0u64, 0, [string]Buffer >>, 10u64, UInt64[?]] } Int8@String Slice[string:out] { out <- Int8[[string]Flatten] } Int8@String Cat[string:out] { out <- Int8[[string]Flatten] } Int16@String Slice[string:out] { out <- Int16[[string]Flatten] } Int16@String Cat[string:out] { out <- Int16[[string]Flatten] } Int32@String Slice[string:out] { out <- Int32[[string]Flatten] } Int32@String Cat[string:out] { out <- Int32[[string]Flatten] } Int64@String Slice[string:out] { out <- Int64[[string]Flatten] } Int64@String Cat[string:out] { out <- Int64[[string]Flatten] } UInt8@String Slice[string:out] { out <- UInt8[[string]Flatten] } UInt8@String Cat[string:out] { out <- UInt8[[string]Flatten] } UInt16@String Slice[string:out] { out <- UInt16[[string]Flatten] } UInt16@String Cat[string:out] { out <- UInt16[[string]Flatten] } UInt32@String Slice[string:out] { out <- UInt32[[string]Flatten] } UInt32@String Cat[string:out] { out <- UInt32[[string]Flatten] } UInt64@String Slice[string:out] { out <- UInt64[[string]Flatten] } UInt64@String Cat[string:out] { out <- UInt64[[string]Flatten] } //TODO: Implement me once Real64 support is added Real64[string:out] { out <- string } Flatten@String[string:out] { out <- string } _CPOff to BOff[buff,cur,boff,cpoff,expected,used:out] { If[expected] { byte <- [buff]Index[boff] err <- If[[byte]>[192u8]] {} { err <- If[[byte]<[128u8]] {} { out <- _CPOff to BOff[buff, cur, [boff]+[1i32], cpoff, [expected]-[1i32], [used]+[1i32]] } } Val[err] { ncur <- [cur]+[used] If[[ncur]>[cpoff]] { out <- [boff]-[[cpoff]-[ncur]] }{ out <- CPOff to BOff[buff,ncur,boff,cpoff] } } }{ out <- CPOff to BOff[buff,[cur]+[1i32],boff,cpoff] } } CPOff to BOff[buff,cur,boff,cpoff:out] { If[[cur]=[cpoff]] { out <- boff }{ byte <- [buff]Index[boff] If[[byte] < [128u8]] { nboff <- [boff]+[1i32] ncur <- [cur]+[1i32] }{ If[[byte]<[192u8]] { //Error: Encoding for 2nd,3rd or 4th byte of sequence //treat as a single character nboff <- [boff]+[1i32] ncur <- [cur]+[1i32] }{ If[[byte]<[224u8]] { expect <- 1i32 }{ If[[byte]<[240u8]] { expect <- 2i32 }{ If[[byte]<[245u8]] { expect <- 3i32 }{ //Error nboff <- [boff]+[1i32] ncur <- [cur]+[1i32] } } } Val[expect] { out <- _CPOff to BOff[buff, cur, [boff]+[1i32], cpoff, expect, 1i32] {} } } } out <- CPOff to BOff[buff, ncur, cpoff, nboff] } } Slice@String[string,slicepoint:left,right] { If[[slicepoint]>=[[string]Length]] { left <- string right <- "" }{ If[[slicepoint]<=[0]] { left <- "" right <- string }{ sliceoffset <- CPOff to BOff[[string]Buffer >>, 0i32, 0i32, slicepoint] left <- String Slice[string, 0i32, slicepoint, sliceoffset] right <- String Slice[string, sliceoffset, [[string]Length >>]-[slicepoint], [[[string]Buffer >>]Length]-[sliceoffset]] } } } Byte@String[string,index:out,invalid] { out,invalid <- [[string]Buffer >>]Index[index] } Length@String[string:out] { out <- [string]Length >> } _=String[left,right,index:out] { [left]Byte[index] { rbyte <- [right]Byte[index] {} { Print[["Could not fetch byte from right string at offset:"]Append[String[index]]] { Print[["Right string has type ID: "]Append[ String[ID[Blueprint Of[right]]] ]] { Print[[right]Byte Length] { Print[[right]Length] { Print[["Left string has type ID: "]Append[ String[ID[Blueprint Of[left]]] ]] { Print[[left]Byte Length] { Print[[left]Length] }}}}}} out <- No } ,out <- If[[~]=[rbyte]] { out <- _=String[left,right,[index]+[1]] } }{ out <- Yes } } Eq String[left,right:out] { ,out <- If[[[left]Length] = [[right]Length]] { out <- _=String[left,right,0] } } =@String[left,right:out] { out <- Eq String[left,right] } Byte Length@String[string:out] { out <- [[string]Buffer >>]Length } Append@String[left,right:out] { out <- String Cat[left,right] } Blueprint String Slice { Source Offset(Int32,Naked) Length(Int32,Naked) ByteLen(Int32,Naked) } String Slice[source,offset,length,bytelen:out(String Slice)] { out <- [[[[Build[String Slice()]]Source <<[source]]Offset <<[offset]]Length <<[length]]ByteLen <<[bytelen] } Byte@String Slice[string,index:out,invalid] { ,invalid <- If[[index]<[[string]ByteLen >>]] { out,invalid <- [[string]Source >>]Byte[[index]+[[string]Offset >>]] } } Byte Length@String Slice[string:out] { out <- [string]ByteLen >> } Length@String Slice[string:out] { out <- [string]Length >> } =@String Slice[left,right:out] { out <- Eq String[left,right] } _Flatten@String[string,dest,offset,count:out] { If[count] { out <- [string]_Flatten[[dest]Append[ [[string]Buffer >>]Index[offset] ], [offset]+[1i32], [count]-[1i32]] }{ out <- dest } } Flatten@String[string:out] { out <- string } _Flatten@String Slice[string,dest,offset,count:out] { out <- [[string]Source >>]_Flatten[dest, [[string]Offset >>]+[offset], count] } Flatten@String Slice[string:out] { out <- [[Build[String()]]Buffer <<[ [[string]Source >>]_Flatten[Array[], [string]Offset >>, [string]ByteLen >>] ] ]Length <<[[string]Length >>] } Print@String Slice[string:out] { out <- Print[Flatten[string]] } Append@String Slice[left,right:out] { out <- String Cat[left,right] } Slice@String Slice[string,slicepoint:left,right] { If[[slicepoint]>=[[string]Length]] { left <- string right <- "" }{ If[[slicepoint]<=[0]] { left <- "" right <- string }{ sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, slicepoint] left <- String Slice[[string]Source >>, [string]Offset >>, slicepoint, [sliceoffset]-[[string]Offset >>]] right <- String Slice[[string]Source >>, sliceoffset, [[string]Length >>]-[slicepoint], [[string]Byte Length]-[[sliceoffset]-[[string]Offset >>]]] } } } Blueprint String Cat { Left Right Length ByteLen } String Cat[left,right:out] { out <- [[[[Build[String Cat()] ]Left <<[left] ]Right <<[right] ]Length <<[ [[left]Length]+[[right]Length] ] ]ByteLen <<[ [[left]Byte Length]+[[right]Byte Length] ] } Append@String Cat[left,right:out] { out <- String Cat[left,right] } Byte@String Cat[string,index:out,invalid] { leftlen <- [[string]Left >>]Byte Length If[[index]<[leftlen]] { out,invalid <- [[string]Left >>]Byte[index] }{ rindex <- [index]-[leftlen] ,invalid <- If[[rindex]<[[[string]Right >>]Byte Length]] { out,invalid <- [[string]Right >>]Byte[rindex] } } } Byte Length@String Cat[string:out] { out <- [string]ByteLen >> } Length@String Cat[string:out] { out <- [string]Length >> } _Flatten@String Cat[string,dest,offset,count:out] { left <- [string]Left >> If[[offset] < [[left]Byte Length]] { lcount <- Min[[left]Byte Length, count] ldest <- [left]_Flatten[dest, offset, lcount] rcount <- [count]-[lcount] }{ ldest <- Val[dest] rcount <- count } If[[[offset]+[count]]>[[left]Byte Length]] { right <- [string]Right >> roffset <- Max[0, [offset]-[[left]Byte Length]] out <- [right]_Flatten[ldest, roffset, Min[[right]Byte Length, rcount]] }{ out <- Val[ldest] } } Flatten@String Cat[string:out] { out <- [[Build[String()] ]Buffer << [ [[string]Right >>]_Flatten[ [[string]Left >>]_Flatten[Array[], 0i32, [[string]Left >>]Byte Length], 0i32, [[string]Right >>]Byte Length]] ]Length << [[string]Length >>] } Print@String Cat[string:out] { out <- Print[Flatten[string]] } Slice@String Cat[string,slicepoint:left,right] { llen <- [[string]Left >>]Length If[[slicepoint]=[llen]] { left <- [string]Left >> right <- [string]Right >> }{ If[[slicepoint]<[llen]] { left,lright <- [[string]Left >>]Slice[slicepoint] right <- String Cat[lright,[string]Right >>] }{ rleft,right <- [[string]Right >>]Slice[ [slicepoint]-[llen] ] left <- String Cat[[string]Left >>, rleft] } } } =@String Cat[left,right:out] { out <- Eq String[left,right] } =Delim[string,delims,index:outindex,after,nomatch] { delim <- [delims]Index[index] If[[[string]Length]<[[delim]Length]] { try next <- Yes }{ check,mafter <- [string]Slice[[delim]Length] ,try next <- If[[check]=[delim]] { outindex <- index after <- Val[mafter] } } Val[try next] { ,nomatch <- [delims]Next[index] { outindex,after,nomatch <- =Delim[string,delims,~] } } } _Partition[string,delims:matched,after,not found] { not found <- If[[string]=[""]] {} { ,after <- =Delim[string, delims, [delims]First] { matched <- [delims]Index[~] } {} { [string]Slice[1] {} { matched,after,not found <- _Partition[~,delims] } } } } Partition[string,odelims:before,matched,after,not found] { dt <- Blueprint Of[odelims] If[ [[[dt]=[String()]] Or [[dt]=[String Slice()]]] Or [[dt]=[String Cat()]] ] { delims <- [List[]]Append[odelims] }{ delims <- Val[odelims] } matched,after,not found <- _Partition[string,delims] { dlen <- Length[~] } { alen <- Length[~] } before <- [string]Slice[ [[string]Length]-[[dlen]+[alen]] ] } Dict Type ID@String[string:out] { out <- ID[String()] } Dict Type ID@String Cat[string:out] { out <- ID[String()] } Dict Type ID@String Slice[string:out] { out <- ID[String()] } Dict Bits@String[string,index:out,invalid] { ,invalid <- [string]Byte[index] { out <- UInt32[~] } } Dict Bits@String Cat[string,index:out,invalid] { ,invalid <- [string]Byte[index] { out <- UInt32[~] } } Dict Bits@String Slice[string,index:out,invalid] { ,invalid <- [string]Byte[index] { out <- UInt32[~] } } _From Dict String[arr,el:out] { out <- [arr]Append[Trunc UInt8[el]] } From Dict Key@String[string,data:out] { out <- String[Fold[_From Dict String[?], Array[], data]] } String@String[string:out] { out <- string } String@String Cat[string:out] { out <- string } String@String Slice[string:out] { out <- string } Replace[string,toreplace,with:out] { ,delim,after <-[string]Partition[toreplace] { wt <- Blueprint Of[with] If[ [[[wt]=[String()]] Or [[wt]=[String Slice()]]] Or [[wt]=[String Cat()]] ] { replacement <- with }{ replacement <- [with]Index[[toreplace]Find[=[delim,?]]] } out <- [[~]Append[replacement]]Append[Replace[after,toreplace,with]] } {} {} { out <- string } } _Join[list,delim,current,index:out] { [list]Next[index] { out <- _Join[list, delim, [[current]Append[delim]]Append[String[[list]Index[~]]], ~] }{ out <- current } } Join[list,delim:out] { [list]First { out <- _Join[list, delim, String[[list]Index[~]], ~] }{ out <- "" } } Starts With[thing,starts with:out] { out <- [[thing]Slice[[starts with]Length]] = [starts with] } Ends With[thing,ends with:out] { ,compare <- [thing]Slice[ [[thing]Length] - [[ends with]Length] ] out <- [compare] = [ends with] } If@String[str:yes,no] { yes,no <- If[[str]Length] } If@String Cat[str:yes,no] { yes,no <- If[[str]Length] } If@String Slice[str:yes,no] { yes,no <- If[[str]Length] } _Split[list,string,delim:out] { ,,rest <- [string]Partition[delim] { out <- _Split[[list]Append[~], rest, delim] } {} {} { out <- [list]Append[string] } } Split[string,delim:out] { If[string] { out <- _Split[(),string,delim] } { out <- () } } In[needle,haystack:out] { If[haystack] { out <- If[[[haystack]Slice[Length[needle]]]=[needle]] {} { [haystack]Slice[1] {} { out <- [needle]In[~] } } }{ out <- No } } Left Trim[string,trim:trimmed] { If[ [[string]Length] > [0] ] { first,rest <- [string]Slice[1] If[ [first]In[trim] ] { trimmed <- Left Trim[rest, trim] }{ trimmed <- string } }{ trimmed <- string } } Right Trim[string,trim:trimmed] { If[ [[string]Length] > [0] ] { rest,last <- [string]Slice[ [[string]Length] - [1]] If[ [last]In[trim] ] { trimmed <- Right Trim[rest, trim] }{ trimmed <- string } }{ trimmed <- string } } Trim[string,trim:trimmed] { left <- Left Trim[string, trim] trimmed <- Right Trim[left, trim] } Contains[haystack,needle:out] { [haystack]Partition[needle] { out <- Yes } {} {} { out <- No } }