# HG changeset patch # User Mike Pavone # Date 1293069627 0 # Node ID ed70399a07aaa7a5a7cadd6e4162cdcf226c8058 # Parent d59611dcec7178372a39aa2c5fd30768fb178c16 Add Substring method to string and improve performance of Partition diff -r d59611dcec71 -r ed70399a07aa pattern.rhope --- a/pattern.rhope Tue Dec 21 04:12:11 2010 +0000 +++ b/pattern.rhope Thu Dec 23 02:00:27 2010 +0000 @@ -87,7 +87,6 @@ out <- p } - _Match@Empty Pattern[pattern,string,n:num,no match] { no match <- Yes @@ -129,4 +128,21 @@ } } +_Partition@Pattern[delims,string:matched,after,not found] +{ + not found <- If[[string]=[""]] {} + { + [delims]Match[string] + { + matched,after <- [string]Slice[~] + }{ + matched,after,not found <- _Partition[delims, [string]Substring[1, 0]] + } + } +} +_Partition@Empty Pattern[delims,string:matched,after,not found] +{ + not found <- Yes +} + diff -r d59611dcec71 -r ed70399a07aa string.rhope --- a/string.rhope Tue Dec 21 04:12:11 2010 +0000 +++ b/string.rhope Thu Dec 23 02:00:27 2010 +0000 @@ -455,6 +455,31 @@ } } +Substring@String[string,start,length:out] +{ + If[[start]>=[[string]Length]] + { out <- "" } + { + If[[length]<=[0]] + { slength <- [[[string]Length]-[start]]+[length] } + { + If[[[start]+[length]] > [[string]Length]] + { slength <- [[string]Length]-[start] } + { slength <- Val[length] } + } + + If[[[string]Length] = [[string]Byte Length]] + { + sliceoffset <- Val[start] + sbytelen <- Val[slength] + }{ + sliceoffset <- CPOff to BOff[[string]Buffer >>, 0i32, 0i32, start] + sbytelen <- CPOff to BOff[[string]Buffer >>, 0i32, sliceoffset, slength] + } + out <- String Slice[string, sliceoffset, slength, sbytelen] + } +} + Byte@String[string,index:out,invalid] { out,invalid <- [[string]Buffer >>]Index[index] @@ -528,6 +553,123 @@ out <- Eq String[left,right] } +_Find Memcmp[haybuf,hayoff,haylen,needlebuf,needleoff,needlelen:found,not found] +{ + not found <- If[[haylen]<[needlelen]] {} + { + If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, needlelen]] + { + found,not found <- _Find Memcmp[haybuf, [hayoff]+[1], [haylen]-[1], needlebuf, needleoff, needlelen] + }{ + found <- hayoff + } + } +} + +_Find Flat@String[haystack,needlebuf,off,bytelen:found,not found] +{ + found,not found <- _Find Memcmp[[haystack]Buffer >>, 0, [haystack]Byte Length, needlebuf, off, bytelen] +} + +_Find Flat@String Slice[haystack,needlebuf,off,bytelen:found,not found] +{ + ,not found <- _Find Memcmp[[[haystack]Source >>]Buffer >>, [haystack]Offset >>, [haystack]Byte Length, needlebuf, off, bytelen] + { found <- [~]-[[haystack]Offset >>] } +} + +_Find Partial Memcmp[haybuf,hayoff,haylen,needlebuf,needleoff,needlelen:found,partial,not found] +{ + If[[haylen]<[needlelen]] + { + ,not found <- If[haylen] + { + cont <- If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, haylen]] {} + { + partial <- hayoff + } + } + }{ + cont <- If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, needlelen]] {} + { + found <- hayoff + } + } + Val[cont] + { + found,partial,not found <- _Find Partial Memcmp[haybuf, [hayoff]+[1], [haylen]-[1], needlebuf, needleoff, needlelen] + } +} + +_Find Partial@String[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found] +{ + found,partial,not found <- _Find Partial Memcmp[[haystack]Buffer >>, hayoff, [[haystack]Byte Length]-[hayoff], needlebuf, needleoff, needlelen] {} {} +} + +_Check Rest@String[haystack,needlebuf,needleoff,needlelen:found,not found] +{ + haylen <- [haystack]Byte Length + ,not found <- If[[haylen]<[needlelen]] + { + not found,found <- If[_internal_memcmp_offset[[haystack]Buffer >>, 0, needlebuf, needleoff, needlelen]] + } +} + +_Check Rest@String Slice[haystack,needlebuf,needleoff,needlelen:found,not found] +{ + haylen <- [haystack]Byte Length + ,not found <- If[[haylen]<[needlelen]] + { + not found,found <- If[_internal_memcmp_offset[[[haystack]Source >>]Buffer >>, [haystack]Offset >>, needlebuf, needleoff, needlelen]] + } +} + +_Check Rest@String Slice[haystack,needlebuf,needleoff,needlelen:found,not found] +{ + llen <- [[haystack]Left >>]Byte Length + If[[needlelen]<[llen]] + { found,not found <- _Check Rest[[haystack]Left >>, needlebuf, needleoff, needlelen] } + { + ,not found <- _Check Rest[[haystack]Left >>, needlebuf, needleoff, llen] + { + found,not found <- _Check Rest[[haystack]Right >>, needlebuf, [needleoff]+[llen], [needlelen]-[llen]] + } + } +} + +_Find Partial@String Slice[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found] +{ + ,,not found <- _Find Partial Memcmp[[[haystack]Source >>]Buffer >>, [hayoff]+[[haystack]Offset >>], [[haystack]Byte Length]-[hayoff], needlebuf, needleoff, needlelen] + { found <- [~]-[[haystack]Offset >>] } + { partial <- [~]-[[haystack]Offset >>] } +} + +_Find Partial@String Cat[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found] +{ + llen <- [[haystack]Left >>]Byte Length + If[[hayoff] < [llen]] + { + found,p,checkright <- [[haystack]Left >>]_Find Partial[hayoff,needlebuf,needleoff,needlelen] {} + { + partlen <- [llen]-[~] + ,cont <- [[haystack]Right >>]_Check Rest[needlebuf,[needleoff]+[partlen], [needlelen]-[partlen]] + { found <- Val[p] } + { found,partial,not found <- _Find Partial[haystack,[hayoff]+[1],needlebuf,needleoff,needlelen] } + }{ + found,partial,not found <- _Find Partial[haystack,llen,needlebuf,needleoff,needlelen] + } + }{ + ,,not found <- [[haystack]Right >>]_Find Partial[[hayoff]-[llen],needlebuf,needleoff,needlelen] + { found <- [~]+[llen] } + { partial <- [~]+[llen] } + } +} + + +_Find Flat@String Cat[haystack,needlebuf,off,bytelen:found,not found] +{ + found,not found,not found <- _Find Partial[haystack, 0, needlebuf, off, bytelen] {} +} + =@String[left,right:out] { out <- [right]_Flat=[left] @@ -661,6 +803,31 @@ } } +Substring@String Slice[string,start,length:out] +{ + If[[start]>=[[string]Length]] + { out <- "" } + { + If[[length]<=[0]] + { slength <- [[[string]Length]-[start]]+[length] } + { + If[[[start]+[length]] > [[string]Length]] + { slength <- [[string]Length]-[start] } + { slength <- Val[length] } + } + + If[[[string]Length] = [[string]Byte Length]] + { + sliceoffset <- [[string]Offset >>]+[start] + sbytelen <- Val[slength] + }{ + sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, start] + sbytelen <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, sliceoffset, slength] + } + out <- String Slice[[string]Source >>, sliceoffset, slength, sbytelen] + } +} + Blueprint String Cat { Left @@ -769,6 +936,36 @@ } } +Substring@String Cat[string,start,length:out] +{ + llen <- [[string]Left >>]Length + If[[start] >= [llen]] + { + out <- [[string]Right >>]Substring[[start]-[llen],length] + }{ + If[[length] <= [0]] + { slength <- [[[string]Length]-[start]]+[length] } + { + If[[[start]+[length]] > [[string]Length]] + { slength <- [[string]Length]-[start] } + { slength <- Val[length] } + } + If[[[start]+[slength]]<=[llen]] + { + out <- [[string]Left >>]Substring[start, slength] + }{ + new end <- [start]+[slength] + If[[new end]=[[string]Length]] + { right <- Right >>[string] } + { right <- [[string]Right >>]Substring[0, [new end]-[[string]Length]] } + If[[start]=[0]] + { left <- Left >>[string] } + { left <- [[string]Left >>]Substring[start, 0] } + out <- String Cat[left, right] + } + } +} + =@String Cat[left,right:out] { out <- Eq String[left,right] @@ -797,6 +994,46 @@ } } +Match@String[string,cmp:num,no match,idx] +{ + n <- [string]Length + ,no match <- If[[string]=[[cmp]Substring[0, n]]] + { + num <- Val[n] + idx <- 0 + } +} + +Match@String Slice[string,cmp:num,no match,idx] +{ + n <- [string]Length + ,no match <- If[[string]=[[cmp]Substring[0, n]]] + { + num <- Val[n] + idx <- 0 + } +} + +_Partition@String[delim,string:matched,after,not found] +{ + ,not found <- [string]_Find Flat[[delim]Buffer >>, 0, [delim]Byte Length] + { + //TODO: Translate byte offset to cp offset when necessary + matched <- delim + after <- [string]Substring[[~]+[[delim]Length], 0] + } +} + +_Partition@String Slice[delim,string:matched,after,not found] +{ + ,not found <- [string]_Find Flat[[[delim]Source >>]Buffer >>, [delim]Offset >>, [delim]Byte Length] + { + //TODO: Translate byte offset to cp offset when necessary + matched <- delim + after <- [string]Substring[[~]+[[delim]Length], 0] + } +} + Pattern@String[string:out] { out <- string @@ -812,50 +1049,17 @@ out <- Flatten[string] } -Match@String[string,cmp:num,no match,idx] -{ - n <- [string]Length - ,no match <- If[[string]=[[cmp]Slice[n]]] - { - num <- Val[n] - idx <- 0 - } -} - -Match@String Slice[string,cmp:num,no match,idx] -{ - n <- [string]Length - ,no match <- If[[string]=[[cmp]Slice[n]]] - { - num <- Val[n] - idx <- 0 - } -} - - -_Partition[string,delims:matched,after,not found] -{ - not found <- If[[string]=[""]] {} - { - [delims]Match[string] - { - matched,after <- [string]Slice[~] - }{ - [string]Slice[1] {} - { matched,after,not found <- _Partition[~,delims] } - } - } -} - Partition[string,delims:before,matched,after,not found] { - matched,after,not found <- _Partition[string,Pattern[delims]] + matched,after,not found <- _Partition[Pattern[delims],string] { dlen <- Length[~] } { alen <- Length[~] } - before <- [string]Slice[ [[string]Length]-[[dlen]+[alen]] ] + blen <- [[string]Length]-[[dlen]+[alen]] + If[blen] + { before <- [string]Substring[0, blen] } + { before <- "" } } - Dict Type ID@String[string:out] { out <- ID[String()]