changeset 156:ed70399a07aa

Add Substring method to string and improve performance of Partition
author Mike Pavone <pavone@retrodev.com>
date Thu, 23 Dec 2010 02:00:27 +0000
parents d59611dcec71
children 37b999dbd12e
files pattern.rhope string.rhope
diffstat 2 files changed, 259 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/pattern.rhope	Tue Dec 21 04:12:11 2010 +0000
+++ b/pattern.rhope	Thu Dec 23 02:00:27 2010 +0000
@@ -87,7 +87,6 @@
 	out <- p
 }
 
-
 _Match@Empty Pattern[pattern,string,n:num,no match]
 {
 	no match <- Yes
@@ -129,4 +128,21 @@
 	}
 }
 
+_Partition@Pattern[delims,string:matched,after,not found]
+{
+        not found <- If[[string]=[""]] {}
+        {
+                [delims]Match[string]
+                {
+                        matched,after <- [string]Slice[~]
+                }{
+                        matched,after,not found <- _Partition[delims, [string]Substring[1, 0]]
+                }
+        }
+}
 
+_Partition@Empty Pattern[delims,string:matched,after,not found]
+{
+	not found <- Yes
+}
+
--- a/string.rhope	Tue Dec 21 04:12:11 2010 +0000
+++ b/string.rhope	Thu Dec 23 02:00:27 2010 +0000
@@ -455,6 +455,31 @@
 	}
 }
 
+Substring@String[string,start,length:out]
+{
+	If[[start]>=[[string]Length]]
+	{ out <- "" }
+	{
+		If[[length]<=[0]] 
+		{ slength <- [[[string]Length]-[start]]+[length] }
+		{ 
+			If[[[start]+[length]] > [[string]Length]]
+			{ slength <- [[string]Length]-[start] }
+			{ slength <- Val[length] }
+		}
+	
+		If[[[string]Length] = [[string]Byte Length]]
+		{
+			sliceoffset <- Val[start]
+			sbytelen <- Val[slength]
+		}{
+			sliceoffset <- CPOff to BOff[[string]Buffer >>, 0i32, 0i32, start]
+			sbytelen <- CPOff to BOff[[string]Buffer >>, 0i32, sliceoffset, slength]
+		}
+		out <- String Slice[string, sliceoffset, slength, sbytelen]
+	}
+}
+
 Byte@String[string,index:out,invalid]
 {
 	out,invalid <- [[string]Buffer >>]Index[index]
@@ -528,6 +553,123 @@
 	out <- Eq String[left,right]
 }
 
+_Find Memcmp[haybuf,hayoff,haylen,needlebuf,needleoff,needlelen:found,not found]
+{
+	not found <- If[[haylen]<[needlelen]] {}
+	{
+		If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, needlelen]]
+		{
+			found,not found <- _Find Memcmp[haybuf, [hayoff]+[1], [haylen]-[1], needlebuf, needleoff, needlelen]
+		}{
+			found <- hayoff
+		}
+	}
+}
+
+_Find Flat@String[haystack,needlebuf,off,bytelen:found,not found]
+{
+	found,not found <- _Find Memcmp[[haystack]Buffer >>, 0, [haystack]Byte Length, needlebuf, off, bytelen]
+}
+
+_Find Flat@String Slice[haystack,needlebuf,off,bytelen:found,not found]
+{
+	,not found <- _Find Memcmp[[[haystack]Source >>]Buffer >>, [haystack]Offset >>, [haystack]Byte Length, needlebuf, off, bytelen]
+	{ found <- [~]-[[haystack]Offset >>] }
+}
+
+_Find Partial Memcmp[haybuf,hayoff,haylen,needlebuf,needleoff,needlelen:found,partial,not found]
+{
+	If[[haylen]<[needlelen]]
+	{
+		,not found <- If[haylen]
+		{
+			cont <- If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, haylen]] {}
+			{
+				partial <- hayoff
+			}
+		}
+	}{
+		cont <- If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, needlelen]] {}
+		{
+			found <- hayoff
+		}
+	}
+	Val[cont]
+	{
+		found,partial,not found <- _Find Partial Memcmp[haybuf, [hayoff]+[1], [haylen]-[1], needlebuf, needleoff, needlelen]
+	}
+}
+
+_Find Partial@String[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found]
+{
+	found,partial,not found <- _Find Partial Memcmp[[haystack]Buffer >>, hayoff, [[haystack]Byte Length]-[hayoff], needlebuf, needleoff, needlelen] {} {}
+}
+
+_Check Rest@String[haystack,needlebuf,needleoff,needlelen:found,not found]
+{
+	haylen <- [haystack]Byte Length
+	,not found <- If[[haylen]<[needlelen]]
+	{
+		not found,found <- If[_internal_memcmp_offset[[haystack]Buffer >>, 0, needlebuf, needleoff, needlelen]]
+	}
+}
+
+_Check Rest@String Slice[haystack,needlebuf,needleoff,needlelen:found,not found]
+{
+        haylen <- [haystack]Byte Length
+        ,not found <- If[[haylen]<[needlelen]]
+        {
+                not found,found <- If[_internal_memcmp_offset[[[haystack]Source >>]Buffer >>, [haystack]Offset >>, needlebuf, needleoff, needlelen]]
+        }
+}
+
+_Check Rest@String Slice[haystack,needlebuf,needleoff,needlelen:found,not found]
+{
+	llen <- [[haystack]Left >>]Byte Length
+	If[[needlelen]<[llen]]
+	{ found,not found <- _Check Rest[[haystack]Left >>, needlebuf, needleoff, needlelen] }
+	{
+		,not found <- _Check Rest[[haystack]Left >>, needlebuf, needleoff, llen]
+		{
+			found,not found <- _Check Rest[[haystack]Right >>, needlebuf, [needleoff]+[llen], [needlelen]-[llen]]
+		}
+	}
+}
+
+_Find Partial@String Slice[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found]
+{
+	,,not found <- _Find Partial Memcmp[[[haystack]Source >>]Buffer >>, [hayoff]+[[haystack]Offset >>], [[haystack]Byte Length]-[hayoff], needlebuf, needleoff, needlelen]
+	{ found <- [~]-[[haystack]Offset >>] }
+	{ partial <- [~]-[[haystack]Offset >>] }
+}
+
+_Find Partial@String Cat[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found]
+{
+	llen <- [[haystack]Left >>]Byte Length
+	If[[hayoff] < [llen]]
+	{
+		found,p,checkright <- [[haystack]Left >>]_Find Partial[hayoff,needlebuf,needleoff,needlelen] {}
+		{
+			partlen <- [llen]-[~]
+			,cont <- [[haystack]Right >>]_Check Rest[needlebuf,[needleoff]+[partlen], [needlelen]-[partlen]]
+			{ found <- Val[p] }
+			{ found,partial,not found <- _Find Partial[haystack,[hayoff]+[1],needlebuf,needleoff,needlelen] }
+		}{
+			found,partial,not found <- _Find Partial[haystack,llen,needlebuf,needleoff,needlelen]
+		}
+	}{
+		,,not found <- [[haystack]Right >>]_Find Partial[[hayoff]-[llen],needlebuf,needleoff,needlelen]
+		{ found <- [~]+[llen] }
+		{ partial <- [~]+[llen] }
+	}
+}
+
+
+_Find Flat@String Cat[haystack,needlebuf,off,bytelen:found,not found]
+{
+	found,not found,not found <- _Find Partial[haystack, 0, needlebuf, off, bytelen] {}
+}
+
 =@String[left,right:out]
 {
 	out <- [right]_Flat=[left]
@@ -661,6 +803,31 @@
 	}
 }
 
+Substring@String Slice[string,start,length:out]
+{
+        If[[start]>=[[string]Length]]
+        { out <- "" }
+        {
+		If[[length]<=[0]]
+                { slength <- [[[string]Length]-[start]]+[length] }
+                {
+                        If[[[start]+[length]] > [[string]Length]]
+                        { slength <- [[string]Length]-[start] }
+                        { slength <- Val[length] }
+                }
+
+                If[[[string]Length] = [[string]Byte Length]]
+                {
+                        sliceoffset <- [[string]Offset >>]+[start]
+                        sbytelen <- Val[slength]
+                }{
+                        sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, start]
+                        sbytelen <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, sliceoffset, slength]
+                }
+                out <- String Slice[[string]Source >>, sliceoffset, slength, sbytelen]
+        }
+}
+
 Blueprint String Cat
 {
 	Left
@@ -769,6 +936,36 @@
 	}
 }
 
+Substring@String Cat[string,start,length:out]
+{
+	llen <- [[string]Left >>]Length
+	If[[start] >= [llen]]
+	{
+		out <- [[string]Right >>]Substring[[start]-[llen],length]
+	}{
+		If[[length] <= [0]]
+		{ slength <- [[[string]Length]-[start]]+[length] }
+		{
+                       	If[[[start]+[length]] > [[string]Length]]
+               	       	{ slength <- [[string]Length]-[start] }
+			{ slength <- Val[length] }
+		}
+		If[[[start]+[slength]]<=[llen]]
+		{
+			out <- [[string]Left >>]Substring[start, slength]
+		}{
+			new end <- [start]+[slength]
+			If[[new end]=[[string]Length]]
+			{ right <- Right >>[string] }
+			{ right <- [[string]Right >>]Substring[0, [new end]-[[string]Length]] }
+			If[[start]=[0]]
+			{ left <- Left >>[string] }
+			{ left <- [[string]Left >>]Substring[start, 0] }
+			out <- String Cat[left, right]
+		}
+        }
+}
+
 =@String Cat[left,right:out]
 {
 	out <- Eq String[left,right]
@@ -797,6 +994,46 @@
 	}
 }
 
+Match@String[string,cmp:num,no match,idx]
+{
+	n <- [string]Length
+	,no match <- If[[string]=[[cmp]Substring[0, n]]]
+	{
+		num <- Val[n]
+		idx <- 0
+	}
+}
+
+Match@String Slice[string,cmp:num,no match,idx]
+{
+        n <- [string]Length
+        ,no match <- If[[string]=[[cmp]Substring[0, n]]]
+        {
+                num <- Val[n]
+		idx <- 0
+        }
+}
+
+_Partition@String[delim,string:matched,after,not found]
+{
+	,not found <- [string]_Find Flat[[delim]Buffer >>, 0, [delim]Byte Length]
+	{
+		//TODO: Translate byte offset to cp offset when necessary
+		matched <- delim
+		after <- [string]Substring[[~]+[[delim]Length], 0]
+	}
+}
+
+_Partition@String Slice[delim,string:matched,after,not found]
+{
+	,not found <- [string]_Find Flat[[[delim]Source >>]Buffer >>, [delim]Offset >>, [delim]Byte Length]
+	{
+		//TODO: Translate byte offset to cp offset when necessary
+		matched <- delim
+		after <- [string]Substring[[~]+[[delim]Length], 0]
+	}
+}
+
 Pattern@String[string:out]
 {
 	out <- string
@@ -812,50 +1049,17 @@
 	out <- Flatten[string]
 }
 
-Match@String[string,cmp:num,no match,idx]
-{
-	n <- [string]Length
-	,no match <- If[[string]=[[cmp]Slice[n]]]
-	{
-		num <- Val[n]
-		idx <- 0
-	}
-}
-
-Match@String Slice[string,cmp:num,no match,idx]
-{
-        n <- [string]Length
-        ,no match <- If[[string]=[[cmp]Slice[n]]]
-        {
-                num <- Val[n]
-		idx <- 0
-        }
-}
-
-
-_Partition[string,delims:matched,after,not found]
-{
-	not found <- If[[string]=[""]] {}
-	{
-		[delims]Match[string]
-		{
-			matched,after <- [string]Slice[~]
-		}{
-			[string]Slice[1] {}
-			{ matched,after,not found <- _Partition[~,delims] }
-		}
-	}
-}
-
 Partition[string,delims:before,matched,after,not found]
 {
-	matched,after,not found <- _Partition[string,Pattern[delims]]
+	matched,after,not found <- _Partition[Pattern[delims],string]
 	{ dlen <- Length[~] }
 	{ alen <- Length[~] }
-	before <- [string]Slice[ [[string]Length]-[[dlen]+[alen]] ]
+	blen <- [[string]Length]-[[dlen]+[alen]]
+	If[blen]
+	{ before <- [string]Substring[0, blen] }
+	{ before <- "" }
 }
 
-
 Dict Type ID@String[string:out]
 {
 	out <- ID[String()]