view string.rhope @ 131:0a4682be2db2

Modify lexer and new parser to work in compiler
author Mike Pavone <pavone@retrodev.com>
date Fri, 05 Nov 2010 02:43:34 +0000
parents fd23ab2c1a73
children fc3815b7462f
line wrap: on
line source


Import pattern.rhope

UTF8 Expect[num,arr,index,count,consumed:out]
{
	byte <- [arr]Index[index]
	{
		If[[128u8]>[byte]]
		{
			//Error: ASCII byte when we were expecting part of a mutlibyte sequence
			//treat each byte as a separate character
			ncount <- [1i32]+[[count]+[consumed]]
		}{
			If[[192u8]>[byte]]
			{
				If[[num]=[1]]
				{
					//Sequence is complete count as single character
					ncount <- [1i32]+[count]
				}{
					out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]]
				}
			}{
				//Error: too high to be a continuation byte
				ncount <- [1i32]+[[count]+[consumed]]
			}
		}
	}{
		//Error: string ended in the middle of a multi-byte sequence
		out <- [count]+[consumed]
	}
	Val[ncount]
	{
		[arr]Next[index]
		{
			out <- Count UTF8[arr, ~, ncount]
		}{
			out <- Val[ncount]
		}
	}
}

Count UTF8[arr,index,count:out]
{
	byte <- [arr]Index[index]
	If[[128u8]>[byte]]
	{ ncount <- [1i32]+[count] }
	{
		If[[192u8]>[byte]]
		{
			//Error: Encoding for 2nd,3rd or 4th byte of sequence
			//treat as a single character
			ncount <- [1i32]+[count]
		}{
			If[[224u8]>[byte]]
			{
				out <- UTF8 Expect[1, arr, [index]+[1], count, 1]
			}{
				If[[240u8]>[byte]]
				{
					out <- UTF8 Expect[2, arr, [index]+[1], count, 1]
				}{
					If[[245u8]>[byte]]
					{
						out <- UTF8 Expect[3, arr, [index]+[1], count, 1]
					}{
						//Error: Out of range of Unicode standard
						//treat as a single character
						ncount <- [1i32]+[count]
					}
				}
			}
		}
	}
	[arr]Next[index]
	{
		out <- Count UTF8[arr, ~, ncount]
	}{
		out <- Val[ncount]
	}
}

Blueprint String
{
	Buffer
	Length(Int32,Naked)
}

String@Array[in:out(String)]
{
	[in]First
	{ len <- Count UTF8[in, ~, 0i32] }
	{ len <- 0i32 }
	out <- [[Build[String()]]Buffer <<[in]]Length <<[len]
}

Print@String[string:out]
{	
	//TODO: Sanitize string (remove terminal escapes and replace invalid UTF)
	write[1i32, [string]Buffer >>, Int64[[[string]Buffer >>]Length >>]]
	{ out <- write[1i32, [Array[]]Append[10u8], 1i64] }
}

Get Char[:out]
{
	read[0, [Array[]]Set[0, 0u8], 1i64]
	{}
	{ out <- String[~] }
}

_String to Int[current,index,array,ten,conv:out]
{
	char <- [array]Index[index]
	{
		If[[char]<[48u8]]
		{
			out <- Val[current]
		}{
			If[[char]>[57u8]]
			{
				out <- Val[current]
			}{
				out <- _String to Int[[[current]*[ten]]+[[conv]Call[[char]-[48u8]]], [index]+[1], array, ten, conv]
			}
		}
		
	}{
		out <- Val[current]
	}
}

U8toI8[val:out]
{
	out <- Trunc Int8[Int16[val]]
}

Int8@String[string:out]
{
	buf <- [string]Buffer >>
	[buf]Index[0]
	{
		If[[~]=[45u8]]
		{
			out <- [0i8]-[_String to Int[0i8, 1, buf, 10i8, U8toI8[?]]]
		}{
			out <- _String to Int[0i8, 0, buf, 10i8, U8toI8[?]]
		}
	}{
		out <- 0i8
	}
	
}


Int16@String[string:out]
{
	buf <- [string]Buffer >>
	[buf]Index[0]
	{
		If[[~]=[45u8]]
		{
			out <- [0i16]-[_String to Int[0i16, 1, buf, 10i16, Int16[?]]]
		}{
			out <- _String to Int[0i16, 0, buf, 10i16, Int16[?]]
		}
	}{
		out <- 0i16
	}
	
}

Int32@String[string:out]
{
	buf <- [string]Buffer >>
	[buf]Index[0]
	{
		If[[~]=[45u8]]
		{
			out <- [0i32]-[_String to Int[0i32, 1, buf, 10i32, Int32[?]]]
		}{
			out <- _String to Int[0i32, 0, buf, 10i32, Int32[?]]
		}
	}{
		out <- 0i32
	}
	
}

_Hex Int32[str,cur,idx:out]
{
	char <- [str]Byte[idx]
	{
		//0-9
		[char]Between[47u8, 58u8]
		{
			adjust <- 48u8
		}{
			//A-F
			[char]Between[64u8, 71u8]
			{
				adjust <- 55u8
			}{
				//a-f
				[char]Between[96u8, 103u8]
				{
					adjust <- 87u8
				}{
					out <- cur
				}
			}
		}
		Val[adjust]
		{
			out <- _Hex Int32[str, [[cur]*[16i32]]+[Int32[[char]-[adjust]]], [idx]+[1]]
		}
	}{
		out <- cur
	}
}

Hex Int32[str:out]
{
	out <- _Hex Int32[str, 0i32, 0]
}

Int64@String[string:out]
{
	buf <- [string]Buffer >>
	[buf]Index[0]
	{
		If[[~]=[45u8]]
		{
			out <- [0i64]-[_String to Int[0i64, 1, buf, 10i64, Int64[?]]]
		}{
			out <- _String to Int[0i64, 0, buf, 10i64, Int64[?]]
		}
	}{
		out <- 0i64
	}
	
}

UInt8@String[string:out]
{
	out <- _String to Int[0u8, 0, [string]Buffer >>, 10u8, Val[?]]	
}


UInt16@String[string:out]
{
	out <- _String to Int[0u16, 0, [string]Buffer >>, 10u16, UInt16[?]]	
	
}

UInt32@String[string:out]
{
	out <- _String to Int[0u32, 0, [string]Buffer >>, 10u32, UInt32[?]]	
	
}

UInt64@String[string:out]
{
	out <- _String to Int[0u64, 0, [string]Buffer >>, 10u64, UInt64[?]]	
}

Int8@String Slice[string:out]
{
	out <- Int8[[string]Flatten]
}

Int8@String Cat[string:out]
{
	out <- Int8[[string]Flatten]
}

Int16@String Slice[string:out]
{
	out <- Int16[[string]Flatten]
}

Int16@String Cat[string:out]
{
	out <- Int16[[string]Flatten]
}

Int32@String Slice[string:out]
{
	out <- Int32[[string]Flatten]
}

Int32@String Cat[string:out]
{
	out <- Int32[[string]Flatten]
}

Int64@String Slice[string:out]
{
	out <- Int64[[string]Flatten]
}

Int64@String Cat[string:out]
{
	out <- Int64[[string]Flatten]
}

UInt8@String Slice[string:out]
{
	out <- UInt8[[string]Flatten]
}

UInt8@String Cat[string:out]
{
	out <- UInt8[[string]Flatten]
}

UInt16@String Slice[string:out]
{
	out <- UInt16[[string]Flatten]
}

UInt16@String Cat[string:out]
{
	out <- UInt16[[string]Flatten]
}

UInt32@String Slice[string:out]
{
	out <- UInt32[[string]Flatten]
}

UInt32@String Cat[string:out]
{
	out <- UInt32[[string]Flatten]
}

UInt64@String Slice[string:out]
{
	out <- UInt64[[string]Flatten]
}

UInt64@String Cat[string:out]
{
	out <- UInt64[[string]Flatten]
}

//TODO: Implement me once Real64 support is added
Real64[string:out]
{
	out <- string
}

Flatten@String[string:out]
{
	out <- string
}

_CPOff to BOff[buff,cur,boff,cpoff,expected,used:out]
{
	If[expected]
	{
		byte <- [buff]Index[boff]
		err <- If[[byte]>[192u8]] {}
		{
			err <- If[[byte]<[128u8]] {}
			{
				out <- _CPOff to BOff[buff, cur, [boff]+[1i32], cpoff, [expected]-[1i32], [used]+[1i32]]
			}
		}

		Val[err]
		{
			ncur <- [cur]+[used]
			If[[ncur]>[cpoff]]
			{
				out <- [boff]-[[cpoff]-[ncur]]
			}{
				out <- CPOff to BOff[buff,ncur,boff,cpoff]
			}
		}
	}{
		out <- CPOff to BOff[buff,[cur]+[1i32],boff,cpoff]
	}
}

CPOff to BOff[buff,cur,boff,cpoff:out]
{
	If[[cur]=[cpoff]]
	{
		out <- boff
	}{
		byte <- [buff]Index[boff]
		If[[byte] < [128u8]]
		{
			nboff <- [boff]+[1i32]
			ncur <- [cur]+[1i32]
		}{
			If[[byte]<[192u8]]
			{
				//Error: Encoding for 2nd,3rd or 4th byte of sequence
				//treat as a single character
				nboff <- [boff]+[1i32]
				ncur <- [cur]+[1i32]
			}{
				If[[byte]<[224u8]]
				{
					expect <- 1i32
				}{
					If[[byte]<[240u8]]
					{
						expect <- 2i32
					}{
						If[[byte]<[245u8]]
						{
							expect <- 3i32
						}{
							//Error
							nboff <- [boff]+[1i32]
							ncur <- [cur]+[1i32]
						}
					}
				}
				Val[expect]
				{
					out <- _CPOff to BOff[buff, cur, [boff]+[1i32], cpoff, expect, 1i32] {}
				}
			}
		}
		out <- CPOff to BOff[buff, ncur, cpoff, nboff]
	}
}

Slice@String[string,slicepoint:left,right]
{
	If[[slicepoint]>=[[string]Length]]
	{
		left <- string
		right <- ""
	}{
		If[[slicepoint]<=[0]]
		{
			left <- ""
			right <- string
		}{
			sliceoffset <- CPOff to BOff[[string]Buffer >>, 0i32, 0i32, slicepoint]
			left <- String Slice[string, 0i32, slicepoint, sliceoffset]
			right <- String Slice[string, sliceoffset, [[string]Length >>]-[slicepoint], [[[string]Buffer >>]Length]-[sliceoffset]]
		}
	}
}

Byte@String[string,index:out,invalid]
{
	out,invalid <- [[string]Buffer >>]Index[index]
}

Length@String[string:out]
{
	out <- [string]Length >>
}

_=String[left,right,index:out]
{
	[left]Byte[index]
	{
		rbyte <- [right]Byte[index]
		,out <- If[[~]=[rbyte]]
		{
			out <- _=String[left,right,[index]+[1]]
		}
	}{
		out <- Yes
	}
}

Eq String[left,right:out]
{
	,out <- If[[[left]Length] = [[right]Length]] 
	{
		out <- _=String[left,right,0]
	}
}

Foreign C:libc
{
	memcmp[a(Array,Raw Pointer),b(Array,Raw Pointer),len(Int64,Naked):out(Int32,Naked)]
}

Foreign C:runtime
{
	_internal_memcmp_offset[left(Array,Raw Pointer),loffset(Int32,Naked),right(Array,Raw Pointer),roffset(Int32,Naked),len(Int32,Naked):out(Int32,Naked)]
}

_Flat=@String[left,right:out]
{
	,out <- If[[[left]Byte Length] = [[right]Byte Length]]
	{
		If[memcmp[[left]Buffer >>, [right]Buffer >>, Int64[[left]Byte Length]]]
		{
			out <- No
		}{
			out <- Yes
		}
	}
}

_Flat=@String Slice[left,right:out]
{
	,out <- If[[[left]Byte Length] = [[right]Byte Length]]
	{
		If[_internal_memcmp_offset[[[left]Source >>]Buffer >>, [left]Offset >>, [right]Buffer >>, 0i32, [left]Byte Length]]
		{
			out <- No
		}{
			out <- Yes
		}
	}
}

_Flat=@String Cat[left,right:out]
{
	out <- Eq String[left,right]
}

=@String[left,right:out]
{
	out <- [right]_Flat=[left]
}

Byte Length@String[string:out]
{
	out <- [[string]Buffer >>]Length
}

Append@String[left,right:out]
{
	out <- String Cat[left,right]
}

Blueprint String Slice
{
	Source
	Offset(Int32,Naked)	
	Length(Int32,Naked)
	ByteLen(Int32,Naked)
}

String Slice[source,offset,length,bytelen:out(String Slice)]
{
	out <- [[[[Build[String Slice()]]Source <<[source]]Offset <<[offset]]Length <<[length]]ByteLen <<[bytelen]
}

Byte@String Slice[string,index:out,invalid]
{
	,invalid <- If[[index]<[[string]ByteLen >>]]
	{
		out,invalid <- [[string]Source >>]Byte[[index]+[[string]Offset >>]]
	}
}

Byte Length@String Slice[string:out]
{
	out <- [string]ByteLen >>
}

Length@String Slice[string:out]
{
	out <- [string]Length >>
}

_Slice=@String Slice[left,right:out]
{
	,out <- If[[[left]Byte Length]=[[right]Byte Length]]
	{
		If[_internal_memcmp_offset[[[left]Source >>]Buffer >>, [left]Offset >>, [[right]Source >>]Buffer >>, [right]Offset >>, [left]Byte Length]]
		{ out <- No }
		{ out <- Yes }
	}
}

_Slice=@String[left,right:out]
{
	out <- [right]_Flat=[left]
}

_Slice=@String Cat[left,right:out]
{
	out <- Eq String[left,right]
}

=@String Slice[left,right:out]
{
	out <- [right]_Slice=[left]
}

_Flatten@String[string,dest,offset,count:out]
{
	If[count]
	{
		out <- [_internal_array_copychunk[[string]Buffer >>, offset, dest, [dest]Length, count]]Length <<[[[dest]Length]+[count]]
	}{
		out <- dest
	}
}

Flatten@String[string:out]
{
	out <- string
}

_Flatten@String Slice[string,dest,offset,count:out]
{
	out <- [[string]Source >>]_Flatten[dest, [[string]Offset >>]+[offset], count]
}

Flatten@String Slice[string:out]
{
	If[[string]ByteLen >>]
	{
		out <- [[Build[String()]]Buffer <<[ [[string]Source >>]_Flatten[_internal_array_allocnaked[[string]ByteLen >>, UInt8()], [string]Offset >>, [string]ByteLen >>] ]
			]Length <<[[string]Length >>]
	}{
		out <- ""
	}
}

Print@String Slice[string:out]
{
	out <- Print[Flatten[string]]
}

Append@String Slice[left,right:out]
{
	out <- String Cat[left,right]
}

Slice@String Slice[string,slicepoint:left,right]
{
	If[[slicepoint]>=[[string]Length]]
	{
		left <- string
		right <- ""
	}{
		If[[slicepoint]<=[0]]
		{
			left <- ""
			right <- string
		}{
			sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, slicepoint]
			left <- String Slice[[string]Source >>, [string]Offset >>, slicepoint, [sliceoffset]-[[string]Offset >>]]
			right <- String Slice[[string]Source >>, sliceoffset, [[string]Length >>]-[slicepoint], [[string]Byte Length]-[[sliceoffset]-[[string]Offset >>]]]
		}
	}
}

Blueprint String Cat
{
	Left
	Right
	Length
	ByteLen
}

String Cat[left,right:out]
{
	out <- [[[[Build[String Cat()]
		]Left <<[left]
		]Right <<[right]
		]Length <<[ [[left]Length]+[[right]Length] ]
		]ByteLen <<[ [[left]Byte Length]+[[right]Byte Length] ]
}

Append@String Cat[left,right:out]
{
	out <- String Cat[left,right]
}

Byte@String Cat[string,index:out,invalid]
{
	leftlen <- [[string]Left >>]Byte Length
	If[[index]<[leftlen]]
	{
		out,invalid <- [[string]Left >>]Byte[index]
	}{
		rindex <- [index]-[leftlen]
		,invalid <- If[[rindex]<[[[string]Right >>]Byte Length]]
		{
			out,invalid <- [[string]Right >>]Byte[rindex]
		}
	}
}

Byte Length@String Cat[string:out]
{
	out <- [string]ByteLen >>
}

Length@String Cat[string:out]
{
	out <- [string]Length >>
}

_Flatten@String Cat[string,dest,offset,count:out]
{
	left <- [string]Left >>
	If[[offset] < [[left]Byte Length]]
	{
		lcount <- Min[[left]Byte Length, count]
		ldest <- [left]_Flatten[dest, offset, lcount]
		rcount <- [count]-[lcount]
	}{
		ldest <- Val[dest]
		rcount <- count
	}
	If[[[offset]+[count]]>[[left]Byte Length]]
	{
		right <- [string]Right >>
		roffset <- Max[0, [offset]-[[left]Byte Length]]
		out <- [right]_Flatten[ldest, roffset, Min[[right]Byte Length, rcount]]
	}{
		out <- Val[ldest]
	}
}	

Flatten@String Cat[string:out]
{
	If[[string]ByteLen >>]
	{
		out <- [[Build[String()]
			]Buffer << [
				[[string]Right >>]_Flatten[
					[[string]Left >>]_Flatten[_internal_array_allocnaked[[string]ByteLen >>, UInt8()], 0i32, [[string]Left >>]Byte Length],
					0i32, [[string]Right >>]Byte Length]]
			]Length << [[string]Length >>]
	}{
		out <- ""
	}
}

Print@String Cat[string:out]
{
	out <- Print[Flatten[string]]
}

Slice@String Cat[string,slicepoint:left,right]
{
	llen <- [[string]Left >>]Length
	If[[slicepoint]=[llen]]
	{
		left <- [string]Left >>
		right <- [string]Right >>
	}{
		If[[slicepoint]<[llen]]
		{
			left,lright <- [[string]Left >>]Slice[slicepoint]
			right <- String Cat[lright,[string]Right >>]
		}{
			rleft,right <- [[string]Right >>]Slice[ [slicepoint]-[llen] ]
			left <- String Cat[[string]Left >>, rleft]
		}
	}
}

=@String Cat[left,right:out]
{
	out <- Eq String[left,right]
}

=Delim[string,delims,index:outindex,after,nomatch]
{
	delim <- [delims]Index[index]
	If[[[string]Length]<[[delim]Length]]
	{
		try next <- Yes
	}{
		check,mafter <- [string]Slice[[delim]Length]
		,try next <- If[[check]=[delim]]
		{
			outindex <- index
			after <- Val[mafter]
		}
	}
	Val[try next]
	{
		,nomatch <- [delims]Next[index]
		{
			outindex,after,nomatch <- =Delim[string,delims,~]
		}
	}
}

Pattern@String[string:out]
{
	out <- string
}

Pattern@String Slice[string:out]
{
	out <- string
}

Pattern@String Cat[string:out]
{
	out <- Flatten[string]
}

Match@String[string,cmp:num,no match,idx]
{
	n <- [string]Length
	,no match <- If[[string]=[[cmp]Slice[n]]]
	{
		num <- Val[n]
		idx <- 0
	}
}

Match@String Slice[string,cmp:num,no match,idx]
{
        n <- [string]Length
        ,no match <- If[[string]=[[cmp]Slice[n]]]
        {
                num <- Val[n]
		idx <- 0
        }
}


_Partition[string,delims:matched,after,not found]
{
	not found <- If[[string]=[""]] {}
	{
		[delims]Match[string]
		{
			matched,after <- [string]Slice[~]
		}{
			[string]Slice[1] {}
			{ matched,after,not found <- _Partition[~,delims] }
		}
	}
}

Partition[string,delims:before,matched,after,not found]
{
	matched,after,not found <- _Partition[string,Pattern[delims]]
	{ dlen <- Length[~] }
	{ alen <- Length[~] }
	before <- [string]Slice[ [[string]Length]-[[dlen]+[alen]] ]
}


Dict Type ID@String[string:out]
{
	out <- ID[String()]
}

Dict Type ID@String Cat[string:out]
{
	out <- ID[String()]
}

Dict Type ID@String Slice[string:out]
{
	out <- ID[String()]
}

Dict Bits@String[string,index:out,invalid]
{
	,invalid <- [string]Byte[index]
	{ out <- UInt32[~] }
}

Dict Bits@String Cat[string,index:out,invalid]
{
	,invalid <- [string]Byte[index]
	{ out <- UInt32[~] }
}

Dict Bits@String Slice[string,index:out,invalid]
{
	,invalid <- [string]Byte[index]
	{ out <- UInt32[~] }
}

_From Dict String[arr,el:out]
{
	out <- [arr]Append[Trunc UInt8[el]]
}

From Dict Key@String[string,data:out]
{
	out <- String[Fold[_From Dict String[?], Array[], data]]
}

String@String[string:out]
{
	out <- string
}

String@String Cat[string:out]
{
	out <- string
}

String@String Slice[string:out]
{
	out <- string
}

Replace[string,otoreplace,with:out]
{
	toreplace <- Pattern[otoreplace]
	,delim,after <-[string]Partition[toreplace]
	{
		wt <- Blueprint Of[with]
		If[ [[[wt]=[String()]] Or [[wt]=[String Slice()]]] Or [[wt]=[String Cat()]] ]
		{
			replacement <- with
		}{
			,,idx <- [toreplace]Match[delim]
			replacement <- [with]Index[idx]
		}
		out <- [[~]Append[replacement]]Append[Replace[after,toreplace,with]]
	} {} {} {
		out <- string
	}
}

_Join[list,delim,current,index:out]
{
	[list]Next[index]
	{
		out <- _Join[list, delim, [[current]Append[delim]]Append[String[[list]Index[~]]], ~]
	}{
		out <- current
	}
}

Join[list,delim:out]
{
	[list]First
	{
		out <- _Join[list, delim, String[[list]Index[~]], ~]
	}{
		out <- ""
	}
}

Starts With[thing,starts with:out]
{
	out <- [[thing]Slice[[starts with]Length]] = [starts with]
}

Ends With[thing,ends with:out]
{
	,compare <- [thing]Slice[ [[thing]Length] - [[ends with]Length] ]
	out <- [compare] = [ends with]
}

If@String[str:yes,no]
{
	yes,no <- If[[str]Length]
}

If@String Cat[str:yes,no]
{
	yes,no <- If[[str]Length]
}

If@String Slice[str:yes,no]
{
	yes,no <- If[[str]Length]
}

_Split[list,string,delim:out]
{
	,,rest <- [string]Partition[delim]
	{
		out <- _Split[[list]Append[~], rest, delim]
	} {} {} {
		out <- [list]Append[string]
	}
}

Split[string,delim:out]
{
	If[string]
	{ out <- _Split[(),string,delim] }
	{ out <- () }
}

In[needle,haystack:out]
{
	If[haystack]
	{
		out <- If[[[haystack]Slice[Length[needle]]]=[needle]] {}
		{
			[haystack]Slice[1] {}
			{ out <- [needle]In[~] }
		}
	}{
		out <- No
	}
}

Left Trim[string,trim:trimmed]
{
	If[ [[string]Length] > [0] ]
	{
		first,rest <- [string]Slice[1]
		If[ [first]In[trim] ]
		{
			trimmed <- Left Trim[rest, trim]
		}{
			trimmed <- string
		}
	}{
		trimmed <- string
	}
}

Right Trim[string,trim:trimmed]
{
	If[ [[string]Length] > [0] ]
	{
		rest,last <- [string]Slice[ [[string]Length] - [1]]
		If[ [last]In[trim] ]
		{
			trimmed <- Right Trim[rest, trim]
		}{
			trimmed <- string
		}
	}{
		trimmed <- string
	}
}

Trim[string,trim:trimmed]
{
	left <- Left Trim[string, trim]
	trimmed <- Right Trim[left, trim]
}

Contains[haystack,needle:out]
{
	[haystack]Partition[needle]
	{
		out <- Yes	
	} {} {} {
		out <- No
	}
}

After[text,prefix:after,not found]
{
	If[[text]Starts With[prefix]]
	{
		,after <- [text]Slice[[prefix]Length]
	}{
		not found <- text
	}
}