view string.rhope @ 51:7d6a6906b648

Added integer type conversions and started on the implementation of String
author Mike Pavone <pavone@retrodev.com>
date Thu, 22 Apr 2010 02:18:26 -0400
parents
children 079200bc3e75
line wrap: on
line source


Blueprint Null String
{
}

Null String[:out(Null String)]
{
	out <- Build[Null String()]
}

Depth@Null String[in:out(Int32)]
{
	out <- 0i32
}

Blueprint Base String
{
	Buffer
	Length(Int32,Naked)
}

UTF8 Expect[num,arr,index,count,consumed:out]
{
	byte <- [arr]Index[index]
	{
		If[[128u8]>[byte]]
		{
			//Error: ASCII byte when we were expecting part of a mutlibyte sequence
			//treat each byte as a separate character
			ncount <- [1i32]+[[count]+[consumed]]
		}{
			If[[192u8]>[byte]]
			{
				If[[num]=[1]]
				{
					//Sequence is complete count as single character
					ncount <- [1i32]+[count]
				}{
					out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]]
				}
			}{
				//Error: too high to be a continuation byte
				ncount <- [1i32]+[[count]+[consumed]]
			}
		}
	}{
		//Error: string ended in the middle of a multi-byte sequence
		out <- [count]+[consumed]
	}
	Val[ncount]
	{
		[arr]Next[index]
		{
			out <- Count UTF8[arr, ~, ncount]
		}{
			out <- Val[ncount]
		}
	}
}

Count UTF8[arr,index,count:out]
{
	byte <- [arr]Index[index]
	If[[128u8]>[byte]]
	{ ncount <- [1i32]+[count] }
	{
		If[[192u8]>[byte]]
		{
			//Error: Encoding for 2nd,3rd or 4th byte of sequence
			//treat as a single character
			ncount <- [1i32]+[count]
		}{
			If[[224u8]>[byte]]
			{
				out <- UTF8 Expect[1, arr, [index]+[1], count, 1]
			}{
				If[[240u8]>[byte]]
				{
					out <- UTF8 Expect[2, arr, [index]+[1], count, 1]
				}{
					If[[245u8]>[byte]]
					{
						out <- UTF8 Expect[3, arr, [index]+[1], count, 1]
					}{
						//Error: Out of range of Unicode standard
						//treat as a single character
						ncount <- [1i32]+[count]
					}
				}
			}
		}
	}
	[arr]Next[index]
	{
		out <- Count UTF8[arr, ~, ncount]
	}{
		out <- Val[ncount]
	}
}

Blueprint String
{
	Left
	Right
	L Offset(Int32,Naked)
	L Length(Int32,Naked)
	Depth(Int32,Naked)
	Length(Int32,Naked)
}

String[in(Array):out(Base String)]
{
	out <- [[Build[Base String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]]
}

Main[]
{
	text <- [[[[[[[[[[[[[Array[1]
		]Append[36u8]
		]Append[194u8]
		]Append[162u8]
		]Append[236u8]
		]Append[130u8]
		]Append[172u8]
		]Append[240u8]
		]Append[164u8]
		]Append[173u8]
		]Append[162u8]
		]Append[194u8]
		]Append[36u8]
		]Append[162u8]
	Print[Count UTF8[text, 0, 0]]
}