diff string.rhope @ 51:7d6a6906b648

Added integer type conversions and started on the implementation of String
author Mike Pavone <pavone@retrodev.com>
date Thu, 22 Apr 2010 02:18:26 -0400
parents
children 079200bc3e75
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/string.rhope	Thu Apr 22 02:18:26 2010 -0400
@@ -0,0 +1,134 @@
+
+Blueprint Null String
+{
+}
+
+Null String[:out(Null String)]
+{
+	out <- Build[Null String()]
+}
+
+Depth@Null String[in:out(Int32)]
+{
+	out <- 0i32
+}
+
+Blueprint Base String
+{
+	Buffer
+	Length(Int32,Naked)
+}
+
+UTF8 Expect[num,arr,index,count,consumed:out]
+{
+	byte <- [arr]Index[index]
+	{
+		If[[128u8]>[byte]]
+		{
+			//Error: ASCII byte when we were expecting part of a mutlibyte sequence
+			//treat each byte as a separate character
+			ncount <- [1i32]+[[count]+[consumed]]
+		}{
+			If[[192u8]>[byte]]
+			{
+				If[[num]=[1]]
+				{
+					//Sequence is complete count as single character
+					ncount <- [1i32]+[count]
+				}{
+					out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]]
+				}
+			}{
+				//Error: too high to be a continuation byte
+				ncount <- [1i32]+[[count]+[consumed]]
+			}
+		}
+	}{
+		//Error: string ended in the middle of a multi-byte sequence
+		out <- [count]+[consumed]
+	}
+	Val[ncount]
+	{
+		[arr]Next[index]
+		{
+			out <- Count UTF8[arr, ~, ncount]
+		}{
+			out <- Val[ncount]
+		}
+	}
+}
+
+Count UTF8[arr,index,count:out]
+{
+	byte <- [arr]Index[index]
+	If[[128u8]>[byte]]
+	{ ncount <- [1i32]+[count] }
+	{
+		If[[192u8]>[byte]]
+		{
+			//Error: Encoding for 2nd,3rd or 4th byte of sequence
+			//treat as a single character
+			ncount <- [1i32]+[count]
+		}{
+			If[[224u8]>[byte]]
+			{
+				out <- UTF8 Expect[1, arr, [index]+[1], count, 1]
+			}{
+				If[[240u8]>[byte]]
+				{
+					out <- UTF8 Expect[2, arr, [index]+[1], count, 1]
+				}{
+					If[[245u8]>[byte]]
+					{
+						out <- UTF8 Expect[3, arr, [index]+[1], count, 1]
+					}{
+						//Error: Out of range of Unicode standard
+						//treat as a single character
+						ncount <- [1i32]+[count]
+					}
+				}
+			}
+		}
+	}
+	[arr]Next[index]
+	{
+		out <- Count UTF8[arr, ~, ncount]
+	}{
+		out <- Val[ncount]
+	}
+}
+
+Blueprint String
+{
+	Left
+	Right
+	L Offset(Int32,Naked)
+	L Length(Int32,Naked)
+	Depth(Int32,Naked)
+	Length(Int32,Naked)
+}
+
+String[in(Array):out(Base String)]
+{
+	out <- [[Build[Base String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]]
+}
+
+Main[]
+{
+	text <- [[[[[[[[[[[[[Array[1]
+		]Append[36u8]
+		]Append[194u8]
+		]Append[162u8]
+		]Append[236u8]
+		]Append[130u8]
+		]Append[172u8]
+		]Append[240u8]
+		]Append[164u8]
+		]Append[173u8]
+		]Append[162u8]
+		]Append[194u8]
+		]Append[36u8]
+		]Append[162u8]
+	Print[Count UTF8[text, 0, 0]]
+}
+