Mercurial > repos > rhope
view string.rhope @ 72:f7bcf3db1342
Add =@String, fix bool bug
author | Mike Pavone <pavone@retrodev.com> |
---|---|
date | Fri, 18 Jun 2010 17:00:50 -0400 |
parents | c40c3d399133 |
children | a748300a4143 |
line wrap: on
line source
UTF8 Expect[num,arr,index,count,consumed:out] { byte <- [arr]Index[index] { If[[128u8]>[byte]] { //Error: ASCII byte when we were expecting part of a mutlibyte sequence //treat each byte as a separate character ncount <- [1i32]+[[count]+[consumed]] }{ If[[192u8]>[byte]] { If[[num]=[1]] { //Sequence is complete count as single character ncount <- [1i32]+[count] }{ out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]] } }{ //Error: too high to be a continuation byte ncount <- [1i32]+[[count]+[consumed]] } } }{ //Error: string ended in the middle of a multi-byte sequence out <- [count]+[consumed] } Val[ncount] { [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } } Count UTF8[arr,index,count:out] { byte <- [arr]Index[index] If[[128u8]>[byte]] { ncount <- [1i32]+[count] } { If[[192u8]>[byte]] { //Error: Encoding for 2nd,3rd or 4th byte of sequence //treat as a single character ncount <- [1i32]+[count] }{ If[[224u8]>[byte]] { out <- UTF8 Expect[1, arr, [index]+[1], count, 1] }{ If[[240u8]>[byte]] { out <- UTF8 Expect[2, arr, [index]+[1], count, 1] }{ If[[245u8]>[byte]] { out <- UTF8 Expect[3, arr, [index]+[1], count, 1] }{ //Error: Out of range of Unicode standard //treat as a single character ncount <- [1i32]+[count] } } } } } [arr]Next[index] { out <- Count UTF8[arr, ~, ncount] }{ out <- Val[ncount] } } Blueprint String { Buffer Length(Int32,Naked) } String[in(Array):out(String)] { out <- [[Build[String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]] } Print@String[string:out] { //TODO: Sanitize string (remove terminal escapes and replace invalid UTF) write[1i32, [string]Buffer >>, Int64[[[string]Buffer >>]Length >>]] { out <- write[1i32, [Array[]]Append[10u8], 1i64] } } Get Char[:out] { read[0, [Array[]]Set[0, 0u8], 1i64] {} { out <- String[~] } } _String to Int32[current,index,array:out] { char <- [array]Index[index] { If[[char]<[48u8]] { out <- Val[current] }{ If[[char]>[57u8]] { out <- Val[current] }{ out <- _String to Int32[[[current]*[10i32]]+[Int32[[char]-[48u8]]], [index]+[1], array] } } }{ out <- Val[current] } } Int32@String[string:out] { buf <- [string]Buffer >> [buf]Index[0] { If[[~]=[45u8]] { out <- [0i32]-[_String to Int32[0i32, 1, buf]] }{ out <- _String to Int32[0i32, 0, buf] } }{ out <- 0i32 } } Flatten@String[string:out] { out <- string } Slice@String[string,slicepoint:left,right] { //TODO: Handle invalid slicepoints left <- String Slice[string, 0i32, slicepoint] right <- String Slice[string, slicepoint, [[string]Length >>]-[slicepoint]] } Byte@String[string,index:out,invalid] { out,invalid <- [[string]Buffer >>]Index[index] } Length@String[string:out] { out <- [string]Length >> } _=String[left,right,index:out] { [left]Byte[index] { ,out <- If[[~]=[[right]Byte[index]]] { out <- _=String[left,right,[index]+[1]] } }{ out <- Yes } } =@String[left,right:out] { ,out <- If[[[left]Length] = [[right]Length]] { out <- _=String[left,right,0] } } Blueprint String Slice { Source Offset(Int32,Naked) Length(Int32,Naked) } String Slice[source,offset,length:out(String Slice)] { out <- [[[Build[String Slice()]]Source <<[source]]Offset <<[offset]]Length <<[length] }