comparison string.rhope @ 51:7d6a6906b648

Added integer type conversions and started on the implementation of String
author Mike Pavone <pavone@retrodev.com>
date Thu, 22 Apr 2010 02:18:26 -0400
parents
children 079200bc3e75
comparison
equal deleted inserted replaced
50:689fb73e7612 51:7d6a6906b648
1
2 Blueprint Null String
3 {
4 }
5
6 Null String[:out(Null String)]
7 {
8 out <- Build[Null String()]
9 }
10
11 Depth@Null String[in:out(Int32)]
12 {
13 out <- 0i32
14 }
15
16 Blueprint Base String
17 {
18 Buffer
19 Length(Int32,Naked)
20 }
21
22 UTF8 Expect[num,arr,index,count,consumed:out]
23 {
24 byte <- [arr]Index[index]
25 {
26 If[[128u8]>[byte]]
27 {
28 //Error: ASCII byte when we were expecting part of a mutlibyte sequence
29 //treat each byte as a separate character
30 ncount <- [1i32]+[[count]+[consumed]]
31 }{
32 If[[192u8]>[byte]]
33 {
34 If[[num]=[1]]
35 {
36 //Sequence is complete count as single character
37 ncount <- [1i32]+[count]
38 }{
39 out <- UTF8 Expect[[num]-[1], arr, [index]+[1], count, [1i32]+[consumed]]
40 }
41 }{
42 //Error: too high to be a continuation byte
43 ncount <- [1i32]+[[count]+[consumed]]
44 }
45 }
46 }{
47 //Error: string ended in the middle of a multi-byte sequence
48 out <- [count]+[consumed]
49 }
50 Val[ncount]
51 {
52 [arr]Next[index]
53 {
54 out <- Count UTF8[arr, ~, ncount]
55 }{
56 out <- Val[ncount]
57 }
58 }
59 }
60
61 Count UTF8[arr,index,count:out]
62 {
63 byte <- [arr]Index[index]
64 If[[128u8]>[byte]]
65 { ncount <- [1i32]+[count] }
66 {
67 If[[192u8]>[byte]]
68 {
69 //Error: Encoding for 2nd,3rd or 4th byte of sequence
70 //treat as a single character
71 ncount <- [1i32]+[count]
72 }{
73 If[[224u8]>[byte]]
74 {
75 out <- UTF8 Expect[1, arr, [index]+[1], count, 1]
76 }{
77 If[[240u8]>[byte]]
78 {
79 out <- UTF8 Expect[2, arr, [index]+[1], count, 1]
80 }{
81 If[[245u8]>[byte]]
82 {
83 out <- UTF8 Expect[3, arr, [index]+[1], count, 1]
84 }{
85 //Error: Out of range of Unicode standard
86 //treat as a single character
87 ncount <- [1i32]+[count]
88 }
89 }
90 }
91 }
92 }
93 [arr]Next[index]
94 {
95 out <- Count UTF8[arr, ~, ncount]
96 }{
97 out <- Val[ncount]
98 }
99 }
100
101 Blueprint String
102 {
103 Left
104 Right
105 L Offset(Int32,Naked)
106 L Length(Int32,Naked)
107 Depth(Int32,Naked)
108 Length(Int32,Naked)
109 }
110
111 String[in(Array):out(Base String)]
112 {
113 out <- [[Build[Base String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]]
114 }
115
116 Main[]
117 {
118 text <- [[[[[[[[[[[[[Array[1]
119 ]Append[36u8]
120 ]Append[194u8]
121 ]Append[162u8]
122 ]Append[236u8]
123 ]Append[130u8]
124 ]Append[172u8]
125 ]Append[240u8]
126 ]Append[164u8]
127 ]Append[173u8]
128 ]Append[162u8]
129 ]Append[194u8]
130 ]Append[36u8]
131 ]Append[162u8]
132 Print[Count UTF8[text, 0, 0]]
133 }
134