comparison string.rhope @ 78:4d5ea487f810

Working String implementation and some basic (but nowhere near exhaustive) tests
author Mike Pavone <pavone@retrodev.com>
date Thu, 08 Jul 2010 21:55:47 -0400
parents a748300a4143
children 27bb051d631c
comparison
equal deleted inserted replaced
77:a748300a4143 78:4d5ea487f810
84 Length(Int32,Naked) 84 Length(Int32,Naked)
85 } 85 }
86 86
87 String@Array[in:out(String)] 87 String@Array[in:out(String)]
88 { 88 {
89 out <- [[Build[String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]] 89 [in]First
90 { len <- Count UTF8[in, ~, 0i32] }
91 { len <- 0i32 }
92 out <- [[Build[String()]]Buffer <<[in]]Length <<[len]
90 } 93 }
91 94
92 Print@String[string:out] 95 Print@String[string:out]
93 { 96 {
94 //TODO: Sanitize string (remove terminal escapes and replace invalid UTF) 97 //TODO: Sanitize string (remove terminal escapes and replace invalid UTF)
144 Flatten@String[string:out] 147 Flatten@String[string:out]
145 { 148 {
146 out <- string 149 out <- string
147 } 150 }
148 151
149 _CPOff to BOff[buff,cur,expected:outcur,outboff] 152 _CPOff to BOff[buff,cur,boff,cpoff,expected,used:out]
150 { 153 {
151 If[expected] 154 If[expected]
152 { 155 {
153 outcur <- cur 156 byte <- [buff]Index[boff]
154 outboff <- 0i32
155 }{
156 err <- If[[byte]>[192u8]] {} 157 err <- If[[byte]>[192u8]] {}
157 { 158 {
158 err <- If[[byte]<[128u8]] {} 159 err <- If[[byte]<[128u8]] {}
159 { 160 {
160 outcur <- _CPOff to BOff[buff, [cur]+[1i32], [expected]-[1i32]] {} 161 out <- _CPOff to BOff[buff, cur, [boff]+[1i32], cpoff, [expected]-[1i32], [used]+[1i32]]
161 { outboff <- [~]+[1i32] }
162 } 162 }
163 } 163 }
164 164
165 Val[err] 165 Val[err]
166 { 166 {
167 outcur <- [cur]+[1i32] 167 ncur <- [cur]+[used]
168 outboff <- 1i32 168 If[[ncur]>[cpoff]]
169 } 169 {
170 out <- [boff]-[[cpoff]-[ncur]]
171 }{
172 out <- CPOff to BOff[buff,ncur,boff,cpoff]
173 }
174 }
175 }{
176 out <- CPOff to BOff[buff,[cur]+[1i32],boff,cpoff]
170 } 177 }
171 } 178 }
172 179
173 CPOff to BOff[buff,cur,boff,cpoff:out] 180 CPOff to BOff[buff,cur,boff,cpoff:out]
174 { 181 {
175 If[[cur]=[cpoff]] 182 If[[cur]=[cpoff]]
176 { 183 {
177 out <- boff 184 out <- boff
178 }{ 185 }{
179 byte <- [buff]Index[cur] 186 byte <- [buff]Index[boff]
180 If[[byte] < [128u8]] 187 If[[byte] < [128u8]]
181 { 188 {
182 nboff <- [bof]+[1i32] 189 nboff <- [boff]+[1i32]
183 ncur <- [cur]+[1i32] 190 ncur <- [cur]+[1i32]
184 }{ 191 }{
185 If[[byte]<[192u8]] 192 If[[byte]<[192u8]]
186 { 193 {
187 //Error: Encoding for 2nd,3rd or 4th byte of sequence 194 //Error: Encoding for 2nd,3rd or 4th byte of sequence
188 //treat as a single character 195 //treat as a single character
189 nboff <- [bof]+[1i32] 196 nboff <- [boff]+[1i32]
190 ncur <- [cur]+[1i32] 197 ncur <- [cur]+[1i32]
191 }{ 198 }{
192 If[[byte]<[224u8]] 199 If[[byte]<[224u8]]
193 { 200 {
194 expect <- 1i32 201 expect <- 1i32
200 If[[byte]<[245u8]] 207 If[[byte]<[245u8]]
201 { 208 {
202 expect <- 3i32 209 expect <- 3i32
203 }{ 210 }{
204 //Error 211 //Error
205 nboff <- [bof]+[1i32] 212 nboff <- [boff]+[1i32]
206 ncur <- [cur]+[1i32] 213 ncur <- [cur]+[1i32]
207 } 214 }
208 } 215 }
209 } 216 }
210 Val[expect] 217 Val[expect]
211 { 218 {
212 ncur <- _CPOff to BOff[buff, [cur]+[1i32], expect] {} 219 out <- _CPOff to BOff[buff, cur, [boff]+[1i32], cpoff, expect, 1i32] {}
213 { nboff <- [1i32]+[~] }
214 } 220 }
215 } 221 }
216 } 222 }
217 out <- CPOff to BOff[buff, ncur, cpoff, nboff] 223 out <- CPOff to BOff[buff, ncur, cpoff, nboff]
218 } 224 }
238 244
239 _=String[left,right,index:out] 245 _=String[left,right,index:out]
240 { 246 {
241 [left]Byte[index] 247 [left]Byte[index]
242 { 248 {
243 ,out <- If[[~]=[[right]Byte[index]]] 249 rbyte <- [right]Byte[index] {}
250 {
251 Print["Could not fetch byte from right string at offset:"]
252 { Print[index] }
253 out <- No
254 }
255 ,out <- If[[~]=[rbyte]]
244 { 256 {
245 out <- _=String[left,right,[index]+[1]] 257 out <- _=String[left,right,[index]+[1]]
246 } 258 }
247 }{ 259 }{
248 out <- Yes 260 out <- Yes
296 Byte Length@String Slice[string:out] 308 Byte Length@String Slice[string:out]
297 { 309 {
298 out <- [string]ByteLen >> 310 out <- [string]ByteLen >>
299 } 311 }
300 312
313 Length@String Slice[string:out]
314 {
315 out <- [string]Length >>
316 }
317
301 =@String Slice[left,right:out] 318 =@String Slice[left,right:out]
302 { 319 {
303 out <- Eq String[left,right] 320 out <- Eq String[left,right]
304 } 321 }
305 322
323 out <- [[string]Source >>]_Flatten[dest, [[string]Offset >>]+[offset], count] 340 out <- [[string]Source >>]_Flatten[dest, [[string]Offset >>]+[offset], count]
324 } 341 }
325 342
326 Flatten@String Slice[string:out] 343 Flatten@String Slice[string:out]
327 { 344 {
328 out <- String[ [[string]Source >>]_Flatten[Array[], [string]Offset >>, [string]ByteLen >>] ] 345 out <- [[Build[String()]]Buffer <<[ [[string]Source >>]_Flatten[Array[], [string]Offset >>, [string]ByteLen >>] ]
346 ]Length <<[[string]Length >>]
347 }
348
349 Print@String Slice[string:out]
350 {
351 out <- Print[Flatten[string]]
329 } 352 }
330 353
331 Append@String Slice[left,right:out] 354 Append@String Slice[left,right:out]
332 { 355 {
333 out <- String Cat[left,right] 356 out <- String Cat[left,right]
357 }
358
359 Slice@String Slice[string,slicepoint:left,right]
360 {
361 //TODO: Handle invalid slicepoints
362 sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, slicepoint]
363 left <- String Slice[[string]Source >>, [string]Offset >>, slicepoint, [sliceoffset]-[[string]Offset >>]]
364 right <- String Slice[[string]Source >>, sliceoffset, [[string]Length >>]-[slicepoint], [[[string]Source >>]Byte Length]-[sliceoffset]]
334 } 365 }
335 366
336 Blueprint String Cat 367 Blueprint String Cat
337 { 368 {
338 Left 369 Left
373 Byte Length@String Cat[string:out] 404 Byte Length@String Cat[string:out]
374 { 405 {
375 out <- [string]ByteLen >> 406 out <- [string]ByteLen >>
376 } 407 }
377 408
409 Length@String Cat[string:out]
410 {
411 out <- [string]Length >>
412 }
413
378 _Flatten@String Cat[string,dest,offset,count:out] 414 _Flatten@String Cat[string,dest,offset,count:out]
379 { 415 {
380 [string]Left >> 416 left <- [string]Left >>
381 } 417 If[[offset] < [[left]Byte Length]]
382 418 {
419 lcount <- Min[[left]Byte Length, count]
420 ldest <- [left]_Flatten[dest, offset, lcount]
421 rcount <- [count]-[lcount]
422 }{
423 ldest <- Val[dest]
424 rcount <- count
425 }
426 If[[[offset]+[count]]>[[left]Byte Length]]
427 {
428 right <- [string]Right >>
429 roffset <- Max[0, [offset]-[[left]Byte Length]]
430 out <- [right]_Flatten[ldest, roffset, Min[[right]Byte Length, rcount]]
431 }{
432 out <- Val[ldest]
433 }
434 }
435
436 Flatten@String Cat[string:out]
437 {
438 out <- [[Build[String()]
439 ]Buffer << [
440 [[string]Right >>]_Flatten[
441 [[string]Left >>]_Flatten[Array[], 0i32, [[string]Left >>]Byte Length],
442 0i32, [[string]Right >>]Byte Length]]
443 ]Length << [[string]Length >>]
444 }
445
446 Print@String Cat[string:out]
447 {
448 out <- Print[Flatten[string]]
449 }
450
451 Slice@String Cat[string,slicepoint:left,right]
452 {
453 llen <- [[string]Left >>]Length
454 If[[slicepoint]=[llen]]
455 {
456 left <- [string]Left >>
457 right <- [string]Right >>
458 }{
459 If[[slicepoint]<[llen]]
460 {
461 left,lright <- [[string]Left >>]Slice[slicepoint]
462 right <- String Cat[lright,[string]Right >>]
463 }{
464 rleft,right <- [[string]Right >>]Slice[ [slicepoint]-[llen] ]
465 left <- String Cat[[string]Left >>, rleft]
466 }
467 }
468 }
469
470 =@String Cat[left,right:out]
471 {
472 out <- Eq String[left,right]
473 }
474
475 =Delim[string,delims,index:outindex,after,nomatch]
476 {
477 delim <- [delims]Index[index]
478 If[[[string]Length]<[[delim]Length]]
479 {
480 try next <- Yes
481 }{
482 check,mafter <- [string]Slice[[delim]Length]
483 ,try next <- If[[check]=[delim]]
484 {
485 outindex <- index
486 after <- Val[mafter]
487 }
488 }
489 Val[try next]
490 {
491 ,nomatch <- [delims]Next[index]
492 {
493 outindex,after,nomatch <- =Delim[string,delims,~]
494 }
495 }
496 }
497
498
499 _Partition[string,delims:matched,after,not found]
500 {
501 not found <- If[[string]=[""]] {}
502 {
503 ,after <- =Delim[string, delims, [delims]First]
504 {
505 matched <- [delims]Index[~]
506 } {} {
507 [string]Slice[1] {}
508 { matched,after,not found <- _Partition[~,delims] }
509 }
510 }
511 }
512
513 Partition[string,odelims:before,matched,after,not found]
514 {
515 dt <- Blueprint Of[odelims]
516 If[ [[[dt]=[String()]] Or [[dt]=[String Slice()]]] Or [[dt]=[String Cat()]] ]
517 {
518 delims <- [List[]]Append[odelims]
519 }{
520 delims <- Val[odelims]
521 }
522 matched,after,not found <- _Partition[string,delims]
523 { dlen <- Length[~] }
524 { alen <- Length[~] }
525 before <- [string]Slice[ [[string]Length]-[[dlen]+[alen]] ]
526 }
527
528