Mercurial > repos > rhope
comparison string.rhope @ 78:4d5ea487f810
Working String implementation and some basic (but nowhere near exhaustive) tests
author | Mike Pavone <pavone@retrodev.com> |
---|---|
date | Thu, 08 Jul 2010 21:55:47 -0400 |
parents | a748300a4143 |
children | 27bb051d631c |
comparison
equal
deleted
inserted
replaced
77:a748300a4143 | 78:4d5ea487f810 |
---|---|
84 Length(Int32,Naked) | 84 Length(Int32,Naked) |
85 } | 85 } |
86 | 86 |
87 String@Array[in:out(String)] | 87 String@Array[in:out(String)] |
88 { | 88 { |
89 out <- [[Build[String()]]Buffer <<[in]]Length <<[Count UTF8[in, 0, 0]] | 89 [in]First |
90 { len <- Count UTF8[in, ~, 0i32] } | |
91 { len <- 0i32 } | |
92 out <- [[Build[String()]]Buffer <<[in]]Length <<[len] | |
90 } | 93 } |
91 | 94 |
92 Print@String[string:out] | 95 Print@String[string:out] |
93 { | 96 { |
94 //TODO: Sanitize string (remove terminal escapes and replace invalid UTF) | 97 //TODO: Sanitize string (remove terminal escapes and replace invalid UTF) |
144 Flatten@String[string:out] | 147 Flatten@String[string:out] |
145 { | 148 { |
146 out <- string | 149 out <- string |
147 } | 150 } |
148 | 151 |
149 _CPOff to BOff[buff,cur,expected:outcur,outboff] | 152 _CPOff to BOff[buff,cur,boff,cpoff,expected,used:out] |
150 { | 153 { |
151 If[expected] | 154 If[expected] |
152 { | 155 { |
153 outcur <- cur | 156 byte <- [buff]Index[boff] |
154 outboff <- 0i32 | |
155 }{ | |
156 err <- If[[byte]>[192u8]] {} | 157 err <- If[[byte]>[192u8]] {} |
157 { | 158 { |
158 err <- If[[byte]<[128u8]] {} | 159 err <- If[[byte]<[128u8]] {} |
159 { | 160 { |
160 outcur <- _CPOff to BOff[buff, [cur]+[1i32], [expected]-[1i32]] {} | 161 out <- _CPOff to BOff[buff, cur, [boff]+[1i32], cpoff, [expected]-[1i32], [used]+[1i32]] |
161 { outboff <- [~]+[1i32] } | |
162 } | 162 } |
163 } | 163 } |
164 | 164 |
165 Val[err] | 165 Val[err] |
166 { | 166 { |
167 outcur <- [cur]+[1i32] | 167 ncur <- [cur]+[used] |
168 outboff <- 1i32 | 168 If[[ncur]>[cpoff]] |
169 } | 169 { |
170 out <- [boff]-[[cpoff]-[ncur]] | |
171 }{ | |
172 out <- CPOff to BOff[buff,ncur,boff,cpoff] | |
173 } | |
174 } | |
175 }{ | |
176 out <- CPOff to BOff[buff,[cur]+[1i32],boff,cpoff] | |
170 } | 177 } |
171 } | 178 } |
172 | 179 |
173 CPOff to BOff[buff,cur,boff,cpoff:out] | 180 CPOff to BOff[buff,cur,boff,cpoff:out] |
174 { | 181 { |
175 If[[cur]=[cpoff]] | 182 If[[cur]=[cpoff]] |
176 { | 183 { |
177 out <- boff | 184 out <- boff |
178 }{ | 185 }{ |
179 byte <- [buff]Index[cur] | 186 byte <- [buff]Index[boff] |
180 If[[byte] < [128u8]] | 187 If[[byte] < [128u8]] |
181 { | 188 { |
182 nboff <- [bof]+[1i32] | 189 nboff <- [boff]+[1i32] |
183 ncur <- [cur]+[1i32] | 190 ncur <- [cur]+[1i32] |
184 }{ | 191 }{ |
185 If[[byte]<[192u8]] | 192 If[[byte]<[192u8]] |
186 { | 193 { |
187 //Error: Encoding for 2nd,3rd or 4th byte of sequence | 194 //Error: Encoding for 2nd,3rd or 4th byte of sequence |
188 //treat as a single character | 195 //treat as a single character |
189 nboff <- [bof]+[1i32] | 196 nboff <- [boff]+[1i32] |
190 ncur <- [cur]+[1i32] | 197 ncur <- [cur]+[1i32] |
191 }{ | 198 }{ |
192 If[[byte]<[224u8]] | 199 If[[byte]<[224u8]] |
193 { | 200 { |
194 expect <- 1i32 | 201 expect <- 1i32 |
200 If[[byte]<[245u8]] | 207 If[[byte]<[245u8]] |
201 { | 208 { |
202 expect <- 3i32 | 209 expect <- 3i32 |
203 }{ | 210 }{ |
204 //Error | 211 //Error |
205 nboff <- [bof]+[1i32] | 212 nboff <- [boff]+[1i32] |
206 ncur <- [cur]+[1i32] | 213 ncur <- [cur]+[1i32] |
207 } | 214 } |
208 } | 215 } |
209 } | 216 } |
210 Val[expect] | 217 Val[expect] |
211 { | 218 { |
212 ncur <- _CPOff to BOff[buff, [cur]+[1i32], expect] {} | 219 out <- _CPOff to BOff[buff, cur, [boff]+[1i32], cpoff, expect, 1i32] {} |
213 { nboff <- [1i32]+[~] } | |
214 } | 220 } |
215 } | 221 } |
216 } | 222 } |
217 out <- CPOff to BOff[buff, ncur, cpoff, nboff] | 223 out <- CPOff to BOff[buff, ncur, cpoff, nboff] |
218 } | 224 } |
238 | 244 |
239 _=String[left,right,index:out] | 245 _=String[left,right,index:out] |
240 { | 246 { |
241 [left]Byte[index] | 247 [left]Byte[index] |
242 { | 248 { |
243 ,out <- If[[~]=[[right]Byte[index]]] | 249 rbyte <- [right]Byte[index] {} |
250 { | |
251 Print["Could not fetch byte from right string at offset:"] | |
252 { Print[index] } | |
253 out <- No | |
254 } | |
255 ,out <- If[[~]=[rbyte]] | |
244 { | 256 { |
245 out <- _=String[left,right,[index]+[1]] | 257 out <- _=String[left,right,[index]+[1]] |
246 } | 258 } |
247 }{ | 259 }{ |
248 out <- Yes | 260 out <- Yes |
296 Byte Length@String Slice[string:out] | 308 Byte Length@String Slice[string:out] |
297 { | 309 { |
298 out <- [string]ByteLen >> | 310 out <- [string]ByteLen >> |
299 } | 311 } |
300 | 312 |
313 Length@String Slice[string:out] | |
314 { | |
315 out <- [string]Length >> | |
316 } | |
317 | |
301 =@String Slice[left,right:out] | 318 =@String Slice[left,right:out] |
302 { | 319 { |
303 out <- Eq String[left,right] | 320 out <- Eq String[left,right] |
304 } | 321 } |
305 | 322 |
323 out <- [[string]Source >>]_Flatten[dest, [[string]Offset >>]+[offset], count] | 340 out <- [[string]Source >>]_Flatten[dest, [[string]Offset >>]+[offset], count] |
324 } | 341 } |
325 | 342 |
326 Flatten@String Slice[string:out] | 343 Flatten@String Slice[string:out] |
327 { | 344 { |
328 out <- String[ [[string]Source >>]_Flatten[Array[], [string]Offset >>, [string]ByteLen >>] ] | 345 out <- [[Build[String()]]Buffer <<[ [[string]Source >>]_Flatten[Array[], [string]Offset >>, [string]ByteLen >>] ] |
346 ]Length <<[[string]Length >>] | |
347 } | |
348 | |
349 Print@String Slice[string:out] | |
350 { | |
351 out <- Print[Flatten[string]] | |
329 } | 352 } |
330 | 353 |
331 Append@String Slice[left,right:out] | 354 Append@String Slice[left,right:out] |
332 { | 355 { |
333 out <- String Cat[left,right] | 356 out <- String Cat[left,right] |
357 } | |
358 | |
359 Slice@String Slice[string,slicepoint:left,right] | |
360 { | |
361 //TODO: Handle invalid slicepoints | |
362 sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, slicepoint] | |
363 left <- String Slice[[string]Source >>, [string]Offset >>, slicepoint, [sliceoffset]-[[string]Offset >>]] | |
364 right <- String Slice[[string]Source >>, sliceoffset, [[string]Length >>]-[slicepoint], [[[string]Source >>]Byte Length]-[sliceoffset]] | |
334 } | 365 } |
335 | 366 |
336 Blueprint String Cat | 367 Blueprint String Cat |
337 { | 368 { |
338 Left | 369 Left |
373 Byte Length@String Cat[string:out] | 404 Byte Length@String Cat[string:out] |
374 { | 405 { |
375 out <- [string]ByteLen >> | 406 out <- [string]ByteLen >> |
376 } | 407 } |
377 | 408 |
409 Length@String Cat[string:out] | |
410 { | |
411 out <- [string]Length >> | |
412 } | |
413 | |
378 _Flatten@String Cat[string,dest,offset,count:out] | 414 _Flatten@String Cat[string,dest,offset,count:out] |
379 { | 415 { |
380 [string]Left >> | 416 left <- [string]Left >> |
381 } | 417 If[[offset] < [[left]Byte Length]] |
382 | 418 { |
419 lcount <- Min[[left]Byte Length, count] | |
420 ldest <- [left]_Flatten[dest, offset, lcount] | |
421 rcount <- [count]-[lcount] | |
422 }{ | |
423 ldest <- Val[dest] | |
424 rcount <- count | |
425 } | |
426 If[[[offset]+[count]]>[[left]Byte Length]] | |
427 { | |
428 right <- [string]Right >> | |
429 roffset <- Max[0, [offset]-[[left]Byte Length]] | |
430 out <- [right]_Flatten[ldest, roffset, Min[[right]Byte Length, rcount]] | |
431 }{ | |
432 out <- Val[ldest] | |
433 } | |
434 } | |
435 | |
436 Flatten@String Cat[string:out] | |
437 { | |
438 out <- [[Build[String()] | |
439 ]Buffer << [ | |
440 [[string]Right >>]_Flatten[ | |
441 [[string]Left >>]_Flatten[Array[], 0i32, [[string]Left >>]Byte Length], | |
442 0i32, [[string]Right >>]Byte Length]] | |
443 ]Length << [[string]Length >>] | |
444 } | |
445 | |
446 Print@String Cat[string:out] | |
447 { | |
448 out <- Print[Flatten[string]] | |
449 } | |
450 | |
451 Slice@String Cat[string,slicepoint:left,right] | |
452 { | |
453 llen <- [[string]Left >>]Length | |
454 If[[slicepoint]=[llen]] | |
455 { | |
456 left <- [string]Left >> | |
457 right <- [string]Right >> | |
458 }{ | |
459 If[[slicepoint]<[llen]] | |
460 { | |
461 left,lright <- [[string]Left >>]Slice[slicepoint] | |
462 right <- String Cat[lright,[string]Right >>] | |
463 }{ | |
464 rleft,right <- [[string]Right >>]Slice[ [slicepoint]-[llen] ] | |
465 left <- String Cat[[string]Left >>, rleft] | |
466 } | |
467 } | |
468 } | |
469 | |
470 =@String Cat[left,right:out] | |
471 { | |
472 out <- Eq String[left,right] | |
473 } | |
474 | |
475 =Delim[string,delims,index:outindex,after,nomatch] | |
476 { | |
477 delim <- [delims]Index[index] | |
478 If[[[string]Length]<[[delim]Length]] | |
479 { | |
480 try next <- Yes | |
481 }{ | |
482 check,mafter <- [string]Slice[[delim]Length] | |
483 ,try next <- If[[check]=[delim]] | |
484 { | |
485 outindex <- index | |
486 after <- Val[mafter] | |
487 } | |
488 } | |
489 Val[try next] | |
490 { | |
491 ,nomatch <- [delims]Next[index] | |
492 { | |
493 outindex,after,nomatch <- =Delim[string,delims,~] | |
494 } | |
495 } | |
496 } | |
497 | |
498 | |
499 _Partition[string,delims:matched,after,not found] | |
500 { | |
501 not found <- If[[string]=[""]] {} | |
502 { | |
503 ,after <- =Delim[string, delims, [delims]First] | |
504 { | |
505 matched <- [delims]Index[~] | |
506 } {} { | |
507 [string]Slice[1] {} | |
508 { matched,after,not found <- _Partition[~,delims] } | |
509 } | |
510 } | |
511 } | |
512 | |
513 Partition[string,odelims:before,matched,after,not found] | |
514 { | |
515 dt <- Blueprint Of[odelims] | |
516 If[ [[[dt]=[String()]] Or [[dt]=[String Slice()]]] Or [[dt]=[String Cat()]] ] | |
517 { | |
518 delims <- [List[]]Append[odelims] | |
519 }{ | |
520 delims <- Val[odelims] | |
521 } | |
522 matched,after,not found <- _Partition[string,delims] | |
523 { dlen <- Length[~] } | |
524 { alen <- Length[~] } | |
525 before <- [string]Slice[ [[string]Length]-[[dlen]+[alen]] ] | |
526 } | |
527 | |
528 |