comparison string.rhope @ 156:ed70399a07aa

Add Substring method to string and improve performance of Partition
author Mike Pavone <pavone@retrodev.com>
date Thu, 23 Dec 2010 02:00:27 +0000
parents d86df83402f3
children 37b999dbd12e
comparison
equal deleted inserted replaced
155:d59611dcec71 156:ed70399a07aa
453 right <- String Slice[string, sliceoffset, [[string]Length >>]-[slicepoint], [[[string]Buffer >>]Length]-[sliceoffset]] 453 right <- String Slice[string, sliceoffset, [[string]Length >>]-[slicepoint], [[[string]Buffer >>]Length]-[sliceoffset]]
454 } 454 }
455 } 455 }
456 } 456 }
457 457
458 Substring@String[string,start,length:out]
459 {
460 If[[start]>=[[string]Length]]
461 { out <- "" }
462 {
463 If[[length]<=[0]]
464 { slength <- [[[string]Length]-[start]]+[length] }
465 {
466 If[[[start]+[length]] > [[string]Length]]
467 { slength <- [[string]Length]-[start] }
468 { slength <- Val[length] }
469 }
470
471 If[[[string]Length] = [[string]Byte Length]]
472 {
473 sliceoffset <- Val[start]
474 sbytelen <- Val[slength]
475 }{
476 sliceoffset <- CPOff to BOff[[string]Buffer >>, 0i32, 0i32, start]
477 sbytelen <- CPOff to BOff[[string]Buffer >>, 0i32, sliceoffset, slength]
478 }
479 out <- String Slice[string, sliceoffset, slength, sbytelen]
480 }
481 }
482
458 Byte@String[string,index:out,invalid] 483 Byte@String[string,index:out,invalid]
459 { 484 {
460 out,invalid <- [[string]Buffer >>]Index[index] 485 out,invalid <- [[string]Buffer >>]Index[index]
461 } 486 }
462 487
524 } 549 }
525 550
526 _Flat=@String Cat[left,right:out] 551 _Flat=@String Cat[left,right:out]
527 { 552 {
528 out <- Eq String[left,right] 553 out <- Eq String[left,right]
554 }
555
556 _Find Memcmp[haybuf,hayoff,haylen,needlebuf,needleoff,needlelen:found,not found]
557 {
558 not found <- If[[haylen]<[needlelen]] {}
559 {
560 If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, needlelen]]
561 {
562 found,not found <- _Find Memcmp[haybuf, [hayoff]+[1], [haylen]-[1], needlebuf, needleoff, needlelen]
563 }{
564 found <- hayoff
565 }
566 }
567 }
568
569 _Find Flat@String[haystack,needlebuf,off,bytelen:found,not found]
570 {
571 found,not found <- _Find Memcmp[[haystack]Buffer >>, 0, [haystack]Byte Length, needlebuf, off, bytelen]
572 }
573
574 _Find Flat@String Slice[haystack,needlebuf,off,bytelen:found,not found]
575 {
576 ,not found <- _Find Memcmp[[[haystack]Source >>]Buffer >>, [haystack]Offset >>, [haystack]Byte Length, needlebuf, off, bytelen]
577 { found <- [~]-[[haystack]Offset >>] }
578 }
579
580 _Find Partial Memcmp[haybuf,hayoff,haylen,needlebuf,needleoff,needlelen:found,partial,not found]
581 {
582 If[[haylen]<[needlelen]]
583 {
584 ,not found <- If[haylen]
585 {
586 cont <- If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, haylen]] {}
587 {
588 partial <- hayoff
589 }
590 }
591 }{
592 cont <- If[_internal_memcmp_offset[haybuf, hayoff, needlebuf, needleoff, needlelen]] {}
593 {
594 found <- hayoff
595 }
596 }
597 Val[cont]
598 {
599 found,partial,not found <- _Find Partial Memcmp[haybuf, [hayoff]+[1], [haylen]-[1], needlebuf, needleoff, needlelen]
600 }
601 }
602
603 _Find Partial@String[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found]
604 {
605 found,partial,not found <- _Find Partial Memcmp[[haystack]Buffer >>, hayoff, [[haystack]Byte Length]-[hayoff], needlebuf, needleoff, needlelen] {} {}
606 }
607
608 _Check Rest@String[haystack,needlebuf,needleoff,needlelen:found,not found]
609 {
610 haylen <- [haystack]Byte Length
611 ,not found <- If[[haylen]<[needlelen]]
612 {
613 not found,found <- If[_internal_memcmp_offset[[haystack]Buffer >>, 0, needlebuf, needleoff, needlelen]]
614 }
615 }
616
617 _Check Rest@String Slice[haystack,needlebuf,needleoff,needlelen:found,not found]
618 {
619 haylen <- [haystack]Byte Length
620 ,not found <- If[[haylen]<[needlelen]]
621 {
622 not found,found <- If[_internal_memcmp_offset[[[haystack]Source >>]Buffer >>, [haystack]Offset >>, needlebuf, needleoff, needlelen]]
623 }
624 }
625
626 _Check Rest@String Slice[haystack,needlebuf,needleoff,needlelen:found,not found]
627 {
628 llen <- [[haystack]Left >>]Byte Length
629 If[[needlelen]<[llen]]
630 { found,not found <- _Check Rest[[haystack]Left >>, needlebuf, needleoff, needlelen] }
631 {
632 ,not found <- _Check Rest[[haystack]Left >>, needlebuf, needleoff, llen]
633 {
634 found,not found <- _Check Rest[[haystack]Right >>, needlebuf, [needleoff]+[llen], [needlelen]-[llen]]
635 }
636 }
637 }
638
639 _Find Partial@String Slice[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found]
640 {
641 ,,not found <- _Find Partial Memcmp[[[haystack]Source >>]Buffer >>, [hayoff]+[[haystack]Offset >>], [[haystack]Byte Length]-[hayoff], needlebuf, needleoff, needlelen]
642 { found <- [~]-[[haystack]Offset >>] }
643 { partial <- [~]-[[haystack]Offset >>] }
644 }
645
646 _Find Partial@String Cat[haystack,hayoff,needlebuf,needleoff,needlelen:found,partial,not found]
647 {
648 llen <- [[haystack]Left >>]Byte Length
649 If[[hayoff] < [llen]]
650 {
651 found,p,checkright <- [[haystack]Left >>]_Find Partial[hayoff,needlebuf,needleoff,needlelen] {}
652 {
653 partlen <- [llen]-[~]
654 ,cont <- [[haystack]Right >>]_Check Rest[needlebuf,[needleoff]+[partlen], [needlelen]-[partlen]]
655 { found <- Val[p] }
656 { found,partial,not found <- _Find Partial[haystack,[hayoff]+[1],needlebuf,needleoff,needlelen] }
657 }{
658 found,partial,not found <- _Find Partial[haystack,llen,needlebuf,needleoff,needlelen]
659 }
660 }{
661 ,,not found <- [[haystack]Right >>]_Find Partial[[hayoff]-[llen],needlebuf,needleoff,needlelen]
662 { found <- [~]+[llen] }
663 { partial <- [~]+[llen] }
664 }
665 }
666
667
668 _Find Flat@String Cat[haystack,needlebuf,off,bytelen:found,not found]
669 {
670 found,not found,not found <- _Find Partial[haystack, 0, needlebuf, off, bytelen] {}
529 } 671 }
530 672
531 =@String[left,right:out] 673 =@String[left,right:out]
532 { 674 {
533 out <- [right]_Flat=[left] 675 out <- [right]_Flat=[left]
657 { sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, slicepoint] } 799 { sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, slicepoint] }
658 left <- String Slice[[string]Source >>, [string]Offset >>, slicepoint, [sliceoffset]-[[string]Offset >>]] 800 left <- String Slice[[string]Source >>, [string]Offset >>, slicepoint, [sliceoffset]-[[string]Offset >>]]
659 right <- String Slice[[string]Source >>, sliceoffset, [[string]Length >>]-[slicepoint], [[string]Byte Length]-[[sliceoffset]-[[string]Offset >>]]] 801 right <- String Slice[[string]Source >>, sliceoffset, [[string]Length >>]-[slicepoint], [[string]Byte Length]-[[sliceoffset]-[[string]Offset >>]]]
660 } 802 }
661 } 803 }
804 }
805
806 Substring@String Slice[string,start,length:out]
807 {
808 If[[start]>=[[string]Length]]
809 { out <- "" }
810 {
811 If[[length]<=[0]]
812 { slength <- [[[string]Length]-[start]]+[length] }
813 {
814 If[[[start]+[length]] > [[string]Length]]
815 { slength <- [[string]Length]-[start] }
816 { slength <- Val[length] }
817 }
818
819 If[[[string]Length] = [[string]Byte Length]]
820 {
821 sliceoffset <- [[string]Offset >>]+[start]
822 sbytelen <- Val[slength]
823 }{
824 sliceoffset <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, [string]Offset >>, start]
825 sbytelen <- CPOff to BOff[[[string]Source >>]Buffer >>, 0i32, sliceoffset, slength]
826 }
827 out <- String Slice[[string]Source >>, sliceoffset, slength, sbytelen]
828 }
662 } 829 }
663 830
664 Blueprint String Cat 831 Blueprint String Cat
665 { 832 {
666 Left 833 Left
767 left <- String Cat[[string]Left >>, rleft] 934 left <- String Cat[[string]Left >>, rleft]
768 } 935 }
769 } 936 }
770 } 937 }
771 938
939 Substring@String Cat[string,start,length:out]
940 {
941 llen <- [[string]Left >>]Length
942 If[[start] >= [llen]]
943 {
944 out <- [[string]Right >>]Substring[[start]-[llen],length]
945 }{
946 If[[length] <= [0]]
947 { slength <- [[[string]Length]-[start]]+[length] }
948 {
949 If[[[start]+[length]] > [[string]Length]]
950 { slength <- [[string]Length]-[start] }
951 { slength <- Val[length] }
952 }
953 If[[[start]+[slength]]<=[llen]]
954 {
955 out <- [[string]Left >>]Substring[start, slength]
956 }{
957 new end <- [start]+[slength]
958 If[[new end]=[[string]Length]]
959 { right <- Right >>[string] }
960 { right <- [[string]Right >>]Substring[0, [new end]-[[string]Length]] }
961 If[[start]=[0]]
962 { left <- Left >>[string] }
963 { left <- [[string]Left >>]Substring[start, 0] }
964 out <- String Cat[left, right]
965 }
966 }
967 }
968
772 =@String Cat[left,right:out] 969 =@String Cat[left,right:out]
773 { 970 {
774 out <- Eq String[left,right] 971 out <- Eq String[left,right]
775 } 972 }
776 973
795 outindex,after,nomatch <- =Delim[string,delims,~] 992 outindex,after,nomatch <- =Delim[string,delims,~]
796 } 993 }
797 } 994 }
798 } 995 }
799 996
800 Pattern@String[string:out]
801 {
802 out <- string
803 }
804
805 Pattern@String Slice[string:out]
806 {
807 out <- string
808 }
809
810 Pattern@String Cat[string:out]
811 {
812 out <- Flatten[string]
813 }
814
815 Match@String[string,cmp:num,no match,idx] 997 Match@String[string,cmp:num,no match,idx]
816 { 998 {
817 n <- [string]Length 999 n <- [string]Length
818 ,no match <- If[[string]=[[cmp]Slice[n]]] 1000 ,no match <- If[[string]=[[cmp]Substring[0, n]]]
819 { 1001 {
820 num <- Val[n] 1002 num <- Val[n]
821 idx <- 0 1003 idx <- 0
822 } 1004 }
823 } 1005 }
824 1006
825 Match@String Slice[string,cmp:num,no match,idx] 1007 Match@String Slice[string,cmp:num,no match,idx]
826 { 1008 {
827 n <- [string]Length 1009 n <- [string]Length
828 ,no match <- If[[string]=[[cmp]Slice[n]]] 1010 ,no match <- If[[string]=[[cmp]Substring[0, n]]]
829 { 1011 {
830 num <- Val[n] 1012 num <- Val[n]
831 idx <- 0 1013 idx <- 0
832 } 1014 }
833 } 1015 }
834 1016
835 1017 _Partition@String[delim,string:matched,after,not found]
836 _Partition[string,delims:matched,after,not found] 1018 {
837 { 1019 ,not found <- [string]_Find Flat[[delim]Buffer >>, 0, [delim]Byte Length]
838 not found <- If[[string]=[""]] {} 1020 {
839 { 1021 //TODO: Translate byte offset to cp offset when necessary
840 [delims]Match[string] 1022 matched <- delim
841 { 1023 after <- [string]Substring[[~]+[[delim]Length], 0]
842 matched,after <- [string]Slice[~] 1024 }
843 }{ 1025 }
844 [string]Slice[1] {} 1026
845 { matched,after,not found <- _Partition[~,delims] } 1027 _Partition@String Slice[delim,string:matched,after,not found]
846 } 1028 {
847 } 1029 ,not found <- [string]_Find Flat[[[delim]Source >>]Buffer >>, [delim]Offset >>, [delim]Byte Length]
1030 {
1031 //TODO: Translate byte offset to cp offset when necessary
1032 matched <- delim
1033 after <- [string]Substring[[~]+[[delim]Length], 0]
1034 }
1035 }
1036
1037 Pattern@String[string:out]
1038 {
1039 out <- string
1040 }
1041
1042 Pattern@String Slice[string:out]
1043 {
1044 out <- string
1045 }
1046
1047 Pattern@String Cat[string:out]
1048 {
1049 out <- Flatten[string]
848 } 1050 }
849 1051
850 Partition[string,delims:before,matched,after,not found] 1052 Partition[string,delims:before,matched,after,not found]
851 { 1053 {
852 matched,after,not found <- _Partition[string,Pattern[delims]] 1054 matched,after,not found <- _Partition[Pattern[delims],string]
853 { dlen <- Length[~] } 1055 { dlen <- Length[~] }
854 { alen <- Length[~] } 1056 { alen <- Length[~] }
855 before <- [string]Slice[ [[string]Length]-[[dlen]+[alen]] ] 1057 blen <- [[string]Length]-[[dlen]+[alen]]
856 } 1058 If[blen]
857 1059 { before <- [string]Substring[0, blen] }
1060 { before <- "" }
1061 }
858 1062
859 Dict Type ID@String[string:out] 1063 Dict Type ID@String[string:out]
860 { 1064 {
861 out <- ID[String()] 1065 out <- ID[String()]
862 } 1066 }