view lex.rhope @ 131:0a4682be2db2

Modify lexer and new parser to work in compiler
author Mike Pavone <pavone@retrodev.com>
date Fri, 05 Nov 2010 02:43:34 +0000
parents 73e978d590c7
children 1f238280047f
line wrap: on
line source


Blueprint Token
{
	Type
	Raw Text
	Text
}

Token[type,raw,text:out]
{
	out <- [[[Build[Token()]]Type <<[type]]Raw Text <<[raw]]Text <<[text]
}

_Type Match[val, test type, type:out]
{
	If[[test type]=[type]]
	{
		out <- Yes
	}{
		out <- Val[val]
	}
}

As List[val:out]
{
	[(List(),List Leaf())]Find[=[?, Blueprint Of[val]]]
	{
		out <- val
	}{
		out <- [()]Append[val]
	}
}

Type Match@Token[token,type:match,nomatch]
{
	match,nomatch <- If[Fold[_Type Match[?,?, [token]Type >>], No, As List[type]]]
}

String Literal[string, raw string, escapes, text, simple tokens, token list:out]
{
	first,rest <- [text]Slice[1]
	If[[first] = ["\""]]
	{
		out <- _Lex[rest, [first]Slice[0], simple tokens, [token list]Append[Token["String Literal", raw string, string]]]
	}{
		next raw <- [raw string]Append[first]
		If[[first] = ["\\"]]
		{
			second,next text <- [rest]Slice[1]
			char <- [escapes]Index[String[second]] {} 
			{
				char <- Val[second]
			}
			next string <- [string]Append[char]
		}{
			next string <- [string]Append[first]
			next text <- Val[rest]
		}
		out <- String Literal[next string, next raw, escapes, next text, simple tokens, token list]
	}
}

Line Comment[start comment, text, simple tokens, token list:out]
{
	comment,,next text <- [text]Partition["\n"] {} {} {}
	{
		next text <- ""
		comment <- Val[text]
	}
	out <- _Lex[next text, [next text]Slice[0], simple tokens, [token list]Append[Token["Line Comment", [start comment]Append[comment], comment]]]
	
}

Block Comment[comment,raw comment, depth, text, simple tokens, token list:out]
{
	Print[["Block Comment: Depth="]Append[String[depth]]]
	If[[depth] > [0]]
	{
		chunk, delim, next text <- [text]Partition[("/*","*/")] {} {} {}
		{
			next text <- ""
			delim <- ""
			chunk <- Val[text]
		}
		If[[delim] = ["/*"]]
		{
			next depth <- [depth] + [1]
		}{
			next depth <- [depth] - [1]
		}
		If[[next depth] = [0]]
		{
			next comment <- [comment]Append[chunk]
		}{
			next comment <- [[comment]Append[chunk]]Append[delim]
		}
		out <- Block Comment[next comment, [[raw comment]Append[chunk]]Append[delim], next depth, next text, simple tokens, token list]
	}{
		out <- _Lex[text, [raw comment]Slice[0], simple tokens, [token list]Append[Token["Block Comment", raw comment, comment]]]
	}
}

Numeric Literal[literal, text, simple tokens, token list:out]
{
	first,rest <- [text]Slice[1]
	If[[first] In ["01234567890.x"]]
	{
		out <- Numeric Literal[[literal]Append[first], rest, simple tokens, token list]
	}{
		out <- _Lex[text, [first]Slice[0], simple tokens, [token list]Append[Token["Numeric Literal", literal, literal]]]
	}
}

Add Token[token, text, simple tokens, token list:out]
{
	out <- _Lex[text, [text]Slice[0], simple tokens, [token list]Append[token]]
}

_Lex[text, symbol, simple tokens,token list:out]
{
	If[[[text]Length] > [0]]
	{
		first,rest <- [text]Slice[1]
		[simple tokens]Index[String[first]]
		{
			token worker <- Val[Add Token[Token[~, first, ""], rest, ?]]
		}{
			If[[first] = ["\""]]
			{
				escapes <- [[[Dictionary[]]Set["n","\n"]]Set["r","\r"]]Set["t","\t"]
				token worker <- Val[String Literal[[first]Slice[0], first, escapes, rest, ?]]
				//out <- String Literal["", first, rest, simple tokens, token list, escapes]
			}{
				second,second rest <- [rest]Slice[1]
				If[[[first] = ["<"]] And [[second]=["-"]]]
				{
					[first]Append[second]
					{
						token worker <- Val[Add Token[Token["Assignment", ~, ~], second rest, ?]]
					}
				}{
					
					If[[[first] = ["/"]] And [[second] = ["*"]]]
					{
						token worker <- Val[Block Comment[[first]Slice[0], [first]Append[second], 1, second rest, ?]]
						//out <- Block Comment[next text, simple tokens, token list, 1]
					}{
						If[[[first] = ["/"]] And [[second] = ["/"]]]
						{
							token worker <- Val[Line Comment[[first]Append[second], second rest, ?]]
							//out <- Line Comment["", [first]Append[second], next text, simple tokens, token list]
						}{
							If[[[first]In["0123456789"]] Or [[[first] = ["-"]] And [[second]In["0123456789"]]]]
							{
								token worker <- Val[Numeric Literal[first, rest, ?]]
								//out <- Numeric Literal[text, simple tokens, token list]
							}{
								out <- _Lex[rest, [symbol]Append[first], simple tokens, token list]
							}
						}
					}
				}
				
			}
		}
		Val[token worker]
		{
			trimmed <- Trim[symbol, " \t\r\n"]
			If[[trimmed] = [""]]
			{
				next list <- Val[token list]
			}{
				next list <- [token list]Append[Token["Symbol", trimmed, trimmed]]
			}
			out <- [token worker]Call[simple tokens, next list]
		}
	}{
		out <- token list
	}
}

Lex[text:out]
{
	simple tokens <- [[[[[[[[[[[Dictionary[]
		]Set["{", "Block Begin"]
		]Set["}", "Block End"]
		]Set["(", "List Begin"]
		]Set[")", "List End"]
		]Set["[", "Args Begin"]
		]Set["]", "Args End"]
		]Set[",", "List Separator"]
		]Set[":", "Name Separator"]
		]Set["@", "Method Separator"]
		]Set["`", "Binary Operation"]
		]Set["\n", "Newline"]
	out <- _Lex[text, [text]Slice[0], simple tokens, ()]
}