57 lines
2.3 KiB
Haskell
57 lines
2.3 KiB
Haskell
module Lexer (tokenize, Token (..), TokenType (..), TrailType (..), TrailRule (..) ) where
|
|
|
|
import Data.List
|
|
import Data.Char (ord)
|
|
|
|
data TrailType = OnlyDigits | LettersOrDigits | OnlySpecial deriving (Show, Eq)
|
|
data TokenType = Quotes | Dot | Comma | Colon | EndStatement | EOF | QString | Numeric | Literal | Digit | Assignment | OpenParen | CloseParen | OpenCurved | CloseCurved | OpenSquared | CloseSquared | Arithmetic | Comparison | Bitwise | Empty deriving (Show, Eq)
|
|
data Token = Token {value :: [Char], token_type :: TokenType} deriving (Show)
|
|
data TrailRule = TrailRule {rule_val :: [Char], trail_type :: TrailType} deriving (Show)
|
|
|
|
parseSingleToken :: [Char] -> Token
|
|
parseSingleToken code
|
|
| c == '(' = Token [c] OpenParen
|
|
| c == ')' = Token [c] CloseParen
|
|
| c == '{' = Token [c] OpenCurved
|
|
| c == '}' = Token [c] CloseCurved
|
|
| c == '[' = Token [c] OpenSquared
|
|
| c == ']' = Token [c] CloseSquared
|
|
| c == '+' || c == '-' || c == '*' || c == '/' || c == '%' = Token [c] Arithmetic
|
|
| c == '<' || c == '>' = Token [c] Comparison
|
|
| c == '&' || c == '|' || c == '^' = Token [c] Bitwise
|
|
| c == '=' = Token [c] Assignment
|
|
| c == ';' = Token [c] EndStatement
|
|
| c == ':' = Token [c] Colon
|
|
| c == '.' = Token [c] Dot
|
|
| c == '\'' = Token [c] Quotes
|
|
| c == ',' = Token [c] Comma
|
|
| elem c ['0'..'9'] = Token code Numeric
|
|
| elem c $ ['a'..'z'] ++ ['A'..'Z'] = Token code Literal
|
|
| otherwise = Token " " Empty
|
|
where c = head code
|
|
|
|
-- shall return [Token]
|
|
tokenize :: [Char] -> [Token]
|
|
tokenize sourceCode
|
|
| elem f ['0'..'9'] = parseRule t $ TrailRule [f] OnlyDigits
|
|
| elem f $ ['a'..'z'] ++ ['A'..'Z'] = parseRule t $ TrailRule [f] LettersOrDigits
|
|
| otherwise = parseRule t $ TrailRule [f] OnlySpecial
|
|
where f = head sourceCode
|
|
t = tail sourceCode
|
|
|
|
joinTokens :: (Token, [Char]) -> [Token]
|
|
joinTokens t = (fst t):(tokenize $ snd t)
|
|
|
|
parseRule :: [Char] -> TrailRule -> [Token]
|
|
parseRule code rule
|
|
| tt == OnlyDigits = joinTokens $ parseDigitsRule code rule
|
|
| otherwise = []
|
|
where tt = trail_type rule
|
|
|
|
parseDigitsRule :: [Char] -> TrailRule -> (Token, [Char])
|
|
parseDigitsRule code rule
|
|
| elem c ['0'..'9'] = parseDigitsRule t $ TrailRule (c:rv) OnlyDigits
|
|
| otherwise = parseSingleToken rv code
|
|
where rv = rule_val rule
|
|
c = head code
|
|
t = tail code |