Porcelain/app/Lexer.hs
2024-10-30 22:16:06 +03:00

57 lines
2.3 KiB
Haskell

module Lexer (tokenize, Token (..), TokenType (..), TrailType (..), TrailRule (..) ) where
import Data.List
import Data.Char (ord)
data TrailType = OnlyDigits | LettersOrDigits | OnlySpecial deriving (Show, Eq)
data TokenType = Quotes | Dot | Comma | Colon | EndStatement | EOF | QString | Numeric | Literal | Digit | Assignment | OpenParen | CloseParen | OpenCurved | CloseCurved | OpenSquared | CloseSquared | Arithmetic | Comparison | Bitwise | Empty deriving (Show, Eq)
data Token = Token {value :: [Char], token_type :: TokenType} deriving (Show)
data TrailRule = TrailRule {rule_val :: [Char], trail_type :: TrailType} deriving (Show)
parseSingleToken :: [Char] -> Token
parseSingleToken code
| c == '(' = Token [c] OpenParen
| c == ')' = Token [c] CloseParen
| c == '{' = Token [c] OpenCurved
| c == '}' = Token [c] CloseCurved
| c == '[' = Token [c] OpenSquared
| c == ']' = Token [c] CloseSquared
| c == '+' || c == '-' || c == '*' || c == '/' || c == '%' = Token [c] Arithmetic
| c == '<' || c == '>' = Token [c] Comparison
| c == '&' || c == '|' || c == '^' = Token [c] Bitwise
| c == '=' = Token [c] Assignment
| c == ';' = Token [c] EndStatement
| c == ':' = Token [c] Colon
| c == '.' = Token [c] Dot
| c == '\'' = Token [c] Quotes
| c == ',' = Token [c] Comma
| elem c ['0'..'9'] = Token code Numeric
| elem c $ ['a'..'z'] ++ ['A'..'Z'] = Token code Literal
| otherwise = Token " " Empty
where c = head code
-- shall return [Token]
tokenize :: [Char] -> [Token]
tokenize sourceCode
| elem f ['0'..'9'] = parseRule t $ TrailRule [f] OnlyDigits
| elem f $ ['a'..'z'] ++ ['A'..'Z'] = parseRule t $ TrailRule [f] LettersOrDigits
| otherwise = parseRule t $ TrailRule [f] OnlySpecial
where f = head sourceCode
t = tail sourceCode
joinTokens :: (Token, [Char]) -> [Token]
joinTokens t = (fst t):(tokenize $ snd t)
parseRule :: [Char] -> TrailRule -> [Token]
parseRule code rule
| tt == OnlyDigits = joinTokens $ parseDigitsRule code rule
| otherwise = []
where tt = trail_type rule
parseDigitsRule :: [Char] -> TrailRule -> (Token, [Char])
parseDigitsRule code rule
| elem c ['0'..'9'] = parseDigitsRule t $ TrailRule (c:rv) OnlyDigits
| otherwise = parseSingleToken rv code
where rv = rule_val rule
c = head code
t = tail code