Porcelain/app/Lexer.hs
alterdekim b0c91f0579 modified: app/Lexer.hs
modified:   app/Parser.hs
	modified:   as/test.as
2024-11-07 02:45:38 +03:00

106 lines
4.2 KiB
Haskell

module Lexer (tokenize, Token (..), TokenType (..) ) where
import Data.List
import Data.Char (ord)
import Debug.Trace;
data TokenType = Quotes | Dot | Comma | Colon | EndStatement | Numeric | Literal | Assignment | OpenParen | CloseParen | OpenCurved | CloseCurved | OpenSquared | CloseSquared | Arithmetic | Comparison | Bitwise | Empty | SoftComp | CompositeAssign | VarKeyword deriving (Show, Eq)
data Token = Token {value :: [Char], token_type :: TokenType} deriving (Show)
-- makes token from single char
parseSingleToken :: Char -> Token
parseSingleToken c
| c == '(' = Token [c] OpenParen
| c == ')' = Token [c] CloseParen
| c == '{' = Token [c] OpenCurved
| c == '}' = Token [c] CloseCurved
| c == '[' = Token [c] OpenSquared
| c == ']' = Token [c] CloseSquared
| c == '+' || c == '-' || c == '*' || c == '/' || c == '%' = Token [c] Arithmetic
| c == '<' || c == '>' = Token [c] Comparison
| c == '&' || c == '|' || c == '^' = Token [c] Bitwise
| c == '=' = Token [c] Assignment
| c == ';' = Token [c] EndStatement
| c == ':' = Token [c] Colon
| c == '.' = Token [c] Dot
| c == '"' = Token [c] Quotes
| c == ',' = Token [c] Comma
| elem c ['0'..'9'] = Token [c] Numeric
| elem c $ ['a'..'z'] ++ ['A'..'Z'] ++ ['_'] = Token [c] Literal
| otherwise = Token " " Empty
-- makes tokens from every char of code.
makeTokenFromEveryChar :: [Char] -> [Token]
makeTokenFromEveryChar code = map (\c -> parseSingleToken c) code
-- entry point, which should be called to start lexer.
tokenize :: [Char] -> [Token]
tokenize sourceCode = keyworder (excludeEmpty (checkFor (makeTokenFromEveryChar sourceCode)))
keyworder :: [Token] -> [Token]
keyworder t
| tv == "var" = (Token tv VarKeyword):(keyworderG tt)
| otherwise = th:(keyworderG tt)
where th = head t
tt = tail t
tv = value th
keyworderG :: [Token] -> [Token]
keyworderG t = if length t > 0 then keyworder t else []
excludeEmpty :: [Token] -> [Token]
excludeEmpty t = filter (\c -> (token_type c) /= Empty) t
-- another helper method, which makes an integer array with the size of Tokens array
getByMod :: [Token] -> Int -> [Int]
getByMod t n = filter (\c -> c `mod` 2 == n) [0..((length t)-1)]
-- helper method, which extracts only odd/even tokens from list (needed for reducer)
getTokensByMod :: [Token] -> Int -> [Token]
getTokensByMod t n = map (\c -> t !! c ) (getByMod t n)
reducerGuard :: [Token] -> Int -> [Token]
--reducerGuard t i = if (length t) <= (trace ("Lol: " ++ (show (length t)) ++ " <= " ++ (show (i+1))) (i+1)) then [] else (reducerItself t i)
reducerGuard t i
| l <= i = []
| l <= succ i = (last t):[]
| otherwise = reducerItself t i
where l = length t
-- reducer itself
reducerItself :: [Token] -> Int -> [Token]
reducerItself t i
| h == Literal && ( g == Literal || g == Numeric ) = (Token ((value e)++(value o)) Literal):(reducerGuard t (i+2))
| h == Numeric && (g == Numeric || g == Dot) = (Token ((value e)++(value o)) Numeric):(reducerGuard t (i+2))
| h == Comparison && g == Assignment = (Token ((value e)++(value o)) SoftComp):(reducerGuard t (i+2))
| (h == Arithmetic || h == Bitwise) && g == Assignment = (Token ((value e)++(value o)) CompositeAssign):(reducerGuard t (i+2))
| otherwise = e:(reducerGuard t (succ i))
where e = t !! i
o = t !! (succ i)
h = token_type e
g = token_type o
-- method, which used for reducing token amout (actually to specify some tokens e.g. ! = -> !=)
reduceTokens :: [Token] -> [Token]
reduceTokens t = reducerItself t 0
hasGuard :: [Token] -> Int -> Bool
hasGuard t i = if length t <= (succ i) then False else hasItself t i
-- method that help checks
hasItself :: [Token] -> Int -> Bool
hasItself t i
| h == Literal && ( g == Literal || g == Numeric ) = True
| h == Numeric && ( g == Numeric || g == Dot ) = True
| h == Comparison && g == Assignment = True
| (h == Arithmetic || h == Bitwise) && g == Assignment = True
| otherwise = hasGuard t (succ i)
where e = t !! i
o = t !! (succ i)
h = token_type e
g = token_type o
-- method that checks if there are equal Literals and Numerics
checkFor :: [Token] -> [Token]
checkFor t = if hasItself t 0 then checkFor (reduceTokens t) else t