-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
463 additions
and
150 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
{-# LANGUAGE TypeApplications #-} | ||
|
||
module Language.X86.Lexer where | ||
|
||
import Data.Void | ||
import Control.Arrow ((&&&), first) | ||
import Data.Data (showConstr, toConstr, dataTypeConstrs, dataTypeOf) | ||
import Data.Char (toUpper, toLower) | ||
import Data.Functor | ||
import Control.Monad (void) | ||
import Control.Applicative ((<|>)) | ||
|
||
import Language.X86.Assembly | ||
|
||
import qualified Text.Megaparsec as Prs | ||
import qualified Text.Megaparsec.Char.Lexer as Lex | ||
import qualified Text.Megaparsec.Char as Prs | ||
|
||
type Parser = Prs.Parsec Void String | ||
|
||
-- | Defining what is considered a space to consume | ||
spaceConsumer :: Parser () | ||
spaceConsumer = Lex.space (Prs.skipSome (void Prs.tab <|> void (Prs.char ' '))) lineCmnt blockCmnt | ||
where | ||
lineCmnt = Lex.skipLineComment ";" | ||
blockCmnt = Lex.skipBlockCommentNested "/*" "*/" | ||
|
||
lexeme :: Parser a -> Parser a | ||
lexeme = Lex.lexeme spaceConsumer | ||
|
||
many1 :: Parser a -> Parser [a] | ||
many1 p = (:) <$> p <*> Prs.many p | ||
|
||
symbol :: String -> Parser String | ||
symbol = lexeme . Lex.symbol spaceConsumer | ||
|
||
-- | 'integer' parses an integer | ||
integer :: Parser Integer | ||
integer = lexeme $ | ||
(((Prs.char '-') $> negate) <|> pure id) | ||
<*> ((Prs.try (Prs.char '0' *> Prs.char 'x') *> Lex.hexadecimal) | ||
<|> Lex.decimal | ||
) | ||
|
||
-- | strings | ||
string :: Parser String | ||
string = lexeme $ Prs.char '"' >> Prs.manyTill Lex.charLiteral (Prs.char '"') | ||
|
||
-- | char | ||
char :: Parser Char | ||
char = lexeme $ Prs.char '\'' *> Lex.charLiteral <* Prs.char '\'' | ||
|
||
rword :: String -> Parser () | ||
rword w = Prs.string w *> Prs.notFollowedBy Prs.alphaNumChar *> spaceConsumer | ||
|
||
-- | list of reserved words | ||
reservedWords :: [String] | ||
reservedWords = | ||
map (map toLower) instructions | ||
++ map (map toUpper) instructions | ||
|
||
instructions :: [String] | ||
instructions = | ||
map tail | ||
. filter (\(c:_) -> c == 'I') | ||
. map showConstr | ||
. dataTypeConstrs | ||
. dataTypeOf @Instruction | ||
$ undefined | ||
|
||
registers :: [(String, Reg)] | ||
registers = | ||
regs ++ map (first (map toLower)) regs | ||
where | ||
regs = | ||
map | ||
(showConstr . toConstr &&& id @Reg) | ||
[minBound..maxBound] | ||
|
||
-- | identifiers | ||
identifier :: Parser String | ||
identifier = lexeme (many1 chara >>= check) | ||
where | ||
check x = if x `elem` reservedWords | ||
then fail $ "instruction " ++ show x ++ " cannot be an identifier" | ||
else pure x | ||
chara = Prs.alphaNumChar <|> Prs.oneOf ("/_-" :: String) | ||
|
||
|
||
|
||
-- | 'parens' parses something between parenthesis | ||
parens :: Parser a -> Parser a | ||
parens = Prs.between (symbol "(") (symbol ")") | ||
|
||
braces, angles, brackets :: Parser a -> Parser a | ||
braces = Prs.between (symbol "{") (symbol "}") | ||
angles = Prs.between (symbol "<") (symbol ">") | ||
brackets = Prs.between (symbol "[") (symbol "]") | ||
|
||
semicolon, comma, colon, dot, equals, arrow, lambda, tilda :: Parser String | ||
semicolon = symbol ";" | ||
comma = symbol "," | ||
colon = symbol ":" | ||
dot = symbol "." | ||
equals = symbol "=" | ||
arrow = symbol "->" | ||
lambda = symbol "\\" | ||
tilda = symbol "~" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,125 @@ | ||
module Language.X86.Parser where | ||
|
||
module Language.X86.Parser | ||
( parsePrint | ||
, parseCode | ||
, parseCodeLine | ||
, parseErrorPretty | ||
) | ||
where | ||
|
||
import Control.Monad | ||
import Control.Applicative | ||
import Data.Void | ||
import Text.Groom | ||
|
||
import qualified Text.Megaparsec as Prs | ||
import qualified Text.Megaparsec.Char as Prs | ||
import qualified Text.Megaparsec.Expr as Prs | ||
|
||
import Language.X86.Assembly | ||
import Language.X86.Lexer | ||
|
||
parseErrorPretty :: (Prs.ParseError (Prs.Token String) Void) -> String | ||
parseErrorPretty = Prs.parseErrorPretty | ||
|
||
parseCode :: String -> String -> Either (Prs.ParseError (Prs.Token String) Void) [Instruction] | ||
parseCode src content = | ||
Prs.parse (concat <$> many1 (parseInstWithLabel <* lexeme Prs.eol)) src (content ++ "\n") | ||
|
||
parseCodeLine :: String -> String -> Either (Prs.ParseError (Prs.Token String) Void) [Instruction] | ||
parseCodeLine src content = Prs.parse parseLine src (content ++ "\n") | ||
|
||
parseLine :: Parser [Instruction] | ||
parseLine = parseInstWithLabel <* lexeme Prs.eol | ||
|
||
parse :: Parser a -> String -> String -> Either (Prs.ParseError (Prs.Token String) Void) a | ||
parse parser srcName content = | ||
Prs.parse (parser <* Prs.eof) srcName content | ||
|
||
parsePrint :: Show a => Parser a -> String -> IO () | ||
parsePrint p = putStrLn . either Prs.parseErrorPretty groom . parse p "test" | ||
|
||
parseLabel :: Parser Instruction | ||
parseLabel = | ||
fmap Label identifier <* colon | ||
|
||
parseArg :: Parser Arg | ||
parseArg = | ||
(Ref <$> brackets parseArg) | ||
<|> (AE <$> parseArithExpr parseReg) | ||
|
||
parseArithExpr :: Parser var -> Parser (ArithExpr var) | ||
parseArithExpr pvar = expr pvar | ||
|
||
parseReg :: Parser Reg | ||
parseReg = | ||
msum (map (\(s,c) -> symbol s *> pure c) registers) | ||
|
||
parseAddress :: Parser Address | ||
parseAddress = parseArithExpr parseAddressVar | ||
|
||
parseAddressVar :: Parser AddressVar | ||
parseAddressVar = | ||
(AR <$> Prs.try parseReg) | ||
<|> fmap AL identifier | ||
|
||
parseBinInstruction :: Parser (Arg -> Arg -> Instruction) | ||
parseBinInstruction = | ||
(IMov <$ (rword "mov" <|> rword "MOV" )) | ||
<|> (IAdd <$ (rword "add" <|> rword "ADD" )) | ||
<|> (ISub <$ (rword "sub" <|> rword "SUB" )) | ||
<|> (ICmp <$ (rword "cmp" <|> rword "CMP" )) | ||
<|> (IXor <$ (rword "xor" <|> rword "XOR" )) | ||
<|> (IAnd <$ (rword "and" <|> rword "AND" )) | ||
<|> (IOr <$ (rword "or" <|> rword "OR" )) | ||
<|> (IShl <$ (rword "shl" <|> rword "SHL" )) | ||
<|> (IShr <$ (rword "shr" <|> rword "SHR" )) | ||
<|> (ISar <$ (rword "sar" <|> rword "SAR" )) | ||
<|> (ISal <$ (rword "sal" <|> rword "SAL" )) | ||
<|> (ITest <$ (rword "test" <|> rword "TEST")) | ||
|
||
parseJmps :: Parser (Address -> Instruction) | ||
parseJmps = | ||
(IJmp <$ (rword "jmp" <|> rword "JMP" )) | ||
<|> (IJe <$ (rword "je" <|> rword "JE" )) | ||
<|> (IJne <$ (rword "jne" <|> rword "JNE" )) | ||
<|> (IJnz <$ (rword "jnz" <|> rword "JNZ" )) | ||
<|> (IJz <$ (rword "jz" <|> rword "JZ" )) | ||
<|> (IJge <$ (rword "jge" <|> rword "JGE" )) | ||
<|> (ICall <$ (rword "call" <|> rword "CALL" )) | ||
|
||
parseInstruction :: Parser Instruction | ||
parseInstruction = | ||
(parseBinInstruction <*> (parseArg <* comma) <*> parseArg) | ||
<|> (parseJmps <*> parseAddress) | ||
<|> (IMul <$ (rword "mul" <|> rword "MUL" ) <*> parseArg) | ||
<|> (IPush <$ (rword "push" <|> rword "PUSH" ) <*> parseArg) | ||
<|> (IPop <$ (rword "pop" <|> rword "POP" ) <*> parseArg) | ||
<|> (IRet <$ (rword "ret" <|> rword "RET" )) | ||
|
||
parseInstWithLabel :: Parser [Instruction] | ||
parseInstWithLabel = do | ||
Prs.try ((:) <$> parseLabel <*> (parseInstWithLabel <|> fmap pure parseInstruction <|> fmap pure (parseLabel <* lexeme Prs.eol))) | ||
<|> (fmap pure parseInstruction <|> fmap pure parseLabel) | ||
|
||
expr :: Parser var -> Parser (ArithExpr var) | ||
expr pvar = Prs.makeExprParser (term pvar) table | ||
|
||
term :: Parser var -> Parser (ArithExpr var) | ||
term pvar = | ||
parens (expr pvar) | ||
<|> fmap (Lit . fromIntegral) integer | ||
<|> fmap Var pvar | ||
|
||
table :: [[Prs.Operator Parser (ArithExpr a)]] | ||
table = | ||
[ [ binary "*" Mul ] | ||
, [ binary "+" Add | ||
, binary "-" Sub | ||
] | ||
] | ||
|
||
binary :: String | ||
-> ((ArithExpr a) -> (ArithExpr a) -> (ArithExpr a)) | ||
-> Prs.Operator Parser (ArithExpr a) | ||
binary name f = Prs.InfixL (f <$ symbol name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
module Parser where | ||
|
||
import Language.X86 | ||
|
||
import Testing | ||
import Simple | ||
|
||
|
||
tests :: TestTree | ||
tests = | ||
testGroup "Parser" $ | ||
mconcat | ||
[ zipWith (\n t -> testCase ("Simple " ++ show n) t) [1..] $ map ppParsePPparse simple | ||
, zipWith (\n t -> testCase ("Jumps " ++ show n) t) [1..] $ map ppParsePPparse jumps | ||
, zipWith (\n t -> testCase ("Stack " ++ show n) t) [1..] $ map ppParsePPparse stack | ||
, zipWith (\n t -> testCase ("Calls " ++ show n) t) [1..] $ map ppParsePPparse calls | ||
] | ||
|
||
ppParsePPparse (snd -> insts) = | ||
case ppppp of | ||
Right _ -> pure () | ||
Left er -> | ||
errorWithoutStackTrace $ | ||
"Test failed to parse: " ++ parseErrorPretty er | ||
where | ||
ppppp = do | ||
i' <- parseCode "test1" (ppAsm insts) | ||
parseCode "test2" (ppAsm i') | ||
pure () | ||
|
||
|
Oops, something went wrong.