Skip to content

Commit

Permalink
adding an intel syntax parser
Browse files Browse the repository at this point in the history
  • Loading branch information
soupi committed Jan 12, 2018
1 parent 543d88e commit 2010a95
Show file tree
Hide file tree
Showing 9 changed files with 463 additions and 150 deletions.
2 changes: 2 additions & 0 deletions src/Language/X86.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ where

import Language.X86.Assembly as X86
import Language.X86.Interpreter as X86
import Language.X86.Parser as X86
import Language.X86.PP as X86

2 changes: 1 addition & 1 deletion src/Language/X86/Assembly.hs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ data Reg
| EBP
| ESI
| EIP
deriving (Show, Read, Eq, Ord, Generic, NFData, Data, Typeable)
deriving (Show, Read, Eq, Ord, Generic, NFData, Data, Typeable, Bounded, Enum)


-- | x86 flags
Expand Down
108 changes: 108 additions & 0 deletions src/Language/X86/Lexer.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
{-# LANGUAGE TypeApplications #-}

module Language.X86.Lexer where

import Data.Void
import Control.Arrow ((&&&), first)
import Data.Data (showConstr, toConstr, dataTypeConstrs, dataTypeOf)
import Data.Char (toUpper, toLower)
import Data.Functor
import Control.Monad (void)
import Control.Applicative ((<|>))

import Language.X86.Assembly

import qualified Text.Megaparsec as Prs
import qualified Text.Megaparsec.Char.Lexer as Lex
import qualified Text.Megaparsec.Char as Prs

type Parser = Prs.Parsec Void String

-- | Defining what is considered a space to consume
spaceConsumer :: Parser ()
spaceConsumer = Lex.space (Prs.skipSome (void Prs.tab <|> void (Prs.char ' '))) lineCmnt blockCmnt
where
lineCmnt = Lex.skipLineComment ";"
blockCmnt = Lex.skipBlockCommentNested "/*" "*/"

lexeme :: Parser a -> Parser a
lexeme = Lex.lexeme spaceConsumer

many1 :: Parser a -> Parser [a]
many1 p = (:) <$> p <*> Prs.many p

symbol :: String -> Parser String
symbol = lexeme . Lex.symbol spaceConsumer

-- | 'integer' parses an integer
integer :: Parser Integer
integer = lexeme $
(((Prs.char '-') $> negate) <|> pure id)
<*> ((Prs.try (Prs.char '0' *> Prs.char 'x') *> Lex.hexadecimal)
<|> Lex.decimal
)

-- | strings
string :: Parser String
string = lexeme $ Prs.char '"' >> Prs.manyTill Lex.charLiteral (Prs.char '"')

-- | char
char :: Parser Char
char = lexeme $ Prs.char '\'' *> Lex.charLiteral <* Prs.char '\''

rword :: String -> Parser ()
rword w = Prs.string w *> Prs.notFollowedBy Prs.alphaNumChar *> spaceConsumer

-- | list of reserved words
reservedWords :: [String]
reservedWords =
map (map toLower) instructions
++ map (map toUpper) instructions

instructions :: [String]
instructions =
map tail
. filter (\(c:_) -> c == 'I')
. map showConstr
. dataTypeConstrs
. dataTypeOf @Instruction
$ undefined

registers :: [(String, Reg)]
registers =
regs ++ map (first (map toLower)) regs
where
regs =
map
(showConstr . toConstr &&& id @Reg)
[minBound..maxBound]

-- | identifiers
identifier :: Parser String
identifier = lexeme (many1 chara >>= check)
where
check x = if x `elem` reservedWords
then fail $ "instruction " ++ show x ++ " cannot be an identifier"
else pure x
chara = Prs.alphaNumChar <|> Prs.oneOf ("/_-" :: String)



-- | 'parens' parses something between parenthesis
parens :: Parser a -> Parser a
parens = Prs.between (symbol "(") (symbol ")")

braces, angles, brackets :: Parser a -> Parser a
braces = Prs.between (symbol "{") (symbol "}")
angles = Prs.between (symbol "<") (symbol ">")
brackets = Prs.between (symbol "[") (symbol "]")

semicolon, comma, colon, dot, equals, arrow, lambda, tilda :: Parser String
semicolon = symbol ";"
comma = symbol ","
colon = symbol ":"
dot = symbol "."
equals = symbol "="
arrow = symbol "->"
lambda = symbol "\\"
tilda = symbol "~"
126 changes: 125 additions & 1 deletion src/Language/X86/Parser.hs
Original file line number Diff line number Diff line change
@@ -1 +1,125 @@
module Language.X86.Parser where

module Language.X86.Parser
( parsePrint
, parseCode
, parseCodeLine
, parseErrorPretty
)
where

import Control.Monad
import Control.Applicative
import Data.Void
import Text.Groom

import qualified Text.Megaparsec as Prs
import qualified Text.Megaparsec.Char as Prs
import qualified Text.Megaparsec.Expr as Prs

import Language.X86.Assembly
import Language.X86.Lexer

parseErrorPretty :: (Prs.ParseError (Prs.Token String) Void) -> String
parseErrorPretty = Prs.parseErrorPretty

parseCode :: String -> String -> Either (Prs.ParseError (Prs.Token String) Void) [Instruction]
parseCode src content =
Prs.parse (concat <$> many1 (parseInstWithLabel <* lexeme Prs.eol)) src (content ++ "\n")

parseCodeLine :: String -> String -> Either (Prs.ParseError (Prs.Token String) Void) [Instruction]
parseCodeLine src content = Prs.parse parseLine src (content ++ "\n")

parseLine :: Parser [Instruction]
parseLine = parseInstWithLabel <* lexeme Prs.eol

parse :: Parser a -> String -> String -> Either (Prs.ParseError (Prs.Token String) Void) a
parse parser srcName content =
Prs.parse (parser <* Prs.eof) srcName content

parsePrint :: Show a => Parser a -> String -> IO ()
parsePrint p = putStrLn . either Prs.parseErrorPretty groom . parse p "test"

parseLabel :: Parser Instruction
parseLabel =
fmap Label identifier <* colon

parseArg :: Parser Arg
parseArg =
(Ref <$> brackets parseArg)
<|> (AE <$> parseArithExpr parseReg)

parseArithExpr :: Parser var -> Parser (ArithExpr var)
parseArithExpr pvar = expr pvar

parseReg :: Parser Reg
parseReg =
msum (map (\(s,c) -> symbol s *> pure c) registers)

parseAddress :: Parser Address
parseAddress = parseArithExpr parseAddressVar

parseAddressVar :: Parser AddressVar
parseAddressVar =
(AR <$> Prs.try parseReg)
<|> fmap AL identifier

parseBinInstruction :: Parser (Arg -> Arg -> Instruction)
parseBinInstruction =
(IMov <$ (rword "mov" <|> rword "MOV" ))
<|> (IAdd <$ (rword "add" <|> rword "ADD" ))
<|> (ISub <$ (rword "sub" <|> rword "SUB" ))
<|> (ICmp <$ (rword "cmp" <|> rword "CMP" ))
<|> (IXor <$ (rword "xor" <|> rword "XOR" ))
<|> (IAnd <$ (rword "and" <|> rword "AND" ))
<|> (IOr <$ (rword "or" <|> rword "OR" ))
<|> (IShl <$ (rword "shl" <|> rword "SHL" ))
<|> (IShr <$ (rword "shr" <|> rword "SHR" ))
<|> (ISar <$ (rword "sar" <|> rword "SAR" ))
<|> (ISal <$ (rword "sal" <|> rword "SAL" ))
<|> (ITest <$ (rword "test" <|> rword "TEST"))

parseJmps :: Parser (Address -> Instruction)
parseJmps =
(IJmp <$ (rword "jmp" <|> rword "JMP" ))
<|> (IJe <$ (rword "je" <|> rword "JE" ))
<|> (IJne <$ (rword "jne" <|> rword "JNE" ))
<|> (IJnz <$ (rword "jnz" <|> rword "JNZ" ))
<|> (IJz <$ (rword "jz" <|> rword "JZ" ))
<|> (IJge <$ (rword "jge" <|> rword "JGE" ))
<|> (ICall <$ (rword "call" <|> rword "CALL" ))

parseInstruction :: Parser Instruction
parseInstruction =
(parseBinInstruction <*> (parseArg <* comma) <*> parseArg)
<|> (parseJmps <*> parseAddress)
<|> (IMul <$ (rword "mul" <|> rword "MUL" ) <*> parseArg)
<|> (IPush <$ (rword "push" <|> rword "PUSH" ) <*> parseArg)
<|> (IPop <$ (rword "pop" <|> rword "POP" ) <*> parseArg)
<|> (IRet <$ (rword "ret" <|> rword "RET" ))

parseInstWithLabel :: Parser [Instruction]
parseInstWithLabel = do
Prs.try ((:) <$> parseLabel <*> (parseInstWithLabel <|> fmap pure parseInstruction <|> fmap pure (parseLabel <* lexeme Prs.eol)))
<|> (fmap pure parseInstruction <|> fmap pure parseLabel)

expr :: Parser var -> Parser (ArithExpr var)
expr pvar = Prs.makeExprParser (term pvar) table

term :: Parser var -> Parser (ArithExpr var)
term pvar =
parens (expr pvar)
<|> fmap (Lit . fromIntegral) integer
<|> fmap Var pvar

table :: [[Prs.Operator Parser (ArithExpr a)]]
table =
[ [ binary "*" Mul ]
, [ binary "+" Add
, binary "-" Sub
]
]

binary :: String
-> ((ArithExpr a) -> (ArithExpr a) -> (ArithExpr a))
-> Prs.Operator Parser (ArithExpr a)
binary name f = Prs.InfixL (f <$ symbol name)
41 changes: 35 additions & 6 deletions src/Language/X86/Run.hs
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,37 @@ commands' =
putStrLn $ groom s
pure s
)

, ( "file"
, parseFile
)
]

parseFile :: [FilePath] -> ReplState -> IO ReplState
parseFile args state = do
catch
(do
case args of
[file] -> do
codeStr <- readFile file
case parseCode "repl" codeStr of
Left er -> do
hPutStrLn stderr (parseErrorPretty er)
pure state
Right code -> do
putStrLn "Code parsed successfully. To view it type 'code'."
pure $ ReplMachineState [initMachine $ toCode [] code]
xs -> do
hPutStrLn stderr ("Expecting one filepath argument, but got: " ++ show (length xs))
pure state

)
(\(SomeException e) -> do
hPutStrLn stderr (show e)
pure state
)


initReplMachineState :: ReplState -> IO ReplState
initReplMachineState s = do
putStrLn "Please enter code. To mark you are done write 'done'. "
Expand All @@ -201,19 +230,19 @@ readCode = do
where
go code = do
putStr "...> "
getLine >>= pure . trim >>= \case
getLine >>= \case
"quit" -> do
putStrLn "Bye!"
exitSuccess
"discard" ->
pure []
"done" ->
pure $ reverse code
pure $ concat $ reverse code
line -> do
case readMaybe line of
Just c -> go (c : code)
_ -> do
hPutStrLn stderr "Failed to read line."
case parseCodeLine "repl" line of
Right c -> go (c : code)
Left er -> do
hPutStrLn stderr ("*** Error: Failed to read line.\n" ++ parseErrorPretty er)
go code


Expand Down
2 changes: 2 additions & 0 deletions test/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module Main where

import Testing
import qualified Simple
import qualified Parser

main :: IO ()
main = defaultMain tests
Expand All @@ -11,4 +12,5 @@ tests =
testGroup
"Tests"
[ Simple.tests
, Parser.tests
]
31 changes: 31 additions & 0 deletions test/Parser.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
module Parser where

import Language.X86

import Testing
import Simple


tests :: TestTree
tests =
testGroup "Parser" $
mconcat
[ zipWith (\n t -> testCase ("Simple " ++ show n) t) [1..] $ map ppParsePPparse simple
, zipWith (\n t -> testCase ("Jumps " ++ show n) t) [1..] $ map ppParsePPparse jumps
, zipWith (\n t -> testCase ("Stack " ++ show n) t) [1..] $ map ppParsePPparse stack
, zipWith (\n t -> testCase ("Calls " ++ show n) t) [1..] $ map ppParsePPparse calls
]

ppParsePPparse (snd -> insts) =
case ppppp of
Right _ -> pure ()
Left er ->
errorWithoutStackTrace $
"Test failed to parse: " ++ parseErrorPretty er
where
ppppp = do
i' <- parseCode "test1" (ppAsm insts)
parseCode "test2" (ppAsm i')
pure ()


Loading

0 comments on commit 2010a95

Please sign in to comment.