diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..99ec428 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +.idea diff --git a/LICENSE b/LICENSE index e1020a4..89bc5f8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2013-2016 Craig Thomas +Copyright (C) 2013-2019 Craig Thomas Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -16,7 +16,3 @@ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -Except as contained in this notice, the name(s) of the above copyright holders -shall not be used in advertising or otherwise to promote the sale, use or other -dealings in this Software without prior written authorization. diff --git a/README.md b/README.md index e544036..b5801b7 100644 --- a/README.md +++ b/README.md @@ -1,105 +1,111 @@ -# Yet Another Color Computer 3 Assembler / Disassembler +# CoCo Assembler -## What is it? - -This project is a Color Computer 3 assembler and disassembler written in -Python. +[![Build Status](https://img.shields.io/travis/craigthomas/CoCo3Assembler?style=flat-square)](https://travis-ci.org/craigthomas/CoCo3Assembler) +[![Codecov](https://img.shields.io/codecov/c/gh/craigthomas/CoCo3Assembler?style=flat-square)](https://codecov.io/gh/craigthomas/CoCo3Assembler) +[![Dependencies](https://img.shields.io/librariesio/github/craigthomas/CoCo3Assembler?style=flat-square)](https://libraries.io/github/craigthomas/CoCo3Assembler) +[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square)](https://opensource.org/licenses/MIT) -The Color Computer 3 is the third incarnation of the Tandy Radio Shack -Color Computer line (TRS-80). The CoCo 3 offered several improvements over the -original CoCo 1 and CoCo 2, most notably the introduction of a memory -management unit (MMU) and a new Advanced Color Video Chip (ACVC) - also known -as the Graphics Interrupt Memory Enhancer (GIME). +## Table of Contents -While the official name of the computer was the TRS-80 Color Computer 3, -the Color Computer family was quite different from the line of business -machines such as the TRS-80 Model I, II, III, and 4. While that family -of computers used a Zilog Z80 microprocessor, the Color Computer family used -a Motorola 6809E processor running at 0.89 MHz. +1. [What is it?](#what-is-it) +2. [Requirements](#requirements) +3. [License](#license) +4. [Installing](#installing) +5. [Usage](#usage) -## Current Status - May 27, 2013 +## What is it? -The assembler is in its first phases of development, and is subject to -change. Check the documentation below for more information on how to use -it. +This project is an assembler for the Tandy Color Computer 1, 2 and 3 written in Python 3.6. +It is intended to be statement compatible with any code written for the EDTASM+ assembler. -## Roadmap +## Requirements -Under construction. +In order to run the assembler, you will need Python 3.6 or greater. If you +prefer to clone the repository, you will need Git (if you don't want to +install Git, then check the [Releases](https://github.com/craigthomas/CoCo3Assembler/releases) +section for the newest release of the package that you can download. ## License This project makes use of an MIT style license. Please see the file called LICENSE for more information. - ## Installing -Simply copy the source files to a directory of your choice. In addition to -the source, you will need the following required software packages: +To install the source files, download the latest release from the +[Releases](https://github.com/craigthomas/CoCo3Assembler/releases) +section of the repository and unzip the contents in a directory of your +choice. Or, clone the repository in the directory of your choice with: -* [Python 2.7+ or 3](http://www.python.org) + git clone https://github.com/craigthomas/Chip8Assembler.git -I strongly recommend creating a virtual environment using the -[virtualenv](http://pypi.python.org/pypi/virtualenv) builder as well as the -[virtualenvwrapper](https://bitbucket.org/dhellmann/virtualenvwrapper) tools. +Next, you will need to install the required packages for the file: + pip install -r requirements.txt + ## Usage -The assembler is used to transform assembly language statements into 6809E -machine code. A source file of assembly language statements is broken up into a -number of columns: +To run the assembler: - LABEL OPERATION OPERAND # COMMENT + python assembler.py input_file --output output_file -Where each column contains the following: +This will assemble the instructions found in file `input_file` and will generate +the associated Color Computer machine instructions in binary format in `output_file`. -* `LABEL` - a label to be applied to the operation or declaraion. A label may be -composed of any alphanumeric characters, and can be any length. -* `OPERATION` - the operation to execute. -* `OPERAND` - the data to apply to the operation. -* `COMMENT` - a textual comment to be applied to the operation, can be any length -and is terminated by a newline character. +### Input Format -A full example would be: +The input file needs to follow the format below: - BUFFER EQU $6100 # START OF BUFFER - BUFFEND EQU $7FFF # END OF BUFFER - START LDY #$0FF00 # LOAD INPUT PIA ADDRESS - LDX #BUFFER # LOAD BUFFER PNTR ADDRESS - INP000 LDB #10 # SELECT RIGHT,X - JSR $A9A2 # SELECT SUBROUTINE + LABEL MNEMONIC OPERANDS COMMENT -To run the assembler: +Where: + +* `LABEL` is a 15 character label for the statement +* `MNEMONIC` is a Chip 8 operation mnemonic from the [Mnemonic Table](#mnemonic-table) below +* `OPERANDS` are registers, values or labels, as described in the [Operands](#operands) section +* `COMMENT` is a 30 character comment describing the statement (must have a `;` preceding it) + +An example file: + + # A comment line that contains nothing - python coco3asm.py input_file -o output_file -This will assemble the instructions found in file `input_file` and will generate the -associated CoCo machine instructions in binary format in `output_file`. Additional -options include printing the symbol table: +### Print Symbol Table - python coco3asm.py test.asm -s +To print the symbol table that is generated during assembly, use the `--symbols` +switch: + + python assembler.py test.asm --symbols Which will have the following output: - -- Symbol Table -- - BUFFER $6100 - START $6000 - INP000 $6007 - BUFFEND $7FFF + -- Symbol Table -- + -Print out the assembled version of the input: +### Print Assembled Statements - python chip8asm/chip8asm.py test.asm -p +To print out the assembled version of the program, use the `--print` switch: + + python assembler.py test.asm --print Which will have the following output: -- Assembled Statements -- - 6000 108E FF00 START LDY #$0FF00 # LOAD INPUT PIA ADDRESS - 6004 8E 6100 LDX #BUFFER # LOAD BUFFER PNTR ADDRESS - 6007 C6 0A INP000 LDB #10 # SELECT RIGHT,X - 6009 BD A9A2 JSR $A9A2 # SELECT SUBROUTINE + +With this output, the first column is the offset in hex where the statement starts, +the second column contains the full machine-code operand, the third column is the +user-supplied label for the statement, the forth column is the mnemonic, the fifth +column is the register values of other numeric or label data the operation will +work on, and the fifth column is the comment string. + +## Mnemonic Table + +### Mnemonics + +### Pseudo Operations + +### Operands ## Further Documentation diff --git a/assembler.py b/assembler.py new file mode 100644 index 0000000..4793556 --- /dev/null +++ b/assembler.py @@ -0,0 +1,57 @@ +""" +Copyright (C) 2019 Craig Thomas + +This project uses an MIT style license - see LICENSE for details. +A Color Computer Assembler - see the README.md file for details. +""" +# I M P O R T S ############################################################### + +import argparse + +from cocoasm.program import Program + +# F U N C T I O N S ########################################################### + + +def parse_arguments(): + """ + Parses the command-line arguments passed to the assembler. + """ + parser = argparse.ArgumentParser( + description="Assembler for the Tandy Color Computer 1, 2, and 3. See README.md for more " + "information, and LICENSE for terms of use." + ) + parser.add_argument("filename", help="the input file") + parser.add_argument( + "--symbols", action="store_true", help="print out the symbol table" + ) + parser.add_argument( + "--print", action="store_true", + help="print out the assembled statements when finished" + ) + parser.add_argument( + "--output", metavar="FILE", help="stores the assembled program in FILE") + return parser.parse_args() + + +def main(args): + """ + Runs the assembler with the specified arguments. + + :param args: the command-line arguments + """ + program = Program(args.filename) + + if args.symbols: + program.print_symbol_table() + + if args.print: + program.print_statements() + + if args.output: + program.save_binary_file(args.output) + + +main(parse_arguments()) + +# E N D O F F I L E ####################################################### diff --git a/coco3asm.py b/coco3asm.py deleted file mode 100644 index e56af9b..0000000 --- a/coco3asm.py +++ /dev/null @@ -1,455 +0,0 @@ -""" -Copyright (C) 2012-2016 Craig Thomas -This project uses an MIT style license - see LICENSE for details. - -A Color Computer 3 assembler - see the README.md file for details. -""" -# I M P O R T S ############################################################### - -import argparse -import re -import sys - -# C O N S T A N T S ########################################################### - -# Addressing modes -INH = "Inherent" -IMM = "Immediate" -DIR = "Direct" -IND = "Indexed" -EXT = "Extended" - -# Dictionary entries -LABEL = "label" -OP = "op" -OPERANDS = "operands" -COMMENT = "comment" -MODE = "mode" -OPCODE = "opcode" -DATA = "data" -IVLD = 0x01 - -# Illegal addressing mode -ILLEGAL_MODE = 0x00 - -# Opcode translation based on addressing modes -OPERATIONS = { - "ABX": {INH: 0x3A, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ADCA": {INH: 0x00, IMM: 0x89, DIR: 0x99, IND: 0xA9, EXT: 0xB9}, - "ADCB": {INH: 0x00, IMM: 0xC9, DIR: 0xD9, IND: 0xE9, EXT: 0xF9}, - "ADDA": {INH: IVLD, IMM: 0x8B, DIR: 0x9B, IND: 0xAB, EXT: 0xBB}, - "ADDB": {INH: IVLD, IMM: 0xCB, DIR: 0xDB, IND: 0xEB, EXT: 0xFB}, - "ADDD": {INH: IVLD, IMM: 0xC3, DIR: 0xD3, IND: 0xE3, EXT: 0xF3}, - "ANDA": {INH: IVLD, IMM: 0x84, DIR: 0x94, IND: 0xA4, EXT: 0xB4}, - "ANDB": {INH: IVLD, IMM: 0xC4, DIR: 0xD4, IND: 0xE4, EXT: 0xF4}, - "ANDCC": {INH: IVLD, IMM: 0x1C, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ASLA": {INH: 0x48, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ASLB": {INH: 0x58, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ASL": {INH: IVLD, IMM: IVLD, DIR: 0x08, IND: 0x68, EXT: 0x78}, - "ASRA": {INH: 0x47, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ASRB": {INH: 0x57, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ASR": {INH: IVLD, IMM: IVLD, DIR: 0x07, IND: 0x67, EXT: 0x77}, - "BITA": {INH: IVLD, IMM: 0x85, DIR: 0x95, IND: 0xA5, EXT: 0xB5}, - "BITB": {INH: IVLD, IMM: 0xC5, DIR: 0xD5, IND: 0xE5, EXT: 0xF5}, - "CLRA": {INH: 0x4F, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "CLRB": {INH: 0x5F, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "CLR": {INH: IVLD, IMM: IVLD, DIR: 0x0F, IND: 0x6F, EXT: 0x7F}, - "CMPA": {INH: IVLD, IMM: 0x81, DIR: 0x91, IND: 0xA1, EXT: 0xB1}, - "CMPB": {INH: IVLD, IMM: 0xC1, DIR: 0xD1, IND: 0xE1, EXT: 0xF1}, - "CMPD": {INH: IVLD, IMM: 0x1083, DIR: 0x1093, IND: 0x10A3, EXT: 0x10B3}, - "CMPS": {INH: IVLD, IMM: 0x118C, DIR: 0x119C, IND: 0x11AC, EXT: 0x11BC}, - "CMPU": {INH: IVLD, IMM: 0x1183, DIR: 0x1193, IND: 0x11A3, EXT: 0x11B3}, - "CMPX": {INH: IVLD, IMM: 0x8C, DIR: 0x9C, IND: 0xAC, EXT: 0xBC}, - "CMPY": {INH: IVLD, IMM: 0x108C, DIR: 0x109C, IND: 0x10AC, EXT: 0x10BC}, - "COMA": {INH: 0x43, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "COMB": {INH: 0x53, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "COM": {INH: IVLD, IMM: IVLD, DIR: 0x03, IND: 0x63, EXT: 0x73}, - "CWAI": {INH: IVLD, IMM: 0x3C, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "DAA": {INH: 0x19, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "DECA": {INH: 0x4A, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "DECB": {INH: 0x5A, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "DEC": {INH: IVLD, IMM: IVLD, DIR: 0x0A, IND: 0x6A, EXT: 0x7A}, - "EORA": {INH: IVLD, IMM: 0x88, DIR: 0x98, IND: 0xA8, EXT: 0xB8}, - "EORB": {INH: IVLD, IMM: 0xC8, DIR: 0xD8, IND: 0xE8, EXT: 0xF8}, - "EXG": {INH: IVLD, IMM: 0x1E, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "INCA": {INH: 0x4C, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "INCB": {INH: 0x5C, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "INC": {INH: IVLD, IMM: IVLD, DIR: 0x0C, IND: 0x6C, EXT: 0x7C}, - "JMP": {INH: IVLD, IMM: IVLD, DIR: 0x0E, IND: 0x6E, EXT: 0x7E}, - "JSR": {INH: IVLD, IMM: IVLD, DIR: 0x9D, IND: 0xAD, EXT: 0xBD}, - "LDA": {INH: IVLD, IMM: 0x86, DIR: 0x96, IND: 0xA6, EXT: 0xB6}, - "LDB": {INH: IVLD, IMM: 0xC6, DIR: 0xD6, IND: 0xE6, EXT: 0xF6}, - "LDD": {INH: IVLD, IMM: 0xCC, DIR: 0xDC, IND: 0xEC, EXT: 0xFC}, - "LDS": {INH: IVLD, IMM: 0x10CE, DIR: 0x10DE, IND: 0x10EE, EXT: 0x10FE}, - "LDU": {INH: IVLD, IMM: 0xCE, DIR: 0xDE, IND: 0xEE, EXT: 0xFE}, - "LDX": {INH: IVLD, IMM: 0x8E, DIR: 0x9E, IND: 0xAE, EXT: 0xBE}, - "LDY": {INH: IVLD, IMM: 0x108E, DIR: 0x109E, IND: 0x10AE, EXT: 0x10BE}, - "LEAS": {INH: IVLD, IMM: IVLD, DIR: IVLD, IND: 0x32, EXT: IVLD}, - "LEAU": {INH: IVLD, IMM: IVLD, DIR: IVLD, IND: 0x33, EXT: IVLD}, - "LEAX": {INH: IVLD, IMM: IVLD, DIR: IVLD, IND: 0x30, EXT: IVLD}, - "LEAY": {INH: IVLD, IMM: IVLD, DIR: IVLD, IND: 0x31, EXT: IVLD}, - "LSLA": {INH: 0x48, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "LSLB": {INH: 0x58, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "LSL": {INH: IVLD, IMM: IVLD, DIR: 0x08, IND: 0x68, EXT: 0x78}, - "LSRA": {INH: 0x44, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "LSRB": {INH: 0x54, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "LSR": {INH: IVLD, IMM: IVLD, DIR: 0x04, IND: 0x64, EXT: 0x74}, - "MUL": {INH: 0x3D, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "NEGA": {INH: 0x40, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "NEGB": {INH: 0x50, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "NEG": {INH: IVLD, IMM: IVLD, DIR: 0x00, IND: 0x60, EXT: 0x70}, - "NOP": {INH: 0x12, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ORA": {INH: IVLD, IMM: 0x8A, DIR: 0x9A, IND: 0xAA, EXT: 0xBA}, - "ORB": {INH: IVLD, IMM: 0xCA, DIR: 0xDA, IND: 0xEA, EXT: 0xFA}, - "ORCC": {INH: IVLD, IMM: 0x1A, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "PSHS": {INH: IVLD, IMM: 0x34, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "PSHU": {INH: IVLD, IMM: 0x36, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "PULS": {INH: IVLD, IMM: 0x35, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "PULU": {INH: IVLD, IMM: 0x37, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ROLA": {INH: 0x49, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ROLB": {INH: 0x59, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ROL": {INH: IVLD, IMM: IVLD, DIR: 0x09, IND: 0x69, EXT: 0x79}, - "RORA": {INH: 0x46, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "RORB": {INH: 0x56, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "ROR": {INH: IVLD, IMM: IVLD, DIR: 0x06, IND: 0x66, EXT: 0x76}, - "RTI": {INH: 0x3B, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "RTS": {INH: 0x39, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "SBCA": {INH: IVLD, IMM: 0x82, DIR: 0x92, IND: 0xA2, EXT: 0xB2}, - "SBCB": {INH: IVLD, IMM: 0xC2, DIR: 0xD2, IND: 0xE2, EXT: 0xF2}, - "SEX": {INH: 0x1D, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "STA": {INH: IVLD, IMM: IVLD, DIR: 0x97, IND: 0xA7, EXT: 0xB7}, - "STB": {INH: IVLD, IMM: IVLD, DIR: 0xD7, IND: 0xE7, EXT: 0xF7}, - "STD": {INH: IVLD, IMM: IVLD, DIR: 0xDD, IND: 0xED, EXT: 0xFD}, - "STS": {INH: IVLD, IMM: IVLD, DIR: 0x10DF, IND: 0x10EF, EXT: 0x10FF}, - "STU": {INH: IVLD, IMM: IVLD, DIR: 0xDF, IND: 0xEF, EXT: 0xFF}, - "STX": {INH: IVLD, IMM: IVLD, DIR: 0x9F, IND: 0xAF, EXT: 0xBF}, - "STY": {INH: IVLD, IMM: IVLD, DIR: 0x109F, IND: 0x10AF, EXT: 0x10BF}, - "SUBA": {INH: IVLD, IMM: 0x80, DIR: 0x90, IND: 0xA0, EXT: 0xB0}, - "SUBB": {INH: IVLD, IMM: 0xC0, DIR: 0xD0, IND: 0xE0, EXT: 0xF0}, - "SUBD": {INH: IVLD, IMM: 0x83, DIR: 0x93, IND: 0xA3, EXT: 0xB3}, - "SWI": {INH: 0x3F, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "SWI2": {INH: 0x103F, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "SWI3": {INH: 0x113F, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "SYNC": {INH: 0x13, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "TFR": {INH: IVLD, IMM: 0x1F, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "TSTA": {INH: 0x4D, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "TSTB": {INH: 0x5D, IMM: IVLD, DIR: IVLD, IND: IVLD, EXT: IVLD}, - "TST": {INH: IVLD, IMM: IVLD, DIR: 0x0D, IND: 0x6D, EXT: 0x7D}, -} - -# Pseudo operations -END = "END" -ORG = "ORG" -EQU = "EQU" -SET = "SET" -RMB = "RMB" -FCB = "FCB" -FDB = "FDB" -FCC = "FCC" -INCLUDE = "INCLUDE" -PSEUDO_OPERATIONS = [END, ORG, EQU, SET, RMB, FCB, FDB, FCC, INCLUDE] - -# Pattern to parse a single line -ASM_LINE_REGEX = re.compile( - "(?P<{0}>\\w*)\\s+(?P<{1}>\\w*)\\s+(?P<{2}>[\\w#\\$,\\+-]*)\\s+# (?P<{3}>.*)".format(LABEL, OP, OPERANDS, COMMENT)) - -# G L O B A L S ############################################################### - -# Stores each of the symbols and their values -symbol_table = dict() - - -# C L A S S E S ############################################################## - - -class TranslationError(Exception): - """ - Translation errors occur when the translate function is called from - within the Statement class. Translation errors usually refer to the fact - that an invalid mnemonic or invalid register was specified. - """ - - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) - - -class Statement: - """ - The Statement class represents a single line of assembly language. Each - statement is constructed from a single line that has the following format: - - LABEL MNEMONIC OPERANDS COMMENT - - The statement can be parsed and translated to its COCO machine code - equivalent. - """ - def __init__(self): - self.label = "" - self.op_code = 0 - self.comments = "" - self.size = 0 - self.address = 0 - self.data = None - self.op = "" - self.operands = None - self.comment = "" - self.mode = "" - - def __str__(self): - if self.mode != INH and not self.is_pseudo_op(): - format_string = "{:04X} {:4X} {:02X} {} {} {} # {}" \ - if self.data < 255 else "{:04X} {:4X} {:04X} {} {} {} # {}" - return format_string.format( - self.address, - self.op_code, - self.data, - self.label.rjust(15, ' '), - self.op.rjust(5, ' '), - self.operands.rjust(15, ' '), - self.comment.ljust(40, ' ')) - - if not self.is_pseudo_op(): - return "{:04X} {:04X} {} {} {} {} # {}".format( - self.address, - self.op_code, - "".rjust(4, ' '), - self.label.rjust(15, ' '), - self.op.rjust(5, ' '), - self.operands.rjust(15, ' '), - self.comment.ljust(40, ' ')) - return "" - - def parse_line(self, line): - """ - Parse a line of assembly language text. - - :param line: the line of assembly language from the source file. - """ - data = ASM_LINE_REGEX.match(line) - if data: - self.label = data.group(LABEL) - self.op = data.group(OP) - self.operands = data.group(OPERANDS) - self.comment = data.group(COMMENT) - - def translate(self, symbol_table): - """ - Translate the text into an actual op code. - - :param symbol_table: the current symbol table - """ - if self.op in PSEUDO_OPERATIONS: - return - - if self.op not in OPERATIONS: - error = "Invalid mnemonic '{}'".format(self.op) - raise TranslationError(error) - - self.set_addressing_mode() - self.set_op_code() - self.set_data(symbol_table) - - def get_operation(self): - """ - Returns the operation dictionary item based upon the mnemonic. - """ - return self.op if self.is_pseudo_op() else OPERATIONS[self.op] - - def is_pseudo_op(self): - """ - Returns True if the operation is a pseudo operation, False otherwise. - """ - return self.op in PSEUDO_OPERATIONS - - def set_addressing_mode(self): - """ - Determines the correct addressing mode based on the operand. - """ - self.mode = EXT - if not self.operands: - self.mode = INH - if self.operands.startswith("#"): - self.mode = IMM - if "," in self.operands: - self.mode = IND - if self.operands.startswith("<"): - self.mode = DIR - if self.operands.startswith(">"): - self.mode = EXT - - def set_op_code(self): - """ - Sets the op code for the statement based on the addressing mode - and the translated op mnemonic. Will raise a TranslationError - if the specified addressing mode does not exist. - """ - self.op_code = self.get_operation()[self.mode] - if self.op_code == IVLD: - error = "Invalid addressing mode ({}) for operation {}".format( - self.mode, self.op) - raise TranslationError(error) - self.size = 1 if self.op_code < 255 else 2 - - def set_data(self, symbol_table): - """ - Sets the data component of the statement. Translates the operands - into useful data bytes. - """ - self.data, size = self.get_hex_value(symbol_table) - self.size += size - - def get_hex_value(self, symbol_table): - """ - Returns the hex value of the operand, and the number of bytes used to - represent the hex value. If the operand contains a symbol reference, - attempts to look up that symbol in the symbol_table. - """ - stripped_operand = self.operands - stripped_operand = stripped_operand.replace("#", "") - stripped_operand = stripped_operand.replace("$", "") - if stripped_operand in symbol_table: - hex_value = symbol_table[stripped_operand] - return hex_value, 1 if hex_value < 256 else 2 - else: - operand = self.operands - if operand.startswith("#"): - operand = operand.replace("#", "") - - if operand.startswith("$"): - value = operand.replace("$", "") - hex_value = int(value, 16) - return hex_value, 1 if hex_value < 256 else 2 - else: - value = operand - hex_value = int(value, 10) - return hex_value, 1 if hex_value < 256 else 2 - - def is_empty(self): - """ - Returns True if there is no label that is contained within the - statement. - """ - return self.op == "" - - def get_label(self): - """ - Returns the label associated with this statement. - """ - return self.label - - def set_address(self, address): - """ - Sets the address for the statement. - :param address: the new address for the statement - """ - self.address = address - -# F U N C T I O N S ########################################################### - - -def parse_arguments(): - """ - Parses the command-line arguments passed to the assembler. - """ - parser = argparse.ArgumentParser( - description="Assemble or disassemble machine language code for " - "the COCO3. See README.md for more information, and LICENSE for " - "terms of use.") - parser.add_argument("filename", help="the name of the file to examine") - parser.add_argument( - "-s", action="store_true", help="print out the symbol table") - parser.add_argument( - "-p", action="store_true", help="print out the assembled " - "statements when finished") - parser.add_argument( - "-o", metavar="FILE", help="stores the assembled program " - "in FILE") - return parser.parse_args() - - -def throw_error(error, statement): - """ - Prints out an error message. - - @param error: the error message to throw - @type error: Exception - - @param statement: the assembly statement that caused the error - @type statement: Statement - """ - print(error.value) - print("Line: " + str(statement)) - sys.exit(1) - - -def main(args): - """ - Runs the assembler with the specified arguments. - - @param args: the arguments to the main function - @type: namedtuple - """ - global symbol_table - symbol_table = dict() - statements = [] - address = 0x6000 - - # Pass 1: parse all of the statements in the file, but do not attempt - # to resolve any of the labels or locations - with open(args.filename) as infile: - for line in infile: - statement = Statement() - statement.parse_line(line) - if not statement.is_empty(): - statements.append(statement) - - # Pass 2: translate the statements into their respective opcodes - for index in xrange(len(statements)): - statement = statements[index] - try: - statement.translate(symbol_table) - except TranslationError as error: - throw_error(error, statement) - if statement.is_pseudo_op(): - if statement.op == EQU: - data, size = statement.get_hex_value(symbol_table) - symbol_table[statement.get_label()] = data - else: - label = statement.get_label() - if label: - if label in symbol_table: - error = {"value": "label [" + label + "] redefined"} - throw_error(error, statement) - symbol_table[label] = index - - # Pass 3: set the address for each operation - for statement in statements: - if not statement.is_pseudo_op(): - label = statement.get_label() - if label: - symbol_table[label] = address - statement.set_address(address) - address += statement.size - - # Check to see if the user wanted to print the symbol table - if args.s: - print("-- Symbol Table --") - for symbol, value in symbol_table.iteritems(): - print("{} ${:4X}".format(symbol.ljust(15, ' '), value)) - - # Check to see if the user wanted a print out of the assembly - if args.p: - print("-- Assembled Statements --") - for statement in statements: - if str(statement) != "": - print(statement) - -# M A I N ##################################################################### - -if __name__ == "__main__": - main(parse_arguments()) - -# E N D O F F I L E ####################################################### diff --git a/cocoasm/__init__.py b/cocoasm/__init__.py new file mode 100644 index 0000000..139597f --- /dev/null +++ b/cocoasm/__init__.py @@ -0,0 +1,2 @@ + + diff --git a/cocoasm/assembler_state.py b/cocoasm/assembler_state.py new file mode 100644 index 0000000..da308e4 --- /dev/null +++ b/cocoasm/assembler_state.py @@ -0,0 +1,18 @@ +""" +Copyright (C) 2019 Craig Thomas + +This project uses an MIT style license - see LICENSE for details. +A Color Computer Assembler - see the README.md file for details. +""" +# I M P O R T S ############################################################### + +from typing import NamedTuple + +# C L A S S E S ############################################################### + + +class AssemblerState(NamedTuple): + origin: int = 0x0 + direct_page: int = 0x0 + +# E N D O F F I L E ######################################################### diff --git a/cocoasm/exceptions.py b/cocoasm/exceptions.py new file mode 100644 index 0000000..f6a3ec0 --- /dev/null +++ b/cocoasm/exceptions.py @@ -0,0 +1,39 @@ +""" +Copyright (C) 2019 Craig Thomas + +This project uses an MIT style license - see LICENSE for details. +This file contains Exceptions for the CoCo Assembler. +""" +# C L A S S E S ############################################################### + + +class TranslationError(Exception): + """ + Translation errors occur when the translate function is called from + within the Statement class. Translation errors usually refer to the fact + that an invalid mnemonic or invalid register was specified. + """ + def __init__(self, value, statement): + super().__init__() + self.value = value + self.statement = statement + + def __str__(self): + return repr(self.value) + + +class ParseError(Exception): + """ + Parse errors occur when the parse function is called from + within the Statement class. Parse errors usually refer to the fact + that an invalid line of assembly code was encountered. + """ + def __init__(self, value, statement): + super().__init__() + self.value = value + self.statement = statement + + def __str__(self): + return repr(self.value) + +# E N D O F F I L E ####################################################### diff --git a/cocoasm/helpers.py b/cocoasm/helpers.py new file mode 100644 index 0000000..0054a5c --- /dev/null +++ b/cocoasm/helpers.py @@ -0,0 +1,51 @@ +""" +Copyright (C) 2019 Craig Thomas + +This project uses an MIT style license - see LICENSE for details. +A Color Computer Assembler - see the README.md file for details. +""" +# I M P O R T S ############################################################### + +import re + +# C O N S T A N T S ########################################################### + +# Pattern to recognize a hex value +HEX_REGEX = re.compile( + r"^\$(?P[0-9a-fA-F]+)" +) + +# Pattern to recognize an integer value +INT_REGEX = re.compile( + r"^(?P[\d]+)" +) + +# F U N C T I O N S ########################################################### + + +def hex_value(value): + return value_with_base_to_hex(value, 16) + + +def decimal_value(value): + return value_with_base_to_hex(value, 10) + + +def value_with_base_to_hex(value, base): + if not value: + return "" + + if type(value) is int: + return "{:0<2X}".format(value) + + data = HEX_REGEX.match(value) + if data: + return "{:0<2X}".format(int(data.group("value"), 16)) + + data = INT_REGEX.match(value) + if data: + return "{:0<2X}".format(int(data.group("value"), 10)) + + return "{:0<2X}".format(int(value, base)) + +# E N D O F F I L E ####################################################### diff --git a/cocoasm/instruction.py b/cocoasm/instruction.py new file mode 100644 index 0000000..4971e14 --- /dev/null +++ b/cocoasm/instruction.py @@ -0,0 +1,291 @@ +""" +Copyright (C) 2019 Craig Thomas + +This project uses an MIT style license - see LICENSE for details. +A Color Computer Assembler - see the README.md file for details. +""" +# I M P O R T S ############################################################### + +from typing import NamedTuple, Callable + +# C O N S T A N T S ########################################################### + +# Invalid operation +IVLD = 0x01 + +# Illegal addressing mode +ILLEGAL_MODE = 0x00 + + +# C L A S S E S ############################################################### + +class Mode(NamedTuple): + """ + The Mode class represents a set of addressing modes. Modes supported by the + Color Computer are Inherent (inh), Immediate (imm), Direct (dir), + Indexed (ind), Extended (ext), and Relative (rel). Each instruction may have + one or more addressing modes (see Instruction class). + """ + inh: int = IVLD + imm: int = IVLD + dir: int = IVLD + ind: int = IVLD + ext: int = IVLD + rel: int = IVLD + + def supports_inherent(self): + """ + Returns whether the addressing mode is an inherent mode. + :return: True if the mode is inherent, false otherwise + """ + return self.inh is not IVLD + + def supports_immediate(self): + """ + Returns whether the addressing mode is immediate. + :return: True if the mode is immediate, false otherwise + """ + return self.imm is not IVLD + + def supports_direct(self): + """ + Returns whether the addressing mode is direct. + :return: True if the mode is direct, false otherwise + """ + return self.dir is not IVLD + + def supports_indexed(self): + """ + Returns whether the addressing mode is indexed. + :return: True if the mode is indexed, false otherwise + """ + return self.ind is not IVLD + + def supports_extended(self): + """ + Returns whether the addressing mode is extended. + :return: True if the mode is extended, false otherwise + """ + return self.ext is not IVLD + + def supports_relative(self): + """ + Returns whether the addressing mode is relative. + + :return: True if the mode is relative, false otherwise + """ + return self.rel is not IVLD + + +class Instruction(NamedTuple): + """ + The Instruction class represents an operation supported by the Color + Computer. Each operation has a mnemonic that is the human + understandable code for the operation, a set of addressing modes + that the operation supports, whether the mnemonic is a pseudo + operation (i.e. only used by the assembler for special directives), + is a branch instruction, and a function to assist with operation + translation by the assembler. + """ + mnemonic: str = "" + mode: Mode = Mode() + pseudo: bool = False + is_branch: bool = False + func: Callable[..., str] = None + + def is_branch_operation(self): + """ + Returns whether the instruction is a branch instruction. + :return: True if the instruction is a branch instruction, False otherwise + """ + return self.is_branch + + def is_include(self): + """ + Returns true if the pseudo operation is an INCLUDE directive, + false otherwise. + + :return: True if the operation is an INCLUDE operation, False otherwise + """ + return self.mnemonic == "INCLUDE" + + def translate_pseudo(self, label, operand, symbol_table): + """ + Translates a pseudo operation. + + :param label: the label attached to the pseudo operation + :param operand: the operand value of the pseudo operation + :param symbol_table: the current symbol table + :return: returns the value of the pseudo operation + """ + if self.mnemonic == "FCB": + return operand.get_hex_value() + + if self.mnemonic == "FDB": + return operand.get_hex_value() + + if self.mnemonic == "EQU": + symbol_table[label].set_address(operand.get_string_value()) + + +INSTRUCTIONS = [ + Instruction(mnemonic="ABX", mode=Mode(inh=0x3A)), + Instruction(mnemonic="ADCA", mode=Mode(inh=0x00, imm=0x89, dir=0x99, ind=0xA9, ext=0xB9)), + Instruction(mnemonic="ADCB", mode=Mode(inh=0x00, imm=0xC9, dir=0xD9, ind=0xE9, ext=0xF9)), + Instruction(mnemonic="ADDA", mode=Mode(imm=0x8B, dir=0x9B, ind=0xAB, ext=0xBB)), + Instruction(mnemonic="ADDB", mode=Mode(imm=0xCB, dir=0xDB, ind=0xEB, ext=0xFB)), + Instruction(mnemonic="ADDD", mode=Mode(imm=0xC3, dir=0xD3, ind=0xE3, ext=0xF3)), + Instruction(mnemonic="ANDA", mode=Mode(imm=0x84, dir=0x94, ind=0xA4, ext=0xB4)), + Instruction(mnemonic="ANDB", mode=Mode(imm=0xC4, dir=0xD4, ind=0xE4, ext=0xF4)), + Instruction(mnemonic="ANDCC", mode=Mode(imm=0x1C)), + Instruction(mnemonic="ASLA", mode=Mode(inh=0x48)), + Instruction(mnemonic="ASLB", mode=Mode(inh=0x58)), + Instruction(mnemonic="ASL", mode=Mode(dir=0x08, ind=0x68, ext=0x78)), + Instruction(mnemonic="ASRA", mode=Mode(inh=0x47)), + Instruction(mnemonic="ASRB", mode=Mode(inh=0x57)), + Instruction(mnemonic="ASR", mode=Mode(dir=0x07, ind=0x67, ext=0x77)), + Instruction(mnemonic="BITA", mode=Mode(imm=0x85, dir=0x95, ind=0xA5, ext=0xB5)), + Instruction(mnemonic="BITB", mode=Mode(imm=0xC5, dir=0xD5, ind=0xE5, ext=0xF5)), + Instruction(mnemonic="CLRA", mode=Mode(inh=0x4F)), + Instruction(mnemonic="CLRB", mode=Mode(inh=0x5F)), + Instruction(mnemonic="CLR", mode=Mode(dir=0x0F, ind=0x6F, ext=0x7F)), + Instruction(mnemonic="CMPA", mode=Mode(imm=0x81, dir=0x91, ind=0xA1, ext=0xB1)), + Instruction(mnemonic="CMPB", mode=Mode(imm=0xC1, dir=0xD1, ind=0xE1, ext=0xF1)), + Instruction(mnemonic="CMPX", mode=Mode(imm=0x8C, dir=0x9C, ind=0xAC, ext=0xBC)), + Instruction(mnemonic="COMA", mode=Mode(inh=0x43)), + Instruction(mnemonic="COMB", mode=Mode(inh=0x53)), + Instruction(mnemonic="COM", mode=Mode(dir=0x03, ind=0x63, ext=0x73)), + Instruction(mnemonic="CWAI", mode=Mode(imm=0x3C)), + Instruction(mnemonic="DAA", mode=Mode(inh=0x19)), + Instruction(mnemonic="DECA", mode=Mode(inh=0x4A)), + Instruction(mnemonic="DECB", mode=Mode(inh=0x5A)), + Instruction(mnemonic="DEC", mode=Mode(dir=0x0A, ind=0x6A, ext=0x7A)), + Instruction(mnemonic="EORA", mode=Mode(imm=0x88, dir=0x98, ind=0xA8, ext=0xB8)), + Instruction(mnemonic="EORB", mode=Mode(imm=0xC8, dir=0xD8, ind=0xE8, ext=0xF8)), + Instruction(mnemonic="EXG", mode=Mode(imm=0x1E)), + Instruction(mnemonic="INCA", mode=Mode(inh=0x4C)), + Instruction(mnemonic="INCB", mode=Mode(inh=0x5C)), + Instruction(mnemonic="INC", mode=Mode(dir=0x0C, ind=0x6C, ext=0x7C)), + Instruction(mnemonic="JMP", mode=Mode(dir=0x0E, ind=0x6E, ext=0x7E)), + Instruction(mnemonic="JSR", mode=Mode(dir=0x9D, ind=0xAD, ext=0xBD)), + Instruction(mnemonic="LDA", mode=Mode(imm=0x86, dir=0x96, ind=0xA6, ext=0xB6)), + Instruction(mnemonic="LDB", mode=Mode(imm=0xC6, dir=0xD6, ind=0xE6, ext=0xF6)), + Instruction(mnemonic="LDD", mode=Mode(imm=0xCC, dir=0xDC, ind=0xEC, ext=0xFC)), + Instruction(mnemonic="LDU", mode=Mode(imm=0xCE, dir=0xDE, ind=0xEE, ext=0xFE)), + Instruction(mnemonic="LDX", mode=Mode(imm=0x8E, dir=0x9E, ind=0xAE, ext=0xBE)), + Instruction(mnemonic="LEAS", mode=Mode(ind=0x32)), + Instruction(mnemonic="LEAU", mode=Mode(ind=0x33)), + Instruction(mnemonic="LEAX", mode=Mode(ind=0x30)), + Instruction(mnemonic="LEAY", mode=Mode(ind=0x31)), + Instruction(mnemonic="LSLA", mode=Mode(inh=0x48)), + Instruction(mnemonic="LSLB", mode=Mode(inh=0x58)), + Instruction(mnemonic="LSL", mode=Mode(dir=0x08, ind=0x68, ext=0x78)), + Instruction(mnemonic="LSRA", mode=Mode(inh=0x44)), + Instruction(mnemonic="LSRB", mode=Mode(inh=0x54)), + Instruction(mnemonic="LSR", mode=Mode(dir=0x04, ind=0x64, ext=0x74)), + Instruction(mnemonic="MUL", mode=Mode(inh=0x3D)), + Instruction(mnemonic="NEGA", mode=Mode(inh=0x40)), + Instruction(mnemonic="NEGB", mode=Mode(inh=0x50)), + Instruction(mnemonic="NEG", mode=Mode(dir=0x00, ind=0x60, ext=0x70)), + Instruction(mnemonic="NOP", mode=Mode(inh=0x12)), + Instruction(mnemonic="ORA", mode=Mode(imm=0x8A, dir=0x9A, ind=0xAA, ext=0xBA)), + Instruction(mnemonic="ORB", mode=Mode(imm=0xCA, dir=0xDA, ind=0xEA, ext=0xFA)), + Instruction(mnemonic="ORCC", mode=Mode(imm=0x1A)), + Instruction(mnemonic="PSHS", mode=Mode(imm=0x34)), + Instruction(mnemonic="PSHU", mode=Mode(imm=0x36)), + Instruction(mnemonic="PULS", mode=Mode(imm=0x35)), + Instruction(mnemonic="PULU", mode=Mode(imm=0x37)), + Instruction(mnemonic="ROLA", mode=Mode(inh=0x49)), + Instruction(mnemonic="ROLB", mode=Mode(inh=0x59)), + Instruction(mnemonic="ROL", mode=Mode(dir=0x09, ind=0x69, ext=0x79)), + Instruction(mnemonic="RORA", mode=Mode(inh=0x46)), + Instruction(mnemonic="RORB", mode=Mode(inh=0x56)), + Instruction(mnemonic="ROR", mode=Mode(dir=0x06, ind=0x66, ext=0x76)), + Instruction(mnemonic="RTI", mode=Mode(inh=0x3B)), + Instruction(mnemonic="RTS", mode=Mode(inh=0x39)), + Instruction(mnemonic="SBCA", mode=Mode(imm=0x82, dir=0x92, ind=0xA2, ext=0xB2)), + Instruction(mnemonic="SBCB", mode=Mode(imm=0xC2, dir=0xD2, ind=0xE2, ext=0xF2)), + Instruction(mnemonic="SEX", mode=Mode(inh=0x1D)), + Instruction(mnemonic="STA", mode=Mode(dir=0x97, ind=0xA7, ext=0xB7)), + Instruction(mnemonic="STB", mode=Mode(dir=0xD7, ind=0xE7, ext=0xF7)), + Instruction(mnemonic="STD", mode=Mode(dir=0xDD, ind=0xED, ext=0xFD)), + Instruction(mnemonic="STU", mode=Mode(dir=0xDF, ind=0xEF, ext=0xFF)), + Instruction(mnemonic="STX", mode=Mode(dir=0x9F, ind=0xAF, ext=0xBF)), + Instruction(mnemonic="SUBA", mode=Mode(imm=0x80, dir=0x90, ind=0xA0, ext=0xB0)), + Instruction(mnemonic="SUBB", mode=Mode(imm=0xC0, dir=0xD0, ind=0xE0, ext=0xF0)), + Instruction(mnemonic="SUBD", mode=Mode(imm=0x83, dir=0x93, ind=0xA3, ext=0xB3)), + Instruction(mnemonic="SWI", mode=Mode(inh=0x3F)), + Instruction(mnemonic="SYNC", mode=Mode(inh=0x13)), + Instruction(mnemonic="TFR", mode=Mode(imm=0x1F)), + Instruction(mnemonic="TSTA", mode=Mode(inh=0x4D)), + Instruction(mnemonic="TSTB", mode=Mode(inh=0x5D)), + Instruction(mnemonic="TST", mode=Mode(dir=0x0D, ind=0x6D, ext=0x7D)), + + # Extended operations + Instruction(mnemonic="CMPD", mode=Mode(imm=0x1083, dir=0x1093, ind=0x10A3, ext=0x10B3)), + Instruction(mnemonic="CMPS", mode=Mode(imm=0x118C, dir=0x119C, ind=0x11AC, ext=0x11BC)), + Instruction(mnemonic="CMPU", mode=Mode(imm=0x1183, dir=0x1193, ind=0x11A3, ext=0x11B3)), + Instruction(mnemonic="LDS", mode=Mode(imm=0x10CE, dir=0x10DE, ind=0x10EE, ext=0x10FE)), + Instruction(mnemonic="CMPY", mode=Mode(imm=0x108C, dir=0x109C, ind=0x10AC, ext=0x10BC)), + Instruction(mnemonic="LDY", mode=Mode(imm=0x108E, dir=0x109E, ind=0x10AE, ext=0x10BE)), + Instruction(mnemonic="STS", mode=Mode(dir=0x10DF, ind=0x10EF, ext=0x10FF)), + Instruction(mnemonic="STY", mode=Mode(dir=0x109F, ind=0x10AF, ext=0x10BF)), + Instruction(mnemonic="SWI2", mode=Mode(inh=0x103F)), + Instruction(mnemonic="SWI3", mode=Mode(inh=0x113F)), + + # Short branches + Instruction(mnemonic="BCC", mode=Mode(rel=0x24), is_branch=True), + Instruction(mnemonic="BCS", mode=Mode(rel=0x25), is_branch=True), + Instruction(mnemonic="BEQ", mode=Mode(rel=0x27), is_branch=True), + Instruction(mnemonic="BGE", mode=Mode(rel=0x2C), is_branch=True), + Instruction(mnemonic="BGT", mode=Mode(rel=0x2E), is_branch=True), + Instruction(mnemonic="BHI", mode=Mode(rel=0x22), is_branch=True), + Instruction(mnemonic="BHS", mode=Mode(rel=0x24), is_branch=True), + Instruction(mnemonic="BLE", mode=Mode(rel=0x2F), is_branch=True), + Instruction(mnemonic="BLO", mode=Mode(rel=0x25), is_branch=True), + Instruction(mnemonic="BLS", mode=Mode(rel=0x23), is_branch=True), + Instruction(mnemonic="BLT", mode=Mode(rel=0x2D), is_branch=True), + Instruction(mnemonic="BMI", mode=Mode(rel=0x2B), is_branch=True), + Instruction(mnemonic="BNE", mode=Mode(rel=0x26), is_branch=True), + Instruction(mnemonic="BPL", mode=Mode(rel=0x2A), is_branch=True), + Instruction(mnemonic="BRA", mode=Mode(rel=0x20), is_branch=True), + Instruction(mnemonic="BRN", mode=Mode(rel=0x21), is_branch=True), + Instruction(mnemonic="BSR", mode=Mode(rel=0x8D), is_branch=True), + Instruction(mnemonic="BVC", mode=Mode(rel=0x28), is_branch=True), + Instruction(mnemonic="BVS", mode=Mode(rel=0x29), is_branch=True), + + # Long branches + Instruction(mnemonic="LBCC", mode=Mode(rel=0x1024), is_branch=True), + Instruction(mnemonic="LBCS", mode=Mode(rel=0x1025), is_branch=True), + Instruction(mnemonic="LBEQ", mode=Mode(rel=0x1027), is_branch=True), + Instruction(mnemonic="LBGE", mode=Mode(rel=0x102C), is_branch=True), + Instruction(mnemonic="LBGT", mode=Mode(rel=0x102E), is_branch=True), + Instruction(mnemonic="LBHI", mode=Mode(rel=0x1022), is_branch=True), + Instruction(mnemonic="LBHS", mode=Mode(rel=0x1024), is_branch=True), + Instruction(mnemonic="LBLE", mode=Mode(rel=0x102F), is_branch=True), + Instruction(mnemonic="LBLO", mode=Mode(rel=0x1025), is_branch=True), + Instruction(mnemonic="LBLS", mode=Mode(rel=0x1023), is_branch=True), + Instruction(mnemonic="LBLT", mode=Mode(rel=0x102D), is_branch=True), + Instruction(mnemonic="LBMI", mode=Mode(rel=0x102B), is_branch=True), + Instruction(mnemonic="LBNE", mode=Mode(rel=0x1026), is_branch=True), + Instruction(mnemonic="LBPL", mode=Mode(rel=0x102A), is_branch=True), + Instruction(mnemonic="LBRA", mode=Mode(rel=0x1020), is_branch=True), + Instruction(mnemonic="LBRN", mode=Mode(rel=0x1021), is_branch=True), + Instruction(mnemonic="LBSR", mode=Mode(rel=0x17), is_branch=True), + Instruction(mnemonic="LBVC", mode=Mode(rel=0x1028), is_branch=True), + Instruction(mnemonic="LBVS", mode=Mode(rel=0x1029), is_branch=True), + + # Pseudo operations + Instruction(mnemonic="END", pseudo=True), + Instruction(mnemonic="ORG", pseudo=True), + Instruction(mnemonic="EQU", pseudo=True), + Instruction(mnemonic="SET", pseudo=True), + Instruction(mnemonic="RMB", pseudo=True), + Instruction(mnemonic="FCB", pseudo=True), + Instruction(mnemonic="FDB", pseudo=True), + Instruction(mnemonic="FCC", pseudo=True), + Instruction(mnemonic="SETDP", pseudo=True), + Instruction(mnemonic="INCLUDE", pseudo=True) +] + +# E N D O F F I L E ####################################################### diff --git a/cocoasm/operand.py b/cocoasm/operand.py new file mode 100644 index 0000000..5e94c5c --- /dev/null +++ b/cocoasm/operand.py @@ -0,0 +1,103 @@ +""" +Copyright (C) 2019 Craig Thomas + +This project uses an MIT style license - see LICENSE for details. +A Color Computer Assembler - see the README.md file for details. +""" +# I M P O R T S ############################################################### + +import re + +from enum import Enum + +from cocoasm.helpers import hex_value, decimal_value + +# C O N S T A N T S ########################################################### + +# Pattern to recognize an immediate value +IMM_REGEX = re.compile( + r"^#(?P.*)" +) + +# Pattern to recognize an indexed value +EXTENDED_INDIRECT_REGEX = re.compile( + r"^\[(?P.*)\]" +) + +# Pattern to recognize a hex value +HEX_REGEX = re.compile( + r"^\$(?P[0-9a-fA-F]+)" +) + +# Pattern to recognize an integer value +INT_REGEX = re.compile( + r"^(?P[\d]+)" +) + +# C L A S S E S ############################################################## + + +class OperandType(Enum): + UNKNOWN = 0 + INHERENT = 1 + IMMEDIATE = 2 + INDIRECT = 3 + EXTENDED_INDIRECT = 4 + EXTENDED = 5 + DIRECT = 6 + RELATIVE = 7 + SYMBOL = 8 + EXPRESSION = 9 + + +class Operand(object): + def __init__(self, operand): + self.operand = operand or "" + self.variables = [] + self.operand_type = OperandType.UNKNOWN + self.determine_operand_type() + + def determine_operand_type(self): + if self.operand == "": + self.operand_type = OperandType.INHERENT + return + + if self.get_immediate() != "": + self.operand_type = OperandType.IMMEDIATE + return + + if self.get_extended_indirect() != "": + self.operand_type = OperandType.EXTENDED_INDIRECT + return + + if self.get_hex_value() != "": + self.operand_type = OperandType.EXTENDED + return + + def get_string_value(self): + return str(self.operand) + + def get_operand_type(self): + return self.operand_type + + def is_inherent(self): + return self.get_operand_type() == OperandType.INHERENT + + def is_immediate(self): + return self.get_operand_type() == OperandType.IMMEDIATE + + def get_immediate(self): + """ + Returns true if the operand is immediate data. + + :return: True if the operand is immediate + """ + return IMM_REGEX.match(self.operand) or "" + + def get_extended_indirect(self): + return EXTENDED_INDIRECT_REGEX.match(self.operand) or "" + + def get_hex_value(self): + return HEX_REGEX.match(self.operand) or "" + +# E N D O F F I L E ####################################################### diff --git a/cocoasm/program.py b/cocoasm/program.py new file mode 100644 index 0000000..bc46730 --- /dev/null +++ b/cocoasm/program.py @@ -0,0 +1,158 @@ +""" +Copyright (C) 2019 Craig Thomas + +This project uses an MIT style license - see LICENSE for details. +This file contains the main Program class for the CoCo Assembler. +""" +# I M P O R T S ############################################################### + +import sys + +from cocoasm.exceptions import TranslationError, ParseError +from cocoasm.statement import Statement +from cocoasm.assembler_state import AssemblerState +from cocoasm.symbol import Symbol + +# C L A S S E S ############################################################### + + +class Program(object): + """ + The Program class represents an actual Color Computer program. Each Program + contains a list of statements. Additionally, a Program keeps track of all + the user-defined symbols in the program. + """ + def __init__(self, filename): + self.symbol_table = dict() + self.statements = [] + self.address = 0x0 + self.state = AssemblerState() + self.process(filename) + + def process(self, filename): + """ + Processes a filename for assembly. + + :param filename: the name of the file to process + """ + try: + self.parse(filename) + self.translate_statements() + except TranslationError as error: + self.throw_error(error) + except ParseError as error: + self.throw_error(error) + + def parse(self, filename): + """ + Parses a single file and saves the set of statements. + + :param filename: the name of the file to process + """ + self.statements = self.parse_file(filename) + + @staticmethod + def parse_file(filename): + """ + Parses all of the lines in a file, and transforms each line into + a Statement. Returns a list of all the statements in the file. + + :param filename: the name of the file to parse + """ + statements = [] + if not filename: + return statements + + with open(filename) as infile: + for line in infile: + statement = Statement(line) + if not statement.is_empty() and not statement.is_comment_only(): + statements.append(statement) + + return statements + + def process_mnemonics(self, statements): + """ + Given a list of statements, processes the mnemonics on each statement, and + assigns each statement an Instruction object. If the statement is the + pseudo operation INCLUDE, then it will parse the statements with the + associated include file. + + :param statements: the list of statements to process + :return: a list of processed statements + """ + processed_statements = [] + for statement in statements: + statement.match_mnemonic() + include = self.process_mnemonics(self.parse_file(statement.get_include_filename())) + processed_statements.extend(include if include else [statement]) + return processed_statements + + def save_symbol(self, index, statement): + """ + Checks a statement for a label and saves it to the symbol table, along with + the index into the list of statements where the label occurs. Will raise a + TranslationError if the label already exists in the symbol table. + + :param index: the index into the list of statements where the label occurs + :param statement: the statement with the label + """ + label = statement.get_label() + if label: + if label in self.symbol_table: + raise TranslationError("Label [" + label + "] redefined", statement) + self.symbol_table[label] = Symbol(label, index) + + def translate_statements(self): + """ + Translates all the parsed statements into their respective + opcodes. + """ + self.statements = self.process_mnemonics(self.statements) + for index, statement in enumerate(self.statements): + self.save_symbol(index, statement) + statement.translate(self.symbol_table) + + def save_binary_file(self, filename): + """ + Writes out the assembled statements to the specified file + name. + + :param filename: the name of the file to save statements + """ + machine_codes = [] + for statement in self.statements: + if not statement.is_empty() and not statement.comment_only: + for index in range(0, len(statement.op_code), 2): + machine_codes.append(int(statement.op_code[index:index + 2], 16)) + with open(filename, "wb") as outfile: + outfile.write(bytearray(machine_codes)) + + def print_symbol_table(self): + """ + Prints out the symbol table and any values contained within it. + """ + print("-- Symbol Table --") + for symbol, value in self.symbol_table.items(): + print(value) + + def print_statements(self): + """ + Prints out the assembled statements. + """ + print("-- Assembled Statements --") + for statement in self.statements: + print(statement) + + @staticmethod + def throw_error(error): + """ + Prints out an error message. + + :param error: the error message to throw + """ + print(error.value) + print("line: {}".format(str(error.statement))) + sys.exit(1) + +# E N D O F F I L E ####################################################### diff --git a/cocoasm/statement.py b/cocoasm/statement.py new file mode 100644 index 0000000..d0eddf3 --- /dev/null +++ b/cocoasm/statement.py @@ -0,0 +1,263 @@ +""" +Copyright (C) 2019 Craig Thomas + +This project uses an MIT style license - see LICENSE for details. +A Color Computer Assembler - see the README.md file for details. +""" +# I M P O R T S ############################################################### + +import re + +from copy import copy + +from cocoasm.exceptions import ParseError, TranslationError +from cocoasm.instruction import INSTRUCTIONS +from cocoasm.operand import Operand +from cocoasm.helpers import hex_value + +# C O N S T A N T S ########################################################### + +# Pattern to recognize a blank line +BLANK_LINE_REGEX = re.compile(r"^\s*$") + +# Pattern to parse a comment line +COMMENT_LINE_REGEX = re.compile(r"^\s*;\s*(?P.*)$") + +# Pattern to parse a single line +ASM_LINE_REGEX = re.compile( + r"^(?P