| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407 |
- #!/usr/bin/python3
- import re
- import math
- import traceback
- label_re = re.compile(r"^[\w\$\#\@\~\.\?]+$", re.IGNORECASE)
- hex_re = re.compile(r"^[0-9a-f]+$", re.IGNORECASE)
- bin_re = re.compile(r"^[0-1_]+$", re.IGNORECASE)
- oct_re = re.compile(r"^[0-8]+$", re.IGNORECASE)
- def match(regex, s):
- return regex.match(s) is not None
- def decode_bytes(val: str):
- try:
- if val.endswith('h'):
- return [int(val[i:i + 2], 16) for i in range(0, len(val) - 1, 2)]
- if val.startswith('0x'):
- return [int(val[i:i + 2], 16) for i in range(2, len(val), 2)]
- if val.startswith('b'):
- val = val.replace('_', '')[1:]
- return [int(val[i:i + 8], 2) for i in range(0, len(val), 8)]
- except ValueError:
- raise ValueError(f"Invalid binary '{val}'")
- if val.isdigit():
- i = int(val)
- if i > 255 or i < 0:
- raise ValueError(f"Invalid binary '{val}', unsigned int out of bounds")
- return [i]
- if (val.startswith('+') or val.startswith('-')) and val[1:].isdigit():
- i = int(val)
- if i > 127 or i < -128:
- raise ValueError(f"Invalid binary '{val}', signed int out of bounds")
- if i < 0: # convert to unsigned
- i += 2 ** 8
- return [i]
- if len(val) == 3 and ((val[0] == "'" and val[2] == "'") or (val[0] == '"' and val[2] == '"')):
- return [ord(val[1])]
- raise ValueError(f"Invalid binary '{val}'")
- def is_reg(r):
- if r.startswith('$'):
- r = r[1:]
- if r.isnumeric() and 0 <= int(r) <= 3:
- return True
- elif len(r) == 2 and r[0] == 'r' and r[1] in {'0', '1', '2', '3', 'a', 'b', 'c', 'e'}:
- return True
- return False
- def decode_reg(r):
- if r.startswith('$') and r[1:].isnumeric():
- r = int(r[1:])
- if isinstance(r, int):
- if 0 <= r <= 3:
- return r
- raise ValueError(f"Invalid register value {r}")
- rl = r.lower()
- if rl.startswith('$'):
- rl = rl[1:]
- if rl == 'ra' or rl == 'r0':
- return 0
- if rl == 'rb' or rl == 'r1':
- return 1
- if rl == 'rc' or rl == 'r2':
- return 2
- if rl == 're' or rl == 'r3':
- return 3
- raise ValueError(f"Invalid register name '{r}'")
- class CompilingError(Exception):
- def __init__(self, message):
- self.message = message
- class InstructionError(Exception):
- def __init__(self, message):
- self.message = message
- class Instruction:
- def __init__(self, name: str, opcode: str, operands=0, alias=None):
- name = name.strip().lower()
- if not name or not name.isalnum():
- raise InstructionError(f"Invalid instruction name '{name}'")
- self.name = name.strip()
- self.alias = alias or []
- self.opcode = decode_bytes(opcode.replace('?', '0'))[0]
- self.reg_operands = 0
- if len(opcode) == 10:
- if opcode[6:8] == '??':
- self.reg_operands += 1
- if opcode[8:10] == '??':
- self.reg_operands += 1
- self.imm_operands = operands
- self.compiler = None
- @property
- def length(self):
- return self.imm_operands + 1
- def __len__(self):
- return self.length
- def _gen_instr(self, regs, imm):
- instr = self.opcode
- if len(regs) != self.reg_operands:
- raise CompilingError(f"Invalid number of registers: set {len(regs)}, required: {self.reg_operands}")
- limm = 0
- for i in imm:
- if isinstance(i, str):
- if i in self.compiler.labels:
- d = self.compiler.labels[i]
- limm += len(d)
- else:
- limm += self.compiler.address_size
- else:
- limm += len(i)
- if limm != self.imm_operands:
- raise CompilingError(f"Invalid number of immediate: set {limm}, required: {self.reg_operands}")
- if len(regs) == 2:
- if regs[1] is None:
- raise CompilingError(f"Unable to decode register name {regs[1]}")
- if regs[0] is None:
- raise CompilingError(f"Unable to decode register name {regs[0]}")
- instr |= regs[1] << 2 | regs[0]
- elif len(regs) == 1:
- if regs[0] is None:
- raise CompilingError(f"Unable to decode register name {regs[0]}")
- instr |= int(regs[0]) << 2
- return instr
- def compile(self, operands):
- regs = []
- imm = []
- for i, arg in enumerate(operands):
- if self.reg_operands > i:
- regs.append(self.compiler.decode_reg(arg))
- else:
- imm.append(self.compiler.decode_bytes(arg))
- instr = self._gen_instr(regs, imm)
- return [instr] + imm
- class CompObject:
- def __init__(self, instr, operands, line_num):
- self.instr = instr
- self.operands = operands
- self.line_num = line_num
- self.code = []
- self.code_ref = 0
- def compile(self):
- self.code = self.instr.compile(self.operands)
- return self.code
- class Compiler:
- def __init__(self, address_size=2, byte_order='little'):
- self.instr_db = {}
- self.data = []
- self.caddress = 0
- self.labels = {}
- self.order = byte_order
- self.regnames = {}
- self.address_size = address_size
- def decode_reg(self, s: str):
- s = s.strip()
- # if s in self.labels:
- # b = self.labels[s]
- if s in self.regnames:
- b = self.regnames[s]
- else:
- b = self.decode_bytes(s)
- if isinstance(b, bytes):
- i = int.from_bytes(b, byteorder=self.order)
- elif isinstance(b, int):
- i = b
- else:
- raise CompilingError(f"Unrecognised register name: {s}")
- if i not in self.regnames.values():
- raise CompilingError(f"Invalid register: {s}")
- return i
- def decode_bytes(self, s: str):
- s = s.strip()
- typ = ""
- # Decimal numbers
- if s.isnumeric():
- typ = 'int'
- elif s.endswith('d') and s[:-1].isnumeric():
- s = s[:-1]
- typ = 'int'
- elif s.startswith('0d') and s[2:].isnumeric():
- s = s[2:]
- typ = 'int'
- # Hexadecimal numbers
- elif s.startswith('0') and s.endswith('h') and match(hex_re, s[1:-1]):
- s = s[1:-1]
- typ = 'hex'
- elif (s.startswith('$0') or s.startswith('0x') or s.startswith('$0')) and match(hex_re, s[2:]):
- s = s[2:]
- typ = 'hex'
- # Octal numbers
- elif (s.endswith('q') or s.endswith('o')) and match(oct_re, s[:-1]):
- s = s[:-1]
- typ = 'oct'
- elif (s.startswith('0q') or s.startswith('0o')) and match(oct_re, s[2:]):
- s = s[2:]
- typ = 'oct'
- # Binary number
- elif (s.endswith('b') or s.endswith('y')) and match(bin_re, s[:-1]):
- s = s[:-1].replace('_', '')
- typ = 'bin'
- elif (s.startswith('0b') or s.startswith('0y')) and match(bin_re, s[2:]):
- s = s[2:].replace('_', '')
- typ = 'bin'
- # ASCII
- elif s.startswith("'") and s.endswith("'") and len(s) == 3:
- s = ord(s[1:-1]).to_bytes(1, self.order)
- typ = 'ascii'
- # Convert with limits
- if typ == 'int':
- numb = int(s)
- for i in range(1, 9):
- if -2 ** (i * 7) < i < 2 ** (i * 8):
- return numb.to_bytes(i, self.order)
- elif typ == 'hex':
- numb = int(s, 16)
- return numb.to_bytes(int(len(s) / 2) + len(s) % 2, self.order)
- elif typ == 'oct':
- numb = int(s, 8)
- for i in range(1, 9):
- if -2 ** (i * 7) < i < 2 ** (i * 8):
- return numb.to_bytes(i, self.order)
- elif typ == 'bin':
- numb = int(s, 2)
- return numb.to_bytes(int(len(s) / 8) + len(s) % 8, self.order)
- else:
- return s
- @staticmethod
- def _hash_instr(name, operands):
- return hash(name) + hash(operands)
- def add_reg(self, name, val):
- self.regnames[name] = val
- self.regnames['$' + name] = val
- def add_instr(self, instr: Instruction):
- instr.compiler = self
- operands = instr.reg_operands + instr.imm_operands
- # ihash = self._hash_instr(instr.name, operands)
- if instr.name in self.instr_db:
- raise InstructionError(f"Instruction {instr.name} operands={operands} duplicate!")
- self.instr_db[instr.name] = instr
- for alias in instr.alias:
- # ahash = self._hash_instr(alias, operands)
- if alias.lower() in self.instr_db:
- raise InstructionError(f"Instruction alias {alias} operands={operands} duplicate!")
- self.instr_db[alias.lower()] = instr
- def __func(self, f, args):
- for arg in args:
- if arg == '|':
- pass
- if arg == '^':
- pass
- if arg == '&':
- pass
- if arg == '<<':
- pass
- if arg == '>>':
- pass
- if arg == '+':
- pass
- if arg == '-':
- pass
- if arg == '*':
- pass
- if arg == '/' or arg == '//':
- pass
- if arg == '%' or arg == '%%':
- pass
- def __precompile(self, line):
- line = line.split(';', 1)[0]
- if ':' in line:
- linespl = line.split(':', 1)
- line = linespl[1]
- label = linespl[0]
- if label in self.labels:
- raise CompilingError(f"Label {label} duplicate")
- self.labels[label] = (self.caddress).to_bytes(self.address_size, self.order)
- if line.startswith('%define'):
- sp = list(filter(None, line.split(' ', 3)))
- if len(sp) != 3:
- raise CompilingError(f"Invalid %define")
- if '(' in sp[1] and ')' in sp[1]: # Function
- raise CompilingError(f"%define functions not implemented")
- self.labels[sp[1]] = self.decode_bytes(sp[2])
- return
- instr0 = list(filter(None, line.strip().split(' ', 1)))
- if len(instr0) == 0:
- return
- instr = instr0[0]
- if len(instr0) == 1:
- instr0.append('')
- operands = list(filter(None, map(lambda x: x.strip(), instr0[1].split(','))))
- if instr.lower() not in self.instr_db:
- raise CompilingError(f"Instruction {instr} operands={operands} is not recognised!")
- co = CompObject(self.instr_db[instr.lower()], operands, 0)
- return co
- def compile(self, file, code):
- failure = False
- instr = []
- binary = []
- for lnum, line in enumerate(code):
- lnum += 1
- try:
- co = self.__precompile(line)
- if co is not None:
- co.line_num = lnum
- self.caddress += co.instr.length
- instr.append(co)
- except CompilingError as e:
- failure = True
- print(f"ERROR {file}:{lnum}: {e.message}")
- for co in instr:
- try:
- binary += co.compile()
- except CompilingError as e:
- failure = True
- print(f"ERROR {file}:{co.line_num}: {e.message}")
- except Exception:
- failure = True
- print(f"ERROR {file}:{co.line_num}: Unexpected error:")
- traceback.print_exc()
- nbin = bytearray()
- for b in binary:
- if isinstance(b, int):
- nbin += b.to_bytes(1, self.order)
- elif isinstance(b, bytes):
- nbin += b
- elif isinstance(b, str):
- if b in self.labels:
- nbin += self.labels[b]
- else:
- failure = True
- print(f"ERROR {file}: Unable to find label '{b}'")
- if failure:
- return None
- return nbin
- def convert_to_binary(data):
- a = '\n'.join([format(i, '08b') for i in data])
- return a.encode()
- def convert_to_mem(data):
- x = b''
- fa = f'0{math.ceil(int(math.log2(len(data)))/4)}x'
- a = [format(d, '02x') for d in data]
- for i in range(int(len(a) / 8) + 1):
- y = a[i * 8:(i + 1) * 8]
- if len(y) > 0:
- x += (' '.join(y) + ' // ' + format(i*8, fa) + '\n').encode()
- return x
- def convert_to_mif(data, depth=32, width=8):
- x = f'''-- auto-generated memory initialisation file
- DEPTH = {depth};
- WIDTH = {width};
- ADDRESS_RADIX = HEX;
- DATA_RADIX = HEX;
- CONTENT
- BEGIN
- '''.encode()
- addr_format = f'0{math.ceil(int(math.log2(len(data)))/4)}x'
- a = [format(i, '02x') for i in data]
- for i in range(int(len(a) / 8) + 1):
- y = a[i * 8:(i + 1) * 8]
- if len(y) > 0:
- x += (format(i*8, addr_format) + ' : ' + ' '.join(y) + ';\n').encode()
- x += b"END;"
- return x
|