asm_compiler.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. #!/usr/bin/python3
  2. import sys
  3. import argparse
  4. from os import path
  5. def decode_byte(val: str):
  6. try:
  7. if val.endswith('h'):
  8. return int(val[:-1], 16)
  9. if val.startswith('0x'):
  10. return int(val[2:], 16)
  11. if val.startswith('b'):
  12. return int(val.replace('_', '')[1:], 2)
  13. except ValueError:
  14. raise ValueError(f"Invalid binary '{val}'")
  15. if val.isdigit():
  16. i = int(val)
  17. if i > 255 or i < 0:
  18. raise ValueError(f"Invalid binary '{val}', unsigned int out of bounds")
  19. return i
  20. if (val.startswith('+') or val.startswith('-')) and val[1:].isdigit():
  21. i = int(val)
  22. if i > 127 or i < -128:
  23. raise ValueError(f"Invalid binary '{val}', signed int out of bounds")
  24. if i < 0: # convert to unsigned
  25. i += 2 ** 8
  26. return i
  27. if len(val) == 3 and ((val[0] == "'" and val[2] == "'") or (val[0] == '"' and val[2] == '"')):
  28. return ord(val[1])
  29. raise ValueError(f"Invalid binary '{val}'")
  30. def is_reg(r):
  31. if r.startswith('$'):
  32. r = r[1:]
  33. return len(r) == 2 and (r == 'ra' or r == 'rb' or r == 'rc' or r == 're')
  34. def decode_reg(r):
  35. if isinstance(r, int):
  36. if 0 <= r <= 3:
  37. return r
  38. raise ValueError(f"Invalid register value {r}")
  39. rl = r.lower()
  40. if rl.startswith('$'):
  41. rl = rl[1:]
  42. if rl == 'ra':
  43. return 0
  44. if rl == 'rb':
  45. return 1
  46. if rl == 'rc':
  47. return 2
  48. if rl == 're':
  49. return 3
  50. raise ValueError(f"Invalid register name '{r}'")
  51. def assemble(file):
  52. odata = []
  53. afile = open(file, 'r')
  54. failed = False
  55. refs = dict()
  56. for lnum, line in enumerate(afile.readlines()):
  57. lnum += 1 # Line numbers start from 1, not 0
  58. if '//' in line:
  59. line = line[:line.index('//')]
  60. if ':' in line:
  61. rsplit = line.split(':', 1)
  62. ref = rsplit[0]
  63. if not ref.isalnum():
  64. print(f"{file}:{lnum}: Invalid pointer reference '{ref}'")
  65. failed = True
  66. continue
  67. if ref in refs:
  68. if refs[ref][1] is not None:
  69. print(f"{file}:{lnum}: Pointer reference '{ref}' is duplicated with {file}:{refs[ref][0]}")
  70. failed = True
  71. continue
  72. refs[ref] = [lnum, len(odata)]
  73. line = rsplit[1]
  74. line = line.replace('\n', '').replace('\r', '').replace('\t', '')
  75. line = line.strip(' ')
  76. if line == '':
  77. continue
  78. ops = line.split()
  79. instr = ops[0].upper()
  80. rops = 3
  81. if instr == 'CPY' or instr == 'COPY':
  82. iname = 'COPY'
  83. inibb = 0
  84. elif instr == 'ADD':
  85. iname = 'ADD'
  86. inibb = 1
  87. elif instr == 'SUB':
  88. iname = 'SUB'
  89. inibb = 2
  90. elif instr == 'AND':
  91. iname = 'AND'
  92. inibb = 3
  93. elif instr == 'OR':
  94. iname = 'OR'
  95. inibb = 4
  96. elif instr == 'XOR':
  97. iname = 'XOR'
  98. inibb = 5
  99. elif instr == 'GT' or instr == 'GRT':
  100. iname = 'GT'
  101. inibb = 6
  102. elif instr == 'EX' or instr == 'EXT':
  103. iname = 'EXT'
  104. inibb = 7
  105. elif instr == 'SHFL':
  106. iname = 'SHTL'
  107. inibb = 7
  108. ops.append(0)
  109. elif instr == 'SHFR':
  110. iname = 'SHTR'
  111. inibb = 7
  112. ops.append(1)
  113. elif instr == 'ROTR':
  114. iname = 'ROTR'
  115. inibb = 7
  116. ops.append(2)
  117. elif instr == 'LW':
  118. iname = 'LW'
  119. inibb = 8
  120. elif instr == 'SW':
  121. iname = 'SW'
  122. inibb = 9
  123. elif instr == 'JEQ':
  124. iname = 'JEQ'
  125. rops = 4
  126. inibb = 10
  127. elif instr == 'JMP' or instr == 'JUMP':
  128. iname = 'JUMP'
  129. rops = 2
  130. inibb = 11
  131. elif instr == 'PUSH':
  132. iname = 'PUSH'
  133. rops = 2
  134. inibb = 14
  135. elif instr == 'POP':
  136. iname = 'POP'
  137. rops = 2
  138. inibb = 15
  139. else:
  140. if len(ops) == 1:
  141. try:
  142. odata.append(decode_byte(ops[0]))
  143. continue
  144. except ValueError:
  145. pass
  146. print(f"{file}:{lnum}: Instruction '{ops[0]}' not recognised")
  147. failed = True
  148. continue
  149. if len(ops) != rops:
  150. print(f"{file}:{lnum}: {iname} instruction requires {rops - 1} arguments")
  151. failed = True
  152. continue
  153. try:
  154. if iname == 'JUMP':
  155. odata.append(inibb << 4)
  156. try:
  157. odata.append(decode_byte(ops[1]))
  158. except ValueError:
  159. if not ops[1].isalnum():
  160. print(f"{file}:{lnum}: Invalid pointer reference '{ops[1]}'")
  161. failed = True
  162. continue
  163. if ops[1] in refs:
  164. odata.append(refs[ops[1]][1])
  165. else:
  166. refs[ops[1]] = [lnum, None]
  167. odata.append(ops[1])
  168. continue
  169. rd = decode_reg(ops[1])
  170. if iname == 'COPY' and not is_reg(ops[2]):
  171. imm = decode_byte(ops[2])
  172. odata.append((inibb << 4) | (rd << 2) | rd)
  173. odata.append(int(imm))
  174. continue
  175. if iname == 'PUSH' or iname == 'POP':
  176. odata.append((inibb << 4) | (rd << 2) | rd)
  177. continue
  178. rs = decode_reg(ops[2])
  179. if iname == 'COPY' and rd == rs:
  180. print(f"{file}:{lnum}: {iname} cannot copy register to itself")
  181. failed = True
  182. continue
  183. odata.append((inibb << 4) | (rd << 2) | rs)
  184. if iname == 'JEQ':
  185. try:
  186. odata.append(decode_byte(ops[3]))
  187. except ValueError:
  188. if not ops[3].isalnum():
  189. print(f"{file}:{lnum}: Invalid pointer reference '{ops[3]}'")
  190. failed = True
  191. continue
  192. if ops[3] in refs:
  193. odata.append(refs[ops[3]][1])
  194. else:
  195. refs[ops[3]] = [lnum, None]
  196. odata.append(ops[3])
  197. continue
  198. except ValueError as e:
  199. print(f"{file}:{lnum}: {e}")
  200. failed = True
  201. continue
  202. afile.close()
  203. # Convert jumps
  204. for i, l in enumerate(odata):
  205. if isinstance(l, str):
  206. if refs[l][1] is None:
  207. print(f"{file}:{refs[l][0]}: Pointer reference '{l}' does not exist!")
  208. failed = True
  209. continue
  210. odata[i] = refs[l][1]
  211. return not failed, odata
  212. def readable_size(num, disp_bytes=True):
  213. num = abs(num)
  214. if num < 1024 and disp_bytes:
  215. return "[%3.0fB]" % num
  216. if num < 1024 and not disp_bytes:
  217. return ""
  218. num /= 1024.0
  219. for unit in ['Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
  220. if abs(num) < 1024.0:
  221. return "[%3.1f%sB]" % (num, unit)
  222. num /= 1024.0
  223. return "[%.1f%sB]" % (num, 'Yi')
  224. if __name__ == '__main__':
  225. parser = argparse.ArgumentParser(description='Assembly compiler', add_help=True)
  226. parser.add_argument('file', help='Files to compile')
  227. parser.add_argument('-t', '--output_type', choices=['bin', 'mem', 'binary'], default='mem', help='Output type')
  228. parser.add_argument('-o', '--output', help='Output file')
  229. parser.add_argument('-f', '--force', action='store_true', help='Force override output file')
  230. args = parser.parse_args(sys.argv[1:])
  231. if not path.isfile(args.file):
  232. print(f'No file {args.file}!')
  233. sys.exit(1)
  234. output = args.output
  235. if not output:
  236. opath = path.dirname(args.file)
  237. bname = path.basename(args.file).rsplit('.', 1)[0]
  238. ext = '.out'
  239. if args.output_type == 'mem':
  240. ext = '.mem'
  241. elif args.output_type == 'bin':
  242. ext = '.bin'
  243. output = path.join(opath, bname + ext)
  244. if not args.force and path.isfile(output):
  245. print(f'Output file already exists {output}!')
  246. sys.exit(1)
  247. success, data = assemble(args.file)
  248. if success:
  249. print(f"Saving {args.output_type} data to {output}")
  250. print(f"Program size: {len(data)}B {readable_size(len(data), False)}")
  251. with open(output, 'wb') as of:
  252. if args.output_type == 'binary':
  253. a = '\n'.join([format(i, '08b') for i in data])
  254. of.write(a.encode())
  255. elif args.output_type == 'mem':
  256. a = [format(i, '02x') for i in data]
  257. for i in range(int(len(a) / 8) + 1):
  258. of.write((' '.join(a[i * 8:(i + 1) * 8]) + '\n').encode())
  259. elif args.output_type == 'bin':
  260. of.write(bytes(data))
  261. else:
  262. print(f'Failed to compile {args.file}!')
  263. sys.exit(1)
  264. sys.exit(0)