A basic compiler based off of thejameskyle's super-tiny-compiler

compiler.js 6.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. const fs = require('fs')
  2. var tokenizer = function (input) {
  3. let pos = 0
  4. let tokens = []
  5. tokens.push(input)
  6. while (pos < input.length) {
  7. let char = input[pos]
  8. let parens = /[()]/
  9. if (parens.test(char)) {
  10. tokens.push({
  11. type: 'paren',
  12. value: char
  13. })
  14. pos++
  15. continue
  16. }
  17. let whitespace = /[;\s]/
  18. if (whitespace.test(char)) {
  19. if (char === ';') {
  20. comment = ''
  21. while (char !== '\n') {
  22. comment += char
  23. char = input[++pos]
  24. }
  25. } else {
  26. pos++
  27. }
  28. continue
  29. }
  30. let numbers = /[0-9]/
  31. if (numbers.test(char)) {
  32. let numberString = ''
  33. while (numbers.test(char)) {
  34. numberString += char
  35. char = input[++pos]
  36. }
  37. tokens.push({
  38. type: 'number',
  39. value: numberString
  40. })
  41. continue
  42. }
  43. let characters = /[a-zA-Z_]/
  44. if (characters.test(char)) {
  45. let name = ''
  46. while (characters.test(char)) {
  47. name += char
  48. char = input[++pos]
  49. }
  50. tokens.push({
  51. type: 'name',
  52. value: name
  53. })
  54. continue
  55. }
  56. let dollar = /[$]/
  57. if (dollar.test(char)) {
  58. let name = ''
  59. char = input[++pos]
  60. if (numbers.test(char)) {
  61. while (numbers.test(char)) {
  62. name += char
  63. char = input[++pos]
  64. }
  65. } else {
  66. console.error("Compiler Error: $ must be followed by a digit [0-9]")
  67. process.exit(1);
  68. }
  69. tokens.push({
  70. type: 'dollar',
  71. value: name
  72. })
  73. continue
  74. }
  75. throw new TypeError("I'm not sure what you are telling me :( Ask my creator to teach me what a: " + char + " is.")
  76. }
  77. return tokens
  78. }
  79. var parser = function (input) {
  80. let pos = 1
  81. function walk() {
  82. let token = input[pos]
  83. if (token.type === 'number') {
  84. pos++
  85. return {
  86. type: 'NumberLiteral',
  87. value: token.value
  88. }
  89. }
  90. if (token.type === 'name') {
  91. pos++
  92. return {
  93. type: 'VariableReference',
  94. value: token.value
  95. }
  96. }
  97. if (token.type === 'dollar') {
  98. pos++
  99. return {
  100. type: 'DollarVar',
  101. value: token.value
  102. }
  103. }
  104. if (token.type === 'paren' && token.value == '(') {
  105. token = input[++pos]
  106. if (token.type !== 'name') {
  107. throw {
  108. name: 'Compiler Error',
  109. message: 'FunctionCall may only be type "name" not "' + token.type + '".'
  110. }
  111. }
  112. let node = {
  113. type: 'FunctionCall',
  114. value: token.value,
  115. params: []
  116. }
  117. token = input[++pos]
  118. while ((token.type !== 'paren') || (token.type === 'paren' && token.value !== ')')) {
  119. node.params.push(walk())
  120. token = input[pos]
  121. }
  122. pos++
  123. return node
  124. }
  125. throw new TypeError(token.type)
  126. }
  127. let ast = {
  128. type: 'Prog',
  129. body: []
  130. }
  131. while (pos < input.length) {
  132. ast.body.push(walk())
  133. }
  134. return ast
  135. }
  136. var traverser = function (ast, visitor) {
  137. function traverseArray(array, parent) {
  138. array.forEach(function (child) {
  139. traverseNode(child, parent)
  140. })
  141. }
  142. function traverseNode(node, parent) {
  143. const method = visitor[node.type]
  144. if (method) {
  145. method(node, parent)
  146. }
  147. switch (node.type) {
  148. case 'Prog':
  149. traverseArray(node.body, node)
  150. break
  151. case 'FunctionCall':
  152. traverseArray(node.params, node)
  153. break
  154. case 'VariableReference':
  155. break
  156. case 'NumberLiteral':
  157. break
  158. case 'DollarVar':
  159. break
  160. default:
  161. throw {
  162. name: 'Compiler Error',
  163. message: 'Unknown leaf in AST: ' + node.type
  164. }
  165. }
  166. }
  167. traverseNode(ast, null)
  168. }
  169. var transformer = function (ast) {
  170. let newAst = {
  171. type: 'Prog',
  172. body: []
  173. }
  174. ast._context = newAst.body
  175. traverser(ast, {
  176. NumberLiteral: function (node, parent) {
  177. parent._context.push({
  178. type: 'NumberLiteral',
  179. value: node.value
  180. })
  181. },
  182. VariableReference: function (node, parent) {
  183. parent._context.push({
  184. type: 'VariableReference',
  185. value: node.value
  186. })
  187. },
  188. DollarVar: function (node, parent) {
  189. parent._context.push({
  190. type: 'DollarVar',
  191. value: node.value
  192. })
  193. },
  194. FunctionCall: function (node, parent) {
  195. let expression = {
  196. type: 'FunctionCall',
  197. callee: {
  198. type: 'FunctionName',
  199. name: node.value
  200. },
  201. args: []
  202. }
  203. node._context = expression.args
  204. if (parent.type !== 'FunctionCall') {
  205. expression = {
  206. type: 'Statement',
  207. expr: expression
  208. }
  209. }
  210. parent._context.push(expression)
  211. }
  212. })
  213. return newAst
  214. }
  215. var generator = function (node) {
  216. switch (node.type) {
  217. case 'Prog':
  218. let program = node.body.map(generator)
  219. program.unshift('var _ = require("./stdlib.js")')
  220. return program.join('\n')
  221. break
  222. case 'Statement':
  223. return (generator(node.expr) + ';')
  224. break
  225. case 'FunctionCall':
  226. if (node.callee.name !== 'def') {
  227. return (generator(node.callee) + '(' + node.args.map(generator).join(', ') + ')')
  228. } else {
  229. return (generator(node.callee) + '(' + node.args.map((v, i) => {
  230. if (i === 0) {
  231. return generator(v) + ', '
  232. } else {
  233. if (i === 1) {
  234. return "'" + generator(v) + '; '
  235. } else {
  236. return generator(v) + '; '
  237. }
  238. }
  239. }).join('') + "')")
  240. }
  241. break;
  242. case 'DollarVar':
  243. return '$' + node.value
  244. break
  245. case 'FunctionName':
  246. return '_.' + node.name
  247. break
  248. case 'VariableReference':
  249. return '_.ref("' + node.value + '")'
  250. break
  251. case 'NumberLiteral':
  252. return '{value: ' + node.value + '}'
  253. break
  254. default:
  255. throw {
  256. name: 'Compiler Error',
  257. message: 'Unexpected leaf in transformed AST: ' + node.type
  258. }
  259. break
  260. }
  261. }
  262. // const myInput = '(assign twelve 12) (assign myvar (add twelve (subtract 6 2))) (log myvar)'
  263. const myInput = fs.readFileSync(process.argv[2], { encoding: 'utf-8' })
  264. const myTokens = tokenizer(myInput)
  265. const parsedTree = parser(myTokens)
  266. const transformedTree = transformer(parsedTree)
  267. //console.log(JSON.stringify(transformedTree,null,2))
  268. const output = generator(transformedTree)
  269. fs.writeFileSync('output.js', output)