A basic compiler based off of thejameskyle's super-tiny-compiler

compiler.js 5.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. const fs = require('fs')
  2. var tokenizer = function (input) {
  3. let pos = 0
  4. let tokens = []
  5. tokens.push(input)
  6. while (pos < input.length) {
  7. let char = input[pos]
  8. let parens = /[()]/
  9. if (parens.test(char)) {
  10. tokens.push({
  11. type: 'paren',
  12. value: char
  13. })
  14. pos++
  15. continue
  16. }
  17. let whitespace = /[#\s]/
  18. if (whitespace.test(char)) {
  19. if (char === '#') {
  20. comment = ''
  21. while (char !== '\n') {
  22. comment += char
  23. char = input[++pos]
  24. }
  25. console.log("Ignoring comment: " + comment)
  26. } else {
  27. pos++
  28. }
  29. continue
  30. }
  31. let numbers = /[0-9]/
  32. if (numbers.test(char)) {
  33. let numberString = ''
  34. while (numbers.test(char)) {
  35. numberString += char
  36. char = input[++pos]
  37. }
  38. tokens.push({
  39. type: 'number',
  40. value: numberString
  41. })
  42. continue
  43. }
  44. let characters = /[a-zA-Z_]/
  45. if (characters.test(char)) {
  46. let name = ''
  47. while (characters.test(char)) {
  48. name += char
  49. char = input[++pos]
  50. }
  51. tokens.push({
  52. type: 'name',
  53. value: name
  54. })
  55. continue
  56. }
  57. throw new TypeError("I'm not sure what you are telling me :( Ask my creator to teach me what a: " + char + " is.")
  58. }
  59. return tokens
  60. }
  61. var parser = function (input) {
  62. let pos = 1
  63. function walk() {
  64. let token = input[pos]
  65. if (token.type === 'number') {
  66. pos++
  67. return {
  68. type: 'NumberLiteral',
  69. value: token.value
  70. }
  71. }
  72. if (token.type === 'name') {
  73. pos++
  74. return {
  75. type: 'VariableReference',
  76. value: token.value
  77. }
  78. }
  79. if (token.type === 'paren' && token.value == '(') {
  80. token = input[++pos]
  81. if (token.type !== 'name') {
  82. throw {
  83. name: 'Compiler Error',
  84. message: 'FunctionCall may only be type "name" not "' + token.type + '".'
  85. }
  86. }
  87. let node = {
  88. type: 'FunctionCall',
  89. value: token.value,
  90. params: []
  91. }
  92. token = input[++pos]
  93. while ((token.type !== 'paren') || (token.type === 'paren' && token.value !== ')')) {
  94. node.params.push(walk())
  95. token = input[pos]
  96. }
  97. pos++
  98. return node
  99. }
  100. throw new TypeError(token.type)
  101. }
  102. let ast = {
  103. type: 'Prog',
  104. body: []
  105. }
  106. while (pos < input.length) {
  107. ast.body.push(walk())
  108. }
  109. return ast
  110. }
  111. var traverser = function (ast, visitor) {
  112. function traverseArray(array, parent) {
  113. array.forEach(function (child) {
  114. traverseNode(child, parent)
  115. })
  116. }
  117. function traverseNode(node, parent) {
  118. const method = visitor[node.type]
  119. if (method) {
  120. method(node, parent)
  121. }
  122. switch (node.type) {
  123. case 'Prog':
  124. traverseArray(node.body, node)
  125. break
  126. case 'FunctionCall':
  127. traverseArray(node.params, node)
  128. break
  129. case 'VariableReference':
  130. break
  131. case 'NumberLiteral':
  132. break
  133. default:
  134. throw {
  135. name: 'Compiler Error',
  136. message: 'Unknown leaf in AST: ' + node.type
  137. }
  138. }
  139. }
  140. traverseNode(ast, null)
  141. }
  142. var transformer = function (ast) {
  143. let newAst = {
  144. type: 'Prog',
  145. body: []
  146. }
  147. ast._context = newAst.body
  148. traverser(ast, {
  149. NumberLiteral: function (node, parent) {
  150. parent._context.push({
  151. type: 'NumberLiteral',
  152. value: node.value
  153. })
  154. },
  155. VariableReference: function (node, parent) {
  156. parent._context.push({
  157. type: 'VariableReference',
  158. value: node.value
  159. })
  160. },
  161. FunctionCall: function (node, parent) {
  162. let expression = {
  163. type: 'FunctionCall',
  164. callee: {
  165. type: 'FunctionName',
  166. name: node.value
  167. },
  168. args: []
  169. }
  170. node._context = expression.args
  171. if (parent.type !== 'FunctionCall') {
  172. expression = {
  173. type: 'Statement',
  174. expr: expression
  175. }
  176. }
  177. parent._context.push(expression)
  178. }
  179. })
  180. return newAst
  181. }
  182. var generator = function (node) {
  183. switch (node.type) {
  184. case 'Prog':
  185. let program = node.body.map(generator)
  186. program.unshift('var _ = require("./stdlib.js")')
  187. return program.join('\n')
  188. break
  189. case 'Statement':
  190. return (generator(node.expr) + ';')
  191. break
  192. case 'FunctionCall':
  193. return (generator(node.callee) + '(' + node.args.map(generator).join(', ') + ')')
  194. break
  195. case 'FunctionName':
  196. return '_.' + node.name
  197. break
  198. case 'VariableReference':
  199. return '_.ref("' + node.value + '")'
  200. break
  201. case 'NumberLiteral':
  202. return '{value: ' + node.value + '}'
  203. break
  204. default:
  205. throw {
  206. name: 'Compiler Error',
  207. message: 'Unexpected leaf in transformed AST: ' + node.type
  208. }
  209. break
  210. }
  211. }
  212. // const myInput = '(assign twelve 12) (assign myvar (add twelve (subtract 6 2))) (log myvar)'
  213. const myInput = fs.readFileSync(process.argv[2], { encoding: 'utf-8' })
  214. const myTokens = tokenizer(myInput)
  215. const parsedTree = parser(myTokens)
  216. const transformedTree = transformer(parsedTree)
  217. const output = generator(transformedTree)
  218. fs.writeFileSync('output.js', output)