A basic compiler based off of thejameskyle's super-tiny-compiler

compiler.js 5.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. const fs = require('fs')
  2. var tokenizer = function (input) {
  3. let pos = 0
  4. let tokens = []
  5. tokens.push(input)
  6. while (pos < input.length) {
  7. let char = input[pos]
  8. let parens = /[()]/
  9. if (parens.test(char)) {
  10. tokens.push({
  11. type: 'paren',
  12. value: char
  13. })
  14. pos++
  15. continue
  16. }
  17. let whitespace = /\s/
  18. if (whitespace.test(char)) {
  19. pos++
  20. continue
  21. }
  22. let numbers = /[0-9]/
  23. if (numbers.test(char)) {
  24. let numberString = ''
  25. while (numbers.test(char)) {
  26. numberString += char
  27. char = input[++pos]
  28. }
  29. tokens.push({
  30. type: 'number',
  31. value: numberString
  32. })
  33. continue
  34. }
  35. let characters = /[a-zA-Z_]/
  36. if (characters.test(char)) {
  37. let name = ''
  38. while (characters.test(char)) {
  39. name += char
  40. char = input[++pos]
  41. }
  42. tokens.push({
  43. type: 'name',
  44. value: name
  45. })
  46. continue
  47. }
  48. throw new TypeError("I'm not sure what you are telling me :( Ask my creator to teach me what a: " + char + " is.")
  49. }
  50. return tokens
  51. }
  52. var parser = function (input) {
  53. let pos = 1
  54. function walk() {
  55. let token = input[pos]
  56. if (token.type === 'number') {
  57. pos++
  58. return {
  59. type: 'NumberLiteral',
  60. value: token.value
  61. }
  62. }
  63. if (token.type === 'name') {
  64. pos++
  65. return {
  66. type: 'VariableReference',
  67. value: token.value
  68. }
  69. }
  70. if (token.type === 'paren' && token.value == '(') {
  71. token = input[++pos]
  72. if (token.type !== 'name') {
  73. throw {
  74. name: 'Compiler Error',
  75. message: 'FunctionCall may only be type "name" not "' + token.type + '".'
  76. }
  77. }
  78. let node = {
  79. type: 'FunctionCall',
  80. value: token.value,
  81. params: []
  82. }
  83. token = input[++pos]
  84. while ((token.type !== 'paren') || (token.type === 'paren' && token.value !== ')')) {
  85. node.params.push(walk())
  86. token = input[pos]
  87. }
  88. pos++
  89. return node
  90. }
  91. throw new TypeError(token.type)
  92. }
  93. let ast = {
  94. type: 'Prog',
  95. body: []
  96. }
  97. while (pos < input.length) {
  98. ast.body.push(walk())
  99. }
  100. return ast
  101. }
  102. var traverser = function (ast, visitor) {
  103. function traverseArray(array, parent) {
  104. array.forEach(function (child) {
  105. traverseNode(child, parent)
  106. })
  107. }
  108. function traverseNode(node, parent) {
  109. const method = visitor[node.type]
  110. if (method) {
  111. method(node, parent)
  112. }
  113. switch (node.type) {
  114. case 'Prog':
  115. traverseArray(node.body, node)
  116. break
  117. case 'FunctionCall':
  118. traverseArray(node.params, node)
  119. break
  120. case 'VariableReference':
  121. break
  122. case 'NumberLiteral':
  123. break
  124. default:
  125. throw {
  126. name: 'Compiler Error',
  127. message: 'Unknown leaf in AST: ' + node.type
  128. }
  129. }
  130. }
  131. traverseNode(ast, null)
  132. }
  133. var transformer = function (ast) {
  134. let newAst = {
  135. type: 'Prog',
  136. body: []
  137. }
  138. ast._context = newAst.body
  139. traverser(ast, {
  140. NumberLiteral: function (node, parent) {
  141. parent._context.push({
  142. type: 'NumberLiteral',
  143. value: node.value
  144. })
  145. },
  146. VariableReference: function (node, parent) {
  147. parent._context.push({
  148. type: 'VariableReference',
  149. value: node.value
  150. })
  151. },
  152. FunctionCall: function (node, parent) {
  153. let expression = {
  154. type: 'FunctionCall',
  155. callee: {
  156. type: 'FunctionName',
  157. name: node.value
  158. },
  159. args: []
  160. }
  161. node._context = expression.args
  162. if (parent.type !== 'FunctionCall') {
  163. expression = {
  164. type: 'Statement',
  165. expr: expression
  166. }
  167. }
  168. parent._context.push(expression)
  169. }
  170. })
  171. return newAst
  172. }
  173. var generator = function (node) {
  174. switch (node.type) {
  175. case 'Prog':
  176. let program = node.body.map(generator)
  177. program.unshift('var _ = require("./stdlib.js")')
  178. return program.join('\n')
  179. break
  180. case 'Statement':
  181. return (generator(node.expr) + ';')
  182. break
  183. case 'FunctionCall':
  184. return (generator(node.callee) + '(' + node.args.map(generator).join(', ') + ')')
  185. break
  186. case 'FunctionName':
  187. return '_.' + node.name
  188. break
  189. case 'VariableReference':
  190. return '_.ref("' + node.value + '")'
  191. break
  192. case 'NumberLiteral':
  193. return '{value: ' + node.value + '}'
  194. break
  195. default:
  196. throw {
  197. name: 'Compiler Error',
  198. message: 'Unexpected leaf in transformed AST: ' + node.type
  199. }
  200. break
  201. }
  202. }
  203. // const myInput = '(assign twelve 12) (assign myvar (add twelve (subtract 6 2))) (log myvar)'
  204. const myInput = fs.readFileSync(process.argv[2], { encoding: 'utf-8' })
  205. const myTokens = tokenizer(myInput)
  206. const parsedTree = parser(myTokens)
  207. const transformedTree = transformer(parsedTree)
  208. const output = generator(transformedTree)
  209. fs.writeFileSync('output.js', output)