A basic compiler based off of thejameskyle's super-tiny-compiler

compiler.js 7.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. const fs = require('fs')
  2. var preprocess = function (input) {
  3. let inputArr = input.split('\n')
  4. for (i = 0; i < inputArr.length; i++) {
  5. line = inputArr[i]
  6. if (line.startsWith('`source')) {
  7. line = fs.readFileSync(line.split(' ')[1], { encoding: 'utf-8' })
  8. inputArr[i] = preprocess(line)
  9. }
  10. }
  11. return inputArr.join('\n')
  12. }
  13. var tokenizer = function (input) {
  14. let pos = 0
  15. let tokens = []
  16. tokens.push(input)
  17. while (pos < input.length) {
  18. let char = input[pos]
  19. let parens = /[()]/
  20. if (parens.test(char)) {
  21. tokens.push({
  22. type: 'paren',
  23. value: char
  24. })
  25. pos++
  26. continue
  27. }
  28. let whitespace = /[;\s]/
  29. if (whitespace.test(char)) {
  30. if (char === ';') {
  31. comment = ''
  32. while (char !== '\n') {
  33. comment += char
  34. char = input[++pos]
  35. }
  36. } else {
  37. pos++
  38. }
  39. continue
  40. }
  41. let stringChars = /['"]/
  42. if (stringChars.test(char)) {
  43. let myDelim = char
  44. let stringString = ''
  45. char = input[++pos]
  46. while (char !== myDelim) {
  47. if (char !== '\n') {
  48. stringString += char
  49. }
  50. char = input[++pos]
  51. }
  52. pos++
  53. tokens.push({
  54. type: 'string',
  55. value: stringString
  56. })
  57. continue
  58. }
  59. let numbers = /[0-9]/
  60. if (numbers.test(char)) {
  61. let numberString = ''
  62. while (numbers.test(char)) {
  63. numberString += char
  64. char = input[++pos]
  65. }
  66. tokens.push({
  67. type: 'number',
  68. value: numberString
  69. })
  70. continue
  71. }
  72. let characters = /[a-zA-Z_]/
  73. if (characters.test(char)) {
  74. let name = ''
  75. while (characters.test(char)) {
  76. name += char
  77. char = input[++pos]
  78. }
  79. tokens.push({
  80. type: 'name',
  81. value: name
  82. })
  83. continue
  84. }
  85. let dollar = /[$]/
  86. if (dollar.test(char)) {
  87. let name = ''
  88. char = input[++pos]
  89. if (numbers.test(char)) {
  90. while (numbers.test(char)) {
  91. name += char
  92. char = input[++pos]
  93. }
  94. } else {
  95. console.error("Compiler Error: $ must be followed by a digit [0-9]")
  96. process.exit(1);
  97. }
  98. tokens.push({
  99. type: 'dollar',
  100. value: name
  101. })
  102. continue
  103. }
  104. throw new TypeError("I'm not sure what you are telling me :( Ask my creator to teach me what a: " + char + " is.")
  105. }
  106. return tokens
  107. }
  108. var parser = function (input) {
  109. let pos = 1
  110. function walk() {
  111. let token = input[pos]
  112. if (token.type === 'number') {
  113. pos++
  114. return {
  115. type: 'NumberLiteral',
  116. value: token.value
  117. }
  118. }
  119. if (token.type === 'name') {
  120. pos++
  121. return {
  122. type: 'VariableReference',
  123. value: token.value
  124. }
  125. }
  126. if (token.type === 'dollar') {
  127. pos++
  128. return {
  129. type: 'DollarVar',
  130. value: token.value
  131. }
  132. }
  133. if (token.type === 'string') {
  134. pos++
  135. return {
  136. type: 'StringLiteral',
  137. value: token.value
  138. }
  139. }
  140. if (token.type === 'paren' && token.value == '(') {
  141. token = input[++pos]
  142. if (token.type !== 'name') {
  143. throw {
  144. name: 'Compiler Error',
  145. message: 'FunctionCall may only be type "name" not "' + token.type + '".'
  146. }
  147. }
  148. let node = {
  149. type: 'FunctionCall',
  150. value: token.value,
  151. params: []
  152. }
  153. token = input[++pos]
  154. while ((token.type !== 'paren') || (token.type === 'paren' && token.value !== ')')) {
  155. node.params.push(walk())
  156. token = input[pos]
  157. }
  158. pos++
  159. return node
  160. }
  161. throw new TypeError(token.type)
  162. }
  163. let ast = {
  164. type: 'Prog',
  165. body: []
  166. }
  167. while (pos < input.length) {
  168. ast.body.push(walk())
  169. }
  170. return ast
  171. }
  172. var traverser = function (ast, visitor) {
  173. function traverseArray(array, parent) {
  174. array.forEach(function (child) {
  175. traverseNode(child, parent)
  176. })
  177. }
  178. function traverseNode(node, parent) {
  179. const method = visitor[node.type]
  180. if (method) {
  181. method(node, parent)
  182. }
  183. switch (node.type) {
  184. case 'Prog':
  185. traverseArray(node.body, node)
  186. break
  187. case 'FunctionCall':
  188. traverseArray(node.params, node)
  189. break
  190. case 'VariableReference':
  191. break
  192. case 'NumberLiteral':
  193. break
  194. case 'StringLiteral':
  195. break
  196. case 'DollarVar':
  197. break
  198. default:
  199. throw {
  200. name: 'Compiler Error',
  201. message: 'Unknown leaf in AST: ' + node.type
  202. }
  203. }
  204. }
  205. traverseNode(ast, null)
  206. }
  207. var transformer = function (ast) {
  208. let newAst = {
  209. type: 'Prog',
  210. body: []
  211. }
  212. ast._context = newAst.body
  213. traverser(ast, {
  214. NumberLiteral: function (node, parent) {
  215. parent._context.push({
  216. type: 'NumberLiteral',
  217. value: node.value
  218. })
  219. },
  220. StringLiteral: function (node, parent) {
  221. parent._context.push({
  222. type: 'StringLiteral',
  223. value: node.value
  224. })
  225. },
  226. VariableReference: function (node, parent) {
  227. parent._context.push({
  228. type: 'VariableReference',
  229. value: node.value
  230. })
  231. },
  232. DollarVar: function (node, parent) {
  233. parent._context.push({
  234. type: 'DollarVar',
  235. value: node.value
  236. })
  237. },
  238. FunctionCall: function (node, parent) {
  239. let expression = {
  240. type: 'FunctionCall',
  241. callee: {
  242. type: 'FunctionName',
  243. name: node.value
  244. },
  245. args: []
  246. }
  247. node._context = expression.args
  248. if (parent.type !== 'FunctionCall') {
  249. expression = {
  250. type: 'Statement',
  251. expr: expression
  252. }
  253. }
  254. parent._context.push(expression)
  255. }
  256. })
  257. return newAst
  258. }
  259. var generator = function (node) {
  260. switch (node.type) {
  261. case 'Prog':
  262. let program = node.body.map(generator)
  263. program.unshift('var _ = require("./stdlib.js")')
  264. return program.join('\n')
  265. break
  266. case 'Statement':
  267. return (generator(node.expr) + ';')
  268. break
  269. case 'FunctionCall':
  270. if (node.callee.name !== 'def') {
  271. return (generator(node.callee) + '(' + node.args.map(generator).join(', ') + ')')
  272. } else {
  273. return (generator(node.callee) + '(' + node.args.map((v, i) => {
  274. if (i === 0) {
  275. return generator(v) + ', '
  276. } else {
  277. if (i === 1) {
  278. return "'" + generator(v) + '; '
  279. } else {
  280. return generator(v) + '; '
  281. }
  282. }
  283. }).join('') + "')")
  284. }
  285. break;
  286. case 'DollarVar':
  287. return '$' + node.value
  288. break
  289. case 'FunctionName':
  290. return '_.' + node.name
  291. break
  292. case 'VariableReference':
  293. return '_.ref("' + node.value + '")'
  294. break
  295. case 'NumberLiteral':
  296. return '{value: ' + node.value + '}'
  297. break
  298. case 'StringLiteral':
  299. return '{ value: \'' + node.value + '\' }'
  300. break
  301. default:
  302. throw {
  303. name: 'Compiler Error',
  304. message: 'Unexpected leaf in transformed AST: ' + node.type
  305. }
  306. break
  307. }
  308. }
  309. // const myInput = '(assign twelve 12) (assign myvar (add twelve (subtract 6 2))) (log myvar)'
  310. const myInput = fs.readFileSync(process.argv[2], { encoding: 'utf-8' })
  311. const preProcessedInput = preprocess(myInput)
  312. const myTokens = tokenizer(preProcessedInput)
  313. const parsedTree = parser(myTokens)
  314. const transformedTree = transformer(parsedTree)
  315. //console.log(JSON.stringify(transformedTree,null,2))
  316. const output = generator(transformedTree)
  317. fs.writeFileSync('output.js', output)