A basic compiler based off of thejameskyle's super-tiny-compiler

compiler.js 8.8KB


  1. const fs = require('fs')
  2. var preprocess = function (input) {
  3. let inputArr = input.split('\n')
  4. for (i = 0; i < inputArr.length; i++) {
  5. line = inputArr[i]
  6. if (line.startsWith('`source')) {
  7. line = fs.readFileSync(line.split(' ')[1], { encoding: 'utf-8' })
  8. inputArr[i] = preprocess(line)
  9. }
  10. }
  11. return inputArr.join('\n')
  12. }
  13. var tokenizer = function (input) {
  14. let pos = 0
  15. let tokens = []
  16. tokens.push(input)
  17. while (pos < input.length) {
  18. let char = input[pos]
  19. let parens = /[()]/
  20. if (parens.test(char)) {
  21. tokens.push({
  22. type: 'paren',
  23. value: char
  24. })
  25. pos++
  26. continue
  27. }
  28. if (char === '|') {
  29. tokens.push({
  30. type: 'bar',
  31. value: char
  32. })
  33. pos++
  34. continue
  35. }
  36. let whitespace = /[;\s]/
  37. if (whitespace.test(char)) {
  38. if (char === ';') {
  39. comment = ''
  40. while (char !== '\n') {
  41. comment += char
  42. char = input[++pos]
  43. }
  44. } else {
  45. pos++
  46. }
  47. continue
  48. }
  49. let stringChars = /['"]/
  50. if (stringChars.test(char)) {
  51. let myDelim = char
  52. let stringString = ''
  53. char = input[++pos]
  54. while (char !== myDelim) {
  55. if (char !== '\n') {
  56. stringString += char
  57. }
  58. char = input[++pos]
  59. }
  60. pos++
  61. tokens.push({
  62. type: 'string',
  63. value: stringString
  64. })
  65. continue
  66. }
  67. let numbers = /[0-9]/
  68. if (numbers.test(char)) {
  69. let numberString = ''
  70. while (numbers.test(char)) {
  71. numberString += char
  72. char = input[++pos]
  73. }
  74. tokens.push({
  75. type: 'number',
  76. value: numberString
  77. })
  78. continue
  79. }
  80. let characters = /[a-zA-Z_:]/
  81. if (characters.test(char)) {
  82. let name = ''
  83. while (characters.test(char)) {
  84. name += char
  85. char = input[++pos]
  86. }
  87. tokens.push({
  88. type: 'name',
  89. value: name
  90. })
  91. continue
  92. }
  93. let dollar = /[$]/
  94. if (dollar.test(char)) {
  95. let name = ''
  96. char = input[++pos]
  97. if (numbers.test(char)) {
  98. while (numbers.test(char)) {
  99. name += char
  100. char = input[++pos]
  101. }
  102. } else {
  103. console.error("Compiler Error: $ must be followed by a digit [0-9]")
  104. process.exit(1);
  105. }
  106. tokens.push({
  107. type: 'dollar',
  108. value: name
  109. })
  110. continue
  111. }
  112. throw new TypeError("I'm not sure what you are telling me :( Ask my creator to teach me what a: " + char + " is.")
  113. }
  114. return tokens
  115. }
  116. var parser = function (input) {
  117. let pos = 1
  118. function walk() {
  119. let token = input[pos]
  120. if (token.type === 'number') {
  121. pos++
  122. return {
  123. type: 'NumberLiteral',
  124. value: token.value
  125. }
  126. }
  127. if (token.type === 'name') {
  128. pos++
  129. return {
  130. type: 'VariableReference',
  131. value: token.value
  132. }
  133. }
  134. if (token.type === 'dollar') {
  135. pos++
  136. return {
  137. type: 'DollarVar',
  138. value: token.value
  139. }
  140. }
  141. if (token.type === 'bar') {
  142. pos++
  143. return {
  144. type: 'BarLiteral',
  145. value: token.value
  146. }
  147. }
  148. if (token.type === 'string') {
  149. pos++
  150. return {
  151. type: 'StringLiteral',
  152. value: token.value
  153. }
  154. }
  155. if (token.type === 'paren' && token.value == '(') {
  156. token = input[++pos]
  157. if (token.type !== 'name') {
  158. throw {
  159. name: 'Compiler Error',
  160. message: 'FunctionCall may only be type "name" not "' + token.type + '".'
  161. }
  162. }
  163. let node = {
  164. type: 'FunctionCall',
  165. value: token.value,
  166. params: []
  167. }
  168. token = input[++pos]
  169. while ((token.type !== 'paren') || (token.type === 'paren' && token.value !== ')')) {
  170. node.params.push(walk())
  171. token = input[pos]
  172. }
  173. pos++
  174. return node
  175. }
  176. throw new TypeError(token.type)
  177. }
  178. let ast = {
  179. type: 'Prog',
  180. body: []
  181. }
  182. while (pos < input.length) {
  183. ast.body.push(walk())
  184. }
  185. return ast
  186. }
  187. var traverser = function (ast, visitor) {
  188. function traverseArray(array, parent) {
  189. array.forEach(function (child) {
  190. traverseNode(child, parent)
  191. })
  192. }
  193. function traverseNode(node, parent) {
  194. const method = visitor[node.type]
  195. if (method) {
  196. method(node, parent)
  197. }
  198. switch (node.type) {
  199. case 'Prog':
  200. traverseArray(node.body, node)
  201. break
  202. case 'FunctionCall':
  203. traverseArray(node.params, node)
  204. break
  205. case 'VariableReference':
  206. break
  207. case 'NumberLiteral':
  208. break
  209. case 'StringLiteral':
  210. break
  211. case 'DollarVar':
  212. break
  213. case 'BarLiteral':
  214. break
  215. default:
  216. throw {
  217. name: 'Compiler Error',
  218. message: 'Unknown leaf in AST: ' + node.type
  219. }
  220. }
  221. }
  222. traverseNode(ast, null)
  223. }
  224. var transformer = function (ast) {
  225. let newAst = {
  226. type: 'Prog',
  227. body: []
  228. }
  229. ast._context = newAst.body
  230. traverser(ast, {
  231. NumberLiteral: function (node, parent) {
  232. parent._context.push({
  233. type: 'NumberLiteral',
  234. value: node.value
  235. })
  236. },
  237. StringLiteral: function (node, parent) {
  238. parent._context.push({
  239. type: 'StringLiteral',
  240. value: node.value
  241. })
  242. },
  243. BarLiteral: function (node, parent) {
  244. parent._context.push({
  245. type: 'BarLiteral',
  246. value: node.value
  247. })
  248. },
  249. VariableReference: function (node, parent) {
  250. parent._context.push({
  251. type: 'VariableReference',
  252. value: node.value
  253. })
  254. },
  255. DollarVar: function (node, parent) {
  256. parent._context.push({
  257. type: 'DollarVar',
  258. value: node.value
  259. })
  260. },
  261. FunctionCall: function (node, parent) {
  262. let expression = {
  263. type: 'FunctionCall',
  264. callee: {
  265. type: 'FunctionName',
  266. name: node.value
  267. },
  268. args: []
  269. }
  270. node._context = expression.args
  271. if (parent.type !== 'FunctionCall') {
  272. expression = {
  273. type: 'Statement',
  274. expr: expression
  275. }
  276. }
  277. parent._context.push(expression)
  278. }
  279. })
  280. return newAst
  281. }
  282. var escapeDepth = 0
  283. var generator = function (node) {
  284. switch (node.type) {
  285. case 'Prog':
  286. let program = node.body.map(generator)
  287. program.unshift('var _ = require("./lib/stdlib.js")(this)')
  288. return program.join('\n')
  289. break
  290. case 'Statement':
  291. return (generator(node.expr) + ';')
  292. break
  293. case 'FunctionCall':
  294. if (!node.callee.name.match('(def|if|repeat)')) {
  295. if (node.callee.name.match('include')) {
  296. // Include is a special function and we will write the generation ourselves
  297. if (node.args.length > 1) {
  298. console.error("Compiler Error: (include) may only take 1 argument!")
  299. process.exit(1)
  300. } else {
  301. let lib = './lib/' + node.args[0].value + '.js'
  302. return ('var _' + node.args[0].value + ' = require("' + lib + '")(this)')
  303. }
  304. } else {
  305. return (generator(node.callee) + '(' + node.args.map(generator).join(', ') + ')')
  306. }
  307. } else {
  308. return (generator(node.callee) + '(' + node.args.map((v, i) => {
  309. if (i === 0) {
  310. return generator(v) + ', '
  311. } else {
  312. if (i === 1) {
  313. return 'function() { \n' + generator(v) + ';\n'
  314. } else {
  315. return generator(v) + ';\n'
  316. }
  317. }
  318. }).join('') + '})')
  319. }
  320. break;
  321. case 'DollarVar':
  322. return 'arguments[' + (+node.value-1) + ']'
  323. break
  324. case 'BarLiteral':
  325. return '}, function() {'
  326. break
  327. case 'FunctionName':
  328. if (node.name.match("::")) {
  329. let [namespace, func] = node.name.split("::")
  330. return "_" + namespace + "." + func
  331. } else {
  332. return '_.' + node.name
  333. }
  334. break
  335. case 'VariableReference':
  336. return '_.ref(\'' + node.value + '\')'
  337. break
  338. case 'NumberLiteral':
  339. return '{value: ' + node.value + '}'
  340. break
  341. case 'StringLiteral':
  342. return '{ value: \'' + node.value + '\' }'
  343. break
  344. default:
  345. throw {
  346. name: 'Compiler Error',
  347. message: 'Unexpected leaf in transformed AST: ' + node.type
  348. }
  349. break
  350. }
  351. }
  352. // const myInput = '(assign twelve 12) (assign myvar (add twelve (subtract 6 2))) (log myvar)'
  353. const fileNameIn = process.argv[2]
  354. const fileNameOut = fileNameIn + '.js'
  355. const myInput = fs.readFileSync(process.argv[2], { encoding: 'utf-8' })
  356. const preProcessedInput = preprocess(myInput)
  357. const myTokens = tokenizer(preProcessedInput)
  358. const parsedTree = parser(myTokens)
  359. const transformedTree = transformer(parsedTree)
  360. //console.log(JSON.stringify(transformedTree,null,2))
  361. const output = generator(transformedTree)
  362. fs.writeFileSync(fileNameOut, output)