A basic compiler based off of thejameskyle's super-tiny-compiler

compiler.js 10KB


  1. const fs = require('fs')
  2. const sourcedFiles = []
  3. var preprocess = function (fileNameIn, input) {
  4. let inputArr = input.split('\n')
  5. sourcedFiles.push(fileNameIn)
  6. for (i = 0; i < inputArr.length; i++) {
  7. line = inputArr[i]
  8. if (line.startsWith('`source')) {
  9. let fileName = line.split(' ')[1]
  10. if (!~sourcedFiles.indexOf(fileName)) {
  11. line = fs.readFileSync(fileName, { encoding: 'utf-8' })
  12. inputArr[i] = preprocess(fileName, line)
  13. } else {
  14. inputArr[i] = '\n'
  15. console.log('Already sourced file: ' + fileName + '; Skipping!')
  16. }
  17. }
  18. }
  19. return inputArr.join('\n')
  20. }
  21. var tokenizer = function (input) {
  22. let pos = 0
  23. let tokens = []
  24. tokens.push(input)
  25. while (pos < input.length) {
  26. let char = input[pos]
  27. let parens = /[()]/
  28. if (parens.test(char)) {
  29. tokens.push({
  30. type: 'paren',
  31. value: char
  32. })
  33. pos++
  34. continue
  35. }
  36. if (char === '|') {
  37. tokens.push({
  38. type: 'bar',
  39. value: char
  40. })
  41. pos++
  42. continue
  43. }
  44. let whitespace = /[;\s]/ // Include comments as a whitespace character as they are ignored
  45. if (whitespace.test(char)) {
  46. if (char === ';') { // Rest of line is ignored
  47. comment = ''
  48. while (char !== '\n') {
  49. comment += char
  50. char = input[++pos]
  51. }
  52. } else {
  53. pos++
  54. }
  55. continue
  56. }
  57. let stringChars = /['"]/
  58. if (stringChars.test(char)) {
  59. let myDelim = char
  60. let stringString = ''
  61. char = input[++pos]
  62. while (char !== myDelim) {
  63. if (char !== '\n') {
  64. stringString += char
  65. }
  66. char = input[++pos]
  67. }
  68. pos++
  69. tokens.push({
  70. type: 'string',
  71. value: stringString
  72. })
  73. continue
  74. }
  75. let numbers = /[0-9]/
  76. if (numbers.test(char)) {
  77. let numberString = ''
  78. while (numbers.test(char)) {
  79. numberString += char
  80. char = input[++pos]
  81. }
  82. tokens.push({
  83. type: 'number',
  84. value: numberString
  85. })
  86. continue
  87. }
  88. let characters = /[a-zA-Z_:]/
  89. if (characters.test(char)) {
  90. let name = ''
  91. while (characters.test(char)) {
  92. name += char
  93. char = input[++pos]
  94. }
  95. tokens.push({
  96. type: 'name',
  97. value: name
  98. })
  99. continue
  100. }
  101. let dollar = /[$]/
  102. if (dollar.test(char)) {
  103. let name = ''
  104. char = input[++pos]
  105. if (numbers.test(char)) {
  106. while (numbers.test(char)) {
  107. name += char
  108. char = input[++pos]
  109. }
  110. } else {
  111. console.error("Compiler Error: $ must be followed by a digit [0-9]")
  112. process.exit(1);
  113. }
  114. tokens.push({
  115. type: 'dollar',
  116. value: name
  117. })
  118. continue
  119. }
  120. let argv = /[{}]/
  121. if (argv.test(char)) {
  122. tokens.push({
  123. type: 'argv',
  124. value: char
  125. })
  126. pos++
  127. continue
  128. }
  129. throw new TypeError("I'm not sure what you are telling me :( Ask my creator to teach me what a: " + char + " is.")
  130. }
  131. return tokens
  132. }
  133. var parser = function (input) {
  134. let pos = 1
  135. function walk() {
  136. let token = input[pos]
  137. if (token.type === 'number') {
  138. pos++
  139. return {
  140. type: 'NumberLiteral',
  141. value: token.value
  142. }
  143. }
  144. if (token.type === 'name') {
  145. pos++
  146. return {
  147. type: 'VariableReference',
  148. value: token.value
  149. }
  150. }
  151. if (token.type === 'dollar') {
  152. pos++
  153. return {
  154. type: 'DollarVar',
  155. value: token.value
  156. }
  157. }
  158. if (token.type === 'bar') {
  159. pos++
  160. return {
  161. type: 'BarLiteral',
  162. value: token.value
  163. }
  164. }
  165. if (token.type === 'string') {
  166. pos++
  167. return {
  168. type: 'StringLiteral',
  169. value: token.value
  170. }
  171. }
  172. if (token.type === 'paren' && token.value == '(') {
  173. token = input[++pos]
  174. if (token.type !== 'name') {
  175. throw {
  176. name: 'Compiler Error',
  177. message: 'FunctionCall may only be type "name" not "' + token.type + '".'
  178. }
  179. }
  180. let node = {
  181. type: 'FunctionCall',
  182. value: token.value,
  183. params: []
  184. }
  185. token = input[++pos]
  186. while ((token.type !== 'paren') || (token.type === 'paren' && token.value !== ')')) {
  187. node.params.push(walk())
  188. token = input[pos]
  189. }
  190. pos++
  191. return node
  192. }
  193. if (token.type === 'argv' && token.value == '{') {
  194. token = input[++pos]
  195. if (token.type !== 'number') {
  196. throw {
  197. name: 'Compiler Error',
  198. message: 'argv may only take integer values.'
  199. }
  200. }
  201. let node = {
  202. type: 'ArgvLiteral',
  203. value: token.value
  204. }
  205. token = input[++pos]
  206. if (token.type !== 'argv' || token.value !== '}') {
  207. throw {
  208. name: 'Compiler Error',
  209. message: 'argv literals take one integer value and nothing else.'
  210. }
  211. }
  212. pos++
  213. return node
  214. }
  215. throw new TypeError(token.type)
  216. }
  217. let ast = {
  218. type: 'Prog',
  219. body: []
  220. }
  221. while (pos < input.length) {
  222. ast.body.push(walk())
  223. }
  224. return ast
  225. }
  226. var traverser = function (ast, visitor) {
  227. function traverseArray(array, parent) {
  228. array.forEach(function (child) {
  229. traverseNode(child, parent)
  230. })
  231. }
  232. function traverseNode(node, parent) {
  233. const method = visitor[node.type]
  234. if (method) {
  235. method(node, parent)
  236. }
  237. switch (node.type) {
  238. case 'Prog':
  239. traverseArray(node.body, node)
  240. break
  241. case 'FunctionCall':
  242. traverseArray(node.params, node)
  243. break
  244. case 'VariableReference':
  245. break
  246. case 'NumberLiteral':
  247. break
  248. case 'StringLiteral':
  249. break
  250. case 'DollarVar':
  251. break
  252. case 'BarLiteral':
  253. break
  254. case 'ArgvLiteral':
  255. break
  256. default:
  257. throw {
  258. name: 'Compiler Error',
  259. message: 'Unknown leaf in AST: ' + node.type
  260. }
  261. }
  262. }
  263. traverseNode(ast, null)
  264. }
  265. var transformer = function (ast) {
  266. let newAst = {
  267. type: 'Prog',
  268. body: []
  269. }
  270. ast._context = newAst.body
  271. traverser(ast, {
  272. NumberLiteral: function (node, parent) {
  273. parent._context.push({
  274. type: 'NumberLiteral',
  275. value: node.value
  276. })
  277. },
  278. StringLiteral: function (node, parent) {
  279. parent._context.push({
  280. type: 'StringLiteral',
  281. value: node.value
  282. })
  283. },
  284. BarLiteral: function (node, parent) {
  285. parent._context.push({
  286. type: 'BarLiteral',
  287. value: node.value
  288. })
  289. },
  290. VariableReference: function (node, parent) {
  291. parent._context.push({
  292. type: 'VariableReference',
  293. value: node.value
  294. })
  295. },
  296. DollarVar: function (node, parent) {
  297. parent._context.push({
  298. type: 'DollarVar',
  299. value: node.value
  300. })
  301. },
  302. ArgvLiteral: function (node, parent) {
  303. parent._context.push({
  304. type: 'ArgvLiteral',
  305. value: node.value
  306. })
  307. },
  308. FunctionCall: function (node, parent) {
  309. let expression = {
  310. type: 'FunctionCall',
  311. callee: {
  312. type: 'FunctionName',
  313. name: node.value
  314. },
  315. args: []
  316. }
  317. node._context = expression.args
  318. if (parent.type !== 'FunctionCall') {
  319. expression = {
  320. type: 'Statement',
  321. expr: expression
  322. }
  323. }
  324. parent._context.push(expression)
  325. }
  326. })
  327. return newAst
  328. }
  329. var escapeDepth = 0
  330. var generator = function (node) {
  331. switch (node.type) {
  332. case 'Prog':
  333. let program = node.body.map(generator)
  334. program.unshift('var _ = require("' + libjsPath + '/stdlib.js")(this)')
  335. return program.join('\n')
  336. break
  337. case 'Statement':
  338. return (generator(node.expr) + ';')
  339. break
  340. case 'FunctionCall':
  341. if (!node.callee.name.match('(def|if|repeat)')) {
  342. if (node.callee.name.match('include')) {
  343. // Include is a special function and we will write the generation ourselves
  344. return node.args.map((arg) => {
  345. let lib = libjsPath + '/' + arg.value + '.js'
  346. return ('var _' + arg.value + ' = require("' + lib + '")(this)')
  347. }).join("\n")
  348. } else {
  349. return (generator(node.callee) + '(' + node.args.map(generator).join(', ') + ')')
  350. }
  351. } else {
  352. return (generator(node.callee) + '(' + node.args.map((v, i) => {
  353. if (i === 0) {
  354. return generator(v) + ', '
  355. } else {
  356. if (i === 1) {
  357. return 'function() { \n' + generator(v) + ';\n'
  358. } else {
  359. return generator(v) + ';\n'
  360. }
  361. }
  362. }).join('') + '})')
  363. }
  364. break;
  365. case 'DollarVar':
  366. return 'arguments[' + (+node.value-1) + ']'
  367. break
  368. case 'BarLiteral':
  369. return '}, function() {'
  370. break
  371. case 'FunctionName':
  372. if (node.name.match("::")) {
  373. let [namespace, func] = node.name.split("::")
  374. return "_" + namespace + "." + func
  375. } else {
  376. return '_.' + node.name
  377. }
  378. break
  379. case 'VariableReference':
  380. return '_.ref(\'' + node.value + '\')'
  381. break
  382. case 'NumberLiteral':
  383. return '{value: ' + node.value + '}'
  384. break
  385. case 'StringLiteral':
  386. return '{ value: \'' + node.value + '\' }'
  387. break
  388. case 'ArgvLiteral':
  389. if (node.value === '0') {
  390. return '{ value: process.argv.slice(2).join(\' \') }'
  391. } else {
  392. return '_.__get_arg(' + (+node.value+1) + ')'
  393. }
  394. break
  395. default:
  396. throw {
  397. name: 'Compiler Error',
  398. message: 'Unexpected leaf in transformed AST: ' + node.type
  399. }
  400. break
  401. }
  402. }
  403. let libjsPath = process.env['LIBJS_PATH']
  404. const fileNameIn = process.argv[2]
  405. const fileNameOut = fileNameIn + '.js'
  406. const myInput = fs.readFileSync(process.argv[2], { encoding: 'utf-8' })
  407. if (libjsPath === '') {
  408. libjsPath = './libjs'
  409. }
  410. const preProcessedInput = preprocess(fileNameIn, myInput) // Run the preprocessor to evaluate any `source's
  411. const myTokens = tokenizer(preProcessedInput) // Convert our input into individual tokens
  412. const parsedTree = parser(myTokens) // Convert these tokens into a syntax tree
  413. const transformedTree = transformer(parsedTree) // Now put the tree into an easily traversable format for our generator
  414. const output = generator(transformedTree) // Generate the final JS code
  415. fs.writeFileSync(fileNameOut, output)