A basic compiler based off of thejameskyle's super-tiny-compiler

compiler.js 9.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. const fs = require('fs')
  2. var preprocess = function (input) {
  3. let inputArr = input.split('\n')
  4. for (i = 0; i < inputArr.length; i++) {
  5. line = inputArr[i]
  6. if (line.startsWith('`source')) {
  7. line = fs.readFileSync(line.split(' ')[1], { encoding: 'utf-8' })
  8. inputArr[i] = preprocess(line)
  9. }
  10. }
  11. return inputArr.join('\n')
  12. }
  13. var tokenizer = function (input) {
  14. let pos = 0
  15. let tokens = []
  16. tokens.push(input)
  17. while (pos < input.length) {
  18. let char = input[pos]
  19. let parens = /[()]/
  20. if (parens.test(char)) {
  21. tokens.push({
  22. type: 'paren',
  23. value: char
  24. })
  25. pos++
  26. continue
  27. }
  28. if (char === '|') {
  29. tokens.push({
  30. type: 'bar',
  31. value: char
  32. })
  33. pos++
  34. continue
  35. }
  36. let whitespace = /[;\s]/
  37. if (whitespace.test(char)) {
  38. if (char === ';') {
  39. comment = ''
  40. while (char !== '\n') {
  41. comment += char
  42. char = input[++pos]
  43. }
  44. } else {
  45. pos++
  46. }
  47. continue
  48. }
  49. let stringChars = /['"]/
  50. if (stringChars.test(char)) {
  51. let myDelim = char
  52. let stringString = ''
  53. char = input[++pos]
  54. while (char !== myDelim) {
  55. if (char !== '\n') {
  56. stringString += char
  57. }
  58. char = input[++pos]
  59. }
  60. pos++
  61. tokens.push({
  62. type: 'string',
  63. value: stringString
  64. })
  65. continue
  66. }
  67. let numbers = /[0-9]/
  68. if (numbers.test(char)) {
  69. let numberString = ''
  70. while (numbers.test(char)) {
  71. numberString += char
  72. char = input[++pos]
  73. }
  74. tokens.push({
  75. type: 'number',
  76. value: numberString
  77. })
  78. continue
  79. }
  80. let characters = /[a-zA-Z_:]/
  81. if (characters.test(char)) {
  82. let name = ''
  83. while (characters.test(char)) {
  84. name += char
  85. char = input[++pos]
  86. }
  87. tokens.push({
  88. type: 'name',
  89. value: name
  90. })
  91. continue
  92. }
  93. let dollar = /[$]/
  94. if (dollar.test(char)) {
  95. let name = ''
  96. char = input[++pos]
  97. if (numbers.test(char)) {
  98. while (numbers.test(char)) {
  99. name += char
  100. char = input[++pos]
  101. }
  102. } else {
  103. console.error("Compiler Error: $ must be followed by a digit [0-9]")
  104. process.exit(1);
  105. }
  106. tokens.push({
  107. type: 'dollar',
  108. value: name
  109. })
  110. continue
  111. }
  112. let argv = /[{}]/
  113. if (argv.test(char)) {
  114. tokens.push({
  115. type: 'argv',
  116. value: char
  117. })
  118. pos++
  119. continue
  120. }
  121. throw new TypeError("I'm not sure what you are telling me :( Ask my creator to teach me what a: " + char + " is.")
  122. }
  123. return tokens
  124. }
  125. var parser = function (input) {
  126. let pos = 1
  127. function walk() {
  128. let token = input[pos]
  129. if (token.type === 'number') {
  130. pos++
  131. return {
  132. type: 'NumberLiteral',
  133. value: token.value
  134. }
  135. }
  136. if (token.type === 'name') {
  137. pos++
  138. return {
  139. type: 'VariableReference',
  140. value: token.value
  141. }
  142. }
  143. if (token.type === 'dollar') {
  144. pos++
  145. return {
  146. type: 'DollarVar',
  147. value: token.value
  148. }
  149. }
  150. if (token.type === 'bar') {
  151. pos++
  152. return {
  153. type: 'BarLiteral',
  154. value: token.value
  155. }
  156. }
  157. if (token.type === 'string') {
  158. pos++
  159. return {
  160. type: 'StringLiteral',
  161. value: token.value
  162. }
  163. }
  164. if (token.type === 'paren' && token.value == '(') {
  165. token = input[++pos]
  166. if (token.type !== 'name') {
  167. throw {
  168. name: 'Compiler Error',
  169. message: 'FunctionCall may only be type "name" not "' + token.type + '".'
  170. }
  171. }
  172. let node = {
  173. type: 'FunctionCall',
  174. value: token.value,
  175. params: []
  176. }
  177. token = input[++pos]
  178. while ((token.type !== 'paren') || (token.type === 'paren' && token.value !== ')')) {
  179. node.params.push(walk())
  180. token = input[pos]
  181. }
  182. pos++
  183. return node
  184. }
  185. if (token.type === 'argv' && token.value == '{') {
  186. token = input[++pos]
  187. if (token.type !== 'number') {
  188. throw {
  189. name: 'Compiler Error',
  190. message: 'argv may only take integer values.'
  191. }
  192. }
  193. let node = {
  194. type: 'ArgvLiteral',
  195. value: token.value
  196. }
  197. token = input[++pos]
  198. if (token.type !== 'argv' || token.value !== '}') {
  199. throw {
  200. name: 'Compiler Error',
  201. message: 'argv literals take one integer value and nothing else.'
  202. }
  203. }
  204. pos++
  205. return node
  206. }
  207. throw new TypeError(token.type)
  208. }
  209. let ast = {
  210. type: 'Prog',
  211. body: []
  212. }
  213. while (pos < input.length) {
  214. ast.body.push(walk())
  215. }
  216. return ast
  217. }
  218. var traverser = function (ast, visitor) {
  219. function traverseArray(array, parent) {
  220. array.forEach(function (child) {
  221. traverseNode(child, parent)
  222. })
  223. }
  224. function traverseNode(node, parent) {
  225. const method = visitor[node.type]
  226. if (method) {
  227. method(node, parent)
  228. }
  229. switch (node.type) {
  230. case 'Prog':
  231. traverseArray(node.body, node)
  232. break
  233. case 'FunctionCall':
  234. traverseArray(node.params, node)
  235. break
  236. case 'VariableReference':
  237. break
  238. case 'NumberLiteral':
  239. break
  240. case 'StringLiteral':
  241. break
  242. case 'DollarVar':
  243. break
  244. case 'BarLiteral':
  245. break
  246. case 'ArgvLiteral':
  247. break
  248. default:
  249. throw {
  250. name: 'Compiler Error',
  251. message: 'Unknown leaf in AST: ' + node.type
  252. }
  253. }
  254. }
  255. traverseNode(ast, null)
  256. }
  257. var transformer = function (ast) {
  258. let newAst = {
  259. type: 'Prog',
  260. body: []
  261. }
  262. ast._context = newAst.body
  263. traverser(ast, {
  264. NumberLiteral: function (node, parent) {
  265. parent._context.push({
  266. type: 'NumberLiteral',
  267. value: node.value
  268. })
  269. },
  270. StringLiteral: function (node, parent) {
  271. parent._context.push({
  272. type: 'StringLiteral',
  273. value: node.value
  274. })
  275. },
  276. BarLiteral: function (node, parent) {
  277. parent._context.push({
  278. type: 'BarLiteral',
  279. value: node.value
  280. })
  281. },
  282. VariableReference: function (node, parent) {
  283. parent._context.push({
  284. type: 'VariableReference',
  285. value: node.value
  286. })
  287. },
  288. DollarVar: function (node, parent) {
  289. parent._context.push({
  290. type: 'DollarVar',
  291. value: node.value
  292. })
  293. },
  294. ArgvLiteral: function (node, parent) {
  295. parent._context.push({
  296. type: 'ArgvLiteral',
  297. value: node.value
  298. })
  299. },
  300. FunctionCall: function (node, parent) {
  301. let expression = {
  302. type: 'FunctionCall',
  303. callee: {
  304. type: 'FunctionName',
  305. name: node.value
  306. },
  307. args: []
  308. }
  309. node._context = expression.args
  310. if (parent.type !== 'FunctionCall') {
  311. expression = {
  312. type: 'Statement',
  313. expr: expression
  314. }
  315. }
  316. parent._context.push(expression)
  317. }
  318. })
  319. return newAst
  320. }
  321. var escapeDepth = 0
  322. var generator = function (node) {
  323. switch (node.type) {
  324. case 'Prog':
  325. let program = node.body.map(generator)
  326. program.unshift('var _ = require("./libjs/stdlib.js")(this)')
  327. return program.join('\n')
  328. break
  329. case 'Statement':
  330. return (generator(node.expr) + ';')
  331. break
  332. case 'FunctionCall':
  333. if (!node.callee.name.match('(def|if|repeat)')) {
  334. if (node.callee.name.match('include')) {
  335. // Include is a special function and we will write the generation ourselves
  336. return node.args.map((arg) => {
  337. let lib = './libjs/' + arg.value + '.js'
  338. return ('var _' + arg.value + ' = require("' + lib + '")(this)')
  339. }).join("\n")
  340. } else {
  341. return (generator(node.callee) + '(' + node.args.map(generator).join(', ') + ')')
  342. }
  343. } else {
  344. return (generator(node.callee) + '(' + node.args.map((v, i) => {
  345. if (i === 0) {
  346. return generator(v) + ', '
  347. } else {
  348. if (i === 1) {
  349. return 'function() { \n' + generator(v) + ';\n'
  350. } else {
  351. return generator(v) + ';\n'
  352. }
  353. }
  354. }).join('') + '})')
  355. }
  356. break;
  357. case 'DollarVar':
  358. return 'arguments[' + (+node.value-1) + ']'
  359. break
  360. case 'BarLiteral':
  361. return '}, function() {'
  362. break
  363. case 'FunctionName':
  364. if (node.name.match("::")) {
  365. let [namespace, func] = node.name.split("::")
  366. return "_" + namespace + "." + func
  367. } else {
  368. return '_.' + node.name
  369. }
  370. break
  371. case 'VariableReference':
  372. return '_.ref(\'' + node.value + '\')'
  373. break
  374. case 'NumberLiteral':
  375. return '{value: ' + node.value + '}'
  376. break
  377. case 'StringLiteral':
  378. return '{ value: \'' + node.value + '\' }'
  379. break
  380. case 'ArgvLiteral':
  381. if (node.value === 0) {
  382. return '{ value: process.argv.slice(2).join(\' \') }'
  383. } else {
  384. return '_.__get_arg(' + (+node.value+1) + ')'
  385. }
  386. break
  387. default:
  388. throw {
  389. name: 'Compiler Error',
  390. message: 'Unexpected leaf in transformed AST: ' + node.type
  391. }
  392. break
  393. }
  394. }
  395. const fileNameIn = process.argv[2]
  396. const fileNameOut = fileNameIn + '.js'
  397. const myInput = fs.readFileSync(process.argv[2], { encoding: 'utf-8' })
  398. const preProcessedInput = preprocess(myInput) // Run the preprocessor to evaluate any `source's
  399. const myTokens = tokenizer(preProcessedInput) // Convert our input into individual tokens
  400. const parsedTree = parser(myTokens) // Convert these tokens into a syntax tree
  401. const transformedTree = transformer(parsedTree) // Now put the tree into an easily traversable format for our generator
  402. const output = generator(transformedTree) // Generate the final JS code
  403. fs.writeFileSync(fileNameOut, output)