实现简易编译器和解释器
大约 2 分钟
学习并尝试使用ts重构实现简易编译器和解释器
目录
最终效果
解释器测试
解释器测试:test_intrepreter.ts
import { intrepreter } from "../index";
async function test_intrepreter() {
intrepreter(
`(
print
(
add
"hello world!!!" "\n"
"1+1=" (add 1 1) "\n"
"2-2=" (sub 2 2) "\n"
"3*3=" (mut 3 3) "\n"
"4/4=" (div 4 4) "\n"
)
)`
);
}
async function main() {
test_intrepreter();
}
main();
测试结果:test_intrepreter_output.txt
hello world!!!
1+1=2
2-2=0
3*3=9
4/4=1
编译器测试
编译器测试:test_compiler.ts
import { compiler } from "../index";
import { visualizeEscapes } from "../utils";
async function test_compiler() {
let result = compiler(
`(
print
(
add
"hello world!!!" "\n"
"1+1=" (add 1 1) "\n"
"2-2=" (sub 2 2) "\n"
"3*3=" (mut 3 3) "\n"
"4/4=" (div 4 4) "\n"
)
)`
);
console.log(visualizeEscapes(result));
}
async function main() {
test_compiler();
}
main();
测试结果:test_compiler_output.txt
print(add("hello world!!!","\n","1+1=",add(1,1),"\n","2-2=",sub(2,2),"\n","3*3=",mut(3,3),"\n","4/4=",div(4,4),"\n"));
具体实现
词法分析器、语法分析器、转换器、代码生成器
词法分析器
tokenizer.ts
export type Token = { type: TokenType; value: string };
export enum TokenType {
paren = "paren",
number = "number",
string = "string",
name = "name",
}
export function tokenizer(input: string) {
let current = 0;
let tokens: Array<Token> = [];
while (current < input.length) {
let char = input[current];
if (char === "(") {
tokens.push({ type: TokenType.paren, value: "(" });
current++;
continue;
}
if (char === ")") {
tokens.push({ type: TokenType.paren, value: ")" });
current++;
continue;
}
let WHITESPACE = /\s/;
if (WHITESPACE.test(char)) {
current++;
continue;
}
let NUMBERS = /[0-9]/;
if (NUMBERS.test(char)) {
let value = "";
while (NUMBERS.test(char)) {
value += char;
char = input[++current];
}
tokens.push({ type: TokenType.number, value });
continue;
}
if (char === '"') {
let value = "";
char = input[++current];
while (char !== '"') {
value += char;
char = input[++current];
}
char = input[++current];
tokens.push({ type: TokenType.string, value });
continue;
}
let LETTERS = /[a-z]/i; // /i 表示不区分大小写
if (LETTERS.test(char)) {
let value = "";
while (LETTERS.test(char)) {
value += char;
char = input[++current];
}
tokens.push({ type: TokenType.name, value });
continue;
}
throw new TypeError("I dont know what this character is: " + char);
}
return tokens;
}
语法分析器
parser.ts
import { Token, TokenType } from "./tokenizer";
export type AST_NODE = AST_ROOT_NODE | AST_VALE_NODE | AST_Call_NODE;
export enum AST_NODE_TYPE {
Program = "Program",
NumberLiteral = "NumberLiteral",
StringLiteral = "StringLiteral",
CallExpression = "CallExpression",
}
export type AST_ROOT_NODE = {
type: AST_NODE_TYPE.Program;
body: Array<AST_NODE>;
};
export type AST_Call_NODE = {
type: AST_NODE_TYPE.CallExpression;
name: string;
params: Array<AST_VALE_NODE | AST_Call_NODE>;
};
export type AST_VALE_NODE = {
type: AST_NODE_TYPE.NumberLiteral | AST_NODE_TYPE.StringLiteral;
value: string;
};
export function parser(tokens: Array<Token>) {
let current = 0;
function walk() {
let token = tokens[current];
let node: AST_NODE;
if (token.type === TokenType.number) {
current++;
node = { type: AST_NODE_TYPE.NumberLiteral, value: token.value };
return node;
}
if (token.type === TokenType.string) {
current++;
node = { type: AST_NODE_TYPE.StringLiteral, value: token.value };
return node;
}
if (token.type === TokenType.paren && token.value === "(") {
token = tokens[++current];
node = {
type: AST_NODE_TYPE.CallExpression,
name: token.value,
params: [],
};
token = tokens[++current];
while (
token.type !== TokenType.paren ||
(token.type === TokenType.paren && token.value !== ")")
) {
node.params.push(walk());
token = tokens[current];
}
current++;
return node;
}
throw new TypeError(TokenType[token.type]);
}
let ast: AST_ROOT_NODE = {
type: AST_NODE_TYPE.Program,
body: [],
};
while (current < tokens.length) {
ast.body.push(walk());
}
return ast;
}
转换器
transformer.ts
import {
AST_NODE,
AST_ROOT_NODE,
AST_NODE_TYPE,
AST_Call_NODE,
AST_VALE_NODE,
} from "./parser";
export type NEW_AST_NODE =
| NEW_AST_ROOT_NODE
| NEW_AST_IDNT_NODE
| NEW_AST_CALL_NODE
| NEW_AST_NUMB_NODE
| NEW_AST_EXPR_NODE
| NEW_AST_STRI_NODE;
export enum NEW_AST_NODE_TYPE {
Program = "Program",
NumberLiteral = "NumberLiteral",
StringLiteral = "StringLiteral",
Identifier = "Identifier",
CallExpression = "CallExpression",
ExpressionStatement = "ExpressionStatement",
}
export type NEW_AST_ROOT_NODE = {
type: NEW_AST_NODE_TYPE.Program;
body: Array<NEW_AST_NODE>;
};
export type NEW_AST_NUMB_NODE = {
type: NEW_AST_NODE_TYPE.NumberLiteral;
value: string;
};
export type NEW_AST_STRI_NODE = {
type: NEW_AST_NODE_TYPE.StringLiteral;
value: string;
};
export type NEW_AST_IDNT_NODE = {
type: NEW_AST_NODE_TYPE.Identifier;
name: string;
};
export type NEW_AST_CALL_NODE = {
type: NEW_AST_NODE_TYPE.CallExpression;
callee: NEW_AST_IDNT_NODE;
arguments: Array<NEW_AST_NODE>;
};
export type NEW_AST_EXPR_NODE = {
type: NEW_AST_NODE_TYPE.ExpressionStatement;
expression: NEW_AST_CALL_NODE;
};
let ctx_map: Map<AST_NODE, Array<NEW_AST_NODE>> = new Map();
export function transformer(ast: AST_ROOT_NODE) {
let newAst: NEW_AST_ROOT_NODE;
let visitor: Visitor = {};
visitor[AST_NODE_TYPE.Program] = {
enter(node: AST_ROOT_NODE, parent: null) {
newAst = {
type: NEW_AST_NODE_TYPE.Program,
body: [],
};
ctx_map.set(node, newAst.body);
},
exit(node: AST_Call_NODE, parent: AST_NODE) {
ctx_map.delete(node);
},
};
visitor[AST_NODE_TYPE.CallExpression] = {
enter(node: AST_Call_NODE, parent: AST_NODE) {
let expression: NEW_AST_CALL_NODE | NEW_AST_EXPR_NODE = {
type: NEW_AST_NODE_TYPE.CallExpression,
callee: {
type: NEW_AST_NODE_TYPE.Identifier,
name: node.name,
},
arguments: [],
};
ctx_map.set(node, expression.arguments);
if (parent.type !== AST_NODE_TYPE.CallExpression) {
expression = {
type: NEW_AST_NODE_TYPE.ExpressionStatement,
expression: expression,
};
}
ctx_map.get(parent)!.push(expression);
},
exit(node: AST_Call_NODE, parent: AST_NODE) {
ctx_map.delete(node);
},
};
visitor[AST_NODE_TYPE.NumberLiteral] = {
enter(node: AST_VALE_NODE, parent: AST_NODE) {
ctx_map.get(parent)!.push({
type: NEW_AST_NODE_TYPE.NumberLiteral,
value: node.value,
});
},
};
visitor[AST_NODE_TYPE.StringLiteral] = {
enter(node: AST_VALE_NODE, parent: AST_NODE) {
ctx_map.get(parent)!.push({
type: NEW_AST_NODE_TYPE.StringLiteral,
value: node.value,
});
},
};
traverser(ast, visitor);
return newAst!;
}
type Visitor = {
[key in AST_NODE_TYPE]?: {
enter(node: AST_NODE, parent: AST_NODE | null): void;
exit?(node: AST_NODE, parent: AST_NODE | null): void;
};
};
function traverser(ast: AST_ROOT_NODE, visitor: Visitor) {
function traverseArray(
array: Array<AST_NODE>,
parent: AST_ROOT_NODE | AST_Call_NODE
) {
array.forEach((child) => {
traverseNode(child, parent);
});
}
function traverseNode(
node: AST_NODE,
parent: AST_ROOT_NODE | AST_Call_NODE | null
) {
let methods = visitor[node.type];
if (methods && methods.enter) {
methods.enter(node, parent);
}
switch (node.type) {
case AST_NODE_TYPE.Program:
traverseArray(node.body, node);
break;
case AST_NODE_TYPE.CallExpression:
traverseArray(node.params, node);
break;
case AST_NODE_TYPE.NumberLiteral:
case AST_NODE_TYPE.StringLiteral:
break;
default:
// @ts-ignore
throw new TypeError(AST_NODE_TYPE[node.type]);
}
if (methods && methods.exit) {
methods.exit(node, parent);
}
}
traverseNode(ast, null);
}
代码生成器
codeGenerator.ts
import { NEW_AST_NODE, NEW_AST_NODE_TYPE } from "./transformer";
export function codeGenerator(node: NEW_AST_NODE): string {
switch (node.type) {
case NEW_AST_NODE_TYPE.Program:
let statements = node.body.map(codeGenerator);
return statements.join("\n");
case NEW_AST_NODE_TYPE.ExpressionStatement:
let expression = codeGenerator(node.expression);
return expression + ";";
case NEW_AST_NODE_TYPE.CallExpression:
let funName = codeGenerator(node.callee);
let args = node.arguments.map(codeGenerator);
return funName + "(" + args.join(",") + ")";
case NEW_AST_NODE_TYPE.Identifier:
return node.name;
case NEW_AST_NODE_TYPE.NumberLiteral:
return node.value;
case NEW_AST_NODE_TYPE.StringLiteral:
return '"' + node.value + '"';
default:
// @ts-ignore
throw new TypeError(NEW_AST_NODE_TYPE[node.type]);
}
}
编译器、解释器
编译器:
compiler.ts
import { tokenizer } from "./tokenizer";
import { parser } from "./parser";
import { transformer } from "./transformer";
import { codeGenerator } from "./codeGenerator";
export function compiler(input: string) {
let tokens = tokenizer(input);
let ast = parser(tokens);
let newAst = transformer(ast);
let output = codeGenerator(newAst);
return output;
}
解释器:
intrepreter.ts
import { NEW_AST_NODE, NEW_AST_NODE_TYPE } from "./transformer";
export function executer(node: NEW_AST_NODE): any {
switch (node.type) {
case NEW_AST_NODE_TYPE.Program:
let statements = node.body;
return statements.forEach((statement) => executer(statement));
case NEW_AST_NODE_TYPE.ExpressionStatement:
let expression = node.expression;
return executer(expression);
case NEW_AST_NODE_TYPE.CallExpression:
let func = executer(node.callee);
let args = node.arguments.map(executer);
switch (func) {
case "add":
return args.reduce((prev, cur) => prev + cur);
case "sub":
return args.reduce((prev, cur) => prev - cur);
case "mut":
return args.reduce((prev, cur) => prev * cur);
case "div":
return args.reduce((prev, cur) => prev / cur);
case "print":
let str = args.join(",");
return console.log(str);
default:
throw new SyntaxError(`unknow function: ${func}`);
}
case NEW_AST_NODE_TYPE.Identifier:
let id = node.name;
return id;
case NEW_AST_NODE_TYPE.NumberLiteral:
let num = new Number(node.value);
return num;
case NEW_AST_NODE_TYPE.StringLiteral:
let str = node.value;
return str;
default:
throw new SyntaxError(`unknow syntax: ${node}`);
}
}
import { tokenizer } from "./tokenizer";
import { parser } from "./parser";
import { transformer } from "./transformer";
export function intrepreter(input: string) {
let tokens = tokenizer(input);
let ast = parser(tokens);
let newAst = transformer(ast);
let output = executer(newAst);
return output;
}
index.ts
export { compiler } from "./compiler";
export { intrepreter } from "./intrepreter";
终端工具
Terminal工具:编译器和解释器的实现
编译器终端工具:
cli_compiler.ts
import { compiler } from "./index";
import { createInterface } from "readline/promises";
let readline = createInterface({
input: process.stdin,
output: process.stdout,
});
async function test_compiler() {
while (true) {
let ans = await readline.question(">");
let result = compiler(ans);
console.log(result);
}
}
async function main() {
test_compiler();
}
main();
解释器终端工具:
cli_intrepreter.ts
import { intrepreter } from "./index";
import { createInterface } from "readline/promises";
let readline = createInterface({
input: process.stdin,
output: process.stdout,
});
async function test_intrepreter() {
while (true) {
let ans = await readline.question(">");
intrepreter(ans);
}
}
async function main() {
test_intrepreter();
}
main();