使用递归下降算法实现简易解释器
尝试使用递归下降算法实现简易词法分析器和文法分析器以及解释器
目录
缘由
2021年08月的时候,在bilibili上看到过一个讲解如何用700行手写实现一个编译器的视频,
源代码在这里,cpc
当时虽然调侃的评论了一句c--
,但其实内心对写出这种东西的人还是感到由衷的钦佩的,
视频的内容我那时只大概的浏览过一些,当时没有时间仔细去看。
再那后来不久的时间里,我又看到过另外一个编译器项目,the-super-tiny-compiler
,
这个项目仅用几百行的代码用比较贴近真实编译器的流程实现了将某种作者定义的代码翻译成为类似于c语言的代码,
这个项目的代码我大概看过,并用ts尝试着将它重新实现了一遍,最近,就是上周的时候,我另外尝试着为其实现了一个解释器,使得可以将其生成的抽象语法树翻译执行。
还有一个项目,Recursive Descent Parser 一个递归下降Parser的实现,用的是js实现的。
这个项目中的代码提供了一种比较完美编程范式,但是在文法的定义部分有些让人感到难以理解,我那时刚看到这个项目的时候,完全看不懂,过于沮丧,放弃了。
后来还看到过一个项目,名字忘了,大概就是在前端中实现了一个虚拟机,使得可以加载并执行那种语言编写的游戏,
还有iPhone上的iSH,也是开源项目,32bit虚拟机,解释执行字节码,可以运行Linux。
还有就是UTM,一款可以在手机上运行的开源的虚拟机,能在平板电脑上运行win7系统。
总之,这些都是我之前接触过、看过、用过、让我感到Amazing的项目,这些项目或多或少都影响了我,使我产生了也要写一个类似东西的想法。
然后,最近在学游戏设计模式,其中作者在讲解行为模式的部分时,提到了解释器模式和如何实现一个虚拟机并执行字节码的内容,用于使得玩家可以用另外一种和游戏本身的编程语言无关的语言来二次开发游戏角色的行为。
这部分内容中作者虽然讲了很多这种解释器模式的缺点和实现虚拟机执行字节码的优点,但却没有提及如何生成字节码,作者只给了一种通过图形界面拖拽生成指令序列的方式来生成字节码,优点自然是对用户友好,但缺点也是显然的,图形化的界面感觉就不如直接编写程序那样能对游戏做到非常精细化的控制。
总之,但这让我回想起了这之前看到过的上述几个项目,让我突然也想完整的实现一个编译器,编译出字节码,另外再实现一个虚拟机,用来执行字节码。有这个想法后我便抑制不住内心的想要实现它的冲动。
这几天我重新阅读了上面几个项目的源代码,甚至去读了js的标准文档,去看了它的文法定义,另外还大概看了另一个项目 tiny-js,这个项目用大概2000行的代码实现了一款可嵌入至c++程序的js解释器,虽然没有完全看完,但给我的感觉还是挺震撼的,就是太牛了。
最近几天,终于尝试着把词法分析和文法分析的内容搞懂了,特别是如何定义文法的那部分。
昨天下午(2023/12/20)五点的时候,感觉已经差不多完整理解了,然后通宵敲了一晚上的代码,一直到今天早上,完整的从头实现了Tokenizer Parser 以及,解释器,支持的语法包括:变量、函数的定义和调用,循环语句,块作用域变量和全局作用域变量,甚至可以注入预定义函数,某种程度上讲,我认为这已经图灵完备了。感觉就像是玩游戏终于通过了某一关 Exciting!
目前支持的语法
- 语句:
- 空语句
;
- 块语句
{}
- if语句
if(){}else{}
- where语句
where(){}
- 标识符定义语句
- 变量定义
def a=1,b=2,c;
- 函数定义
def main(def a){};
- 变量定义
- 空语句
- 表达式:
- 逗号表达式
,
- 赋值表达式:
- 基本赋值表达式
=
- 复合赋值表达式
+= -= *= /=
- 基本赋值表达式
- 逻辑或表达式
||
- 逻辑与表达试
&&
- 等值判断表达式
== !=
- 条件表达式
< > <= >=
- 加减运算表达式
+ -
- 乘除取余运算表达式
* / %
- 括号表达式:
(表达式)
- 标识符表达式
a
- 函数调用表达式
main(1,2,3);
- 逗号表达式
实现效果
解释器测试
test/index.ts
import { Interpreater } from '../Interpreater';
import { Parser } from '../Parser';
import { Tokenizer } from '../Tokenizer';
import { regFuncs } from '../functions';
function testForInterpreater(code: string) {
let tokenizer = new Tokenizer();
let parser = new Parser();
let interpreater = new Interpreater();
regFuncs(interpreater);
let tokens = tokenizer.getTokens(code);
let ast = parser.parse(tokens);
console.log('源代码:');
console.log(code);
console.log('解释器执行结果:');
interpreater.exec(ast);
console.log('源代码词法分析结果:');
console.log(tokens);
console.log('源代码文法分析结果:');
console.dir(ast, { depth: null });
}
function main() {
// 测试解释器
testForInterpreater(
`
def fbnq(def max){
def a=1,b=1,c;
while(a<max){
print(a);
c = b;
b = a + b;
a = c;
}
};
def main(){
fbnq(200); // 输出200以内斐波那契数列
};
main();
`
);
}
main();
output.txt
源代码:
def fbnq(def max){
def a=1,b=1,c;
while(a<max){
print(a);
c = b;
b = a + b;
a = c;
}
};
def main(){
fbnq(200); // 输出200以内斐波那契数列
};
main();
解释器执行结果:
1
1
2
3
5
8
13
21
34
55
89
144
源代码词法分析结果:
[
{ type: 'def', value: 'def' },
{ type: 'Identifier', value: 'fbnq' },
{ type: '(', value: '(' },
{ type: 'def', value: 'def' },
{ type: 'Identifier', value: 'max' },
{ type: ')', value: ')' },
{ type: '{', value: '{' },
{ type: 'def', value: 'def' },
{ type: 'Identifier', value: 'a' },
{ type: 'AssignOpSimple', value: '=' },
{ type: 'Number', value: '1' },
{ type: ',', value: ',' },
{ type: 'Identifier', value: 'b' },
{ type: 'AssignOpSimple', value: '=' },
{ type: 'Number', value: '1' },
{ type: ',', value: ',' },
{ type: 'Identifier', value: 'c' },
{ type: ';', value: ';' },
{ type: 'while', value: 'while' },
{ type: '(', value: '(' },
{ type: 'Identifier', value: 'a' },
{ type: 'EqualityOp', value: '<' },
{ type: 'Identifier', value: 'max' },
{ type: ')', value: ')' },
{ type: '{', value: '{' },
{ type: 'Identifier', value: 'print' },
{ type: '(', value: '(' },
{ type: 'Identifier', value: 'a' },
{ type: ')', value: ')' },
{ type: ';', value: ';' },
{ type: 'Identifier', value: 'c' },
{ type: 'AssignOpSimple', value: '=' },
{ type: 'Identifier', value: 'b' },
{ type: ';', value: ';' },
{ type: 'Identifier', value: 'b' },
{ type: 'AssignOpSimple', value: '=' },
{ type: 'Identifier', value: 'a' },
{ type: 'AddOp', value: '+' },
{ type: 'Identifier', value: 'b' },
{ type: ';', value: ';' },
{ type: 'Identifier', value: 'a' },
{ type: 'AssignOpSimple', value: '=' },
{ type: 'Identifier', value: 'c' },
{ type: ';', value: ';' },
{ type: '}', value: '}' },
{ type: '}', value: '}' },
{ type: ';', value: ';' },
{ type: 'def', value: 'def' },
{ type: 'Identifier', value: 'main' },
{ type: '(', value: '(' },
{ type: ')', value: ')' },
{ type: '{', value: '{' },
{ type: 'Identifier', value: 'fbnq' },
{ type: '(', value: '(' },
{ type: 'Number', value: '200' },
{ type: ')', value: ')' },
{ type: ';', value: ';' },
{ type: '}', value: '}' },
{ type: ';', value: ';' },
{ type: 'Identifier', value: 'main' },
{ type: '(', value: '(' },
{ type: ')', value: ')' },
{ type: ';', value: ';' }
]
源代码文法分析结果:
{
type: 'Program',
statements: [
{
type: 'IdentifierDeclarationsStatement',
declarations: [
{
type: 'FunctionDeclaration',
name: 'fbnq',
FormedArguments: [
{
type: 'VariableDeclaration',
name: 'max',
value: undefined
}
],
statements: {
type: 'BlockStatement',
statements: [
{
type: 'IdentifierDeclarationsStatement',
declarations: [
{
type: 'VariableDeclaration',
name: 'a',
value: { type: 'NumberLiteral', value: '1' }
},
{
type: 'VariableDeclaration',
name: 'b',
value: { type: 'NumberLiteral', value: '1' }
},
{
type: 'VariableDeclaration',
name: 'c',
value: undefined
}
]
},
{
type: 'WhileStatement',
condition: {
type: 'BineryExpression',
operator: '<',
left: { type: 'Identifier', name: 'a' },
right: { type: 'Identifier', name: 'max' }
},
statement: {
type: 'BlockStatement',
statements: [
{
type: 'ExpressionStatement',
expression: {
type: 'FunctionCall',
calee: { type: 'Identifier', name: 'print' },
args: [ { type: 'Identifier', name: 'a' } ]
}
},
{
type: 'ExpressionStatement',
expression: {
type: 'AssignExpression',
operator: '=',
left: { type: 'Identifier', name: 'c' },
right: { type: 'Identifier', name: 'b' }
}
},
{
type: 'ExpressionStatement',
expression: {
type: 'AssignExpression',
operator: '=',
left: { type: 'Identifier', name: 'b' },
right: {
type: 'BineryExpression',
operator: '+',
left: { type: 'Identifier', name: 'a' },
right: { type: 'Identifier', name: 'b' }
}
}
},
{
type: 'ExpressionStatement',
expression: {
type: 'AssignExpression',
operator: '=',
left: { type: 'Identifier', name: 'a' },
right: { type: 'Identifier', name: 'c' }
}
}
]
}
}
]
}
}
]
},
{
type: 'IdentifierDeclarationsStatement',
declarations: [
{
type: 'FunctionDeclaration',
name: 'main',
FormedArguments: [],
statements: {
type: 'BlockStatement',
statements: [
{
type: 'ExpressionStatement',
expression: {
type: 'FunctionCall',
calee: { type: 'Identifier', name: 'fbnq' },
args: [ { type: 'NumberLiteral', value: '200' } ]
}
}
]
}
}
]
},
{
type: 'ExpressionStatement',
expression: {
type: 'FunctionCall',
calee: { type: 'Identifier', name: 'main' },
args: []
}
}
]
}
词法分析的实现
Tokenizer.ts
export type TokenType =
| 'Space' //空白符
| 'Comment' // 注释
| ';' // 分号
| 'if' // if语句
| 'else' // else语句
| 'while'
| 'do'
| 'def' // 声明
| 'AssignOpSimple' // 简单赋值操作符
| 'AssignOpComplex' // 复杂赋值操作符
| 'TernaryOp?' // 三目运算符
| 'TernaryOp:' // 三目运算符
| 'LogicalOr' // 逻辑或运算符
| 'LogicalAnd' // 逻辑与运算符
| 'EqualityOp' // 等值运算符
| 'RelationalOp' // 关系运算符
| 'AddOp' // 算数加减运算符
| 'MutOp' // 算数乘除取余运算符
| 'Number' // 字面量
| 'Boolean' // 字面量
| 'String' // 字面量
| 'null' // 字面量
| '('
| ')'
| '['
| ']'
| '{'
| '}'
| '.'
| ','
| 'this'
| 'super'
| 'Identifier';
export type TokenDef = [TokenType, RegExp];
export type Token = { type: TokenType; value: string };
export const TokenDefs: Array<TokenDef> = [
['Space', /^\s+/],
['Comment', /^\/\/[^\n]*/],
['Comment', /^\/\*(\s|.)*?\*\//],
[';', /^;/],
['if', /^\bif\b/],
['else', /^\belse\b/],
['while', /^\bwhile\b/],
['do', /^\bdo\b/],
['def', /^\bdef\b/],
['EqualityOp', /^[><][=]?/],
['RelationalOp', /^[=!]=/],
['AssignOpSimple', /^=/],
['AssignOpComplex', /^[\+\-\*\/\%]=/],
['TernaryOp?', /^\?/],
['TernaryOp:', /^[:]/],
['LogicalOr', /^\|\|/],
['LogicalAnd', /^&&/],
['AddOp', /^[+-]/],
['MutOp', /^[\*\/\%]/],
['Number', /^\d+\.\d+/], // 0.123
['Number', /^\d+/], // 123
['Boolean', /^\btrue\b/],
['Boolean', /^\bfalse\b/],
['null', /^\bnull\b/],
['String', /^\'[^\']*\'/],
['String', /^\"[^\"]*\"/],
['(', /^\(/],
[')', /^\)/],
['[', /^\[/],
[']', /^\]/],
['{', /^\{/],
['}', /^\}/],
['.', /^\./],
[',', /^\,/],
['this', /^\bthis\b/],
['super', /^\bsuper\b/],
['Identifier', /^\w+/],
];
export class Tokenizer {
idx!: number;
code!: string;
tokens!: Array<Token>;
init(code: string) {
this.idx = 0;
this.code = code;
this.tokens = [];
}
getTokens(code: string) {
this.init(code);
while (this.hasMore()) {
let token = this.getNextToken();
if (token.type == 'Space') continue;
if (token.type == 'Comment') continue;
this.tokens.push(token);
}
return this.tokens;
}
private hasMore() {
return this.idx < this.code.length;
}
private getNextToken(): Token {
if (!this.hasMore()) throw new SyntaxError('no more token.');
const rest = this.code.slice(this.idx);
for (const [tokenType, reg] of TokenDefs) {
let tokenValue = this.match(reg, rest);
if (tokenValue == null) continue;
else return { type: tokenType, value: tokenValue };
}
throw new SyntaxError(`unknow token: ${rest[0]}`);
}
private match(reg: RegExp, str: string) {
let matchs = reg.exec(str);
if (matchs == null) return null; // 没有匹配到
let matched = matchs[0]; // 匹配到了
this.idx += matched.length; // 移动指针
return matched;
}
}
文法定义
ast.ts
import { TokenType } from './Tokenizer';
/**
* Program
* : Statements
* ;
*/
export interface Program extends AST_BASE_NODE {
type: 'Program';
statements: Statements;
}
/**
* Statements
* : Statement
* | Statements Statement
* ;
*/
export type Statements = Array<Statement>;
/**
* 语句
* Statement
* : EmptyStatement
* | IdentifierDeclarationsStatement
* | ExpressionStatement
* ;
*/
export type Statement =
| EmptyStatement
| BlockStatement
| IfStatement
| WhileStatement
| IdentifierDeclarationsStatement
| ExpressionStatement;
/**
* 空语句
* EmptyStatement
* : ';'
* ;
*/
export interface EmptyStatement extends AST_BASE_NODE {
type: 'EmptyStatement';
}
/**
* 代码块
* BlockStatement
* | '{' Statements '}'
* ;
*/
export interface BlockStatement extends AST_BASE_NODE {
type: 'BlockStatement';
statements: Statements;
}
/**
* if语句:
* IfStatement
* : if(Expression) [空语句,块语句]
* | IfStatement ['else' [空语句,块语句,if语句]]*
* ;
*
* 简化
* 由于 `Statement:=空语句|块语句|if语句|...;` 所以可以直接简化:
* IfStatement
* : if(Expression) Statement
* | IfStatement [else Statement]?
* ;
*/
export interface IfStatement extends AST_BASE_NODE {
type: 'IfStatement';
condition: Expression;
case1: Statement;
case2?: Statement;
}
/**
* WhileStatement
* : while(表达式) 空语句|单语句|块语句|....
* ;
*
* 简化
* WhileStatement
* : while(表达式) 语句
* ;
*/
export interface WhileStatement extends AST_BASE_NODE {
type: 'WhileStatement';
condition: Expression;
statement: Statement;
}
/**
* 标识符声明语句可以用来声明多个变量或函数
*
* 标识符声明语句
* | 'def' [变量或函数声明]+ ';'
* ;
*/
export interface IdentifierDeclarationsStatement extends AST_BASE_NODE {
type: 'IdentifierDeclarationsStatement';
declarations: Array<VariableOrFunctionDeclaration>;
}
/**
* 变量或函数声明,
* 函数声明的写法是 :标识符 + [(]
* 变量声明的写法是 :标识符 + [= 表达式]?
* 区别就在于有没有括号
*
* 变量或函数声明:
* | 函数声明
* : 变量声明
* ;
*/
export type VariableOrFunctionDeclaration =
| VariableDeclaration
| FunctionDeclaration;
/**
* 变量声明由一个标识符和一个可选的初始化语句组成,初始化语句由'='符号开头。
*
* 变量声明
* : 标识符 ['=' 逻辑或表达式]?
* : 标识符 变量声明初始化?
* ;
*/
export interface VariableDeclaration extends AST_BASE_NODE {
type: 'VariableDeclaration';
name: string;
value?: VariableDeclarationInit;
}
/**
* 变量声明初始化
* : ['=' 逻辑或表达式]?
* ;
*/
export type VariableDeclarationInit = LogicalOrExpression | undefined;
/**
* 函数声明
* : 标识符 函数形式参数列表 块语句组成
* ;
*
* FunctionDeclaration
* | Identifier FormedArguments BlockStatement
* ;
*/
export interface FunctionDeclaration extends AST_BASE_NODE {
type: 'FunctionDeclaration';
name: string;
FormedArguments: FormedArguments;
statements: BlockStatement;
}
/**
* 函数形式参数列表
* : ( [ [def 变量或函数声明] [,def 变量或函数声明]* ]* )
* ;
*
* FormedArguments
* : ( [ [def VariableOrFunctionDeclaration] [,def VariableOrFunctionDeclaration]* ]* )
* ;
*/
export type FormedArguments = Array<VariableOrFunctionDeclaration>;
/**
* 表达式语句
* ExpressionStatement
* : Expression ';'
* ;
*/
export interface ExpressionStatement extends AST_BASE_NODE {
type: 'ExpressionStatement';
expression: Expression;
}
/**
* 表达式
* Expression
* : CommaExpression
* ;
*/
export type Expression = CommaExpression;
/**
* 逗号表达式
* CommaExpression
* | AssignExpression [CommaOp AssignExpression]*
* ;
*/
export type CommaExpression = AssignExpression | _CommaExpression;
export interface _CommaExpression extends AST_BASE_NODE {
type: 'CommaExpression';
expressions: Array<AssignExpression>;
}
/**
* 赋值表达式
* AssignExpression
* : TernaryExpression
* | MumberAccessExpression [AssignOp AssignExpression]*
* ;
*/
export type AssignExpression = TernaryExpression | _AssignExpression;
export interface _AssignExpression extends AST_BASE_NODE {
type: 'AssignExpression';
operator: string;
left: MumberAccessExpression | Identifier;
right: AssignExpression;
}
/**
* 三目表达式
* TernaryExpression
* : LogicalOrExpression [? TernaryExpression : TernaryExpression]
* ;
*/
export type TernaryExpression = LogicalOrExpression | _TernaryExpression;
export interface _TernaryExpression extends AST_BASE_NODE {
type: 'TernaryExpression';
condition: LogicalOrExpression;
case1: TernaryExpression;
case2: TernaryExpression;
}
/**
* 双目运算表达式
* BineryExpression<Child,OpType>
* | Child OpType Child
* ;
*/
export interface BineryExpression<Child, OpType extends TokenType>
extends AST_BASE_NODE {
type: 'BineryExpression';
operatorType: Extract<TokenType, OpType>;
operator: string;
left: Child;
right: Child;
}
/**
* 逻辑或表达式
* LogicalOrExpression
* : LogicalAndExpression [LogicalOr LogicalAndExpression]*
* ;
*/
export type LogicalOrExpression =
| LogicalAndExpression
| BineryExpression<LogicalAndExpression, 'LogicalOr'>;
/**
* 逻辑或表达式
* LogicalAndExpression
* : EqualityExpression [LogicalAnd EqualityExpression]*
* ;
*/
export type LogicalAndExpression =
| EqualityExpression
| BineryExpression<EqualityExpression, 'LogicalAnd'>;
/**
* 等值表达式
* EqualityExpression
* : RelationalExpression [EqualityOp RelationalExpression]*
* ;
*/
export type EqualityExpression =
| RelationalExpression
| BineryExpression<RelationalExpression, 'EqualityOp'>;
/**
* 关系表达式
* EqualityExpression
* : AdditiveExpression [RelationalOp AdditiveExpression]*
* ;
*/
export type RelationalExpression =
| AdditiveExpression
| BineryExpression<AdditiveExpression, 'RelationalOp'>;
/**
* 加减表达式
* AdditiveExpression
* : MultiplicativeExpression
* | AdditiveExpression AddOp MultiplicativeExpression
* ;
*/
export type AdditiveExpression =
| MultiplicativeExpression
| BineryExpression<MultiplicativeExpression, 'AddOp'>;
/**
* 乘除取余表达式
* MultiplicativeExpression
* : PrimaryExpression
* | PrimaryExpression MutOp PrimaryExpression
* ;
*/
export type MultiplicativeExpression =
| PrimaryExpression
| BineryExpression<PrimaryExpression, 'MutOp'>;
/**
* 成员属性访问表达式和函数调用都以标识符开头,这是字面量所没有的特征
* 所以在主表达式中可以根据该特征来做区分
*
* PrimaryExpression
* : MumberAccessExpressionOrFunctionCallExpression
* | BracketedExpression
* | Literal
* ;
*/
export type PrimaryExpression =
| MumberAccessExpressionOrFunctionCallExpression
| BracketedExpression
| Literal;
/**
* 成员属性访问表达式和函数调用的区分就在于后续有没有括号
* 所以可以先假设这里是一个成员属性访问表达式,如果后面接着一个括号,则假设错误,说明这里是一个函数调用
*
* MumberAccessExpressionOrFunctionCallExpression
* : MumberAccessExpression
* | FunctionCall
* ;
*/
export type MumberAccessExpressionOrFunctionCallExpression =
| MumberAccessExpression
| FunctionCall;
/**
* 成员属性访问表达式
* 把this,super,标识符,都归到这类,因为他们都有相同的特征
*
* MumberAccessExpression
* : This
* | Super
* | Identifier
* | MumberAccessExpression '.' Identifier
* | MumberAccessExpression '[' Expression ']'
* ;
*/
export type MumberAccessExpression =
| This
| Super
| Identifier
| _MumberAccessExpression;
export interface _MumberAccessExpression extends AST_BASE_NODE {
type: 'MumberAccessExpression';
object: MumberAccessExpression;
property: Identifier | Expression;
isIndex: boolean;
}
/**
* FunctionCall
* : Calee Arguments
* ;
*/
export interface FunctionCall extends AST_BASE_NODE {
type: 'FunctionCall';
calee: Calee;
args: Arguments;
}
/**
* Calee
* : MumberAccessExpression
* ;
*/
export type Calee = MumberAccessExpression;
/**
* Arguments
* : '(' AssignExpression ')'
* | '(' ')'
* ;
*/
export type Arguments = Array<AssignExpression>;
/**
* This
* : 'this'
* ;
*/
export interface This extends AST_BASE_NODE {
type: 'this';
name: string;
}
/**
* Super
* : 'super'
* ;
*/
export interface Super extends AST_BASE_NODE {
type: 'super';
name: string;
}
/**
* Identifier
* : [\w]+
* ;
*/
export interface Identifier extends AST_BASE_NODE {
type: 'Identifier';
name: string;
}
/**
* 括号表达式
* BracketedExpression
* : '(' Expression ')'
* ;
* 这里原本可以直接写 BracketedExpression = Expression; 但是这会导致类型循环定义。
*/
export interface BracketedExpression extends AST_BASE_NODE {
type: 'BracketedExpression';
expression: Expression;
}
/**
* Literal
* : NumberLiteral
* | BooleanLiteral
* | StringLiteral
* | NullLiteral
* ;
*/
export type Literal =
| NumberLiteral
| BooleanLiteral
| StringLiteral
| NullLiteral;
export interface NumberLiteral extends AST_BASE_NODE {
type: 'NumberLiteral';
value: string;
}
export interface BooleanLiteral extends AST_BASE_NODE {
type: 'BooleanLiteral';
value: string;
}
export interface StringLiteral extends AST_BASE_NODE {
type: 'StringLiteral';
value: string;
}
export interface NullLiteral extends AST_BASE_NODE {
type: 'NullLiteral';
value: string;
}
export type AST_NODE_TYPE =
| 'Program'
| 'Statement'
| 'EmptyStatement'
| 'BlockStatement'
| 'IfStatement'
| 'WhileStatement'
| 'IdentifierDeclarationsStatement'
| 'VariableDeclaration'
| 'FunctionDeclaration'
| 'ExpressionStatement'
| 'AssignExpression'
| 'CommaExpression'
| 'TernaryExpression'
| 'BineryExpression'
| 'MumberAccessExpression'
| 'this'
| 'super'
| 'Identifier'
| 'FunctionCall'
| 'NumberLiteral'
| 'BooleanLiteral'
| 'StringLiteral'
| 'NullLiteral'
| 'BracketedExpression';
export type AST_NODE =
| Program
| Statement
| EmptyStatement
| BlockStatement
| IfStatement
| WhileStatement
| IdentifierDeclarationsStatement
| VariableDeclaration
| FunctionDeclaration
| ExpressionStatement
| Expression /* Expression中以递归下降的方式定义了其他所有表达式 */;
export interface AST_BASE_NODE {
type: AST_NODE_TYPE;
}
文法分析的实现
Parser.ts
import { TokenType, Token } from './Tokenizer';
import {
Program,
Statements,
Statement,
EmptyStatement,
BlockStatement,
IfStatement,
WhileStatement,
ExpressionStatement,
IdentifierDeclarationsStatement,
VariableOrFunctionDeclaration,
VariableDeclaration,
VariableDeclarationInit,
FunctionDeclaration,
FormedArguments,
Expression,
CommaExpression,
AssignExpression,
TernaryExpression,
LogicalOrExpression,
LogicalAndExpression,
EqualityExpression,
RelationalExpression,
AdditiveExpression,
MultiplicativeExpression,
PrimaryExpression,
MumberAccessExpressionOrFunctionCallExpression,
MumberAccessExpression,
Identifier,
This,
Super,
Arguments,
BracketedExpression,
Literal,
NumberLiteral,
BooleanLiteral,
StringLiteral,
NullLiteral,
BineryExpression,
} from './ast';
export class Parser {
curToken!: Token | null;
tk_idx = 0;
tokens!: Token[];
parse(tokens: Token[]) {
this.tokens = tokens;
this.curToken = this.tokens[(this.tk_idx = 0)];
return this.Program();
}
Program(): Program {
return {
type: 'Program',
statements: this.Statements(),
};
}
Statements(stopAt?: TokenType): Statements {
let statements = [this.Statement()];
while (this.curToken != null && this.curToken?.type != stopAt) {
statements.push(this.Statement());
}
return statements;
}
Statement(): Statement {
switch (this.curToken?.type) {
case ';':
return this.EmptyStatement();
case '{':
return this.BlockStatement();
case 'if':
return this.IfStatement();
case 'while':
return this.WhileStatement();
case 'def':
return this.IdentifierDeclarationsStatement();
default:
return this.ExpressionStatement();
}
}
EmptyStatement(): EmptyStatement {
this.eat(';');
return {
type: 'EmptyStatement',
};
}
BlockStatement(): BlockStatement {
this.eat('{');
let statements = this.curToken?.type != '}' ? this.Statements('}') : [];
this.eat('}');
return {
type: 'BlockStatement',
statements,
};
}
IfStatement(): IfStatement {
this.eat('if');
this.eat('(');
let condition = this.Expression();
this.eat(')');
let case1 = this.Statement();
let case2: Statement | undefined;
if (this.curToken?.type == 'else') {
this.eat('else');
case2 = this.Statement();
}
return {
type: 'IfStatement',
condition,
case1,
case2,
};
}
WhileStatement(): WhileStatement {
this.eat('while');
this.eat('(');
let condition = this.Expression();
this.eat(')');
let statement = this.Statement();
return {
type: 'WhileStatement',
condition,
statement,
};
}
ExpressionStatement(): ExpressionStatement {
let expression = this.Expression();
this.eat(';');
return {
type: 'ExpressionStatement',
expression,
};
}
IdentifierDeclarationsStatement(): IdentifierDeclarationsStatement {
this.eat('def');
let declarations = [this.VariableOrFunctionDeclaration()];
while (this.curToken?.type == ',') {
this.eat(',');
declarations.push(this.VariableOrFunctionDeclaration());
}
this.eat(';');
return {
type: 'IdentifierDeclarationsStatement',
declarations,
};
}
VariableOrFunctionDeclaration(): VariableOrFunctionDeclaration {
let identifier = this.eat('Identifier').value;
switch (this.curToken?.type) {
case '(':
return this.FunctionDeclaration(identifier);
default:
return this.VariableDeclaration(identifier);
}
}
VariableDeclaration(variableName: string): VariableDeclaration {
return {
type: 'VariableDeclaration',
name: variableName,
value: this.VariableDeclarationInit(),
};
}
VariableDeclarationInit(): VariableDeclarationInit {
if (this.curToken?.type == 'AssignOpSimple') {
this.eat('AssignOpSimple');
return this.LogicalOrExpression();
}
return undefined;
}
FunctionDeclaration(functionName: string): FunctionDeclaration {
return {
type: 'FunctionDeclaration',
name: functionName,
FormedArguments: this.FormedArguments(),
statements: this.BlockStatement(),
};
}
FormedArguments(): FormedArguments {
this.eat('(');
let args: VariableOrFunctionDeclaration[] = [];
while (this.curToken?.type == 'def') {
this.eat('def');
args.push(this.VariableOrFunctionDeclaration());
// @ts-ignore
if (this.curToken.type == ',') this.eat(',');
else break;
}
this.eat(')');
return args;
}
Expression(): Expression {
return this.CommaExpression();
}
CommaExpression(): CommaExpression {
let exp = this.AssignExpression();
if (this.curToken?.type == ',') {
let exps = [exp];
while (this.curToken?.type == ',') {
this.eat(',');
exps.push(this.AssignExpression());
}
return {
type: 'CommaExpression',
expressions: exps,
};
}
return exp;
}
AssignExpression(): AssignExpression {
let left: AssignExpression = this.TernaryExpression();
while (
(left.type == 'MumberAccessExpression' || left.type == 'Identifier') &&
(this.curToken?.type == 'AssignOpSimple' ||
this.curToken?.type == 'AssignOpComplex')
) {
let operator = this.eat(this.curToken.type).value;
let right = this.AssignExpression();
left = {
type: 'AssignExpression',
operator,
left,
right,
};
}
return left;
}
TernaryExpression(): TernaryExpression {
let condition: TernaryExpression = this.LogicalOrExpression();
if (this.curToken?.type == 'TernaryOp?') {
this.eat('TernaryOp?');
let case1 = this.TernaryExpression();
this.eat('TernaryOp:');
let case2 = this.TernaryExpression();
return {
type: 'TernaryExpression',
condition,
case1,
case2,
};
}
return condition;
}
LogicalOrExpression(): LogicalOrExpression {
return this.BineryExpression(this.LogicalAndExpression, 'LogicalOr');
}
LogicalAndExpression(): LogicalAndExpression {
return this.BineryExpression(this.EqualityExpression, 'LogicalAnd');
}
EqualityExpression(): EqualityExpression {
return this.BineryExpression(this.RelationalExpression, 'EqualityOp');
}
RelationalExpression(): RelationalExpression {
return this.BineryExpression(this.AdditiveExpression, 'RelationalOp');
}
AdditiveExpression(): AdditiveExpression {
return this.BineryExpression(this.MultiplicativeExpression, 'AddOp');
}
MultiplicativeExpression(): MultiplicativeExpression {
return this.BineryExpression(this.PrimaryExpression, 'MutOp');
}
PrimaryExpression(): PrimaryExpression {
switch (this.curToken?.type) {
case 'Identifier':
case 'this':
case 'super':
return this.MumberAccessExpressionOrFunctionCallExpression();
case '(':
return this.BracketedExpression();
default:
return this.Literal();
}
}
MumberAccessExpressionOrFunctionCallExpression(): MumberAccessExpressionOrFunctionCallExpression {
let mumberExp = this.MumberAccessExpression();
if (this.curToken?.type == '(') {
return {
type: 'FunctionCall',
calee: mumberExp,
args: this.Arguments(),
};
}
return mumberExp;
}
MumberAccessExpression(): MumberAccessExpression {
let object!: MumberAccessExpression;
switch (this.curToken?.type) {
case 'this':
object = this.This();
break;
case 'super':
object = this.Super();
break;
case 'Identifier':
object = this.Identifier();
break;
default:
throw new SyntaxError(`unexpectted token:${this.curToken?.value}`);
}
// @ts-ignore
while (this.curToken?.type == '.' || this.curToken.type == '[') {
let property!: Identifier | Expression;
let isIndex = false;
if (this.curToken.type == '.') {
this.eat('.');
property = this.Identifier();
isIndex = false;
} else if (this.curToken.type == '[') {
this.eat('[');
property = this.Expression();
this.eat(']');
isIndex = true;
}
object = {
type: 'MumberAccessExpression',
object: object,
property: property,
isIndex,
};
}
return object;
}
This(): This {
let ts = this.eat('this').value;
return {
type: 'this',
name: ts,
};
}
Super(): Super {
let sp = this.eat('super').value;
return {
type: 'super',
name: sp,
};
}
Identifier(): Identifier {
let id = this.eat('Identifier').value;
return {
type: 'Identifier',
name: id,
};
}
Arguments(): Arguments {
this.eat('(');
let args: Array<AssignExpression> = [];
if (this.curToken?.type == ')') {
this.eat(')');
return args;
}
args.push(this.AssignExpression());
if (this.curToken?.type == ',') {
this.eat(',');
args.push(this.AssignExpression());
}
this.eat(')');
return args;
}
BracketedExpression(): BracketedExpression {
this.eat('(');
let expression = this.Expression();
this.eat(')');
return {
type: 'BracketedExpression',
expression,
};
}
Literal(): Literal {
switch (this.curToken?.type) {
case 'Number':
return this.NumberLiteral();
case 'String':
return this.StringLiteral();
case 'null':
return this.NullLiteral();
case 'Boolean':
return this.BooleanLiteral();
default:
throw new SyntaxError(`unknow Literal: ${this.curToken?.value}`);
}
}
NumberLiteral(): NumberLiteral {
let val = this.eat('Number').value;
return {
type: 'NumberLiteral',
value: val,
};
}
BooleanLiteral(): BooleanLiteral {
let val = this.eat('Boolean').value;
return {
type: 'BooleanLiteral',
value: val,
};
}
StringLiteral(): StringLiteral {
let val = this.eat('String').value;
return {
type: 'StringLiteral',
value: val,
};
}
NullLiteral(): NullLiteral {
let val = this.eat('null').value;
return {
type: 'NullLiteral',
value: val,
};
}
BineryExpression<ChildNode, operatorType extends TokenType>(
childBuilder: () => ChildNode,
operatorType: operatorType
): ChildNode | BineryExpression<ChildNode, operatorType> {
let left: ChildNode | BineryExpression<ChildNode, operatorType> =
childBuilder.apply(this);
while (this.curToken?.type == operatorType) {
let operator = this.eat(operatorType).value;
let right = childBuilder.apply(this);
left = {
type: 'BineryExpression',
operator,
left,
right,
} as BineryExpression<ChildNode, operatorType>;
}
return left;
}
eat(tkt: TokenType) {
if (this.curToken?.type != tkt) {
throw new SyntaxError(
`expected:'${tkt}';but actually get:'${this.curToken?.value}'`
);
}
let token = this.tokens[this.tk_idx];
this.curToken =
this.tk_idx + 1 < this.tokens.length ? this.tokens[++this.tk_idx] : null;
return token;
}
}
解释器的实现
Interpreater.ts
import { AST_NODE } from './ast';
export class Interpreater {
/**
* 暂时的想法是,进入一个块作用域,就创建一个上下文对象,压入栈顶,
* 当要在该作用域定义变量时,把变量存储到栈顶的上下文对象中。
* 当要寻找一个变量的值时,则从栈顶往栈底遍历。
* 当离开一个作用域时,弹出栈顶上下文对象,并将其销毁
*/
ctxIdx = 0; // 上下文栈指针,第0个栈为系统栈
ctxStack: Array<Map<string, any>> = [new Map()]; // 上下文对象栈
constructor() {}
exec(node?: AST_NODE): any {
if (!node) return undefined;
switch (node.type) {
case 'Program':
case 'BlockStatement':
// 执行代码块,返回最后一条语句的值
this.ctxIdx++;
let res = node.statements.map((stm) => this.exec(stm)).pop();
this.ctxIdx--;
return res;
case 'EmptyStatement':
return undefined;
case 'IfStatement':
// TODO: 需要考虑 return 和 break
if (this.exec(node.condition)) this.exec(node.case1);
else if (node.case2) this.exec(node.case2);
return undefined;
case 'WhileStatement':
// TODO: 需要考虑return 和 break
while (this.exec(node.condition)) this.exec(node.statement);
// 返回最后执行的一条语句
return undefined;
case 'IdentifierDeclarationsStatement':
// 返回标识符列表
return node.declarations.map((dec) => this.exec(dec));
case 'VariableDeclaration':
// 变量声明以及初始化
return this.declear(node.name, this.exec(node.value));
case 'FunctionDeclaration':
// 返回函数名
return this.declear(node.name, node);
case 'ExpressionStatement':
this.exec(node.expression);
return undefined;
case 'CommaExpression':
// 计算并返回逗号表达式的最后一个值
return node.expressions.map((exp) => this.exec(exp)).pop();
case 'AssignExpression':
switch (node.left.type) {
case 'Identifier':
return this.assign(
node.left.name,
node.operator,
this.exec(node.right)
);
default:
throw new SyntaxError('unimplement functions.');
}
case 'TernaryExpression':
return this.exec(node.condition)
? this.exec(node.case1)
: this.exec(node.case2);
case 'BineryExpression':
switch (node.operator) {
case '||':
return this.exec(node.left) || this.exec(node.right);
case '&&':
return this.exec(node.left) && this.exec(node.right);
case '==':
return this.exec(node.left) == this.exec(node.right);
case '!=':
return this.exec(node.left) != this.exec(node.right);
case '<':
return this.exec(node.left) < this.exec(node.right);
case '>':
return this.exec(node.left) > this.exec(node.right);
case '<=':
return this.exec(node.left) <= this.exec(node.right);
case '>=':
return this.exec(node.left) >= this.exec(node.right);
case '+':
return this.exec(node.left) + this.exec(node.right);
case '-':
return this.exec(node.left) - this.exec(node.right);
case '*':
return this.exec(node.left) * this.exec(node.right);
case '/':
return this.exec(node.left) / this.exec(node.right);
case '%':
return this.exec(node.left) % this.exec(node.right);
}
throw new SyntaxError(`unknow operator:${node.operator}`);
case 'FunctionCall':
switch (node.calee.type) {
case 'Identifier':
let symbol = node.calee.name;
let fun: Function | AST_NODE = this.getVal(symbol);
if (fun) {
if (fun instanceof Function) {
let args = node.args.map((arg) => this.exec(arg));
return fun(args);
} else if (fun.type == 'FunctionDeclaration') {
// 计算实参
let argValus = node.args.map((arg) => this.exec(arg));
// 进入函数调用
this.ctxIdx++;
// 声明初始化形式参数
let argNames = fun.FormedArguments.map((arg) => this.exec(arg));
// 把实参传给形式参数
argNames.forEach((name, idx) =>
this.assign(name, '=', argValus[idx])
);
let result = this.exec(fun.statements);
this.ctxIdx--;
return result;
}
} else {
throw new SyntaxError(`unimplement function ${symbol}.`);
}
default:
throw new SyntaxError('unimplement functions.');
}
case 'BracketedExpression':
return this.exec(node.expression);
case 'Identifier':
return this.getVal(node.name);
case 'NumberLiteral':
return Number(node.value);
case 'BooleanLiteral':
return node.value == 'true' ? true : false;
case 'StringLiteral':
return node.value;
case 'NullLiteral':
return null;
case 'MumberAccessExpression':
case 'super':
case 'this':
throw new SyntaxError('unimplement functions.');
}
}
/**
* 获取当前作用域的上下文对象
* 如果不存在,则要负责创建它
*/
getCurrentContext() {
this.ctxStack.length = this.ctxIdx + 1;
let ctx = this.ctxStack[this.ctxIdx];
if (!ctx) {
ctx = this.ctxStack[this.ctxIdx] = new Map<string, any>();
}
return ctx;
}
/**
* 自上而下的到上下文对象栈中去寻找定义了某个符号的上下文对象。
*/
findContextWithSymbol(symbol: string) {
this.ctxStack.length = this.ctxIdx + 1;
let stack = this.ctxStack;
for (let idx = this.ctxIdx; 0 <= idx; idx--) {
const ctx = stack[idx];
if (ctx && ctx.has(symbol)) return ctx;
}
return null;
}
registerFun(funName: string, fun: Function) {
// 把预定义函数定义在系统栈中
this.ctxStack[0].set(funName, fun);
}
/**
* 在当前的作用域中声明变量
*/
declear(symbol: string, val: any | undefined) {
let ctx = this.getCurrentContext();
if (ctx.has(symbol)) throw new SyntaxError(`redefined symbol: ${symbol}`);
ctx.set(symbol, val);
return symbol;
}
/**
* 对一个已经定义的变量赋值
*/
assign(symbol: string, operator: string, val: number) {
let ctx = this.findContextWithSymbol(symbol);
if (!ctx) throw new SyntaxError(`undefined symbol: ${symbol}`);
let newVal = ctx.get(symbol);
switch (operator) {
case '=':
newVal = val;
break;
case '+=':
newVal += val;
break;
case '-=':
newVal -= val;
break;
case '*=':
newVal *= val;
break;
case '/=':
newVal /= val;
break;
case '%=':
newVal %= val;
break;
default:
throw new SyntaxError(`unknow operator:${operator}`);
}
ctx.set(symbol, newVal);
return newVal;
}
/**
* 获取一个已经定义的变量的值
*/
getVal(symbol: string) {
let ctx = this.findContextWithSymbol(symbol);
if (!ctx) throw new SyntaxError(`undefined symbol: ${symbol}`);
return ctx.get(symbol);
}
}