教你使用swift写编译器玩具(7)

前言

本章对应官方教程第7章。本章的目的是支持变量var和`=运算符,我们需要在堆栈分配内存给变量,详细说明请查看官方教程第7章。

教程如下:

教你使用swift写编译器玩具(0)

教你使用swift写编译器玩具(1)

教你使用swift写编译器玩具(2)

教你使用swift写编译器玩具(3)

教你使用swift写编译器玩具(4)

教你使用swift写编译器玩具(5)

教你使用swift写编译器玩具(6)

教你使用swift写编译器玩具(7)

教你使用swift写编译器玩具(8)

仓库在这

开始

调整现有变量

首先我们需要改变namedValues的value类型。

1
var namedValues: [String: IRInstruction] = [:]

此外,我们需要一个辅助函数来创建Alloca

1
2
3
4
func createEntryBlockAlloca(function: Function, name: String) -> IRInstruction {
let instruction = builder.buildAlloca(type: FloatType.double, count: 0, name: name)
return instruction
}

在本章中,变量改为了存于堆栈中,因此变量的代码生成也需要从堆栈中加载。我们修改VariableExprASTcodeGen()方法。

1
2
3
4
5
6
7
func codeGen() -> IRValue? {
let value = namedValues[name]
guard value != nil else {
fatalError("unknow variable name.")
}
return builder.buildLoad(value!, name: name)
}

现在我们需要更新定义变量的代码来设置Alloca。我们从ForExprASTcodeGen()方法开始。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
func codeGen() -> IRValue? {
let startVal = start.codeGen()
guard startVal != nil else {
return nil
}

//for循环,插在当前的block之后
let theFunction = builder.insertBlock?.parent
guard theFunction != nil else {
return nil
}
//在entry block中为变量创建alloca
let alloca = createEntryBlockAlloca(function: theFunction!, name: name)
把变量存储在alloca中
builder.buildStore(startVal!, to: alloca)

let loopBB = theFunction!.appendBasicBlock(named: "loop")
builder.buildBr(loopBB)
builder.positionAtEnd(of: loopBB)

let oldVal = namedValues[name]
namedValues[name] = alloca

guard body.codeGen() != nil else {
return nil
}

let stepVal: IRValue?
if step != nil {
stepVal = step!.codeGen()
guard stepVal != nil else {
return nil
}
} else {
stepVal = FloatType.double.constant(1)
}

//循环终止条件
var endCond = end.codeGen()
guard endCond != nil else {
return nil
}
//build条件时候要使用int类型
endCond = builder.buildICmp(endCond!, IntType.int1.zero(), .notEqual, name: "loopCond")

//加载当前变量
let curVal = builder.buildLoad(alloca)
//让下一个变量为当前变量+步长,即增长了
let nextVal = builder.buildAdd(curVal, stepVal!, name: "nextVal")
//再重新存储到alloca中
builder.buildStore(nextVal, to: alloca)

//循环后的代码basic block
let afterBB = theFunction?.appendBasicBlock(named: "afterLoop")
builder.buildCondBr(condition: endCond!, then: loopBB, else: afterBB!)
builder.positionAtEnd(of: afterBB!)

if oldVal != nil {
namedValues[name] = oldVal!
} else {
namedValues[name] = nil
}

//for循环解析总是返回0
return FloatType.double.constant(0)
}

这其中最大的变化就是我们不再使用phi操作而是使用load/store来根据需要访问变量。

我们在FunctionAST中也需要改变codeGen()方法。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
func codeGen() -> Function? {
functionProtos[proto.name] = proto
let theFunction = getFunction(named: proto.name)
guard theFunction != nil else {
return nil
}

//如果是操作符,把他放在全局的操作符表中
if proto.isOperator {
BinOpPrecedence[proto.operatorName!] = proto.precedence
}

let entry = theFunction!.appendBasicBlock(named: "entry")
builder.positionAtEnd(of: entry)

namedValues.removeAll()
var arg = theFunction!.firstParameter
while arg != nil {
//为参数创建alloca
let alloca = createEntryBlockAlloca(function: theFunction!, name: arg!.name)
//把变量存到alloca中
builder.buildStore(arg!, to: alloca)
//把变量放到符号表里
namedValues[arg!.name] = alloca
arg = arg?.next()
}

if let retValue = body.codeGen() {
builder.buildRet(retValue)
do {
try theModule.verify()
return theFunction
} catch {
print("\(error)")
}
}
//函数体出现问题,移除函数
theFunction!.eraseFromParent()
if proto.isOperator {
BinOpPrecedence[proto.operatorName!] = nil
}
return nil
}

添加新的运算符

我们需要在全局操作符表中加入=

1
var BinOpPrecedence: [String: UInt] = ["=": 2, "<": 10, "+": 20, "-": 20, "*": 40]

接下来我想大家都能想到,那就是去修改BinaryExprASTcodeGen()方法。

我们只需要在codeGen()方法最开始判断一下=即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
if op == "=" {
let lhse = lhs as? VariableExprAST
guard lhse != nil else {
fatalError("Destination of '=' must be a variable.")
}
let val = lhse?.codeGen()
guard val != nil else {
return nil
}
//获取符号表中的变量
let variable = namedValues[lhse!.name]
guard variable != nil else {
fatalError("Unknow variable name.")
}
//为变量赋值
builder.buildStore(val!, to: variable!)
return val
}

用户定义的局部变量

就像之前所做过的扩展一样,我们先要在TokenLexerParser

1
2
3
4
5
6
7
8
enum Token {
...
case `var`
...
}
else if identifierStr == "var" {
currentToken = CurrentToken(token: .var, val: "var")
}

接着我们构造VarExprASTAST Node。

1
2
3
4
5
6
7
8
9
10
11
12
class VarExprAST: ExprAST {

let varNames: [(String, ExprAST?)]

let body: ExprAST

init(_ varNames: [(String, ExprAST?)], _ body: ExprAST) {
self.varNames = varNames
self.body = body
}

}

我们允许通过var/in一次定义多个变量以及其初始化的值,并且我们允许在body中访问var/in定义的变量。

之后我们需要定义Parser的方法。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
/// 解析Var变量
///
/// - Returns: AST
private func parseVarExpr() -> ExprAST? {
lexer.nextToken()
var varNames: [(String, ExprAST?)] = []
guard lexer.currentToken!.token == .identifier else {
fatalError("Expected identifier after val.")
}
while true {
let name = lexer.currentToken!.val
lexer.nextToken()

let expr: ExprAST? = nil
if lexer.currentToken!.val == "=" {
lexer.nextToken()
//解析"="右边
let expr = parseExpression()
guard expr != nil else {
return nil
}
}

varNames.append((name, expr))

//看看还有没有下一个
if lexer.currentToken!.val != "," {
break
}
lexer.nextToken()
if lexer.currentToken!.token != .identifier {
fatalError("Expected identifier list after var.")
}
}
if lexer.currentToken!.token != .in {
fatalError("Expected 'in' keyword after 'var'.")
}
lexer.nextToken()
//解析body
let body = parseExpression()
guard body != nil else {
return nil
}
return VarExprAST(varNames, body!)
}

最后需要添加的是我们VarExprAST中的codeGen()方法。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
func codeGen() -> IRValue? {
var oldBindings: [IRInstruction?] = []
let theFunction = builder.insertBlock?.parent
guard theFunction != nil else {
return nil
}
//注册所有变量,并让他们初始化
for v in varNames {
let initVal: IRValue?
if v.1 != nil {
initVal = v.1?.codeGen()
guard initVal != nil else {
return nil
}
} else {
//没有的话就默认0
initVal = FloatType.double.constant(0)
}

let alloca = createEntryBlockAlloca(function: theFunction!, name: v.0)
//初始化变量,把initVal存到alloca中
builder.buildStore(initVal!, to: alloca)
//记录的目的是防止丢失外部变量名相同的变量,比如说外部有变量a,body里也有变量a
oldBindings.append(namedValues[v.0])
namedValues[v.0] = alloca
}

let bodyVal = body.codeGen()
guard bodyVal != nil else {
return nil
}
//恢复之前的变量绑定
for i in 0..<varNames.count {
namedValues[varNames[i].0] = oldBindings[i]
}
return bodyVal
}

测试

我们输入

1
2
3
4
5
6
7
8
9
10
11
def binary : 1 (x y) y;

def fibi(x)
var a = 1, b = 1, c in
(for i = 3, i < x in
c = a + b :
a = b :
b = c) :
b;

fibi(10);

输出

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
Read function definition:

define double @"binary:"(double %x, double %y) {
entry:
%x1 = alloca double, i64 0
store double %x, double* %x1
%y2 = alloca double, i64 0
store double %y, double* %y2
%y3 = load double, double* %y2
ret double %y3
}
Read function definition:

define double @fibi(double %x) {
entry:
%x1 = alloca double, i64 0
store double %x, double* %x1
%a = alloca double, i64 0
store double 0.000000e+00, double* %a
%b = alloca double, i64 0
store double 0.000000e+00, double* %b
%c = alloca double, i64 0
store double 0.000000e+00, double* %c
%i = alloca double, i64 0
store double 3.000000e+00, double* %i
br label %loop

loop: ; preds = %loop, %entry
%c2 = load double, double* %c
store double %c2, double* %c
%a3 = load double, double* %a
store double %a3, double* %a
%binaryOp = call double @"binary:"(double %c2, double %a3)
%b4 = load double, double* %b
store double %b4, double* %b
%binaryOp5 = call double @"binary:"(double %binaryOp, double %b4)
%i6 = load double, double* %i
%x7 = load double, double* %x1
%boolCmp = fcmp olt double %i6, %x7
%loopCond = icmp ne i1 %boolCmp, false
%0 = load double, double* %i
%nextVal = fadd double %0, 1.000000e+00
store double %nextVal, double* %i
br i1 %loopCond, label %loop, label %afterLoop

afterLoop: ; preds = %loop
%b8 = load double, double* %b
%binaryOp9 = call double @"binary:"(double 0.000000e+00, double %b8)
ret double %binaryOp9
}
Read top-level expression:

define double @__anon_expr() {
entry:
%call = call double @fibi(double 1.000000e+01)
ret double %call
}
0.0