教你使用swift写编译器玩具(5)

前言

本章对应官方教程第5章,本章介绍如何扩展Kaleidoscope以使用if / then / else表达式和一个简单的for循环。

教程如下:

教你使用swift写编译器玩具(0)

教你使用swift写编译器玩具(1)

教你使用swift写编译器玩具(2)

教你使用swift写编译器玩具(3)

教你使用swift写编译器玩具(4)

教你使用swift写编译器玩具(5)

教你使用swift写编译器玩具(6)

教你使用swift写编译器玩具(7)

教你使用swift写编译器玩具(8)

仓库在这

开始

if / then / else

if / then / else也是一种表达式,我们需要把它计算为int1类型,0是假,1是真。如果if表达式计算为真返回then表达式,否则返回else表达式。

首先我们需要做的第一件事情是扩展我们的Token枚举

1
2
3
4
5
...
case `if`
case then
case `else`
...

接着我们在LexernextToken()方法中补充token的解析

1
2
3
4
5
6
7
} else if identifierStr == "if" {
currentToken = CurrentToken(token: .if, val: "if")
} else if identifierStr == "then" {
currentToken = CurrentToken(token: .then, val: "then")
} else if identifierStr == "else" {
currentToken = CurrentToken(token: .else, val: "else")
}

if / then / else的AST扩展

为了解析新的表达式我们需要添加新的AST Node。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class IfExprAST: ExprAST {

let cond: ExprAST

let then: ExprAST

let `else`: ExprAST

init(_ cond: ExprAST, _ then: ExprAST, _ `else`: ExprAST) {
self.cond = cond
self.then = then
self.else = `else`
}

}

if / then / else的Parser扩展

有了AST之后我们要做的事情那就是扩展Parser了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/// 解析条件语句
///
/// - Returns: AST
private func parseIfExpr() -> ExprAST? {
lexer.nextToken()
//解析if表达式
let cond = parseExpression()
guard cond != nil else {
return nil
}
//if表达式后面不是then就报错
guard lexer.currentToken!.token == .then else {
fatalError("expected then.")
}
lexer.nextToken()
//解析then表达式
let then = parseExpression()
guard then != nil else {
return nil
}
//then表达式后面不是else表达式就报错
guard lexer.currentToken!.token == .else else {
fatalError("expected else.")
}
lexer.nextToken()
let `else` = parseExpression()
guard `else` != nil else {
return nil
}
return IfExprAST(cond!, then!, `else`!)
}

接下来我们把它放在parsePrimary中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/// 解析基本表达式的入口
///
/// - Returns: AST
private func parsePrimary() -> ExprAST? {
guard lexer.currentToken != nil else {
return nil
}
if lexer.currentToken!.val == "(" {
return parseParenExpr()
}
switch lexer.currentToken!.token {
case .identifier:
return parseIdentifierExpr()
case .number:
return parseNumberExpr()
case .if:
return parseIfExpr()
default:
fatalError("unknow token when expecting an expression")
}
}

if / then / else的代码生成

我们需要在IfExprAST中实现方法codeGen()。这里我们需要使用的是一个SSA操作:Phi操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
func codeGen() -> IRValue? {
var condV = cond.codeGen()
guard condV != nil else {
return nil
}
//这里有个神坑就是build条件时候要使用int1类型
condV = builder.buildICmp(condV!, IntType.int1.zero(), .equal, name: "ifCond")

let theFunction = builder.insertBlock?.parent
guard theFunction != nil else {
return nil
}

//为then else merge创建basic block并放在函数里
let thenBB = theFunction!.appendBasicBlock(named: "then")
let elseBB = theFunction!.appendBasicBlock(named: "else")
let mergeBB = theFunction!.appendBasicBlock(named: "merge")

//构建控制流表达式
builder.buildCondBr(condition: condV!, then: thenBB, else: elseBB)

//让builder移动到then的基本块里
builder.positionAtEnd(of: thenBB)
//插入then
let thenVal = then.codeGen()
guard thenVal != nil else {
return nil
}
builder.buildBr(mergeBB)
//让builer移动到else的基本块里
builder.positionAtEnd(of: elseBB)
let elseVal = `else`.codeGen()
guard elseVal != nil else {
return nil
}
builder.buildBr(mergeBB)
//让builder移动到merge的基本块里
builder.positionAtEnd(of: mergeBB)
let phi = builder.buildPhi(FloatType.double, name: "phi")
phi.addIncoming([(thenVal!, thenBB), (elseVal!, elseBB)])

return phi
}

for循环表达式

Kaleidoscope的for循环长下面这样,1.0是可选的步长,默认即为1.0。

1
for i = 1, i < n, 1.0 in

for循环表达式的处理会复杂一些,但还是同样运用了Phi操作来处理。

同控制流语句的扩展,我们还是先要扩展TokenLexer

1
2
3
4
5
case `for`

else if identifierStr == "for" {
currentToken = CurrentToken(token: .for, val: "for")
}

接着我们扩展for循环的AST NodeForExprAST

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
class ForExprAST: ExprAST {

let name: String

let start: ExprAST

let end: ExprAST

let step: ExprAST?

let body: ExprAST

init(_ name: String, _ start: ExprAST, _ end: ExprAST, _ step: ExprAST?, _ body: ExprAST) {
self.name = name
self.start = start
self.end = end
self.step = step
self.body = body
}

}

step用来表示for循环的步长,即每次变量的增长值。编译器通过检查第二个逗号是否存在来判断,如果不存在我们把它设为nil

for循环的Parser扩展

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
/// 解析For表达式
///
/// - Returns: AST
private func parseForExpr() -> ExprAST? {
lexer.nextToken()
//第一个得是变量,比如说`i`
guard lexer.currentToken!.token == .identifier else {
fatalError("expected identifier after for.")
}
let idName = lexer.currentToken!.val
lexer.nextToken()
guard lexer.currentToken!.val == "=" else {
fatalError("expected '=' after for.")
}

lexer.nextToken()
//循环开始值
let start = parseExpression()
guard start != nil else {
return nil
}
guard lexer.currentToken!.val == "," else {
fatalError("expected ',' after start value.")
}

lexer.nextToken()
//循环结束值
let end = parseExpression()
guard end != nil else {
return nil
}

//步长
var step: ExprAST!
if lexer.currentToken!.val == "," {
lexer.nextToken()
step = parseExpression()
guard step != nil else {
return nil
}
}
//in作为for循环的关键字不可缺少
guard lexer.currentToken!.token == .in else {
fatalError("expected 'in' after for.")
}
lexer.nextToken()
//for循环的循环体解析
let body = parseExpression()
guard body != nil else {
return nil
}

return ForExprAST(idName, start!, end!, step, body!)
}

我们在parsePrimary()方法中补充调用。

1
2
case .for:
return parseForExpr()

for循环的代码生成

话不多说直接看代码,过程都会体现在注释中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
func codeGen() -> IRValue? {
let startVal = start.codeGen()
guard startVal != nil else {
return nil
}

//for循环,插在当前的block之后
let theFunction = builder.insertBlock?.parent
guard theFunction != nil else {
return nil
}
let preHeaderBB = builder.insertBlock
//循环体的基本块
let loopBB = theFunction!.appendBasicBlock(named: "loop")
builder.buildBr(loopBB)
//让builder移动到
builder.positionAtEnd(of: loopBB)

//这里控制循环或退出
let phi = builder.buildPhi(FloatType.double, name: name)
phi.addIncoming([(startVal!, preHeaderBB!)])

//防止for循环作用域与外部产生变量命名冲突,所以先记录一下,是nil也无所谓
let oldVal = namedValues[name]
namedValues[name] = phi

guard body.codeGen() != nil else {
return nil
}

let stepVal: IRValue?
if step != nil {
//有步长就要解析
stepVal = step!.codeGen()
guard stepVal != nil else {
return nil
}
} else {
//默认步长为1.0
stepVal = FloatType.double.constant(1)
}
//步长的增长指令
let nextVar = builder.buildAdd(phi, stepVal!, name: "nextVar")

//循环终止条件
var endCond = end.codeGen()
guard endCond != nil else {
return nil
}
endCond = builder.buildICmp(endCond!, IntType.int1.zero(), .equal, name: "loopCond")

//循环后的代码basic block
let loopEndBB = builder.insertBlock
let afterBB = theFunction?.appendBasicBlock(named: "afterLoop")
builder.buildCondBr(condition: endCond!, then: loopBB, else: afterBB!)
builder.positionAtEnd(of: afterBB!)

phi.addIncoming([(nextVar, loopEndBB!)])

if oldVal != nil {
namedValues[name] = oldVal!
} else {
namedValues[name] = nil
}

//for循环解析总是返回0
return FloatType.double.constant(0)
}

测试

控制流语句

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
extern foo();
Read extern:

declare i64 @foo()
extern bar();
Read extern:

declare i64 @bar()
def baz(x) if x then foo() else bar();
Read function definition:

define i64 @baz(i64 %x) {
entry:
%ifCond = icmp eq i64 %x, 0
br i1 %ifCond, label %then, label %else

then: ; preds = %entry
%call = call i64 @foo()
br label %merge

else: ; preds = %entry
%call1 = call i64 @bar()
br label %merge

merge: ; preds = %else, %then
%phi = phi i64 [ %call, %then ], [ %call1, %else ]
ret i64 %phi
}

for循环语句

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
extern putchard(char);
Read extern:

declare i64 @putchard(i64 %char)
def printstar(n) for i = 1, i < n, 1 in putchard(42);
Read function definition:

define i64 @printstar(i64 %n) {
entry:
br label %loop

loop: ; preds = %loop, %entry
%i = phi i64 [ 1, %entry ], [ %nextVar, %loop ]
%call = call i64 @putchard(i64 42)
%nextVar = add i64 %i, 1
%boolCmp = icmp slt i64 %i, %n
%0 = sext i1 %boolCmp to i64
%loopCond = icmp eq i64 %0, 0
br i1 %loopCond, label %loop, label %afterLoop

afterLoop: ; preds = %loop
ret i64 0
}