ファイルを分割する
– 9cc.h
– main.c
– parse.c
– codegen.c
Makefile
CFLAGS=-std=c11 -g -static SRCS=$(wildcard *.c) OBJS=$(SRCS:.c=.o) 9cc: $(OBJS) $(CC) -o 9cc $(OBJS) $(LDFLAGS) $(OBJS): 9cc.h clean: rm -f 9cc *.o *~ tmp* .PHONY: test clean
随机应变 ABCD: Always Be Coding and … : хороший
ファイルを分割する
– 9cc.h
– main.c
– parse.c
– codegen.c
Makefile
CFLAGS=-std=c11 -g -static SRCS=$(wildcard *.c) OBJS=$(SRCS:.c=.o) 9cc: $(OBJS) $(CC) -o 9cc $(OBJS) $(LDFLAGS) $(OBJS): 9cc.h clean: rm -f 9cc *.o *~ tmp* .PHONY: test clean
トークナイザ : tokenのlenの追加
// token
struct Token {
TokenKind kind; // token type
Token *next; // next token
int val; // TK_NUM value
char *str; // token char
int len; // length of token
};
bool consume(char *op) {
if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len))
return false;
token = token->next;
return true;
}
演算子の優先順位
1. == !=
2. < <= > >=
3. +, –
4. * /
5. 単項+, –
6. ()
従って
expr = equality
equality = relational (“==” relational | “!=” relational)*
relational = add(“<" add | "<=" add | ">” add | “>=” add)*
add = mul (“+” mul | “-” mul)*
mul = primary (“*” primary | “/” unary)*
unary = (“+” | “-“)? primary
primary = num | “(” expr “)”
### 比較のアセンブリコード
pop rdi
pop rax
cmp rax, rdi // 比較結果はフラグレジスタに保存される
sete al // sete ALで更新すると、自動的にRAXも更新される
movzb rax, al
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// token type 構造体
typedef enum {
TK_RESERVED, // op
TK_NUM,
TK_EOF, // end of input
} TokenKind;
typedef struct Token Token;
// token
struct Token {
TokenKind kind; // token type
Token *next; // next token
int val; // TK_NUM value
char *str; // token char
int len; // length of token
};
// now focus token
Token *token;
// input program
char *user_input;
// report error
void error_at(char *loc, char *fmt, ...){
va_list ap;
va_start(ap, fmt);
int pos = loc - user_input;
fprintf(stderr, "%s\n", user_input);
fprintf(stderr, "%*s", pos, " "); // output pos blank
fprintf(stderr, "^ ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
void error(char *fmt, ...){
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
// if the next token is expected symbol, read forward one token and
// return true.
bool consume(char *op) {
if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len))
return false;
token = token->next;
return true;
}
// if the next token is expected symbol, read forward one token and
// otherwise report an error.
void expect(char *op) {
if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len))
error_at(token->str, "expected \"%s\"", op);
token = token->next;
}
// if the next token is expected number, read forward one token and
// otherwise report an error.
int expect_number() {
if(token->kind != TK_NUM)
error("数ではありません");
int val = token->val;
token = token->next;
return val;
}
bool at_eof() {
return token->kind == TK_EOF;
}
// create a new token and connect it to curl.
Token *new_token(TokenKind kind, Token *cur, char *str, int len){
Token *tok = calloc(1, sizeof(token));
tok->kind = kind;
tok->str = str;
tok->len = len;
cur->next = tok;
return tok;
}
bool startswitch(char *p, char *q) {
return memcmp(p, q, strlen(q)) == 0;
}
// Tokenize the input string p and return it
Token *tokenize() {
char *p = user_input;
Token head;
head.next = NULL;
Token *cur = &head;
while (*p) {
// skip space
if(isspace(*p)){
p++;
continue;
}
if (startswith(p, "==") || startswith(p, "!=") ||
startswith(p, "<=") || startswith(p, ">=")) {
cur = new_token(TK_RESERVED, cur, p, 2);
p += 2;
continue;
}
if(strchr("+-*/()<>", *p)){
cur = new_token(TK_RESERVED, cur, p++, 1);
continue;
}
if(isdigit(*p)) {
cur = new_token(TK_NUM, cur, p, 0);
char *q = p;
cur->val = strtol(p, &p, 10);
cur->len = p - q;
continue;
}
error_at(p, "invalid token");
}
new_token(TK_EOF, cur, p, 0);
return head.next;
}
typedef enum {
ND_ADD, // +
ND_SUB, // -
ND_MUL, // *
ND_DIV, // /
ND_EQ, // ==
ND_NE, // !=
ND_LT, // <
ND_LE, // <=
ND_NUM, // num
} NodeKind;
typedef struct Node Node;
struct Node {
NodeKind kind; // node type
Node *lhs; // left
Node *rhs; // right
int val;
};
Node *new_node(NodeKind kind){
Node *node = calloc(1, sizeof(Node));
node->kind = kind;
return node;
}
Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) {
Node *node = new_node(kind);
node->lhs = lhs;
node->rhs = rhs;
return node;
}
Node *new_num(int val){
Node *node = new_node(ND_NUM);
node->val = val;
return node;
}
Node *expr();
Node *equality();
Node *relational();
Node *add();
Node *mul();
Node *unary();
Node *primary();
Node *expr() {
return equality();
}
Node *equality(){
Node *node = relational();
for(;;){
if(consume("=="))
node == new_binary(ND_EQ, node, relational());
else if (consume("!="))
node = new_binary(ND_NE, node, relational());
else
return node;
}
}
Node *relational(){
Node *node = add();
for(;;) {
if(consume("<"))
node = new_binary(ND_LT, node, add());
else if (consume("<="))
node = new_binary(ND_LE, node, add());
else if (consume(">"))
node = new_binary(ND_LT, add(), node);
else if (consume(">="))
node = new_binary(ND_LE, add(), node);
else
return node;
}
}
Node *add() {
Node *node = mul();
for(;;) {
if(consume('+'))
node = new_binary(ND_ADD, node, mul());
else if(consume('-'))
node = new_binary(ND_DIV, node, mul());
else
return node;
}
}
Node *mul() {
Node *node = unary();
for(;;) {
if(consume('*'))
node = new_binary(ND_MUL, node, unary());
else if(consume('/'))
node = new_binary(ND_DIV, node, unary());
else
return node;
}
}
Node *unary() {
if (consume('+'))
return unary();
if (consume('-'))
return new_binary(ND_SUB, new_node_num(0), unary());
return primary();
}
Node *primary() {
if(consume('(')){
Node *node = expr();
expect(')');
return node;
}
return new_node_num(expect_number());
}
void gen(Node *node) {
if(node->kind == ND_NUM) {
printf(" push %d\n", node->val);
return;
}
gen(node->lhs);
gen(node->rhs);
printf(" pop rdi\n");
printf(" pop rax\n");
switch(node->kind) {
case ND_ADD:
printf(" addd rax, rdi\n");
break;
case ND_SUB:
printf(" sub rax, rdi\n");
break;
case ND_MUL:
printf(" imul rax, rdi\n");
break;
case ND_DIV:
printf(" cqo\n");
printf(" idiv rdi\n");
break;
case ND_EQ:
printf(" cmp rax, rdi\n");
printf(" sete al\n");
printf(" movzb rax, al\n");
break;
case ND_NE:
printf(" cmp rax, rdi\n");
printf(" sete al\n");
printf(" movzb rax, al\n");
break;
case ND_LT:
printf(" cmp rax, rdi\n");
printf(" sete al\n");
printf(" movzb rax, al\n");
break;
case ND_LE:
printf(" cmp rax, rdi\n");
printf(" sete al\n");
printf(" movzb rax, al\n");
break;
}
printf(" push rax\n");
}
int main(int argc, char **argv){
if(argc != 2) {
error("引数の個数が正しくありません\n");
return 1;
}
user_input = argv[1];
token = tokenize(user_input);
Node *node = expr();
printf(".intel_syntax noprefix\n");
printf(".global main\n");
printf("main:\n");
gen(node);
printf(" pop rax\n");
printf(" ret\n");
return 0;
}
expr = mul (“+” mul | “-” mul)*
mul = primary (“*” primary | “/” unary)*
unary = (“+” | “-“)? primary
primary = num | “(” expr “)”
Node *mul() {
Node *node = primary();
for(;;) {
if(consume('*'))
node = new_node(ND_MUL, node, unary());
else if(consume('/'))
node = new_node(ND_DIV, node, unary());
else
return node;
}
}
Node *unary() {
if (consume('+'))
return primary();
if (consume('-'))
return new_node(ND_SUB, new_node_num(0), primary());
return primary();
}
+x の場合はそのまま、-xの場合は、0-xに置き換える。
下方再帰の順番は上記に示した通り
以下をアセンブラに落としていく
PUSH 2
PUSH 3
PUSH 4
MUL
ADD
スタックへのpushとpop
// 2*3を計算して結果をスタックにプッシュ
push 2
push 3
pop rdi
pop rax
mul rax, rdi
push rax
// 4*5を計算して結果をスタックにプッシュ
push 4
push 5
pop rdi
pop rax
mul rax, rdi
push rax
// スタックトップの2つの値を足す
// つまり2*3+4*5を計算する
pop rdi
pop rax
add rax, rdi
push rax
インタプリターでは構文解析の実行のタイミングで実行するがコンパイラはスタックに値を入れて、opに応じてアッセンブラを実行する
RAXはアキュムレータの「A」
RSIはソースインデックスレジスタ
cqo命令を使うと、RAXに入っている64ビットの値を128ビットに伸ばしてRDXとRAXにセットすることができる
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// token type 構造体
typedef enum {
TK_RESERVED, // op
TK_NUM,
TK_EOF, // end of input
} TokenKind;
typedef struct Token Token;
// token
struct Token {
TokenKind kind; // token type
Token *next; // next token
int val; // TK_NUM value
char *str; // token char
};
// now focus token
Token *token;
// input program
char *user_input;
// report error
void error_at(char *loc, char *fmt, ...){
va_list ap;
va_start(ap, fmt);
int pos = loc - user_input;
fprintf(stderr, "%s\n", user_input);
fprintf(stderr, "%*s", pos, " "); // output pos blank
fprintf(stderr, "^ ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
void error(char *fmt, ...){
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
// if the next token is expected symbol, read forward one token and
// return true.
bool consume(char op) {
if(token->kind != TK_RESERVED | token->str[0] != op)
return false;
token = token->next;
return true;
}
// if the next token is expected symbol, read forward one token and
// otherwise report an error.
void expect(char op) {
if(token->kind != TK_RESERVED | token->str[0] != op)
error("'%c'ではありません。", op);
token = token->next;
}
// if the next token is expected number, read forward one token and
// otherwise report an error.
int expect_number() {
if(token->kind != TK_NUM)
error("数ではありません");
int val = token->val;
token = token->next;
return val;
}
bool at_eof() {
return token->kind == TK_EOF;
}
// create a new token and connect it to curl.
Token *new_token(TokenKind kind, Token *cur, char *str){
Token *tok = calloc(1, sizeof(token));
tok->kind = kind;
tok->str = str;
cur->next = tok;
return tok;
}
// Tokenize the input string p and return it
Token *tokenize(char *p) {
Token head;
head.next = NULL;
Token *cur = &head;
while (*p) {
// skip space
if(isspace(*p)){
p++;
continue;
}
if(*p == '+' || *p == '-') {
cur = new_token(TK_RESERVED, cur, p++);
continue;
}
if(isdigit(*p)) {
cur = new_token(TK_NUM, cur, p);
cur->val = strtol(p, &p, 10);
continue;
}
error("トークナイズできません");
}
new_token(TK_EOF, cur, p);
return head.next;
}
typedef enum {
ND_ADD, // +
ND_SUB, // -
ND_MUL, // *
ND_DIV, // /
ND_NUM, // num
} NodeKind;
typedef struct Node Node;
struct Node {
NodeKind kind; // node type
Node *lhs; // left
Node *rhs; // right
int val;
};
Node *new_node(NodeKind kind, Node *lhs, Node *rhs){
Node *node = calloc(1, sizeof(Node));
node->kind = kind;
node->lhs = lhs;
node->rhs = rhs;
return node;
}
Node *new_node_num(int val) {
Node *node = calloc(1, sizeof(Node));
node->kind = ND_NUM;
node->val = val;
return node;
}
Node *expr();
Node *mul();
Node *primary();
Node *expr() {
Node *node = mul();
for(;;) {
if(consume('+'))
node = new_node(ND_ADD, node, mul());
else if (consume('-'))
node = new_node(ND_SUB, node, mul());
else
return node;
}
}
Node *mul() {
Node *node = primary();
for(;;) {
if(consume('*'))
node = new_node(ND_MUL, node, primary());
else if(consume('/'))
node = new_node(ND_DIV, node, primary());
else
return node;
}
}
Node *primary() {
if(consume('(')){
Node *node = expr();
expect(')');
return node;
}
return new_node_num(expect_number());
}
void gen(Node *node) {
if(node->kind == ND_NUM) {
printf(" push %d\n", node->val);
return;
}
gen(node->lhs);
gen(node->rhs);
printf(" pop rdi\n");
printf(" pop rax\n");
switch(node->kind) {
case ND_ADD:
printf(" addd rax, rdi\n");
break;
case ND_SUB:
printf(" sub rax, rdi\n");
break;
case ND_MUL:
printf(" imul rax, rdi\n");
break;
case ND_DIV:
printf(" cqo\n");
printf(" idiv rdi\n");
break;
}
printf(" push rax\n");
}
int main(int argc, char **argv){
if(argc != 2) {
error("引数の個数が正しくありません\n");
return 1;
}
user_input = argv[1];
token = tokenize(user_input);
Node *node = expr();
printf(".intel_syntax noprefix\n");
printf(".global main\n");
printf("main:\n");
gen(node);
printf(" pop rax\n");
printf(" ret\n");
return 0;
}
typedef enum {
ND_ADD, // +
ND_SUB, // -
ND_MUL, // *
ND_DIV, // /
ND_NUM, // num
} NodeKind;
typedef struct Node Node;
struct Node {
NodeKind kind; // node type
Node *lhs; // left
Node *rhs; // right
int val;
};
Node *new_node(NodeKind kind, Node *lhs, Node *rhs){
Node *node = calloc(1, sizeof(Node));
node->kind = kind;
node->lhs = lhs;
node->rhs = rhs;
return node;
}
Node *new_node_num(int val) {
Node *node = calloc(1, sizeof(Node));
node->kind = ND_NUM;
node->val = val;
return node;
}
Node *expr() {
Node *node = mul();
for(;;) {
if(consume('+'))
node = new_node(ND_ADD, node, mul());
else if (consume('-'))
node = new_node(ND_SUB, node, mul());
else
return node;
}
}
Node *mul() {
Node *node = primary();
for(;;) {
if(consume('*'))
node = new_node(ND_MUL, node, primary());
else if(consume('/'))
node = new_node(DN_DIV, node, primary());
else
return node;
}
}
Node *primary() {
if(consume('(')){
Node *node = expr();
expect(')');
return node;
}
return new_node_num(expect_number());
}
calloc関数は、大きさがsizeであるオブジェクトのnmemb個の配列の領域を割りあて
確保した先頭のメモリのポインタを返却
#include <stdio.h>
#include <stdlib.h>
void memdump(void *p);
int main(int argc, char*argv[]){
void *p = NULL;
p = calloc((size_t)10, (size_t)32);
if(p == NULL) {
fprintf(stdout, "calloc 10 * 32 byte error!\n");
return -1;
}
memdump(p);
free(p);
p = calloc((size_t)10, (size_t)32*1024);
if(p == NULL) {
fprintf(stdout, "calloc 10 * 32kbyte error!\n");
return -1;
}
memdump(p);
free(p);
p = calloc((size_t)10, (size_t)32*1024*1024);
if(p == NULL) {
fprintf(stdout, "calloc 10 * 32Mbyte error!\n");
return -1;
}
memdump(p);
free(p);
}
void memdump(void *p){
fprintf(stdout, "==== memdump start ====\n");
unsigned char *cp = NULL;
int i = 0;
for(cp = (char*)p, i = 0; i < 4; cp++, i++){
fprintf(stdout, "cp[%p]:%02x\n", cp, *cp);
}
fprintf( stdout, "==== memdump end ====\n\n" );
}
個数とバイトサイズを指定して確保する
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// token type 構造体
typedef enum {
TK_RESERVED, // op
TK_NUM,
TK_EOF, // end of input
} TokenKind;
typedef struct Token Token;
// token
struct Token {
TokenKind kind; // token type
Token *next; // next token
int val; // TK_NUM value
char *str; // token char
};
// now focus token
Token *token;
void error(char *fmt, ...){
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
// if the next token is expected symbol, read forward one token and
// return true.
bool consume(char op) {
if(token->kind != TK_RESERVED | token->str[0] != op)
return false;
token = token->next;
return true;
}
// if the next token is expected symbol, read forward one token and
// otherwise report an error.
void expect(char op) {
if(token->kind != TK_RESERVED | token->str[0] != op)
error("'%c'ではありません。", op);
token = token->next;
}
// if the next token is expected number, read forward one token and
// otherwise report an error.
int expect_number() {
if(token->kind != TK_NUM)
error("数ではありません");
int val = token->val;
token = token->next;
return val;
}
bool at_eof() {
return token->kind == TK_EOF;
}
// create a new token and connect it to curl.
Token *new_token(TokenKind kind, Token *cur, char *str){
Token *tok = calloc(1, sizeof(token));
tok->kind = kind;
tok->str = str;
cur->next = tok;
return tok;
}
// Tokenize the input string p and return it
Token *tokenize(char *p) {
Token head;
head.next = NULL;
Token *cur = &head;
while (*p) {
// skip space
if(isspace(*p)){
p++;
continue;
}
if(*p == '+' || *p == '-') {
cur = new_token(TK_RESERVED, cur, p++);
continue;
}
if(isdigit(*p)) {
cur = new_token(TK_NUM, cur, p);
cur->val = strtol(p, &p, 10);
continue;
}
error("トークナイズできません");
}
new_token(TK_EOF, cur, p);
return head.next;
}
int main(int argc, char **argv){
if(argc != 2) {
fprintf(stderr, "引数の個数が正しくありません\n");
return 1;
}
token = tokenize(argv[1]);
printf(".intel_syntax noprefix\n");
printf(".global main\n");
printf("main:\n");
printf(" mov rax, %ld\n", expect_number());
while(!at_eof()){
if(consume('+')){
printf(" add rax, %d\n", expect_number());
}
expect('-');
printf(" sub rax, %d\n", expect_number());
}
printf(" ret\n");
return 0;
}
コンパイラはまず構文解析を行なってトークン列を抽象構文木に変換し、その構文木をアセンブリに変換
### 再帰下降構文解析
生成規則による演算子の優先順位
expr = mul (“+” mul | “-” mul)*
mul = num (“*” num | “/” num)*
// 足し算よりも掛け算が常に下に現れる
expr = mul (“+” mul | “-” mul)*
mul = primary (“*” primary | “/” primary)*
primary = num | “(” expr “)”
まずassemblerから
.intel_syntax noprefix
.globl main
main:
mov rax, 5
add rax, 20
sub rax, 4
ret
c
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv){
if(argc != 2) {
fprintf(stderr, "引数の個数が正しくありません\n");
return 1;
}
char *p = argv[1];
printf(".intel_syntax noprefix\n");
printf(".global main\n");
printf("main:\n");
printf(" mov rax, %ld\n", strtol(p, &p, 10));
while(*p){
if(*p == '+'){
p++;
printf(" add rax, %ld\n", strtol(p, &p, 10));
continue;
}
if(*p == '-') {
p++;
printf(" sub rax, %ld\n", strtol(p, &p, 10));
continue;
}
fprintf(stderr, "予期しない文字です: '%c'\n", *p);
return 1;
}
printf(" ret\n");
return 0;
}
$ make
cc -std=c11 -g -static 9cc.c -o 9cc
$ ./9cc ‘5+20-4’
.intel_syntax noprefix
.global main
main:
mov rax, 5
add rax, 20
sub rax, 4
ret
main:
mov rbp, rsp
mov %rsp, %rbp
mov rax, 8
mov $8, %rax
mov [rbp + rcx * 4 - 8], rax
mov %rax, -8(rbp, rcx, 4)
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv){
if(argc != 2) {
fprintf(stderr, "引数の個数が正しくありません\n");
return 1;
}
printf(".intel_syntax noprefix\n");
printf(".global main\n");
printf("main:\n");
printf(" mov rax, %d\n", atoi(argv[1]));
printf(" ret\n");
return 0;
}
$ cc -o 9cc 9cc.c
$ ./9cc 123 > tmp.s
.intel_syntax noprefix
.global main
main:
mov rax, 123
ret
テストスクリプト
#!/bin/bash
assert(){
expected="$1"
input="$2"
./9cc "$input" > tmp.s
cc -o tmp tmp.s
./tmp
actual="$?"
if ["$actual" = "$expected"]; then
echo "input => $actual"
else
echo "input => $expected expected, but got $actual"
exit 1
fi
}
assert 0 0
assert 42 42
echo OK
Makefile
CFLAGS=-std=c11 -g static 9cc: 9cc.c11 test: 9cc ./test.sh clean: rm -f 9cc *.o *~ tmp* .PHONY: test clean
EB4E9048454C4C4F49504C0002010100
02E000400BF009001200020000000000
400B0000000029FFFFFFFF48454C4C4F
2D4F5320202046415431322020200000
00000000000000000000000000000000
B800008ED0BC007C8ED88EC0BE747C8A
0483C6013C007409B40EBB0F00CD10EB
EEF4EBFD0A0A68656C6C6F2C20776F72
6C640A00000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
00000000000000000000000000000000
·N·HELLOIPL·····
···@············
@·····)····HELLO
-OS···FAT12·····
················
·······|·····t|·
····<·t·········
······hello,·wor
ld··············
上記バイナリでPCを起動するとhello worldが表示される。
最も簡単なOS
バイナリエディタも使えると幅が広がるとのこと