トークナイザ : tokenのlenの追加
// token
struct Token {
TokenKind kind; // token type
Token *next; // next token
int val; // TK_NUM value
char *str; // token char
int len; // length of token
};
bool consume(char *op) {
if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len))
return false;
token = token->next;
return true;
}
演算子の優先順位
1. == !=
2. < <= > >=
3. +, –
4. * /
5. 単項+, –
6. ()
従って
expr = equality
equality = relational (“==” relational | “!=” relational)*
relational = add(“<" add | "<=" add | ">” add | “>=” add)*
add = mul (“+” mul | “-” mul)*
mul = primary (“*” primary | “/” unary)*
unary = (“+” | “-“)? primary
primary = num | “(” expr “)”
### 比較のアセンブリコード
pop rdi
pop rax
cmp rax, rdi // 比較結果はフラグレジスタに保存される
sete al // sete ALで更新すると、自動的にRAXも更新される
movzb rax, al
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// token type 構造体
typedef enum {
TK_RESERVED, // op
TK_NUM,
TK_EOF, // end of input
} TokenKind;
typedef struct Token Token;
// token
struct Token {
TokenKind kind; // token type
Token *next; // next token
int val; // TK_NUM value
char *str; // token char
int len; // length of token
};
// now focus token
Token *token;
// input program
char *user_input;
// report error
void error_at(char *loc, char *fmt, ...){
va_list ap;
va_start(ap, fmt);
int pos = loc - user_input;
fprintf(stderr, "%s\n", user_input);
fprintf(stderr, "%*s", pos, " "); // output pos blank
fprintf(stderr, "^ ");
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
void error(char *fmt, ...){
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
exit(1);
}
// if the next token is expected symbol, read forward one token and
// return true.
bool consume(char *op) {
if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len))
return false;
token = token->next;
return true;
}
// if the next token is expected symbol, read forward one token and
// otherwise report an error.
void expect(char *op) {
if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len))
error_at(token->str, "expected \"%s\"", op);
token = token->next;
}
// if the next token is expected number, read forward one token and
// otherwise report an error.
int expect_number() {
if(token->kind != TK_NUM)
error("数ではありません");
int val = token->val;
token = token->next;
return val;
}
bool at_eof() {
return token->kind == TK_EOF;
}
// create a new token and connect it to curl.
Token *new_token(TokenKind kind, Token *cur, char *str, int len){
Token *tok = calloc(1, sizeof(token));
tok->kind = kind;
tok->str = str;
tok->len = len;
cur->next = tok;
return tok;
}
bool startswitch(char *p, char *q) {
return memcmp(p, q, strlen(q)) == 0;
}
// Tokenize the input string p and return it
Token *tokenize() {
char *p = user_input;
Token head;
head.next = NULL;
Token *cur = &head;
while (*p) {
// skip space
if(isspace(*p)){
p++;
continue;
}
if (startswith(p, "==") || startswith(p, "!=") ||
startswith(p, "<=") || startswith(p, ">=")) {
cur = new_token(TK_RESERVED, cur, p, 2);
p += 2;
continue;
}
if(strchr("+-*/()<>", *p)){
cur = new_token(TK_RESERVED, cur, p++, 1);
continue;
}
if(isdigit(*p)) {
cur = new_token(TK_NUM, cur, p, 0);
char *q = p;
cur->val = strtol(p, &p, 10);
cur->len = p - q;
continue;
}
error_at(p, "invalid token");
}
new_token(TK_EOF, cur, p, 0);
return head.next;
}
typedef enum {
ND_ADD, // +
ND_SUB, // -
ND_MUL, // *
ND_DIV, // /
ND_EQ, // ==
ND_NE, // !=
ND_LT, // <
ND_LE, // <=
ND_NUM, // num
} NodeKind;
typedef struct Node Node;
struct Node {
NodeKind kind; // node type
Node *lhs; // left
Node *rhs; // right
int val;
};
Node *new_node(NodeKind kind){
Node *node = calloc(1, sizeof(Node));
node->kind = kind;
return node;
}
Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) {
Node *node = new_node(kind);
node->lhs = lhs;
node->rhs = rhs;
return node;
}
Node *new_num(int val){
Node *node = new_node(ND_NUM);
node->val = val;
return node;
}
Node *expr();
Node *equality();
Node *relational();
Node *add();
Node *mul();
Node *unary();
Node *primary();
Node *expr() {
return equality();
}
Node *equality(){
Node *node = relational();
for(;;){
if(consume("=="))
node == new_binary(ND_EQ, node, relational());
else if (consume("!="))
node = new_binary(ND_NE, node, relational());
else
return node;
}
}
Node *relational(){
Node *node = add();
for(;;) {
if(consume("<"))
node = new_binary(ND_LT, node, add());
else if (consume("<="))
node = new_binary(ND_LE, node, add());
else if (consume(">"))
node = new_binary(ND_LT, add(), node);
else if (consume(">="))
node = new_binary(ND_LE, add(), node);
else
return node;
}
}
Node *add() {
Node *node = mul();
for(;;) {
if(consume('+'))
node = new_binary(ND_ADD, node, mul());
else if(consume('-'))
node = new_binary(ND_DIV, node, mul());
else
return node;
}
}
Node *mul() {
Node *node = unary();
for(;;) {
if(consume('*'))
node = new_binary(ND_MUL, node, unary());
else if(consume('/'))
node = new_binary(ND_DIV, node, unary());
else
return node;
}
}
Node *unary() {
if (consume('+'))
return unary();
if (consume('-'))
return new_binary(ND_SUB, new_node_num(0), unary());
return primary();
}
Node *primary() {
if(consume('(')){
Node *node = expr();
expect(')');
return node;
}
return new_node_num(expect_number());
}
void gen(Node *node) {
if(node->kind == ND_NUM) {
printf(" push %d\n", node->val);
return;
}
gen(node->lhs);
gen(node->rhs);
printf(" pop rdi\n");
printf(" pop rax\n");
switch(node->kind) {
case ND_ADD:
printf(" addd rax, rdi\n");
break;
case ND_SUB:
printf(" sub rax, rdi\n");
break;
case ND_MUL:
printf(" imul rax, rdi\n");
break;
case ND_DIV:
printf(" cqo\n");
printf(" idiv rdi\n");
break;
case ND_EQ:
printf(" cmp rax, rdi\n");
printf(" sete al\n");
printf(" movzb rax, al\n");
break;
case ND_NE:
printf(" cmp rax, rdi\n");
printf(" sete al\n");
printf(" movzb rax, al\n");
break;
case ND_LT:
printf(" cmp rax, rdi\n");
printf(" sete al\n");
printf(" movzb rax, al\n");
break;
case ND_LE:
printf(" cmp rax, rdi\n");
printf(" sete al\n");
printf(" movzb rax, al\n");
break;
}
printf(" push rax\n");
}
int main(int argc, char **argv){
if(argc != 2) {
error("引数の個数が正しくありません\n");
return 1;
}
user_input = argv[1];
token = tokenize(user_input);
Node *node = expr();
printf(".intel_syntax noprefix\n");
printf(".global main\n");
printf("main:\n");
gen(node);
printf(" pop rax\n");
printf(" ret\n");
return 0;
}