トークナイザ : tokenのlenの追加
// token struct Token { TokenKind kind; // token type Token *next; // next token int val; // TK_NUM value char *str; // token char int len; // length of token };
bool consume(char *op) { if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len)) return false; token = token->next; return true; }
演算子の優先順位
1. == !=
2. < <= > >=
3. +, –
4. * /
5. 単項+, –
6. ()
従って
expr = equality
equality = relational (“==” relational | “!=” relational)*
relational = add(“<" add | "<=" add | ">” add | “>=” add)*
add = mul (“+” mul | “-” mul)*
mul = primary (“*” primary | “/” unary)*
unary = (“+” | “-“)? primary
primary = num | “(” expr “)”
### 比較のアセンブリコード
pop rdi
pop rax
cmp rax, rdi // 比較結果はフラグレジスタに保存される
sete al // sete ALで更新すると、自動的にRAXも更新される
movzb rax, al
#include <ctype.h> #include <stdarg.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> // token type 構造体 typedef enum { TK_RESERVED, // op TK_NUM, TK_EOF, // end of input } TokenKind; typedef struct Token Token; // token struct Token { TokenKind kind; // token type Token *next; // next token int val; // TK_NUM value char *str; // token char int len; // length of token }; // now focus token Token *token; // input program char *user_input; // report error void error_at(char *loc, char *fmt, ...){ va_list ap; va_start(ap, fmt); int pos = loc - user_input; fprintf(stderr, "%s\n", user_input); fprintf(stderr, "%*s", pos, " "); // output pos blank fprintf(stderr, "^ "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); exit(1); } void error(char *fmt, ...){ va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); exit(1); } // if the next token is expected symbol, read forward one token and // return true. bool consume(char *op) { if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len)) return false; token = token->next; return true; } // if the next token is expected symbol, read forward one token and // otherwise report an error. void expect(char *op) { if(token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len)) error_at(token->str, "expected \"%s\"", op); token = token->next; } // if the next token is expected number, read forward one token and // otherwise report an error. int expect_number() { if(token->kind != TK_NUM) error("数ではありません"); int val = token->val; token = token->next; return val; } bool at_eof() { return token->kind == TK_EOF; } // create a new token and connect it to curl. Token *new_token(TokenKind kind, Token *cur, char *str, int len){ Token *tok = calloc(1, sizeof(token)); tok->kind = kind; tok->str = str; tok->len = len; cur->next = tok; return tok; } bool startswitch(char *p, char *q) { return memcmp(p, q, strlen(q)) == 0; } // Tokenize the input string p and return it Token *tokenize() { char *p = user_input; Token head; head.next = NULL; Token *cur = &head; while (*p) { // skip space if(isspace(*p)){ p++; continue; } if (startswith(p, "==") || startswith(p, "!=") || startswith(p, "<=") || startswith(p, ">=")) { cur = new_token(TK_RESERVED, cur, p, 2); p += 2; continue; } if(strchr("+-*/()<>", *p)){ cur = new_token(TK_RESERVED, cur, p++, 1); continue; } if(isdigit(*p)) { cur = new_token(TK_NUM, cur, p, 0); char *q = p; cur->val = strtol(p, &p, 10); cur->len = p - q; continue; } error_at(p, "invalid token"); } new_token(TK_EOF, cur, p, 0); return head.next; } typedef enum { ND_ADD, // + ND_SUB, // - ND_MUL, // * ND_DIV, // / ND_EQ, // == ND_NE, // != ND_LT, // < ND_LE, // <= ND_NUM, // num } NodeKind; typedef struct Node Node; struct Node { NodeKind kind; // node type Node *lhs; // left Node *rhs; // right int val; }; Node *new_node(NodeKind kind){ Node *node = calloc(1, sizeof(Node)); node->kind = kind; return node; } Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) { Node *node = new_node(kind); node->lhs = lhs; node->rhs = rhs; return node; } Node *new_num(int val){ Node *node = new_node(ND_NUM); node->val = val; return node; } Node *expr(); Node *equality(); Node *relational(); Node *add(); Node *mul(); Node *unary(); Node *primary(); Node *expr() { return equality(); } Node *equality(){ Node *node = relational(); for(;;){ if(consume("==")) node == new_binary(ND_EQ, node, relational()); else if (consume("!=")) node = new_binary(ND_NE, node, relational()); else return node; } } Node *relational(){ Node *node = add(); for(;;) { if(consume("<")) node = new_binary(ND_LT, node, add()); else if (consume("<=")) node = new_binary(ND_LE, node, add()); else if (consume(">")) node = new_binary(ND_LT, add(), node); else if (consume(">=")) node = new_binary(ND_LE, add(), node); else return node; } } Node *add() { Node *node = mul(); for(;;) { if(consume('+')) node = new_binary(ND_ADD, node, mul()); else if(consume('-')) node = new_binary(ND_DIV, node, mul()); else return node; } } Node *mul() { Node *node = unary(); for(;;) { if(consume('*')) node = new_binary(ND_MUL, node, unary()); else if(consume('/')) node = new_binary(ND_DIV, node, unary()); else return node; } } Node *unary() { if (consume('+')) return unary(); if (consume('-')) return new_binary(ND_SUB, new_node_num(0), unary()); return primary(); } Node *primary() { if(consume('(')){ Node *node = expr(); expect(')'); return node; } return new_node_num(expect_number()); } void gen(Node *node) { if(node->kind == ND_NUM) { printf(" push %d\n", node->val); return; } gen(node->lhs); gen(node->rhs); printf(" pop rdi\n"); printf(" pop rax\n"); switch(node->kind) { case ND_ADD: printf(" addd rax, rdi\n"); break; case ND_SUB: printf(" sub rax, rdi\n"); break; case ND_MUL: printf(" imul rax, rdi\n"); break; case ND_DIV: printf(" cqo\n"); printf(" idiv rdi\n"); break; case ND_EQ: printf(" cmp rax, rdi\n"); printf(" sete al\n"); printf(" movzb rax, al\n"); break; case ND_NE: printf(" cmp rax, rdi\n"); printf(" sete al\n"); printf(" movzb rax, al\n"); break; case ND_LT: printf(" cmp rax, rdi\n"); printf(" sete al\n"); printf(" movzb rax, al\n"); break; case ND_LE: printf(" cmp rax, rdi\n"); printf(" sete al\n"); printf(" movzb rax, al\n"); break; } printf(" push rax\n"); } int main(int argc, char **argv){ if(argc != 2) { error("引数の個数が正しくありません\n"); return 1; } user_input = argv[1]; token = tokenize(user_input); Node *node = expr(); printf(".intel_syntax noprefix\n"); printf(".global main\n"); printf("main:\n"); gen(node); printf(" pop rax\n"); printf(" ret\n"); return 0; }