compilerのトークナイズ

#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// token type 構造体
typedef enum {
    TK_RESERVED, // op
    TK_NUM,
    TK_EOF, // end of input
} TokenKind;

typedef struct Token Token;

// token 
struct Token {
    TokenKind kind; // token type
    Token *next; // next token
    int val; // TK_NUM value
    char *str; // token char
};

// now focus token
Token *token;

void error(char *fmt, ...){
    va_list ap;
    va_start(ap, fmt);
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
    exit(1);
}

// if the next token is expected symbol, read forward one token and 
// return true.
bool consume(char op) {
    if(token->kind != TK_RESERVED | token->str[0] != op)
        return false;
    token = token->next;
    return true;
}

// if the next token is expected symbol, read forward one token and 
// otherwise report an error.
void expect(char op) {
    if(token->kind != TK_RESERVED | token->str[0] != op)
        error("'%c'ではありません。", op);
    token = token->next;
}

// if the next token is expected number, read forward one token and 
// otherwise report an error.
int expect_number() {
    if(token->kind != TK_NUM)
        error("数ではありません");
    int val = token->val;
    token = token->next;
    return val;
}

bool at_eof() {
    return token->kind == TK_EOF;
}

// create a new token and connect it to curl.
Token *new_token(TokenKind kind, Token *cur, char *str){
    Token *tok = calloc(1, sizeof(token));
    tok->kind = kind;
    tok->str = str;
    cur->next = tok;
    return tok;
}

// Tokenize the input string p and return it
Token *tokenize(char *p) {
    Token head;
    head.next = NULL;
    Token *cur = &head;

    while (*p) {
        // skip space
        if(isspace(*p)){
            p++;
            continue;
        }

        if(*p == '+' || *p == '-') {
            cur = new_token(TK_RESERVED, cur, p++);
            continue;
        }

        if(isdigit(*p)) {
            cur = new_token(TK_NUM, cur, p);
            cur->val = strtol(p, &p, 10);
            continue;
        }
        error("トークナイズできません");
    }

    new_token(TK_EOF, cur, p);
    return head.next;
}



int main(int argc, char **argv){
    if(argc != 2) {
        fprintf(stderr, "引数の個数が正しくありません\n");
        return 1;
    }

    token = tokenize(argv[1]);

    printf(".intel_syntax noprefix\n");
    printf(".global main\n");
    printf("main:\n");
    printf("    mov rax, %ld\n", expect_number());

    while(!at_eof()){
        if(consume('+')){
            printf("    add rax, %d\n", expect_number());
        }

        expect('-');
        printf("    sub rax, %d\n", expect_number());
    }

    printf("    ret\n");
    return 0;
}

コンパイラはまず構文解析を行なってトークン列を抽象構文木に変換し、その構文木をアセンブリに変換

### 再帰下降構文解析
生成規則による演算子の優先順位
expr = mul (“+” mul | “-” mul)*
mul = num (“*” num | “/” num)*
// 足し算よりも掛け算が常に下に現れる

expr = mul (“+” mul | “-” mul)*
mul = primary (“*” primary | “/” primary)*
primary = num | “(” expr “)”