字句解析

トークンごとに、その属性を表示します。文字種表も使います。識別子には英数字、下線を利用、先頭は英字が下線です。エスケープ文字や漢字、コメント機能もないものとします。

/*--------------------*/
/* 字句解析 token_p.cpp */
/*--------------------*/
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
#include <cstdlib>
#include <cctype>
using namespace std;

enum TknKind {
    Lparen=1, Rparen, Plus, Minus, Multi, Divi,
    Assign, Comma, DblQ,
    Equal, NotEq, Less, LessEq, Great, GreatEq,
    If, Else, End, Print, Ident, IntNum,
    String, Letter, Digit, EofTkn, Others, END_list
};

struct Token {
    TknKind kind;
    string text;
    int intVal;
    Token() { kind = Others; text = ""; intVal = 0; }
    Token(TknKind k, const string& s, int d=0){
        kind = k; text = s; intVal = d;
    }
};

void initChTyp();
Token nextTkn();
int nextCh();
bool is_ope2(int c1, int c2);
TknKind get_kind(const string& s);

TknKind ctyp[256];
Token token;
ifstream fin;

struct KeyWord {
    const char *keyName;
    TknKind keyKind;
};

KeyWord KeyWdTbl[] = {
    {"if", If }, {"else", Else },
    {"end", End},{"print", Print },
    {"(", Lparen}, {")", Rparen},
    {"+", Plus }, {"-", Minus },
    {"*", Multi },{"/", Divi },
    {"=", Assign}, {",", Comma },
    {"==", Equal }, {"!=", NotEq },
    {"<", Less }, {"<=", LessEq },
    {"", END_list},
};

int main(int argc, char *argv&#91;&#93;)
{
    if (argc == 1) exit(1);
    fin.open(argv&#91;1&#93;); if (!fin) exit(1);
    
    cout << "text kind intVal\n";
    initChTyp();
    for (token = nextTkn(); token.kind != EofTkn; token = nextTkn()){
        cout << left << setw(10) << token.text
        << right << setw(3) << token.kind
        << " " << token.intVal << endl;
    }
    return 0;
}

void initChTyp()
{
    int i;
    
    for(i=0; i<256; i++) { ctyp&#91;i&#93; = Others; }
    for(i='0'; i<='9'; i++){ ctyp&#91;i&#93; = Digit; }
    for(i='A'; i<='Z'; i++){ ctyp&#91;i&#93;=Letter; }
    for(i='a'; i<='z'; i++){ ctyp&#91;i&#93;=Letter; }
    ctyp&#91;'('&#93;= Lparen; ctyp&#91;')'&#93; = Rparen;
    ctyp&#91;'<'&#93;=Less; ctyp&#91;'>'] = Great;
    ctyp['+']= Plus; ctyp['-'] = Minus;
    ctyp['*']= Multi; ctyp['/'] = Divi;
    ctyp['_']= Letter; ctyp['=']= Assign;
    ctyp[',']= Comma; ctyp['"']= DblQ;
}

Token nextTkn()
{
    TknKind kd;
    int ch0, num = 0;
    static int ch = ' ';
    string txt = "";
    
    while (isspace(ch)){ ch = nextCh();}
    if (ch == EOF) return Token(EofTkn, txt);
    
    switch (ctyp[ch]){
        case Letter:
            for ( ; ctyp[ch]==Letter || ctyp[ch]==Digit; ch=nextCh()){
                txt += ch;
            }
    break;
    case Digit:
    for (num=0; ctyp[ch]==Digit; ch=nextCh()){
        num = num*10 +(ch-'0');
    }
    return Token(IntNum, txt, num);
    case DblQ:
    for (ch=nextCh(); ch!=EOF && ch!='\n' && ch!='"'; ch=nextCh()){
        txt += ch;
    }
    if (ch != '"'){ cout << "文字列リテラルが閉じていない\n"; exit(1);}
    ch = nextCh();
    return Token(String, txt);
default:
    txt += ch; ch0 = ch; ch = nextCh();
    if (is_ope2(ch0, ch)){ txt += ch; ch = nextCh();}
}
kd = get_kind(txt);
if (kd == Others){
    cout << "不正なトークンです:" << txt << endl; exit(1);
}
    return Token(kd, txt);
    }
    
    int nextCh()
    {
        static int c = 0;
        if (c == EOF) return c;
        if ((c = fin.get())== EOF) fin.close();
        return c;
    }
    
    bool is_ope2(int c1, int c2)
    {
        char s&#91;&#93; = "     ";
        if (c1=='\0' || c2 == '\0') return false;
        s&#91;1&#93; = c1; s&#91;2&#93;= c2;
        return strstr(" <= >= == != ", s) != NULL;
    }
    
    TknKind get_kind(const string& s)
    {
        for (int i =0; KeyWdTbl[i].keyKind != END_list; i++){
            if (s == KeyWdTbl[i].keyName) return KeyWdTbl[i].keyKind;
        }
        if (ctyp[s[0]] == Letter ) return Ident;
        if (ctyp[s[0]] == Digit) return IntNum;
        return Others;
    }