トークンごとに、その属性を表示します。文字種表も使います。識別子には英数字、下線を利用、先頭は英字が下線です。エスケープ文字や漢字、コメント機能もないものとします。
/*--------------------*/ /* 字句解析 token_p.cpp */ /*--------------------*/ #include <iostream> #include <fstream> #include <iomanip> #include <string> #include <cstdlib> #include <cctype> using namespace std; enum TknKind { Lparen=1, Rparen, Plus, Minus, Multi, Divi, Assign, Comma, DblQ, Equal, NotEq, Less, LessEq, Great, GreatEq, If, Else, End, Print, Ident, IntNum, String, Letter, Digit, EofTkn, Others, END_list }; struct Token { TknKind kind; string text; int intVal; Token() { kind = Others; text = ""; intVal = 0; } Token(TknKind k, const string& s, int d=0){ kind = k; text = s; intVal = d; } }; void initChTyp(); Token nextTkn(); int nextCh(); bool is_ope2(int c1, int c2); TknKind get_kind(const string& s); TknKind ctyp[256]; Token token; ifstream fin; struct KeyWord { const char *keyName; TknKind keyKind; }; KeyWord KeyWdTbl[] = { {"if", If }, {"else", Else }, {"end", End},{"print", Print }, {"(", Lparen}, {")", Rparen}, {"+", Plus }, {"-", Minus }, {"*", Multi },{"/", Divi }, {"=", Assign}, {",", Comma }, {"==", Equal }, {"!=", NotEq }, {"<", Less }, {"<=", LessEq }, {"", END_list}, }; int main(int argc, char *argv[]) { if (argc == 1) exit(1); fin.open(argv[1]); if (!fin) exit(1); cout << "text kind intVal\n"; initChTyp(); for (token = nextTkn(); token.kind != EofTkn; token = nextTkn()){ cout << left << setw(10) << token.text << right << setw(3) << token.kind << " " << token.intVal << endl; } return 0; } void initChTyp() { int i; for(i=0; i<256; i++) { ctyp[i] = Others; } for(i='0'; i<='9'; i++){ ctyp[i] = Digit; } for(i='A'; i<='Z'; i++){ ctyp[i]=Letter; } for(i='a'; i<='z'; i++){ ctyp[i]=Letter; } ctyp['(']= Lparen; ctyp[')'] = Rparen; ctyp['<']=Less; ctyp['>'] = Great; ctyp['+']= Plus; ctyp['-'] = Minus; ctyp['*']= Multi; ctyp['/'] = Divi; ctyp['_']= Letter; ctyp['=']= Assign; ctyp[',']= Comma; ctyp['"']= DblQ; } Token nextTkn() { TknKind kd; int ch0, num = 0; static int ch = ' '; string txt = ""; while (isspace(ch)){ ch = nextCh();} if (ch == EOF) return Token(EofTkn, txt); switch (ctyp[ch]){ case Letter: for ( ; ctyp[ch]==Letter || ctyp[ch]==Digit; ch=nextCh()){ txt += ch; } break; case Digit: for (num=0; ctyp[ch]==Digit; ch=nextCh()){ num = num*10 +(ch-'0'); } return Token(IntNum, txt, num); case DblQ: for (ch=nextCh(); ch!=EOF && ch!='\n' && ch!='"'; ch=nextCh()){ txt += ch; } if (ch != '"'){ cout << "文字列リテラルが閉じていない\n"; exit(1);} ch = nextCh(); return Token(String, txt); default: txt += ch; ch0 = ch; ch = nextCh(); if (is_ope2(ch0, ch)){ txt += ch; ch = nextCh();} } kd = get_kind(txt); if (kd == Others){ cout << "不正なトークンです:" << txt << endl; exit(1); } return Token(kd, txt); } int nextCh() { static int c = 0; if (c == EOF) return c; if ((c = fin.get())== EOF) fin.close(); return c; } bool is_ope2(int c1, int c2) { char s[] = " "; if (c1=='\0' || c2 == '\0') return false; s[1] = c1; s[2]= c2; return strstr(" <= >= == != ", s) != NULL; } TknKind get_kind(const string& s) { for (int i =0; KeyWdTbl[i].keyKind != END_list; i++){ if (s == KeyWdTbl[i].keyName) return KeyWdTbl[i].keyKind; } if (ctyp[s[0]] == Letter ) return Ident; if (ctyp[s[0]] == Digit) return IntNum; return Others; }