import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn import cross_validation, metrics mr = pd.read_csv("mushroom.csv", header=None) label = [] data = [] attr_list = [] for row_index, row in mr.iterrows(): label.append(row.ix[0]) row_data = [] for v in row.ix[1:]: row_data.append(ord(v)) data.append(row_data) data_train, data_test, label_train, label_test = \ cross_validation.train_test_split(data, label) clf = RandomForestClassifier() clf.fit(data_train, label_train) predict = clf.predict(data_test) ac_score = metrics.accuracy_score(label_test, predict) cl_report = metrics.classification_report(label_test, predict) print("正解率=", ac_score) print("レポート=\n", cl_report)
[vagrant@localhost python]$ python3 app.py
正解率= 1.0
レポート=
precision recall f1-score support
e 1.00 1.00 1.00 1031
p 1.00 1.00 1.00 1000
avg / total 1.00 1.00 1.00 2031
import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn import cross_validation, metrics mr = pd.read_csv("mushroom.csv", header=None) label = [] data = [] attr_list = [] for row_index, row in mr.iterrorws(): label.append(row.ix[0]) exdata = [] for col, v in enumerate(row.ix[1:]): if row_index == 0: attr = {"dic": {}, "cnt":0} attr_list.append(attr) else: attr = attr_list[col] d = [0,0,0,0,0,0,0,0,0,0,0,0] if v in attr["dic"]: idx = attr["dic"][v] else: idx = attr["cnt"] attr["dic"][v] = idx attr["cnt"] += 1 d[idx] = 1 exdata += d data.append(exdata) data_train, data_test, label_train, label_test = \ cross_validation.train_test_split(data, label) clf = RandomForestClassifier() clf.fit(data_train, label_train) predict = clf.predict(data_test) ac_score = metrics.accuracy_score(label_test, predict) print("正解率=", ac_score)