import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation, metrics
mr = pd.read_csv("mushroom.csv", header=None)
label = []
data = []
attr_list = []
for row_index, row in mr.iterrows():
label.append(row.ix[0])
row_data = []
for v in row.ix[1:]:
row_data.append(ord(v))
data.append(row_data)
data_train, data_test, label_train, label_test = \
cross_validation.train_test_split(data, label)
clf = RandomForestClassifier()
clf.fit(data_train, label_train)
predict = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, predict)
cl_report = metrics.classification_report(label_test, predict)
print("正解率=", ac_score)
print("レポート=\n", cl_report)
[vagrant@localhost python]$ python3 app.py
正解率= 1.0
レポート=
precision recall f1-score support
e 1.00 1.00 1.00 1031
p 1.00 1.00 1.00 1000
avg / total 1.00 1.00 1.00 2031
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation, metrics
mr = pd.read_csv("mushroom.csv", header=None)
label = []
data = []
attr_list = []
for row_index, row in mr.iterrorws():
label.append(row.ix[0])
exdata = []
for col, v in enumerate(row.ix[1:]):
if row_index == 0:
attr = {"dic": {}, "cnt":0}
attr_list.append(attr)
else:
attr = attr_list[col]
d = [0,0,0,0,0,0,0,0,0,0,0,0]
if v in attr["dic"]:
idx = attr["dic"][v]
else:
idx = attr["cnt"]
attr["dic"][v] = idx
attr["cnt"] += 1
d[idx] = 1
exdata += d
data.append(exdata)
data_train, data_test, label_train, label_test = \
cross_validation.train_test_split(data, label)
clf = RandomForestClassifier()
clf.fit(data_train, label_train)
predict = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, predict)
print("正解率=", ac_score)

