sklearn import pandas

import pandas as pd
from sklearn import svm, metrics

xor_input = [
	[0, 0, 0],
	[0, 1, 1],
	[1, 0, 1],
	[1, 1, 0]
]

xor_df = pd.DataFrame(xor_input)
xor_data = xor_df.ix[:,0:1]
xor_label = xor_df.ix[:,2]

clf = svm.SVC()
clf.fit(xor_data, xor_label)
pre = clf.predict(xor_data)

ac_score = metrics.accuracy_score(xor_label, pre)
print(" 正解率=", ac_score)

[vagrant@localhost python]$ python3 app.py
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
正解率= 1.0

こりゃあかん、プログラミングやり始めた時と全く同じだ。
何故こう動いているか理解できん。

scikit-learnのfit()メソッドを使う

いきなりコードから始めます。

from sklearn import svm

xor_data = [
		[0, 0, 0],
		[0, 1, 1],
		[1, 0, 1],
		[1, 1, 0]
] 

data = []
label = []
for row in xor_data:
	p = row[0]
	q = row[1]
	r = row[2]
	data.append([p, q])
	label.append(r)

clf = svm.SVC()
clf.fit(data, label)

pre = clf.predict(data)
print("予測結果:", pre)

ok = 0; total = 0
for idx, answer in enumerate(label):
	p = pre[idx]
	if p == answer: ok += 1
	total += 1
print("正解率:", ok, "/", total, "=", ok/total)

続いてコマンドライン
[vagrant@localhost python]$ python3 app.py
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
return f(*args, **kwds)
予測結果: [0 1 1 0]
正解率: 4 / 4 = 1.0

なんだこりゃ、いきなり一気に難易度が上がった?!
scikit-learn -> Pythonの機械学習ライブラリ

scikit-learnを入れる

いよいよ来ました、pythonで機械学習 ^^
やっとここまできましたね(祝!) ワクワクします。

とりあえず、scikit-learnを入れます。
[vagrant@localhost python]$ pip3 install -U scikit-learn scipy matplotlib scikit-image

それから、pandasも入れておきましょう。
[vagrant@localhost python]$ pip3 install pandas
Collecting pandas
Downloading https://files.pythonhosted.org/packages/5d/d4/6e9c56a561f1d27407bf29318ca43f36ccaa289271b805a30034eb3a8ec4/pandas-0.23.4-cp35-cp35m-manylinux1_x86_64.whl (8.7MB)
100% |████████████████████████████████| 8.7MB 735kB/s

pythonでtinydbを使う

まず、pip3でtinydbを入れる
[vagrant@localhost python]$ pip3 install tinydb
Collecting tinydb
Downloading https://files.pythonhosted.org/packages/97/6c/fb150f2c09d8b6f23b8f080396673794f970fa7ca0d50900fbe5fe07b8b9/tinydb-3.10.0-py2.py3-none-any.whl
Installing collected packages: tinydb
Successfully installed tinydb-3.10.0

続いて、python

from tinydb import TinyDB, Query

filepath = "test-tynydb.json"
db = TinyDB(filepath)

db.purge_table('fruits')

table = db.table('fruits')

table.insert({'name':'Banana', 'price':600})
table.insert({'name':'Orange', 'price':1200})
table.insert({'name':'Mango', 'price':840})

print(table.all())

Item = Query()
res = table.search(Item.name == 'Orange')
print('Orange is', res[0]['price'])

print("800円以上のもの:")
res = table.search(Item.price >= 800)
for it in res:
	print("-", it['name'])

うーん、 jsonですね。phpからmongodbのinsert, fetchよりもコード量が少ないので、書きやすいかもしれませんね。
[vagrant@localhost python]$ python3 app.py
[{‘name’: ‘Banana’, ‘price’: 600}, {‘name’: ‘Orange’, ‘price’: 1200}, {‘name’: ‘Mango’, ‘price’: 840}]
Orange is 1200
800円以上のもの:
– Orange
– Mango

pythonでmysqlを使う

import MySQLdb

conn = MySQLdb.connect(
	user='root',
	passwd='',
	host='localhost',
	db='test')

cur = conn.cursor()

# cur.execute("DROP TABLE items")
cur.execute('''
	CREATE TABLE items(
		item_id INTEGER PRIMARY KEY AUTO_INCREMENT,
		name TEXT,
		price INTEGER
	)
	''')

data = [('Banana', 300),('Mango', 640),('Kiwi', 280)]
for i in data:
	cur.execute("INSERT INTO items(name,price) VALUES(%s,%s)", i)

cur.execute("SELECT * FROM items")
for row in cur.fetchall():
	print(row)

[vagrant@localhost python]$ python3 app.py
Traceback (most recent call last):
File “app.py”, line 1, in
import MySQLdb
ImportError: No module named ‘MySQLdb’

なに?
mysqlclientを入れる
[vagrant@localhost python]$ pip3 install mysqlclient
Collecting mysqlclient
Downloading https://files.pythonhosted.org/packages/ec/fd/83329b9d3e14f7344d1cb31f128e6dbba70c5975c9e57896815dbb1988ad/mysqlclient-1.3.13.tar.gz (90kB)
100% |████████████████████████████████| 92kB 265kB/s
Installing collected packages: mysqlclient
Running setup.py install for mysqlclient … done
Successfully installed mysqlclient-1.3.13

お!
[vagrant@localhost python]$ python3 app.py
(1, ‘Banana’, 300)
(2, ‘Mango’, 640)
(3, ‘Kiwi’, 280)

python3でメール送信

#!/bin/env python3

import smtplib
from email.mime.text import MIMEText
import datetime

jp='iso-2022-jp'

raw_msg = "hogehoge"
msg = MIMEText(raw_msg.encode(jp), 'plain', jp,)

fromaddr = "hoge"
toaddr = "hogehoge"

d = datetime.datetime.today()
date = d.strftime("%Y-%m-%d")

msg['Subject'] = date
msg['From'] = fromaddr
msg['To'] = toaddr

try:
	server = smtplib.SMTP("localhost")
	server.send_message(msg)
	print("successfully sent email")
except Exception:
	print("Error: unable to send email")

[vagrant@localhost python]$ python3 mail.py
successfully sent email

あれ、こない。vagrantだから? ec2でやってみるか。

ec2からコマンドを打つ
[ec2-user@ip-***-**-**-** html]$ python3 mail.py
successfully sent email

なに、こない。

pythonからsqliteに接続する

import sqlite3

dbpath = "test.sqlite"
conn = sqlite3.connect(dbpath)

cur = conn.cursor()
cur.executescript("""
DROP TABLE IF EXISTS items;

CREATE TABLE items(
	item_id INTEGER PRIMARY KEY,
	name TEXT UNIQUE,
	price INTEGER
);
INSERT INTO items(name, price)VALUES('Apple', 800);
INSERT INTO items(name, price)VALUES('Orange', 780);
INSERT INTO items(name, price)VALUES('Banana', 430);
""")

conn.commit()

cur = conn.cursor()
cur.execute("SELECT item_id, name, price FROM items")
item_list = cur.fetchall()
for it in item_list:
	print(it)

[vagrant@localhost python]$ python3 app.py
(1, ‘Apple’, 800)
(2, ‘Orange’, 780)
(3, ‘Banana’, 430)

import sqlite3

filepath = “test2.sqlite”
conn = sqlite3.connect(filepath)

cur = conn.cursor()
cur.execute(“DROP TABLE IF EXISTS items”)
cur.execute(“”” CREATE TABLE items(
item_id INTEGER PRIMARY KEY,
name TEXT,
price INTEGER)”””)
conn.commit()

cur = conn.cursor()
cur.execute(
“INSERT INTO items (name,price) VALUES (?,?)”,
(“Orange”, 520))
conn.commit()

cur = conn.cursor()
data = [(“Mango”, 770), (“Kiwi”,400), (“Grape”,800),(“Peach”,940),(“Persimmon”,700),(“Banana”, 400)]
cur.executemany(
“INSERT INTO items(name,price) VALUES(?,?)”, data)
conn.commit()

cur = conn.cursor()
price_range = (400, 700)
cur.execute(
“SELECT * FROM items WHERE price>=? AND price<=?", price_range) fr_list = cur.fetchall() for fr in fr_list: print(fr) [/python] [vagrant@localhost python]$ python3 app.py (1, 'Orange', 520) (3, 'Kiwi', 400) (6, 'Persimmon', 700) (7, 'Banana', 400)

csvファイルを読み込む

文字コードがshift-jsだと、
[vagrant@localhost python]$ python3 app.py
Traceback (most recent call last):
File “app.py”, line 4, in
csv = codecs.open(filename, “r”, “shift_jis”).read()
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/codecs.py”, line 698, in read
return self.reader.read(size)
UnicodeDecodeError: ‘shift_jis’ codec can’t decode byte 0xef in position 0: illegal multibyte sequence

utf-8にします。

import codecs

filename = "list-sjis.csv"
csv = codecs.open(filename, "r", "utf-8").read()

data = []
rows = csv.split("\r\n")
for row in rows:
	if row == "": continue
	cells = row.split(",")
	data.append(cells)

for c in data:
	print(c[1], c[2])

[vagrant@localhost python]$ python3 app.py
商品名 値段
石鹸 300
手袋 150
マスク 230

import csv, codecs

filename = "list-sjis.csv"
fp = codecs.open(filename, "r", "utf-8")

reader = csv.reader(fp, delimiter=",", quotechar='"')
for cells in reader:
	print(cells[1], cells[2])

このようにも書けますね。

pyyaml

[vagrant@localhost python]$ pip3 install pyyaml
Collecting pyyaml
Downloading https://files.pythonhosted.org/packages/9e/a3/1d13970c3f36777c583f136c136f804d70f500168edc1edea6daa7200769/PyYAML-3.13.tar.gz (270kB)
100% |████████████████████████████████| 276kB 507kB/s
Installing collected packages: pyyaml
Running setup.py install for pyyaml … done
Successfully installed pyyaml-3.13

import yaml

yaml_str = """
Date: 2018-08-10
PriceList:
	-
		item_id: 1000
		name: Banana
		color: yellow
		price: 800
	-
		item_id: 1001
		name: Orange
		color: orange
		price: 1400
	-
		item_id: 1002
		name: Apple
		color: red
		price: 2400
"""

data = yaml.load(yaml_str)

for item in data['PriceList']:
	print(item["name"], item["price"])

yaml.scanner.ScannerError: while scanning for the next token
found character ‘\t’ that cannot start any token
in ““, line 4, column 1:

^
何故だ?

import yaml

customer = [
	{ "name": "Yamada", "age": "35", "gender": "man"},
	{ "name": "Sato", "age": "58", "gender": "woman"},
	{ "name": "Kato", "age": "42", "gender": "man"},
	{ "name": "Nishi", "age": "22", "gender": "man"}
]

yaml_str = yaml.dump(customer)
print(yaml_str)
print("--- --- ---")

data = yaml.load(yaml_str)

for p in data:
	print(p["name"])

書き出しは出来ますね。

[vagrant@localhost python]$ python3 app.py
– {age: ’35’, gender: man, name: Yamada}
– {age: ’58’, gender: woman, name: Sato}
– {age: ’42’, gender: man, name: Kato}
– {age: ’22’, gender: man, name: Nishi}

— — —
Yamada
Sato
Kato
Nishi

import yaml

yaml_str = """
color_def:
		- &color1 "#FF0000"
		- &color2 "#00FF00"
		- &color3 "#0000FF"

color:
	title: *color1
	body: *color2
	link: *color3
"""

data = yaml.load(yaml_str)
print("title=", data["color"]["title"])
print("body=", data["color"]["body"])
print("link=", data["color"]["link"])

yaml.scanner.ScannerError: while scanning for the next token
found character ‘\t’ that cannot start any token
in ““, line 3, column 1:
– &color1 “#FF0000”
なんだこれ?

jsonを扱う

import urllib.request as req
import os.path, random
import json

url = "hoge"
savename = "hogehoge"
if not os.path.exists(url):
	req.urlretrieve(url, savename)

data = json.load(open(savename, "r", encoding="utf-8"))

r = random.choice(data)
print(r['kami'], r['simo'])

jsonを作る。

import json

price = {
	"date": "2018-08-11",
	"price": {
			"apple": 80,
			"orange": 55,
			"banana": 40
	}
}
s = json.dumps(price)
print(s)

[vagrant@localhost python]$ python3 app.py
{“price”: {“apple”: 80, “orange”: 55, “banana”: 40}, “date”: “2018-08-11”}