anaconda

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

x = np.arange(0,11)
y = 4*x+3
plt.plot(x,y)
plt.grid()

[vagrant@localhost python]$ python myapp.py
File “myapp.py”, line 3
%matplotlib inline
^
SyntaxError: invalid syntax

ん?これって、anacondaが入ってないって事?

[vagrant@localhost python]$ pyenv -v
pyenv 1.2.13

[vagrant@localhost python]$ pyenv install anaconda3-2019.03
Downloading Anaconda3-2019.03-Linux-x86_64.sh…
-> https://repo.continuum.io/archive/Anaconda3-2019.03-Linux-x86_64.sh
Installing Anaconda3-2019.03-Linux-x86_64…
Installed Anaconda3-2019.03-Linux-x86_64 to /home/vagrant/.pyenv/versions/anaconda3-2019.03

[vagrant@localhost python]$ python –version
Python 3.7.3
(anaconda3-2019.03)

(anaconda3-2019.03) [vagrant@localhost python]$ python myapp.py
File “myapp.py”, line 3
% matplotlib inline
^
SyntaxError: invalid syntax

これはどうやらコマンドラインで画像を描画できないからっぽいですね。
まあよかった、原因がわかって。

centosのpythonをupdateしよう

from sklearn.datasets import load_boston
boston = load_boston()

import pandas as pd
boston_df = pd.DataFrame(boston.data, columns = boston.feature_names)
boston_df['MEDV'] = boston.target

boston_df.head()

TypeError: descriptor ‘__subclasses__’ of ‘type’ object needs an argument

[vagrant@localhost python]$ sudo yum install -y https://centos6.iuscommunity.org/ius-release.rpm

まず、python36を入れます。
[vagrant@localhost python]$ sudo yum install -y python36*

[vagrant@localhost python]$ pyenv versions
system
* 3.5.2 (set by /home/vagrant/.pyenv/version)
[vagrant@localhost python]$ pyenv install 3.6.4
Downloading Python-3.6.4.tar.xz…
-> https://www.python.org/ftp/python/3.6.4/Python-3.6.4.tar.xz
Installing Python-3.6.4…
Installed Python-3.6.4 to /home/vagrant/.pyenv/versions/3.6.4

[vagrant@localhost python]$ pyenv global 3.6.4
[vagrant@localhost python]$ python -V
Python 3.6.4

[vagrant@localhost python]$ pip list
Package Version
——————– ——-
absl-py 0.8.0
astor 0.8.0
chainer 6.3.0
cycler 0.10.0
decorator 4.4.0
filelock 3.0.12
gast 0.2.2
google-pasta 0.1.7
grpcio 1.23.0
h5py 2.9.0
imageio 2.5.0
joblib 0.13.2
Keras 2.2.5
Keras-Applications 1.0.8
Keras-Preprocessing 1.1.0
kiwisolver 1.1.0
Markdown 3.1.1
matplotlib 3.1.1
networkx 2.3
numpy 1.17.1
pandas 0.25.1
Pillow 6.1.0
pip 19.2.3
protobuf 3.7.1
pyparsing 2.4.2
python-dateutil 2.8.0
pytz 2019.2
PyWavelets 1.0.3
PyYAML 5.1.2
scikit-image 0.15.0
scikit-learn 0.21.3
scipy 1.3.1
setuptools 28.8.0
six 1.12.0
tensorboard 1.14.0
tensorflow 1.14.0
tensorflow-estimator 1.14.0
termcolor 1.1.0
Theano 1.0.4
typing 3.6.6
typing-extensions 3.6.6
Werkzeug 0.15.5
wheel 0.33.6
wrapt 1.11.2

[vagrant@localhost python]$ python myapp.py
/home/vagrant/.pyenv/versions/3.6.4/lib/python3.6/site-packages/pandas/compat/__init__.py:84: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.
warnings.warn(msg)
/home/vagrant/.pyenv/versions/3.6.4/lib/python3.6/site-packages/pandas/compat/__init__.py:84: UserWarning: Could not import the lzma module. Your installed Python is incomplete. Attempting to use lzma compression will result in a RuntimeError.
warnings.warn(msg)

なにいいいいいいいいいいいいいいいいいいいいいい

linear regression

線形回帰モデル(Linear Regression)とは、回帰式を用いて説明変数から目的変数の値を予測する

y = b1x1 + b2x2 + b3x3 + … + bkxk + e(誤差)

scikit-learn は線形回帰を行う予想クラスとして
sklearn.linear_model.LinearRegressionが用意されている
{code}
sklearn.linear_model.LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)
{/code}

[vagrant@localhost python]$ sudo ln -s /usr/bin/python3.6 /usr/bin/python3
[vagrant@localhost python]$ sudo ln -s /usr/bin/pip3.6 /usr/bin/pip3
[vagrant@localhost python]$ python -V
Python 3.5.2

あれ、/usr/bin/python3ではなく、/usr/bin/pythonか。。

python3 線形回帰分析

import pandas as pd
import numpy as np

wine = pd.read_csv("winequality-red.csv", sep=";")
wine.head

from sklearn import linear_model
clf = linear_model.LinearRegression()

X = wine.log[:, ['density']].as_matrix()

Y = wine['alcohol'].as_matrix()

clf.fit(X, Y)

print(clf.coef_)
print(clf.intercept_)
print(clf.score(X, Y))

[vagrant@localhost python]$ python myapp.py
Traceback (most recent call last):
File “myapp.py”, line 1, in
import pandas as pd
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/site-packages/pandas/__init__.py”, line 55, in
from pandas.core.api import (
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/site-packages/pandas/core/api.py”, line 5, in
from pandas.core.arrays.integer import (
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/site-packages/pandas/core/arrays/__init__.py”, line 1, in
from .array_ import array # noqa: F401
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/site-packages/pandas/core/arrays/array_.py”, line 7, in
from pandas.core.dtypes.common import (
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/site-packages/pandas/core/dtypes/common.py”, line 11, in
from pandas.core.dtypes.dtypes import (
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/site-packages/pandas/core/dtypes/dtypes.py”, line 53, in
class Registry:
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/site-packages/pandas/core/dtypes/dtypes.py”, line 84, in Registry
self, dtype: Union[Type[ExtensionDtype], str]
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/typing.py”, line 552, in __getitem__
dict(self.__dict__), parameters, _root=True)
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/typing.py”, line 512, in __new__
for t2 in all_params – {t1} if not isinstance(t2, TypeVar)):
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/typing.py”, line 512, in
for t2 in all_params – {t1} if not isinstance(t2, TypeVar)):
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/typing.py”, line 1077, in __subclasscheck__
if super().__subclasscheck__(cls):
File “/home/vagrant/.pyenv/versions/3.5.2/lib/python3.5/abc.py”, line 225, in __subclasscheck__
for scls in cls.__subclasses__():
TypeError: descriptor ‘__subclasses__’ of ‘type’ object needs an argument

python3.5.2が行けなかったか。。

sk-learnを使ってみよう

————— ——-
cycler 0.10.0
joblib 0.13.2
kiwisolver 1.1.0
matplotlib 3.0.3
numpy 1.17.1
pandas 0.25.1
pip 19.2.3
pyparsing 2.4.2
python-dateutil 2.8.0
pytz 2019.2
scikit-learn 0.21.3
scipy 1.3.1
setuptools 20.10.1
six 1.12.0

from sklearn import svm
xor_data = [
	[0,0,0],
	[0,1,0],
	[1,0,1],
	[1,1,0]
]
data = []
label = []
for row in xor_data:
	p = row[0]
	q = row[1]
	r = row[2]
	data.append([p,q])
	label.append(r)
clf = svm.SVC(gamma="auto")
clf.fit(data, label)
pre = clf.predict(data)
print("予想結果:", pre)
ok = 0; total = 0
for idx, answer in enumerate(label):
	p = pre[idx]
	if p == answer: ok += 1
	total += 1
print("正解率:", ok, "/", total, "=", ok/total)

[vagrant@localhost python]$ python myapp.py
予想結果: [0 0 0 0]
正解率: 3 / 4 = 0.75

OK、取り敢えず環境は整ってきた

python astモジュール

The ast module makes it easy to handle Python abstract syntax trees in Python applications. The abstract syntax itself can change with every release of Python. Using this module will help to learn the current grammer programmatically.

To create an abstract syntax tree, pass ast. PyCF_ONLY_AST as a flag for the built-in function compile() or use the helper function parse() provided by this module. The result is a tree of objects of classes that inherit from ast.AST. Abstract syntax trees can be compiled into Python code objects using the built-in function compile().

[vagrant@localhost test]$ python --version
Python 3.5.2
[vagrant@localhost test]$ cat << 'EOF' > helloworld.py
> !#/usr/bin/env python
> # -*- coding: utf-8 -*-
>
> def main():
>   print('hello, world!')
>
> if __name__ == '__main__':
>   main()
> EOF

[vagrant@localhost test]$ python
Python 3.5.2 (default, Jul 28 2018, 11:25:01)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-23)] on linux
Type “help”, “copyright”, “credits” or “license” for more information.
>>> FILENAME = ‘helloworld.py’
>>> with open(FILENAME, ‘r’) as f:
… source = f.read()
File ““, line 2
source = f.read()
^
IndentationError: expected an indented block

あれ、うまくいかんな。。

ec2でpythonにpost

[ec2-user@ip-xxx-xx-xx-xx app]$ python -V
Python 2.7.14

[ec2-user@ip-xxx-xx-xx-xx app]$ python35
Python 3.5.5 (default, Apr 25 2018, 23:51:32)
[GCC 4.8.5 20150623 (Red Hat 4.8.5-11)] on linux
Type “help”, “copyright”, “credits” or “license” for more information.
>>>

python2系だからか?
usr/binを見ます。

うまくいきませんね。
他の方法を試してみましょうか。。

vagrantでajaxでpythonにpostする

index.php

<!DOCTYPE html>
<html lang="ja">
<head>
  <title>Ajax</title>
</head>

<body>
  <h1>Ajax</h1>
  <form id="form">
    <div><label>送信する数字</label><input type="number" id="number" value="0"></div>
    <div>
      <label>送信するテキスト</label>
      <textarea id="text"></textarea>
    </div>
    <button type="submit" class="btn btn-primary">Submit</button>
  </form>
  <div id="result"></div>

  <script src="https://code.jquery.com/jquery-3.2.1.min.js"></script>
  <script type="text/javascript">
    $(document).ready(function(){
      $('#form').submit(function(){
        event.preventDefault();
        var $form = $(this);
        $.ajax({
          url:'http://localhost:8000/cgi-bin/index.py',
          type: 'post',
          dataType: 'text',
          data: {
            number: $('#number').val(),
            text: $('#text').val()
          },
        })
        .done(function(response){
          $('#result').html(response);
        })
        .fail(function(){
          $('#result').html('Failed.');
        });
      });
    });
    </script>
  </body>
</html>

index.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import cgi, cgitb

cgitb.enable()

form = cgi.FieldStorage()
text = form.getFirst("text")
n = form.getFirst("number")
sequence_list = []

print('Content-type: text/html\nAccess-Control-Allow-Origin: *\n')
print("<p>送信された数字: {}</p>".format("None" if n is None else int(n)))
print("<p>送信されたテキスト: {}</p>".format(text))
python -m http.server --cgi

何故だ? 問題はHTML側ではないと思うので、AWSもしくはsakuraでやってみるか。

index.phpをindex.htmlに変更します。

192.168.35.1 – – [26/Aug/2018 10:19:03] code 403, message CGI script is not executable (‘/cgi-bin/index.py’)
192.168.35.1 – – [26/Aug/2018 10:19:03] “POST /cgi-bin/index.py HTTP/1.1” 403 –

なに?
[vagrant@localhost app]$ cd cgi-bin
[vagrant@localhost cgi-bin]$ chmod 755 index.py

192.168.35.1 – – [26/Aug/2018 10:23:35] “GET / HTTP/1.1” 200 –
192.168.35.1 – – [26/Aug/2018 10:23:43] “POST /cgi-bin/index.py HTTP/1.1” 200 –
: そのようなファイルやディレクトリはありません
192.168.35.1 – – [26/Aug/2018 10:23:43] CGI script exit status 0x7f00

う~ん、なんでだろう。
jsのdocument.titleで取得してphpファイルに送ることもできるが、後々のことを考えるとpythonでやりたいですね。

クロスバリデーション

from sklearn import svm, metrics
import random, re

lines = open('iris.csv', 'r', encoding='utf-8').read().split("\n")
f_tonum = lambda n : float(n) if re.match(r'^[0-9\.]+$', n) else n
f_cols = lambda li: list(map(f_tonum, li.strip().split(',')))
csv = list(map(f_cols, lines))
del csv[0]
random.shuffle(csv)

K = 5
csvk = [ [] for i in range(K) ]
for i in range(len(csv)):
	csvk[i % K].append(csv[i])

def split_data_label(rows):
	data = []; label = []
	for row in rows:
		data.append(row[0:4])
		label.append(row[4])
	return (data, label)

def calc_score(test, train):
	test_f, test_l = split_data_label(test)
	train_f, train_l = split_data_label(train)
	clf = svm.SVC()
	clf.fit(train_f, train_l)
	pre = clf.predict(test_f)
	return metrics.accuracy_score(test_l, pre)

score_list = []
for testc in csvk:
	trainc = []
	for i in csvk:
		if i != testc: trainc += i
	sc = calc_score(testc, trainc)
	score_list.append(sc)
print("各正解率=", score_list)
print("平均成果率=", sum(score_list) / len(score_list))

各正解率= [0.9666666666666667, 1.0, 1.0, 0.9333333333333333, 1.0]
平均成果率= 0.9800000000000001

import pandas as pd
from sklearn import cross_validation, svm, metrics
from sklearn.grid_search import GridSearchCV

train_csv = pd.read_csv("./mnist/train.csv")
test_csv = pd.read_csv("./mnist/t10k.csv")

train_label = train_csv.ix[:, 0]
train_data = train_csv.ix[:, 1:577]
test_label = test_csv.ix[:, 0]
test_data = test_csv.ix[:, 1:577]
print("学習データ数=", len(train_label))

params = [
	{"C": [1,10,100,1000], "kernel":["linear"]},
	{"C": [1,10,100,1000], "kernel":["rbf"], "gamma":[0.001, 0.0001]}
]

clf = GridSearchCV(svm.SVC(), params, n_jobs = -1)
clf.fit(train_data, train_label)
print("学習器=", clf.best_estimator_)

pre = clf.predict(test_data)
ac_score = metrics.accuracy_score(pre, test_label)
print("正解率=", ac_score)

RandomForestClassifier

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation, metrics

mr = pd.read_csv("mushroom.csv", header=None)

label = []
data = []
attr_list = []
for row_index, row in mr.iterrows():
	label.append(row.ix[0])
	row_data = []
	for v in row.ix[1:]:
		row_data.append(ord(v))
	data.append(row_data)

data_train, data_test, label_train, label_test = \
	cross_validation.train_test_split(data, label)

clf = RandomForestClassifier()
clf.fit(data_train, label_train)

predict = clf.predict(data_test)

ac_score = metrics.accuracy_score(label_test, predict)
cl_report = metrics.classification_report(label_test, predict)
print("正解率=", ac_score)
print("レポート=\n", cl_report)

[vagrant@localhost python]$ python3 app.py
正解率= 1.0
レポート=
precision recall f1-score support

e 1.00 1.00 1.00 1031
p 1.00 1.00 1.00 1000

avg / total 1.00 1.00 1.00 2031

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn import cross_validation, metrics

mr = pd.read_csv("mushroom.csv", header=None)

label = []
data = []
attr_list = []
for row_index, row in mr.iterrorws():
	label.append(row.ix[0])
	exdata = []
	for col, v in enumerate(row.ix[1:]):
		if row_index == 0:
			attr = {"dic": {}, "cnt":0}
			attr_list.append(attr)
		else:
			attr = attr_list[col]
		d = [0,0,0,0,0,0,0,0,0,0,0,0]
		if v in attr["dic"]:
			idx = attr["dic"][v]
		else:
			idx = attr["cnt"]
			attr["dic"][v] = idx
			attr["cnt"] += 1
			d[idx] = 1
			exdata += d
		data.append(exdata)

data_train, data_test, label_train, label_test = \
	cross_validation.train_test_split(data, label)

clf = RandomForestClassifier()
clf.fit(data_train, label_train)
predict = clf.predict(data_test)
ac_score = metrics.accuracy_score(label_test, predict)
print("正解率=", ac_score)