def NBAccuracy(features_train, labels_train, features_test, labels_test): from sklearn.naive_bayes import GaussianNB clf = GaussianNB() pref = clf.predict(features_test) accuracy = return accuracy from class_vis import prettyPicture from prep_terrain_data import makeTerrainData from classify import NBAccuracy import matplotlib.pyplot as plt import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() def submitAccuracy(): accuracy = NBAccuracy(features_train, labels_train, features_test, labels_test) return accuracy
GaussianNB Deployment
#!/usr/bin/python
from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image
from ClassyfyNB import classify
import numpy as np
import pylab as pl
features_train, labels_train, features_test, labels_test = makeTerrainData()
grade_fast = [features_tarain[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]=0]
bumpy_fast = [features_tarain[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]=0]
grade_slow = [features_tarain[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]=1]
bumpy_slow = [features_tarain[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]=1]
clf = classify(features_train, labels_train)
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())
#!/usr/bin/python
#from ***plots import *
import warnings
warnings.filterwarnings("ignore")
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import pylab as pl
import numpy as np
def prettyPicture(clf, X_test, y_test):
x_min = 0.0; x_max = 1.0
y_min = 0.0; y_max = 1.0
h = .01
xx, yy = np.meshgrid(np.arrange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)
grade_fast = [features_tarain[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]=0]
bumpy_fast = [features_tarain[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]=0]
grade_slow = [features_tarain[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]=1]
bumpy_slow = [features_tarain[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]=1]
plt.scatter(grade_sig, bumpy_sig, color="b", label="fast")
plt.scatter(grade_bkg, bumpy_bkg, color="r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.savefig("test.png")
import base64
import json
import subprocess
def output_image(name, format, bytes):
image_start = "BEGIN_IMAGE_f9825uweof8jw9fj4r8"
image_end = "END_IMAGE_0238jfw08fjsiufhw8frs"
data = {}
data['name'] = name
data['format'] = format
data['bytes'] = base64.encodestring(bytes)
print image_start+json.dumps(data)+image_end
#!/usr/bin/python
import random
def makeTerrainData(n_points=1000):
random.seed(42)
grade = [random.random() for ii in range(0,n_points)]
bumpy = [random.random() for ii in range(0,n_points)]
error = [random.random() for ii in range(0,n_points)]
y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii]) for ii in range(0,n_points)]
for ii in range(0, len(y)):
if grade[ii]>0.8 or bumpy[ii]>0.8:
y[ii] = 1.0
X = [[gg, ss] for gg, ss in zip(grade, bumpy)]
split = int(0.75*n_points)
X_train = X[0:split]
X_test = X[split:]
y_train = y[0:split]
y_test = y[split:]
grade_sig = [X_train[ii][0] for ii in range(0, len(X_train[i])) if y_train[ii]==0]
bumpy_sig = [X_train[ii][1] for ii in range(0, len(X_train[i])) if y_train[ii]==0]
grade_sig = [X_train[ii][0] for ii in range(0, len(X_train[i])) if y_train[ii]==1]
bumpy_sig = [X_train[ii][1] for ii in range(0, len(X_train[i])) if y_train[ii]==1]
grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]
bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]
grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]
bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]
test_data = {"fast":{"grade":grade_sig, "bumpiness":bumpy_sig}
, "slow":{"grade":grade_bkg, "bumpiness":bumpy_bkg}}
return X_train, y_train, X_test, y_test
Scatter Plot
Bumpiness: smooth – bad
slope : flat – very steep
more like red x of blue circle, that’s most important in machine learning
Decision surface: Linear
Naive Bayes
Zooming ahead on supervised classification with python!
goal: draw decision boundary
http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html
come with page, just run with python interpreter
>>> import numpy as np >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) >>> Y = np.array([1, 1, 1, 2, 2, 2]) >>> from sklearn.naive_bayes import GaussianNB >>> clf = GaussianNB() >>> clf.fit(X, Y) GaussianNB(priors=None) >>> print(clf.predict([[-0.8, -1]])) [1] >>> clf_pf = GaussianNB() >>> clf_pf.partial_fit(X, Y, np.unique(Y)) GaussianNB(priors=None) >>> print(clf_pf.predict([[-0.8, -1]])) [1]
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.scatter(grade_sig, bumpy_sig, color = "b", label = "fast!")
plt.scatter(grade_bkg, bumpy_bkg, color = "r", label = "slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
from sklearn.naive_bayes import GaussianNB
clt = GaussianNB()
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
#coding:utf-8
import math
import sys
from collections import defaultdict
class NaiveBayes:
"""Multinomial Naive Bayes"""
def __init__(self):
self.categories = set()
self.vocabularies = set()
self.wordcount = {}
self.catcount = {}
self.denominator = {}
def train(self, data):
"""ナイーブベイズ分類器の訓練"""
# 文書集合からカテゴリを抽出して辞書を初期化
for d in data:
cat = d[0]
self.categories.add(cat)
for cat in self.categories:
self.wordcount[cat] = defaultdict(int)
self.catcount[cat] = 0
# 文書集合からカテゴリと単語をカウント
for d in data:
cat, doc = d[0], d[1:]
self.catcount[cat] += 1
for word in doc:
self.vocabularies.add(word)
self.wordcount[cat][word] += 1
# 単語の条件付き確率の分母の値をあらかじめ一括計算しておく (高速化のため)
for cat in self.categories:
self.denominator[cat] = sum(self.wordcount[cat].values()) + len(self.yocabularies)
def classify(self, doc):
"""事後確率の対数 log(P(cat|doc))がもっとも大きなカテゴリを返す"""
best = None
max = -sys.maxint
for cat in self.catcount.keys():
p = self.score(docs, cat)
if p > max:
max = p
best = cat
return best
def wordProb(self, word, cat):
"""単語の条件付き確率 P(word|cat)を求める"""
# ラプサムスムージングを適用
# wordcount[cat]はdefaultdict(int)なのでカテゴリに存在しなかった単語はデフォルトの0を返す
return float(self.wordcount[cat][word] + 1)/ float(self.denominator[cat])
score(self, doc, cat):
"""文書が与えられたときのカテゴリの事後確率の対数 log(P(cat|doc))を求める"""
total = sum(self.catcount.values()) #総文書数
score = math.log(float(self.catcount[cat]) / total) # log P(cat)
for word in doc:
# logをとると掛け算は足し算に
score += math.log(self.wordProb(word, cat)) # log P(word|cat)
return score
__str__(self):
total = sum(self.catcount.values()) #総文書数
return "documents: %d, vocabularies: %d, categories: %d" % (total, len(self.vocabularies), len(self.categories))
if __name__ == "__main__":
# Introduction to Information Retrieval 13.2
data = [["yes", "chinese", "Beijin", "Chinese"],
["yes","chinese", "Chinse", "Shanghai"],
["yes","Chinese", "Macao"],
["no","Tokyo","Japan","Chinse"]]
#ナイーブベイズ分類器を訓練
nb = NaiveBayes()
nb.train(data)
print nb
print "P(Chinese|yes) =", nb.wordProb("Chinese", "yes")
print "P(Tokyo|yes) =", nb.wordProb("Tokyo", "yes")
print "P(Japan|yes) =", nb.wordProb("Japan", "yes")
print "P(Chinese|no) =", nb.wordProb("Chinese", "no")
print "P(Tokyo|no) =", nb.wordProb("Tokyo", "no")
print "P(Japan|no) =", nb.wordProb("Japan", "no")
test = ["Chinese","Chinese", "Chinese", "Tokyo", "Japan"]
print "log P(yest|test) =", nb.score(test, "yes")
print "log P(no|test) =", nb.score(test, "no")
print nb.classify(test)
Naive Bayes
self driving car, supervising case
acerous, non-acerous
horse is categorized non-acerous
machine learning: give you bunch of example, features
pick up right feature, and you can classify new example
supervised classification examples
-from an album of tagged photos, recognize someone in a picture(facebook always dose)
-given someone’s music choices and a bunch of features of that music (tempo, genre, etc.) recommend a new song
unsupervised learning
-analyze bank data for weird-looking transactions, and flag those for fraud
-cluster students into types based on learning styles
Feature and Labels
LET IT Go
Features: intensity, tempo, genre, gender
tempo: relaxed – fast
intensity: light – soaring
She likes those, she doesn’t like
Scatter Plot
Introduction of Machine Learning
Machine Learning is everywhere in silicon valley
-speech recognition
-self driving car
-age of big data
-optimize product
Bikes and Buses
Abus = 0.8 m/s^2
Vmax = 20m / s
Abike = 1.2m/s^2
Vmax = 12m/s
Air Pressure = Weight / Area
P = F / A
Special Relativity
-Unintuitive
-Very Fast
C = 3 * 10^8 m/s
299,792,458 m/s
Where to Begin?
Einstein, Galileo
Two postulates, logic, conclusion
Spaceship Flyby2
t’ = γt
γ= 1/√1-β^2
orbit satellite
β = 14000km / hr * 1000m / Km * 1hr / 3600s = 3890 m/s
Wret = ΔPE
= PEf – PEo
Dead Reckoning
Direction, speed, duration
East, 12km/h, 2.5hrs
south, 20km/h, 1hr
Conservation of Charge
The total charge in the universe never changes.
Closed System -> No charge being added to or removed from the system.
Friction
Conduction
Induction
Infinite source and sink of electrons
Grounding
Electric Potential Energy
When r is small, Ue is highest.
Ue = K q1q2/r
K = 9*10^9
E = 1000 N/C
d = 1cm
Mp = 1.673 * 10 ^ -27
g = 1.602 * 10 ^ -19C
F = Eg
na = Eg
a = Eg/m
ΔKE = Egx
W = Fx = Egx
Lightning
Electricity
Leyden Jar -> charged
Ben’s contribution
charge +-, conservation of charge
CHARGE is a property of matter
Why is mass so important?
Fe Fg
cause: Q:m
effect: Q:m
Force Law: Fα1/^2:Fα1/r^2
Direction of force
Attraction / Repulsion
– Opposite charges attract
– Like charges repel
Coulomb
Electricity is really strong
proton
1.6 = 10^-19C
neutron = 0
electron
-1.6 * 10^-19C
Making Graphs!
X(t) = XmaxSin(ωt + π/2)
Adjusting the Period
Fs = -Kx
F = ma
ma = -Kx, a = -k/m * x
K = 50 n/m
T = 1sec
m = 1.27Kg
The simple pendulum
SHM: a = -?x
F = ma
-mg*sinΘ = ma
Gmean = 1.6m/s^2
ge = 9.8 m/s^2
Te = Tm
Lm = lm/le Le
= 1.6/9.8 Le