n! / (n-k)!・k!
p(heads)=0.5
flip coin 5 times
p(#head)=1
5!/4!*1! = 5, 2^5 = 32
0.15625
probability p(heads)=0.8
frip coin 3 time p(#heads= 1)
0.096
5*(0.8)^4 *(0.2)^1
随机应变 ABCD: Always Be Coding and … : хороший
n! / (n-k)!・k!
p(heads)=0.5
flip coin 5 times
p(#head)=1
5!/4!*1! = 5, 2^5 = 32
0.15625
probability p(heads)=0.8
frip coin 3 time p(#heads= 1)
0.096
5*(0.8)^4 *(0.2)^1
outlier: 外れ値
real age -> age in database
type ->
diminish type from target
lower quarlile
upper quarlile
interquarlile range
percentile
20,21,22,24,211
upper 20% percentile
from math import sqrt
data3=[13.04, 1.32, 22.65, 17.44, 29.54, 23.22, 17.65, 10.12, 26.73, 16.43]
def mean(data):
return sum(data)/len(data)
def variance(data):
mu=mean(data)
return mean([(x-mu)**2 for x in data])
def stddev(data):
sigma2 = variance(data)
return sqrt(sigma2)
print stddev(data3)
standard score(偏差値) = (data – mean)/standard deviation
xi…,xi…,xn
mean:5
variance:16
standard deviation:4
xi:9
multiply by 1.5
standard score:(9-5)/4 = 1
μ:7.5
σ:6
σ^2:36
yi:13.5
z:1
correction factor(補正率)
incremental mean
from __future__ import division def mean(oldmean, n, x): return (oldmean*n+x)/(n+1) currentmean=10 currentcount=5 new=4 print mean(currentmean, currentcount,new)
def likelihood(dist,data):
l = 1
for i in data:
l*dist[i]
return l
tests= [(({‘A’:0.2,’B’:0.2,’C’:0.2,’D’:0.2,’E’:0.2},’ABCEDDECAB’), 1.024e-07),(({‘Good’:0.6,’Bad’:0.2,’Indifferent’:0.2},[‘Good’,’Bad’,’Indifferent’,’Good’,’Good’,’Bad’]), 0.001728),(({‘Z’:0.6,’X’:0.333,’Y’:0.067},’ZXYYZXYXYZY’), 1.07686302456e-08),(({‘Z’:0.6,’X’:0.233,’Y’:0.067,’W’:0.1},’WXYZYZZZZW’), 8.133206112e-07)]
for t,l in tests:
if abs(likelihood(*t)/l-1)<0.01: print 'Correct'
else: print 'Incorrect'
[/python]
data2 = []
def variance
return
print variance(data2)
mean =
data = []
ndata = data – mu
ndata = []
ndata.append()
data3=[13.04, 1.32, 22.65, 17.44, 29.54, 23.22, 17.65, 10.12, 26.73, 16.43]
def mean(data):
return sum(data)/len(data)
def variance(data):
mu = mean(data)
ndata = []
for i in range(len(data)):
ndata.append((data[i] - mu)**2)
sigma2 = mean(ndata)
return sigma2
another simple pattern comes here
data3=[13.04, 1.32, 22.65, 17.44, 29.54, 23.22, 17.65, 10.12, 26.73, 16.43]
def mean(data):
return sum(data)/len(data)
def variance(data):
mu = mean(data)
return mean([(x-mu)**2 for x in data])
print variance(data2)
mean
data1=[49., 66, 24, 98, 37, 64, 98, 27, 56, 93, 68, 78, 22, 25, 11] def mean(data): return sum(data)/len(data) print mean(data1)
median
data1=[1,2,5,10,-20] def median(data): sdata = sorted(data) index = (len(data) - 1)/2 return sdata[index] print median(data1)
data1 = [1,2,5,10,-20,5,5]
def mode(data):
…
return
print mode(data1)
5
for i in range(len(data))
if :
data.count(data.[i])
data1=[1,2,5,10,-20,5,5] def mode(data): modecnt=0 for i in range(len(data)) icount=data.count(data.[i]) if icount>modecnt: mode=data[i] modecnt=icount return mode print mode(data1)
μ= 1/n Σi Xi
σ^2 = 1/n Σ(xi – μ)^2
σ^2 = ΣXi^2/N - (ΣXi)^2/N^2
3,4,5,6,7
N = 5
ΣXi = 25
ΣXi^2 = 135
μ= 2
σ^2 = 2
Variance: spread of data
standard deviation
17, 19, 18, 17, 19 -> mean=18
-1 1 0 -1 1
variance = 0.8
std deviation = 0.8944
7, 38, 4, 23, 17 -> mean=18
-11 20 -14 5 0
variance = 148.4
std deviation = 12.18
mean = μ
variance
1/n Σ(Xi – μ)^2
standard deviation = √variance
data 3,4,5,6,7
mean 5
variance 2
std dev 1.414
data 8, 9, 10, 11, 12
mean 5
variance 2
std dev 1.414
data 15, 20, 25, 30, 35
mean 25
variance 50
std dev 7.071
MEAN, MEDIAN, MODE
house prices
190k 170k 165k 180k 165k
Mean = 1/N ΣXi = 175
Median
picks the one in the middle
Mode
most frequently used number
3, 9, 3, 8, 2, 9, 1, 9, 2, 4
mean = 5
median = 3, 4
mode = 9
def sim_distance(prefs, person1, preson2)
shared_items_a = shared_items_a(prefs, person1, person2)
return 0 if shared_items_a.size == 0
sum_of_squares = shared_items_a.inject(0){|result, item|
result + (prefs[person1][item]-prefs[person2][item])**2
}
return 1/(1+sum_of_squares)
end
def shared_items_a(prefs, person1, person2)
prefs[person1].keys & prefs[person2].keys
end
ピアソン相関係数
def sim_pearson(prefs, person1, person2)
shared_items_a = shared_items_a(prefs, person1, person2)
n = shared_items_a.size
return 0 if n == 0
sum1 = shared_items_a.inject(0) {|result,si|
result + prefs[person1][si]
}
sum2 = shared_items_a.inject(0) {|result,si|
result + prefs[person2][si]
}
sum1_sq = shared_items_a.inject(0) {|result,si|
result + prefs[person1][si]**2
}
sum2_sq = shared_items_a.inject(0) {|result,si|
result + prefs[person2][si]**2
}
sum_products = shared_items_a.inject(0) {|result,si|
result + prefs[person1][si]*prefs[person2][si]
}
num = sum_products - (sum1*sum2/n)
den = Math.sqrt((sum1_sq - sum1**2/n)*(sum2_sq - sum2**2/n))
return 0 if den == 0
return num/den
end
類似度
def top_matches(prefs, person, n=5, similarity=:sim_pearson) scores = Array.new prefs.each do |key,value| if key != person scores << [__send__(similarity, prefs, person, key),key] end end scores.sort.reverse[0,n] end p top_matches(critics_ja, 'xxx')
def get_recommendations(prefs, person, similarity=:sim_pearson) totals_h = Hash.new(0) sim_sums_h = Hash.new(0) prefs.each do |other,val| next if other == person sim = __send__(similarity,prefs,person,other) next if sim <= 0 prefs[other].each do |item, val| if !prefs[person].keys.include?(item)||pref[person][item]==0 totals_h[item] += prefs[other][item]*sim sim_sums_h[item] += sim end end end rankings = Array.new totals_h.each do |item,total| rankings << [total/sim_sums_h[item], item] end rankings.sort.reverse end p get_recommendations(critics_ja, 'xxx')
def transform_prefs(prefs) result = Hash.new prefs.each do |person, score_h| score_h.each do |item, score| result[item] ||= Hash.new result[item][person] = score end end result end menu = transform_prefs(ciritics_ja) p top_matches(menu, 'xxx')
for goods in goods.get_all(): Recomender.register(goods.id, tag=goods.tag) for user in user.get_all(): Recomender.like(user.id, user.history.goods_ids) Recomender.update_all() Recomender.update_all(proc=4) Recomender.update_all(proc=4, scope=[1, 4]) Recomender.update_all(proc=4, scope=[2, 4]) Recomender.update_all(proc=4, scope=[3, 4]) Recomender.update_all(proc=4, scope=[4, 4])
new_goods_id = 2100 tag = "book" Recomender.register(new_goods_id, tag=tag) goods_id = 102 print Recomender.get(good_id, count=5) Recomender.update(goods_id) Recomender.update_all() user_id = "xxxx" goods_ids = [102, 102, 103, 104] Recomender.like(user_id, goods_ids)
new_tag = "computer" Recomender.change_tag(goods_id, new_tag) Recomender.remove(goods_id) Recomender.remove_user(user_id)