## ユークリッド距離

```def sim_distance(prefs, person1, preson2)
shared_items_a = shared_items_a(prefs, person1, person2)
return 0 if shared_items_a.size == 0
sum_of_squares = shared_items_a.inject(0){|result, item|
result + (prefs[person1][item]-prefs[person2][item])**2
}
return 1/(1+sum_of_squares)
end

def shared_items_a(prefs, person1, person2)
prefs[person1].keys & prefs[person2].keys
end
```

ピアソン相関係数

```def sim_pearson(prefs, person1, person2)
shared_items_a = shared_items_a(prefs, person1, person2)

n = shared_items_a.size
return 0 if n == 0

sum1 = shared_items_a.inject(0) {|result,si|
result + prefs[person1][si]
}
sum2 = shared_items_a.inject(0) {|result,si|
result + prefs[person2][si]
}
sum1_sq = shared_items_a.inject(0) {|result,si|
result + prefs[person1][si]**2
}
sum2_sq = shared_items_a.inject(0) {|result,si|
result + prefs[person2][si]**2
}
sum_products = shared_items_a.inject(0) {|result,si|
result + prefs[person1][si]*prefs[person2][si]
}

num = sum_products - (sum1*sum2/n)
den = Math.sqrt((sum1_sq - sum1**2/n)*(sum2_sq - sum2**2/n))
return 0 if den == 0
return num/den
end
```

```def top_matches(prefs, person, n=5, similarity=:sim_pearson)
scores = Array.new
prefs.each do |key,value|
if key != person
scores << [__send__(similarity, prefs, person, key),key]
end
end
scores.sort.reverse[0,n]
end

p top_matches(critics_ja, 'xxx')
```
```def get_recommendations(prefs, person, similarity=:sim_pearson)
totals_h = Hash.new(0)
sim_sums_h = Hash.new(0)

prefs.each do |other,val|
next if other == person
sim = __send__(similarity,prefs,person,other)
next if sim <= 0
prefs[other].each do |item, val|
if !prefs[person].keys.include?(item)||pref[person][item]==0
totals_h[item] += prefs[other][item]*sim
sim_sums_h[item] += sim
end
end
end

rankings = Array.new
totals_h.each do |item,total|
rankings << [total/sim_sums_h[item], item]
end
rankings.sort.reverse
end

p get_recommendations(critics_ja, 'xxx')
```
```def transform_prefs(prefs)
result = Hash.new
prefs.each do |person, score_h|
score_h.each do |item, score|
result[item] ||= Hash.new
result[item][person] = score
end
end
result
end

menu = transform_prefs(ciritics_ja)
p top_matches(menu, 'xxx')
```

## レコメンドエンジン

```for goods in goods.get_all():
Recomender.register(goods.id, tag=goods.tag)

for user in user.get_all():
Recomender.like(user.id, user.history.goods_ids)

Recomender.update_all()

Recomender.update_all(proc=4)

Recomender.update_all(proc=4, scope=[1, 4])
Recomender.update_all(proc=4, scope=[2, 4])
Recomender.update_all(proc=4, scope=[3, 4])
Recomender.update_all(proc=4, scope=[4, 4])
```
```new_goods_id = 2100
tag = "book"
Recomender.register(new_goods_id, tag=tag)

goods_id = 102
print Recomender.get(good_id, count=5)

Recomender.update(goods_id)

Recomender.update_all()

user_id = "xxxx"
goods_ids = [102, 102, 103, 104]
Recomender.like(user_id, goods_ids)
```
```new_tag = "computer"
Recomender.change_tag(goods_id, new_tag)

Recomender.remove(goods_id)

Recomender.remove_user(user_id)
```

## 逆引き索引

```# -*- coding: utf-8 -*-
__future__ import absolute_import, unicode_literals

# 商品ID:10の購入者
from collections import defaultdict

ITEM_10_BUY_USERS = ['A', 'C', 'E', 'G']

INDEX_BASE = 'INDEX_BUY_HISTORY_USER_{}'
INDEX = {
'INDEX_BUY_HISTORY_USER_A':[10,20,50,60,90],
'INDEX_BUY_HISTORY_USER_B':[20,20,50,60,90],
'INDEX_BUY_HISTORY_USER_A':[10,30,50,60,90],
'INDEX_BUY_HISTORY_USER_A':[30,40,50,60],
'INDEX_BUY_HISTORY_USER_A':[10],
'INDEX_BUY_HISTORY_USER_A':[70,80,90],
'INDEX_BUY_HISTORY_USER_A':[10,70,90],
}

result = defaultdict(int)
for user_id in ITEM_10_BUY_USERS:
buy_history = INDEX.get(INDEX_BASE.format(user_id))
for item_id in buy_history:
result[item_id] += 1

l = []
for key in result:
l.append((key, result[key]))

l.sort(key=lambda x: x[1], reverse=True)
print l
```

## php レコメンドエンジン

```\$Redis->1Rem('Viewer:Item' . \$item_id, \$user_id):
\$Redis->1plus('Viewer:Item' . \$item_id, \$user_id);
\$Redis->1Trim('Viewer:Item' . \$item_id, 0, 999);
```

Jaccard指数の計算

```/**
* \$item_ids => 商品idの配列[1,2,3,4,5]のような配列
*/

foreach (\$item_ids as \$item_id1){
\$base = \$Redis->1Range('Viewer:Item:' . \$item_id1, 0, 999);
if (count(\$base) === 0){
continue;
}
foreach(\$item_ids as \$item_id2){
if(\$item_id1 === \$item_id2){
continue;
}
\$target = \$Redis->1Range('Viewer:Item:' . \$item_id2, 0, 999);
continue;
}

\$join = floatval(count(array_unique(array_merge(\$base, \$target))));
\$intersect = floatval(count(array_intersect(\$base, \$target)));
if (\$intersect == 0 || \$join == 0)
continue;
}
\$jaccard = \$intersect / \$join;

\$Redis->aAdd('Jaccard:Item:' . \$item_id1, \$jaccard, \$item_id2);
}
}
```
```\$Redis->zRevRange('Jaccard:Item:' . \$item_id, 0, -1);
```

## Estimators

Maximum likelihood estimator
laplacian estimator

100101 P(head)=0.5
11011 P(head)=0.4

DATA x1 x2 .. xn
1/n ΣiXi between 0-1

MLE

## Correlation And Causation

Deep insight
correlation, causation

Sick
In hospital 40, died 4 10%
home 8000, died 20 0.25%

Chances of dying in hospital are 40 times larger than at home

hospital died
sick 36 4 11.1%
health 4 0 0%

At home
sick 40 20 50%
healthy 7960 20 0.251%

P(exactly one head)

P(first flip is only head)
= 4

```def test(coins, flips):
f=FlipPredictor(coins)
quesses=[]
for flip in flips:
f.update(flip)
quesses.append(f.Pheads())
return guesses

print test([0.5,0.4,0.3],'HHTH')
```
```from __future__ import division
class FlipPredictor(object):
def __init__(self,coins):
self.coins=coins
n=len(coins)
self.probs=[1/n]*n
def Pheads(self):

def update(self,result):
```

## Density

Probability for continuous spaces

f(x)= 1/360, f(0) < x <= 360 Date * Time you were born P(x)= 0 f(x)= 0.0166 f(x<=noon) = 2*f(x>noon)
a=0.0555 1/18
b=0.0277 1/3*1/12

## Cancer

P(c)= p0 = 0.1, p(¬c)=0.9
p(pos|c)= p1 = 0.9, p(pos|¬c)=0.1
p(neg|¬c)= p2 = 0.8, p(neg|c)= 0.2

p(p)= 0.09 + 0.18 = 0.27

```def f(p0, p1, p2):
return p0*p1 + (1-p0)*(1-p2)
print f(0.1, 0.9, 0.8)
```

program bayes rule

```def f(p0, p1, p2):
return p0*p1 / (p0 * p1 + (1-p0)*(1-p2))
print f(0.1, 0.9, 0.8)
```
```def f(p0,p1,p2):
return p0 * (1-p1)/(p0 * (1-p1)+(1-p0)*p2)
print f(0.1, 0.9, 0.8)
```

## Flip Two Coins

```def f(p1, p2):
return p1 * p2

print f(0.5, 0.8)
```

c1 p(H|c1)=p1
c2 p(H|c2)=p2

p(c1)=p0=0.3
p(c2)=1-p0=0.7
p1 = 0.5
p2 = 0.9
0.15+0.7*0.9
p(H)=0.78

```def f(p0, p1, p2):
return p0 * p1 + (1-p0) * p2

print f(0.3, 0.5, 0.9)
```