μ= 1/n Σi Xi
σ^2 = 1/n Σ(xi – μ)^2
σ^2 = ΣXi^2/N - (ΣXi)^2/N^2
3,4,5,6,7
N = 5
ΣXi = 25
ΣXi^2 = 135
μ= 2
σ^2 = 2
ソフトウェアエンジニアの技術ブログ:Software engineer tech blog
随机应变 ABCD: Always Be Coding and … : хороший
μ= 1/n Σi Xi
σ^2 = 1/n Σ(xi – μ)^2
σ^2 = ΣXi^2/N - (ΣXi)^2/N^2
3,4,5,6,7
N = 5
ΣXi = 25
ΣXi^2 = 135
μ= 2
σ^2 = 2
Variance: spread of data
standard deviation
17, 19, 18, 17, 19 -> mean=18
-1 1 0 -1 1
variance = 0.8
std deviation = 0.8944
7, 38, 4, 23, 17 -> mean=18
-11 20 -14 5 0
variance = 148.4
std deviation = 12.18
mean = μ
variance
1/n Σ(Xi – μ)^2
standard deviation = √variance
data 3,4,5,6,7
mean 5
variance 2
std dev 1.414
data 8, 9, 10, 11, 12
mean 5
variance 2
std dev 1.414
data 15, 20, 25, 30, 35
mean 25
variance 50
std dev 7.071
MEAN, MEDIAN, MODE
house prices
190k 170k 165k 180k 165k
Mean = 1/N ΣXi = 175
Median
picks the one in the middle
Mode
most frequently used number
3, 9, 3, 8, 2, 9, 1, 9, 2, 4
mean = 5
median = 3, 4
mode = 9
def sim_distance(prefs, person1, preson2) shared_items_a = shared_items_a(prefs, person1, person2) return 0 if shared_items_a.size == 0 sum_of_squares = shared_items_a.inject(0){|result, item| result + (prefs[person1][item]-prefs[person2][item])**2 } return 1/(1+sum_of_squares) end def shared_items_a(prefs, person1, person2) prefs[person1].keys & prefs[person2].keys end
ピアソン相関係数
def sim_pearson(prefs, person1, person2) shared_items_a = shared_items_a(prefs, person1, person2) n = shared_items_a.size return 0 if n == 0 sum1 = shared_items_a.inject(0) {|result,si| result + prefs[person1][si] } sum2 = shared_items_a.inject(0) {|result,si| result + prefs[person2][si] } sum1_sq = shared_items_a.inject(0) {|result,si| result + prefs[person1][si]**2 } sum2_sq = shared_items_a.inject(0) {|result,si| result + prefs[person2][si]**2 } sum_products = shared_items_a.inject(0) {|result,si| result + prefs[person1][si]*prefs[person2][si] } num = sum_products - (sum1*sum2/n) den = Math.sqrt((sum1_sq - sum1**2/n)*(sum2_sq - sum2**2/n)) return 0 if den == 0 return num/den end
類似度
def top_matches(prefs, person, n=5, similarity=:sim_pearson) scores = Array.new prefs.each do |key,value| if key != person scores << [__send__(similarity, prefs, person, key),key] end end scores.sort.reverse[0,n] end p top_matches(critics_ja, 'xxx')
def get_recommendations(prefs, person, similarity=:sim_pearson) totals_h = Hash.new(0) sim_sums_h = Hash.new(0) prefs.each do |other,val| next if other == person sim = __send__(similarity,prefs,person,other) next if sim <= 0 prefs[other].each do |item, val| if !prefs[person].keys.include?(item)||pref[person][item]==0 totals_h[item] += prefs[other][item]*sim sim_sums_h[item] += sim end end end rankings = Array.new totals_h.each do |item,total| rankings << [total/sim_sums_h[item], item] end rankings.sort.reverse end p get_recommendations(critics_ja, 'xxx')
def transform_prefs(prefs) result = Hash.new prefs.each do |person, score_h| score_h.each do |item, score| result[item] ||= Hash.new result[item][person] = score end end result end menu = transform_prefs(ciritics_ja) p top_matches(menu, 'xxx')
for goods in goods.get_all(): Recomender.register(goods.id, tag=goods.tag) for user in user.get_all(): Recomender.like(user.id, user.history.goods_ids) Recomender.update_all() Recomender.update_all(proc=4) Recomender.update_all(proc=4, scope=[1, 4]) Recomender.update_all(proc=4, scope=[2, 4]) Recomender.update_all(proc=4, scope=[3, 4]) Recomender.update_all(proc=4, scope=[4, 4])
new_goods_id = 2100 tag = "book" Recomender.register(new_goods_id, tag=tag) goods_id = 102 print Recomender.get(good_id, count=5) Recomender.update(goods_id) Recomender.update_all() user_id = "xxxx" goods_ids = [102, 102, 103, 104] Recomender.like(user_id, goods_ids)
new_tag = "computer" Recomender.change_tag(goods_id, new_tag) Recomender.remove(goods_id) Recomender.remove_user(user_id)
# -*- coding: utf-8 -*- __future__ import absolute_import, unicode_literals # 商品ID:10の購入者 from collections import defaultdict ITEM_10_BUY_USERS = ['A', 'C', 'E', 'G'] INDEX_BASE = 'INDEX_BUY_HISTORY_USER_{}' INDEX = { 'INDEX_BUY_HISTORY_USER_A':[10,20,50,60,90], 'INDEX_BUY_HISTORY_USER_B':[20,20,50,60,90], 'INDEX_BUY_HISTORY_USER_A':[10,30,50,60,90], 'INDEX_BUY_HISTORY_USER_A':[30,40,50,60], 'INDEX_BUY_HISTORY_USER_A':[10], 'INDEX_BUY_HISTORY_USER_A':[70,80,90], 'INDEX_BUY_HISTORY_USER_A':[10,70,90], } result = defaultdict(int) for user_id in ITEM_10_BUY_USERS: buy_history = INDEX.get(INDEX_BASE.format(user_id)) for item_id in buy_history: result[item_id] += 1 l = [] for key in result: l.append((key, result[key])) l.sort(key=lambda x: x[1], reverse=True) print l
$Redis->1Rem('Viewer:Item' . $item_id, $user_id): $Redis->1plus('Viewer:Item' . $item_id, $user_id); $Redis->1Trim('Viewer:Item' . $item_id, 0, 999);
Jaccard指数の計算
/** * $item_ids => 商品idの配列[1,2,3,4,5]のような配列 */ foreach ($item_ids as $item_id1){ $base = $Redis->1Range('Viewer:Item:' . $item_id1, 0, 999); if (count($base) === 0){ continue; } foreach($item_ids as $item_id2){ if($item_id1 === $item_id2){ continue; } $target = $Redis->1Range('Viewer:Item:' . $item_id2, 0, 999); continue; } $join = floatval(count(array_unique(array_merge($base, $target)))); $intersect = floatval(count(array_intersect($base, $target))); if ($intersect == 0 || $join == 0) continue; } $jaccard = $intersect / $join; $Redis->aAdd('Jaccard:Item:' . $item_id1, $jaccard, $item_id2); } }
$Redis->zRevRange('Jaccard:Item:' . $item_id, 0, -1);
Maximum likelihood estimator
laplacian estimator
100101 P(head)=0.5
11011 P(head)=0.4
DATA x1 x2 .. xn
1/n ΣiXi between 0-1
MLE
Deep insight
correlation, causation
Sick
In hospital 40, died 4 10%
home 8000, died 20 0.25%
Chances of dying in hospital are 40 times larger than at home
hospital died
sick 36 4 11.1%
health 4 0 0%
At home
sick 40 20 50%
healthy 7960 20 0.251%
P(exactly one head)
–
P(first flip is only head)
= 4
def test(coins, flips): f=FlipPredictor(coins) quesses=[] for flip in flips: f.update(flip) quesses.append(f.Pheads()) return guesses print test([0.5,0.4,0.3],'HHTH')
from __future__ import division class FlipPredictor(object): def __init__(self,coins): self.coins=coins n=len(coins) self.probs=[1/n]*n def Pheads(self): def update(self,result):
Probability for continuous spaces
f(x)= 1/360, f(0) < x <= 360
Date * Time you were born
P(x)= 0
f(x)= 0.0166
f(x<=noon) = 2*f(x>noon)
a=0.0555 1/18
b=0.0277 1/3*1/12