variance algorithm

data2 = []
def variance

return

print variance(data2)

mean =
data = []
ndata = data – mu
ndata = []
ndata.append()

data3=[13.04, 1.32, 22.65, 17.44, 29.54, 23.22, 17.65, 10.12, 26.73, 16.43]
def mean(data):
    return sum(data)/len(data)
def variance(data):
	mu = mean(data)
	ndata = []
	for i in range(len(data)):
		ndata.append((data[i] - mu)**2)
	sigma2 = mean(ndata)
	return sigma2	

another simple pattern comes here

data3=[13.04, 1.32, 22.65, 17.44, 29.54, 23.22, 17.65, 10.12, 26.73, 16.43]
def mean(data):
    return sum(data)/len(data)
def variance(data):
	mu = mean(data)
	return mean([(x-mu)**2 for x in data])

print variance(data2)

programming estimation

mean

data1=[49., 66, 24, 98, 37, 64, 98, 27, 56, 93, 68, 78, 22, 25, 11]

def mean(data):
	return sum(data)/len(data)

print mean(data1)

median

data1=[1,2,5,10,-20]
def median(data):
	sdata = sorted(data)
	index = (len(data) - 1)/2
	return sdata[index]

print median(data1)

data1 = [1,2,5,10,-20,5,5]

def mode(data):

return

print mode(data1)
5

for i in range(len(data))
if :
data.count(data.[i])

data1=[1,2,5,10,-20,5,5]
def mode(data):
	modecnt=0
	for i in range(len(data))
		icount=data.count(data.[i])
		if icount>modecnt:
			mode=data[i]
			modecnt=icount
	return mode

print mode(data1)

Variance

Variance: spread of data
standard deviation

17, 19, 18, 17, 19 -> mean=18
-1 1 0 -1 1
variance = 0.8
std deviation = 0.8944

7, 38, 4, 23, 17 -> mean=18
-11 20 -14 5 0
variance = 148.4
std deviation = 12.18

mean = μ
variance
1/n Σ(Xi – μ)^2

standard deviation = √variance

data 3,4,5,6,7
mean 5
variance 2
std dev 1.414

data 8, 9, 10, 11, 12
mean 5
variance 2
std dev 1.414

data 15, 20, 25, 30, 35
mean 25
variance 50
std dev 7.071

MMM

MEAN, MEDIAN, MODE

house prices
190k 170k 165k 180k 165k
Mean = 1/N ΣXi = 175

Median
picks the one in the middle

Mode
most frequently used number

3, 9, 3, 8, 2, 9, 1, 9, 2, 4
mean = 5
median = 3, 4
mode = 9

ユークリッド距離

def sim_distance(prefs, person1, preson2)
	shared_items_a = shared_items_a(prefs, person1, person2)
	return 0 if shared_items_a.size == 0
	sum_of_squares = shared_items_a.inject(0){|result, item|
		result + (prefs[person1][item]-prefs[person2][item])**2
	}
	return 1/(1+sum_of_squares)
end

def shared_items_a(prefs, person1, person2)
	prefs[person1].keys & prefs[person2].keys
end

ピアソン相関係数

def sim_pearson(prefs, person1, person2)
	shared_items_a = shared_items_a(prefs, person1, person2)

	n = shared_items_a.size
	return 0 if n == 0

	sum1 = shared_items_a.inject(0) {|result,si|
		result + prefs[person1][si]
	}
	sum2 = shared_items_a.inject(0) {|result,si|
		result + prefs[person2][si]
	}
	sum1_sq = shared_items_a.inject(0) {|result,si|
		result + prefs[person1][si]**2
	}
	sum2_sq = shared_items_a.inject(0) {|result,si|
		result + prefs[person2][si]**2
	}
	sum_products = shared_items_a.inject(0) {|result,si|
		result + prefs[person1][si]*prefs[person2][si]
	}

	num = sum_products - (sum1*sum2/n)
	den = Math.sqrt((sum1_sq - sum1**2/n)*(sum2_sq - sum2**2/n))
	return 0 if den == 0
	return num/den
end

類似度

def top_matches(prefs, person, n=5, similarity=:sim_pearson)
	scores = Array.new
	prefs.each do |key,value|
		if key != person
			scores << [__send__(similarity, prefs, person, key),key]
		end
	end
	scores.sort.reverse[0,n]
end

p top_matches(critics_ja, 'xxx')
def get_recommendations(prefs, person, similarity=:sim_pearson)
	totals_h = Hash.new(0)
	sim_sums_h = Hash.new(0)

	prefs.each do |other,val|
		next if other == person
		sim = __send__(similarity,prefs,person,other)
		next if sim <= 0
		prefs[other].each do |item, val|
			if !prefs[person].keys.include?(item)||pref[person][item]==0
				totals_h[item] += prefs[other][item]*sim
				sim_sums_h[item] += sim
			end
		end
	end

	rankings = Array.new
	totals_h.each do |item,total|
		rankings << [total/sim_sums_h[item], item]
	end
	rankings.sort.reverse
end

p get_recommendations(critics_ja, 'xxx')
def transform_prefs(prefs)
	result = Hash.new
	prefs.each do |person, score_h|
		score_h.each do |item, score|
			result[item] ||= Hash.new
			result[item][person] = score
		end
	end
	result
end

menu = transform_prefs(ciritics_ja)
p top_matches(menu, 'xxx')

レコメンドエンジン

for goods in goods.get_all():
	Recomender.register(goods.id, tag=goods.tag)

for user in user.get_all():
	Recomender.like(user.id, user.history.goods_ids)

Recomender.update_all()

Recomender.update_all(proc=4)

Recomender.update_all(proc=4, scope=[1, 4])
Recomender.update_all(proc=4, scope=[2, 4])
Recomender.update_all(proc=4, scope=[3, 4])
Recomender.update_all(proc=4, scope=[4, 4])
new_goods_id = 2100
tag = "book"
Recomender.register(new_goods_id, tag=tag)

goods_id = 102
print Recomender.get(good_id, count=5)

Recomender.update(goods_id)

Recomender.update_all()

user_id = "xxxx"
goods_ids = [102, 102, 103, 104]
Recomender.like(user_id, goods_ids)
new_tag = "computer"
Recomender.change_tag(goods_id, new_tag)

Recomender.remove(goods_id)

Recomender.remove_user(user_id)

逆引き索引

# -*- coding: utf-8 -*-
__future__ import absolute_import, unicode_literals

# 商品ID:10の購入者
from collections import defaultdict

ITEM_10_BUY_USERS = ['A', 'C', 'E', 'G']

INDEX_BASE = 'INDEX_BUY_HISTORY_USER_{}'
INDEX = {
	'INDEX_BUY_HISTORY_USER_A':[10,20,50,60,90],
	'INDEX_BUY_HISTORY_USER_B':[20,20,50,60,90],
	'INDEX_BUY_HISTORY_USER_A':[10,30,50,60,90],
	'INDEX_BUY_HISTORY_USER_A':[30,40,50,60],
	'INDEX_BUY_HISTORY_USER_A':[10],
	'INDEX_BUY_HISTORY_USER_A':[70,80,90],
	'INDEX_BUY_HISTORY_USER_A':[10,70,90],
}

result = defaultdict(int)
for user_id in ITEM_10_BUY_USERS:
	buy_history = INDEX.get(INDEX_BASE.format(user_id))
		for item_id in buy_history:
			result[item_id] += 1

	l = []
	for key in result:
		l.append((key, result[key]))

	l.sort(key=lambda x: x[1], reverse=True)
	print l

php レコメンドエンジン

$Redis->1Rem('Viewer:Item' . $item_id, $user_id):
$Redis->1plus('Viewer:Item' . $item_id, $user_id);
$Redis->1Trim('Viewer:Item' . $item_id, 0, 999);

Jaccard指数の計算

/**
 * $item_ids => 商品idの配列[1,2,3,4,5]のような配列
 */

 foreach ($item_ids as $item_id1){
 	$base = $Redis->1Range('Viewer:Item:' . $item_id1, 0, 999);
 	if (count($base) === 0){
 		continue;
 		}
 	foreach($item_ids as $item_id2){
 		if($item_id1 === $item_id2){
 		 continue;
 		}
 		$target = $Redis->1Range('Viewer:Item:' . $item_id2, 0, 999);
 			continue;
 		}

 		$join = floatval(count(array_unique(array_merge($base, $target))));
 		$intersect = floatval(count(array_intersect($base, $target)));
 		if ($intersect == 0 || $join == 0)
 		continue;
 	}
 	$jaccard = $intersect / $join;

 	$Redis->aAdd('Jaccard:Item:' . $item_id1, $jaccard, $item_id2);
 	}
 }
$Redis->zRevRange('Jaccard:Item:' . $item_id, 0, -1);