central limit theorem

coin:(0,1) P(Σi=k)= n!/(n-k)!k!
Pascal Triangle

flip a coin 1000 times
mean
standard deviation

import random
from math import sqrt

def mean(data):
	return float(sum(data))/len(data)

def variance(data):
	mu=mean(data)
	return sum([(float(x)-mu)**2 for x in data])/len(data)

def stddev(data):
	return sqrt(variance(data))

def flip(N):
    return [random.random() > 0.5 for x in range(N)]

N=1000
f=flip(N)

print mean(f)
print stddev(f)

Here comes standard deviation

from math import sqrt

data3=[13.04, 1.32, 22.65, 17.44, 29.54, 23.22, 17.65, 10.12, 26.73, 16.43]


def mean(data):
    return sum(data)/len(data)
def variance(data):
    mu=mean(data)
    return mean([(x-mu)**2 for x in data])
def stddev(data):
	sigma2 = variance(data)
	return sqrt(sigma2)

print stddev(data3)

standard score(偏差値) = (data – mean)/standard deviation

xi…,xi…,xn
mean:5
variance:16
standard deviation:4
xi:9

multiply by 1.5

standard score:(9-5)/4 = 1
μ:7.5
σ:6
σ^2:36
yi:13.5
z:1

correction factor(補正率)

incremental mean

from __future__ import division

def mean(oldmean, n, x):
	return (oldmean*n+x)/(n+1)

currentmean=10
currentcount=5
new=4

print mean(currentmean, currentcount,new)

def likelihood(dist,data):
l = 1
for i in data:
l*dist[i]
return l

tests= [(({‘A’:0.2,’B’:0.2,’C’:0.2,’D’:0.2,’E’:0.2},’ABCEDDECAB’), 1.024e-07),(({‘Good’:0.6,’Bad’:0.2,’Indifferent’:0.2},[‘Good’,’Bad’,’Indifferent’,’Good’,’Good’,’Bad’]), 0.001728),(({‘Z’:0.6,’X’:0.333,’Y’:0.067},’ZXYYZXYXYZY’), 1.07686302456e-08),(({‘Z’:0.6,’X’:0.233,’Y’:0.067,’W’:0.1},’WXYZYZZZZW’), 8.133206112e-07)]

for t,l in tests:
if abs(likelihood(*t)/l-1)<0.01: print 'Correct' else: print 'Incorrect' [/python]

variance algorithm

data2 = []
def variance

return

print variance(data2)

mean =
data = []
ndata = data – mu
ndata = []
ndata.append()

data3=[13.04, 1.32, 22.65, 17.44, 29.54, 23.22, 17.65, 10.12, 26.73, 16.43]
def mean(data):
    return sum(data)/len(data)
def variance(data):
	mu = mean(data)
	ndata = []
	for i in range(len(data)):
		ndata.append((data[i] - mu)**2)
	sigma2 = mean(ndata)
	return sigma2	

another simple pattern comes here

data3=[13.04, 1.32, 22.65, 17.44, 29.54, 23.22, 17.65, 10.12, 26.73, 16.43]
def mean(data):
    return sum(data)/len(data)
def variance(data):
	mu = mean(data)
	return mean([(x-mu)**2 for x in data])

print variance(data2)

programming estimation

mean

data1=[49., 66, 24, 98, 37, 64, 98, 27, 56, 93, 68, 78, 22, 25, 11]

def mean(data):
	return sum(data)/len(data)

print mean(data1)

median

data1=[1,2,5,10,-20]
def median(data):
	sdata = sorted(data)
	index = (len(data) - 1)/2
	return sdata[index]

print median(data1)

data1 = [1,2,5,10,-20,5,5]

def mode(data):

return

print mode(data1)
5

for i in range(len(data))
if :
data.count(data.[i])

data1=[1,2,5,10,-20,5,5]
def mode(data):
	modecnt=0
	for i in range(len(data))
		icount=data.count(data.[i])
		if icount>modecnt:
			mode=data[i]
			modecnt=icount
	return mode

print mode(data1)

Variance

Variance: spread of data
standard deviation

17, 19, 18, 17, 19 -> mean=18
-1 1 0 -1 1
variance = 0.8
std deviation = 0.8944

7, 38, 4, 23, 17 -> mean=18
-11 20 -14 5 0
variance = 148.4
std deviation = 12.18

mean = μ
variance
1/n Σ(Xi – μ)^2

standard deviation = √variance

data 3,4,5,6,7
mean 5
variance 2
std dev 1.414

data 8, 9, 10, 11, 12
mean 5
variance 2
std dev 1.414

data 15, 20, 25, 30, 35
mean 25
variance 50
std dev 7.071

MMM

MEAN, MEDIAN, MODE

house prices
190k 170k 165k 180k 165k
Mean = 1/N ΣXi = 175

Median
picks the one in the middle

Mode
most frequently used number

3, 9, 3, 8, 2, 9, 1, 9, 2, 4
mean = 5
median = 3, 4
mode = 9