Break Even Point

million = 1000000

def Q(state, action, U):
	if action == 'hold':
		return U(state + 1*million)
	if action == 'gamble':
		return U(state + 3*million)* .5 + U(state) * .5

U = math.log10

c = 1*million
Q(c, 'gamble', math.log10), Q(c, 'hold', math.log10)
@memo
def win_diff(state)
	(p, me, you, pending) = state
	if me + pending >= goal or you >= goal:
		return (me + pending - you)
	else:
		return max(Q_pig(state, action, win_diff)
			for action in pig_actions(state))

states = [(0, me, you, pending)
for me in range(41) for in range(41) for pending in range(41)
if me + pending <= goal] len(states) from collections import defaultdict r = defaultdict(int) for s in states: r[max_wins(s), max_diffs(s)] += 1 dict(r) {('hold', 'hold'): 1204, ('hold', 'roll'): 381, ('roll', 'roll'): 29741, ('roll', 'hold'): 3975} [/python] [python] def story(): r = defaultdict(lambda: [0, 0]) for s in states: w, d = max_wins(s), max_diffs(s) if w != d: _, _, _, pending = s i = 0 if (w == 'roll') else 1 r[pending][i] += 1 for (delta, (wrolls, drolls)) in sorted(r.items()): print '%4d: %3d %3d' % (delta, wrolls, drolls) [/python]