million = 1000000 def Q(state, action, U): if action == 'hold': return U(state + 1*million) if action == 'gamble': return U(state + 3*million)* .5 + U(state) * .5 U = math.log10 c = 1*million Q(c, 'gamble', math.log10), Q(c, 'hold', math.log10)
@memo def win_diff(state) (p, me, you, pending) = state if me + pending >= goal or you >= goal: return (me + pending - you) else: return max(Q_pig(state, action, win_diff) for action in pig_actions(state))
states = [(0, me, you, pending)
for me in range(41) for in range(41) for pending in range(41)
if me + pending <= goal]
len(states)
from collections import defaultdict
r = defaultdict(int)
for s in states: r[max_wins(s), max_diffs(s)] += 1
dict(r)
{('hold', 'hold'): 1204,
('hold', 'roll'): 381,
('roll', 'roll'): 29741,
('roll', 'hold'): 3975}
[/python]
[python]
def story():
r = defaultdict(lambda: [0, 0])
for s in states:
w, d = max_wins(s), max_diffs(s)
if w != d:
_, _, _, pending = s
i = 0 if (w == 'roll') else 1
r[pending][i] += 1
for (delta, (wrolls, drolls)) in sorted(r.items()):
print '%4d: %3d %3d' % (delta, wrolls, drolls)
[/python]