Skip to content

Commit f3594e6

Browse files
committed
bug fix and hints
1 parent 39d121a commit f3594e6

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

machine_learning/q_learning.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ def get_policy() -> dict[State, int]:
118118
"""
119119
Extract a deterministic policy from the Q-table.
120120
121+
121122
>>> q_table.clear()
122123
>>> q_table[(1, 2)][1] = 2.0
123124
>>> q_table[(1, 2)][2] = 1.0
@@ -127,7 +128,7 @@ def get_policy() -> dict[State, int]:
127128
policy: dict[State, int] = {}
128129
for s, a_dict in q_table.items():
129130
if a_dict:
130-
policy[s] = max(a_dict, key=a_dict.get)
131+
policy[s] = max(a_dict, key=lambda a: a_dict[a])
131132
return policy
132133

133134

0 commit comments

Comments
 (0)