@@ -21,6 +21,8 @@ let environment = gym.make("Blackjack-v0")
21
21
let totalIterations = 10000
22
22
let learningPhase = totalIterations * 5 / 100
23
23
24
+ typealias Strategy = Bool
25
+
24
26
class BlackjackState {
25
27
var playerSum : Int = 0
26
28
var dealerCard : Int = 0
@@ -69,7 +71,7 @@ class Solver {
69
71
Q [ prior. playerSum] [ prior. dealerCard] [ prior. useableAce] [ action] += priorQ + postQ
70
72
}
71
73
72
- func qLearningStrategy( observation: BlackjackState , iteration: Int ) -> Bool {
74
+ func qLearningStrategy( observation: BlackjackState , iteration: Int ) -> Strategy {
73
75
let hitReward = Q [ observation. playerSum] [ observation. dealerCard] [ observation. useableAce] [ 0 ]
74
76
let stayReward = Q [ observation. playerSum] [ observation. dealerCard] [ observation. useableAce] [ 1 ]
75
77
@@ -87,11 +89,11 @@ class Solver {
87
89
}
88
90
}
89
91
90
- func randomStrategy( ) -> Bool {
91
- return Bool . random ( )
92
+ func randomStrategy( ) -> Strategy {
93
+ return Strategy . random ( )
92
94
}
93
95
94
- func markovStrategy( observation: BlackjackState ) -> Bool {
96
+ func markovStrategy( observation: BlackjackState ) -> Strategy {
95
97
// hit @ 80% probability unless over 18, in which case do the reverse
96
98
let flip = Float . random ( in: 0 ..< 1 )
97
99
let threshHold : Float = 0.8
@@ -122,15 +124,15 @@ class Solver {
122
124
}
123
125
}
124
126
125
- func normalStrategy( observation: BlackjackState ) -> Bool {
127
+ func normalStrategy( observation: BlackjackState ) -> Strategy {
126
128
if observation. playerSum == 0 {
127
129
return true
128
130
}
129
131
let lookupString = normalStrategyLookup ( playerSum: observation. playerSum)
130
132
return Array ( lookupString) [ observation. dealerCard - 1 ] == " H "
131
133
}
132
134
133
- func strategy( observation: BlackjackState , solver: SolverType , iteration: Int ) -> Bool {
135
+ func strategy( observation: BlackjackState , solver: SolverType , iteration: Int ) -> Strategy {
134
136
switch solver {
135
137
case . random:
136
138
return randomStrategy ( )
0 commit comments