@@ -69,41 +69,41 @@ class Solver {
69
69
Q [ prior. playerSum] [ prior. dealerCard] [ prior. useableAce] [ action] += priorQ + postQ
70
70
}
71
71
72
- func getQLearningStrategy ( observation: BlackjackState , iteration: Int ) -> Bool {
72
+ func qLearningStrategy ( observation: BlackjackState , iteration: Int ) -> Bool {
73
73
let hitReward = Q [ observation. playerSum] [ observation. dealerCard] [ observation. useableAce] [ 0 ]
74
74
let stayReward = Q [ observation. playerSum] [ observation. dealerCard] [ observation. useableAce] [ 1 ]
75
75
76
- if ( iteration < Int . random ( in: 1 ... learningPhase) ) {
77
- return getRandomStrategy ( )
76
+ if iteration < Int . random ( in: 1 ... learningPhase) {
77
+ return randomStrategy ( )
78
78
} else {
79
79
// quit learning after initial phase
80
- if ( iteration > learningPhase) { alpha = 0.0 }
80
+ if iteration > learningPhase { alpha = 0.0 }
81
81
}
82
82
83
83
if hitReward == stayReward {
84
- return getRandomStrategy ( )
84
+ return randomStrategy ( )
85
85
} else {
86
86
return hitReward < stayReward
87
87
}
88
88
}
89
89
90
- func getRandomStrategy ( ) -> Bool {
90
+ func randomStrategy ( ) -> Bool {
91
91
return Bool . random ( )
92
92
}
93
93
94
- func getMarkovStrategy ( observation: BlackjackState ) -> Bool {
94
+ func markovStrategy ( observation: BlackjackState ) -> Bool {
95
95
// hit @ 80% probability unless over 18, in which case do the reverse
96
96
let flip = Float . random ( in: 0 ..< 1 )
97
97
let threshHold : Float = 0.8
98
98
99
- if ( observation. playerSum < 18 ) {
99
+ if observation. playerSum < 18 {
100
100
return flip < threshHold
101
101
} else {
102
102
return flip > threshHold
103
103
}
104
104
}
105
105
106
- func getNormalStrategyLookup ( playerSum: Int ) -> String {
106
+ func normalStrategyLookup ( playerSum: Int ) -> String {
107
107
// see figure 11: https://ieeexplore.ieee.org/document/1299399/
108
108
switch playerSum {
109
109
case 10 : return " HHHHHSSHHH "
@@ -122,24 +122,24 @@ class Solver {
122
122
}
123
123
}
124
124
125
- func getNormalStrategy ( observation: BlackjackState ) -> Bool {
125
+ func normalStrategy ( observation: BlackjackState ) -> Bool {
126
126
if observation. playerSum == 0 {
127
127
return true
128
128
}
129
- let lookupString = getNormalStrategyLookup ( playerSum: observation. playerSum)
129
+ let lookupString = normalStrategyLookup ( playerSum: observation. playerSum)
130
130
return Array ( lookupString) [ observation. dealerCard - 1 ] == " H "
131
131
}
132
132
133
- func getStrategy ( observation: BlackjackState , solver: SolverType , iteration: Int ) -> Bool {
133
+ func strategy ( observation: BlackjackState , solver: SolverType , iteration: Int ) -> Bool {
134
134
switch solver {
135
135
case . random:
136
- return getRandomStrategy ( )
136
+ return randomStrategy ( )
137
137
case . markov:
138
- return getMarkovStrategy ( observation: observation)
138
+ return markovStrategy ( observation: observation)
139
139
case . qlearning:
140
- return getQLearningStrategy ( observation: observation, iteration: iteration)
140
+ return qLearningStrategy ( observation: observation, iteration: iteration)
141
141
case . normal:
142
- return getNormalStrategy ( observation: observation)
142
+ return normalStrategy ( observation: observation)
143
143
}
144
144
}
145
145
}
@@ -154,19 +154,18 @@ for solver in SolverType.allCases {
154
154
environment. reset ( )
155
155
156
156
while !isDone {
157
-
158
157
let priorState = BlackjackState ( pythonState: environment. _get_obs ( ) )
159
- let action : Int = learner. getStrategy ( observation: priorState,
160
- solver: solver,
161
- iteration: i) ? 1 : 0
158
+ let action : Int = learner. strategy ( observation: priorState,
159
+ solver: solver,
160
+ iteration: i) ? 1 : 0
162
161
163
162
let ( pythonPostState, reward, done, _) = environment. step ( action) . tuple4
164
- let postState = BlackjackState ( pythonState: pythonPostState)
165
163
166
164
if solver == . qlearning {
165
+ let postState = BlackjackState ( pythonState: pythonPostState)
167
166
learner. updateQLearningStrategy ( prior: priorState,
168
167
action: action,
169
- reward: Int ( reward) ! ,
168
+ reward: Int ( reward) ?? 0 ,
170
169
post: postState)
171
170
}
172
171
0 commit comments