@@ -44,17 +44,17 @@ class Solver {
44
44
var alpha : Float = 0.5
45
45
let gamma : Float = 0.2
46
46
47
- let numberOfPlayerStates = 32 // 21 + 10 + 1 offset
48
- let numberOfDealerVisibleStates = 11 // 10 + 1 offset
49
- let numberOfAceStates = 2 // useable / not bool
50
- let numberOfPlayerActions = 2 // hit / stay
47
+ let numPlayerStates = 32 // 21 + 10 + 1 offset
48
+ let numDealerVisibleStates = 11 // 10 + 1 offset
49
+ let numAceStates = 2 // useable / not bool
50
+ let numPlayerActions = 2 // hit / stay
51
51
52
52
init ( ) {
53
53
Q = Array ( repeating: Array ( repeating: Array ( repeating: Array ( repeating: 0.0 ,
54
- count: numberOfPlayerActions ) ,
55
- count: numberOfAceStates ) ,
56
- count: numberOfDealerVisibleStates ) ,
57
- count: numberOfPlayerStates )
54
+ count: numPlayerActions ) ,
55
+ count: numAceStates ) ,
56
+ count: numDealerVisibleStates ) ,
57
+ count: numPlayerStates )
58
58
}
59
59
60
60
func updateQLearningStrategy( prior: BlackjackState ,
@@ -72,8 +72,9 @@ class Solver {
72
72
}
73
73
74
74
func qLearningStrategy( observation: BlackjackState , iteration: Int ) -> Strategy {
75
- let hitReward = Q [ observation. playerSum] [ observation. dealerCard] [ observation. useableAce] [ 0 ]
76
- let stayReward = Q [ observation. playerSum] [ observation. dealerCard] [ observation. useableAce] [ 1 ]
75
+ let qLookup = Q [ observation. playerSum] [ observation. dealerCard] [ observation. useableAce]
76
+ let stayReward = qLookup [ 0 ]
77
+ let hitReward = qLookup [ 1 ]
77
78
78
79
if iteration < Int . random ( in: 1 ... learningPhase) {
79
80
return randomStrategy ( )
@@ -85,7 +86,7 @@ class Solver {
85
86
if hitReward == stayReward {
86
87
return randomStrategy ( )
87
88
} else {
88
- return hitReward < stayReward
89
+ return hitReward > stayReward
89
90
}
90
91
}
91
92
0 commit comments