Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit 64bb0fe

Browse files
committed
more cleanup
1 parent 19e878c commit 64bb0fe

File tree

1 file changed

+12
-11
lines changed

1 file changed

+12
-11
lines changed

Gym/Blackjack/main.swift

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,17 @@ class Solver {
4444
var alpha: Float = 0.5
4545
let gamma: Float = 0.2
4646

47-
let numberOfPlayerStates = 32 // 21 + 10 + 1 offset
48-
let numberOfDealerVisibleStates = 11 // 10 + 1 offset
49-
let numberOfAceStates = 2 // useable / not bool
50-
let numberOfPlayerActions = 2 // hit / stay
47+
let numPlayerStates = 32 // 21 + 10 + 1 offset
48+
let numDealerVisibleStates = 11 // 10 + 1 offset
49+
let numAceStates = 2 // useable / not bool
50+
let numPlayerActions = 2 // hit / stay
5151

5252
init() {
5353
Q = Array(repeating: Array(repeating: Array(repeating: Array(repeating: 0.0,
54-
count: numberOfPlayerActions),
55-
count: numberOfAceStates),
56-
count: numberOfDealerVisibleStates),
57-
count: numberOfPlayerStates)
54+
count: numPlayerActions),
55+
count: numAceStates),
56+
count: numDealerVisibleStates),
57+
count: numPlayerStates)
5858
}
5959

6060
func updateQLearningStrategy(prior: BlackjackState,
@@ -72,8 +72,9 @@ class Solver {
7272
}
7373

7474
func qLearningStrategy(observation: BlackjackState, iteration: Int) -> Strategy {
75-
let hitReward = Q[observation.playerSum][observation.dealerCard][observation.useableAce][0]
76-
let stayReward = Q[observation.playerSum][observation.dealerCard][observation.useableAce][1]
75+
let qLookup = Q[observation.playerSum][observation.dealerCard][observation.useableAce]
76+
let stayReward = qLookup[0]
77+
let hitReward = qLookup[1]
7778

7879
if iteration < Int.random(in: 1...learningPhase) {
7980
return randomStrategy()
@@ -85,7 +86,7 @@ class Solver {
8586
if hitReward == stayReward {
8687
return randomStrategy()
8788
} else {
88-
return hitReward < stayReward
89+
return hitReward > stayReward
8990
}
9091
}
9192

0 commit comments

Comments
 (0)