Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import random
import math
import numpy as np
class BlackjackAI:
# Initialize Blackjack AI agent settings
def __init__(self, use_basic_strategy=False, temperature=1.0, num_decks=6):
# Initialize game state and agent properties
self.state = None # Current state of the game
self.q_table = {} # Stores the Q-values for state-action pairs
self.actions = ['hit', 'stand', 'double down', 'split'] # Possible actions
self.learning_rate = 0.05 # How quickly the agent adopts new values
self.total_games_played = 0 # Track number of games played
self.discount_factor = 0.95 # Discount factor for future rewards
self.exploration_rate = 1.0 # Exploration rate for choosing random actions
self.epsilon_min = 0.05 # Minimum exploration rate
self.epsilon_decay = 0.9995 # Decay rate of exploration rate per game
self.card_count = 0 # Counter for card counting
self.use_basic_strategy = use_basic_strategy # Whether to use basic strategy
self.temperature = temperature # Controls how exploration choices are made
self.num_decks = num_decks # Number of decks in play
self.deck = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11] * 4 * num_decks # Initialize deck
self.deck_cut = int(0.75 * len(self.deck)) # Position to reshuffle the deck
# Shuffle the deck and reset the cut position
def shuffle_deck(self):
random.shuffle(self.deck)
self.deck_cut = int(0.75 * len(self.deck)) # Reset cut card position
# Draw a card from the deck, shuffle if below cut position
def deal_card(self):
if len(self.deck) < self.deck_cut:
self.shuffle_deck()
return self.deck.pop()
# Initialize the Q-table for all possible game states
def initialize_q_table(self):
self.q_table = {}
for player_hand_value in range(4, 22):
for dealer_face_up_card in range(2, 12):
for soft in [False, True]:
for hand_size in range(2, 12):
for can_split in [False, True]:
actions = {'hit': 0, 'stand': 0, 'double down': 0}
if can_split:
actions['split'] = 0
self.q_table[(player_hand_value, dealer_face_up_card, soft, hand_size, can_split)] = actions
# Dynamically adjust the learning rate based on the number of games played
def update_learning_rate(self):
self.learning_rate = max(0.01, self.learning_rate * 0.9999)
def deal_card(self):
card_values = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11]
card = random.choice(card_values)
self.adjust_card_count(card)
return card
# Adjust the card counting strategy based on the value of dealt card
def adjust_card_count(self, card):
# Basic Hi-Lo card counting system adjustment for multiple decks
if card in [10, 11]: # High cards: 10 (including 10, J, Q, K) and Aces
self.card_count -= 1
elif card in [2, 3, 4, 5, 6]: # Low cards: 2 to 6
self.card_count += 1
def calculate_hand_value(self, hand):
value = sum(hand)
aces = hand.count(11)
is_soft = bool(aces and value <= 21)
while value > 21 and aces:
value -= 10
aces -= 1
return value, is_soft, len(hand)
# Calculate the bet size based on the true count of cards
def calculate_bet_size(self):
true_count = self.card_count / ((len(self.deck) / 52) or 1) # Avoid division by zero
if true_count > 1:
return min(100, 10 * true_count) # Example betting strategy: bet more when count is high
return 10
# Select an action based on the current state using either basic strategy or Q-learning
def select_action(self, player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split):
state = (player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split)
if self.use_basic_strategy:
return self.basic_strategy(player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split)
if random.random() < self.exploration_rate:
# Smart exploration: probabilities favor actions with higher Q-values
total = sum(math.exp(self.q_table[state][action] / self.temperature) for action in self.q_table[state])
probabilities = {action: math.exp(self.q_table[state][action] / self.temperature) / total for action in self.q_table[state]}
actions, weights = zip(*probabilities.items())
return random.choices(actions, weights=weights)[0]
return max(self.q_table[state], key=self.q_table[state].get)
# Define basic strategy rules for the game
def basic_strategy(self, player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split):
# Rules for splitting
if can_split and hand_size == 2:
if player_hand_value == 16: # Split 8s
return 'split'
elif player_hand_value == 12 and dealer_face_up_card in [2, 3, 7, 8, 9, 10, 11]: # Split 6s except against these dealer cards
return 'split'
# Additional rules can be added here
if player_hand_value == 18 and can_split and dealer_face_up_card == 9:
return 'split' if self.card_count > 0 else 'stand' # Example of adjustment based on count
# Rules for double down
if hand_size == 2: # Usually, double down is considered only on the first move
if player_hand_value == 11:
return 'double down'
elif player_hand_value == 10 and dealer_face_up_card not in [10, 11]:
return 'double down'
# Additional rules can be added here
# General hit/stand rules
if is_soft:
if player_hand_value >= 19:
return 'stand'
if player_hand_value == 18 and dealer_face_up_card in range(2, 7):
return 'stand'
return 'hit'
else:
if player_hand_value >= 17:
return 'stand'
if player_hand_value >= 13 and dealer_face_up_card < 7:
return 'stand'
if player_hand_value == 12 and 4 <= dealer_face_up_card <= 6:
return 'stand'
return 'hit'
# Update the Q-table with new Q-value based on reward received
def update_q_table(self, state, action, reward, next_state):
current_q = self.q_table[state][action]
future_q = max(self.q_table[next_state].values()) if next_state else 0
self.q_table[state][action] = current_q + self.learning_rate * (reward + self.discount_factor * future_q - current_q)
# Simulate one game of blackjack, potentially returning the game's total reward
def play_one_game(self, explain=False):
player_hand = [self.deal_card(), self.deal_card()]
dealer_hand = [self.deal_card(), self.deal_card()]
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand)
dealer_value, dealer_is_soft, dealer_hand_size = self.calculate_hand_value(dealer_hand)
dealer_face_up_card = dealer_hand[0]
state = (player_value, dealer_face_up_card, is_soft, hand_size, False) # Assume can_split is managed correctly
game_over = False
total_reward = 0
while not game_over:
# Game play logic including player actions, dealer response, and reward calculation
action = self.select_action(player_value, dealer_face_up_card, is_soft, hand_size, False)
if explain:
print(f"State: {state}, Chosen Action: {action}, Current Card Count: {self.card_count}")
if action == 'hit':
player_hand.append(self.deal_card())
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand)
if player_value > 21:
if explain:
print("Explanation: Player busts with a hand value of", player_value)
reward = -1
if not self.use_basic_strategy:
self.update_q_table(state, action, reward, None)
total_reward += reward
break
state = (player_value, dealer_face_up_card, is_soft, hand_size, False)
if explain:
print("Explanation: Player hits and now has a hand value of", player_value)
elif action == 'double down':
player_hand.append(self.deal_card())
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand)
reward = -2 if player_value > 21 else 2
if explain:
print("Explanation: Player chooses to double down.")
print("Double down result:", "Bust" if player_value > 21 else f"Continues with hand value of {player_value}")
if not self.use_basic_strategy:
self.update_q_table(state, action, reward, None)
total_reward += reward
game_over = True
else: # 'stand'
while dealer_value < 17:
dealer_hand.append(self.deal_card())
dealer_value, dealer_is_soft, dealer_hand_size = self.calculate_hand_value(dealer_hand)
reward = self.calculate_reward(player_value, dealer_value)
if explain:
print("Explanation: Player stands with a hand value of", player_value)
print(f"Game ended. Dealer's hand value: {dealer_value}, Player's hand value: {player_value}, Reward: {reward}")
if not self.use_basic_strategy:
self.update_q_table(state, action, reward, None)
total_reward += reward
game_over = True
return total_reward
def play_split_hand(self, hand, dealer_hand, explain):
# This function needs to handle a single split hand versus the dealer
# For simplicity, it just mimics the main play_one_game logic for one hand
player_value, is_soft, hand_size = self.calculate_hand_value(hand)
dealer_face_up_card = dealer_hand[0]
can_split = False # No further splits allowed
state = (player_value, dealer_face_up_card, is_soft, hand_size, can_split)
total_reward = 0
while True:
action = self.select_action(player_value, dealer_face_up_card, is_soft, hand_size, can_split)
if action == 'hit':
hand.append(self.deal_card())
player_value, is_soft, hand_size = self.calculate_hand_value(hand)
if player_value > 21:
if explain:
print("Split hand busts!")
return -1
state = (player_value, dealer_face_up_card, is_soft, hand_size, can_split)
else: # 'stand'
break
# Now compare with dealer's hand
dealer_value, _, _ = self.calculate_hand_value(dealer_hand)
while dealer_value < 17:
dealer_hand.append(self.deal_card())
dealer_value, _, _ = self.calculate_hand_value(dealer_hand)
return self.calculate_reward(player_value, dealer_value)
def test(self, num_games, explain=False):
results = {'win': 0, 'lose': 0, 'draw': 0}
total_rewards = 0
for _ in range(num_games):
reward = self.play_one_game(explain=explain)
total_rewards += reward
if reward > 0:
results['win'] += 1
elif reward < 0:
results['lose'] += 1
else:
results['draw'] += 1
print("Test Results:", results)
print("Total Rewards:", total_rewards)
return results, total_rewards
def calculate_reward(self, player_hand_value, dealer_hand_value):
if player_hand_value > 21:
return -1
elif dealer_hand_value > 21:
return 1
elif player_hand_value > dealer_hand_value:
return 1
elif player_hand_value < dealer_hand_value:
return -1
else:
return 0
def train(self, num_episodes):
self.initialize_q_table()
recent_games = 100 # Number of games to consider for adjusting exploration rate
win_threshold = 0.65 # Threshold of win rate to trigger increased exploration
win_count = 0
for _ in range(num_episodes):
self.play_one_game()
self.update_learning_rate() # Update learning rate dynamically
self.exploration_rate *= self.epsilon_decay
self.exploration_rate = max(self.exploration_rate, self.epsilon_min)
for i in range(num_episodes):
reward = self.play_one_game()
# Track wins for recent games
if reward > 0:
win_count += 1
# Adjust exploration rate every 'recent_games' episodes
if (i + 1) % recent_games == 0:
win_rate = win_count / recent_games
if win_rate > win_threshold:
self.exploration_rate *= 1.10 # Increase exploration by 10%
else:
self.exploration_rate *= self.epsilon_decay # Usual decay
win_count = 0 # Reset win count
# Ensure exploration rate does not fall below the minimum threshold
self.exploration_rate = max(self.exploration_rate, self.epsilon_min)
def adjust_exploration(self, win_rate, recent_rewards):
if win_rate > self.win_threshold:
self.exploration_rate *= 1.10 # Encourages exploration
elif np.std(recent_rewards) > self.reward_variance_threshold:
self.exploration_rate *= 1.05
else:
self.exploration_rate *= self.epsilon_decay
self.exploration_rate = max(self.exploration_rate, self.epsilon_min)
if __name__ == "__main__":
# Create instances of the AI and test both Q-learning and basic strategy approaches
print("Training and testing Q-learning model:")
q_learning_ai = BlackjackAI(use_basic_strategy=False)
q_learning_ai.train(1000000)
q_learning_results = q_learning_ai.test(10000, explain=False)
print("Q-learning Test Results:", q_learning_results)
print("Testing basic strategy model:")
basic_strategy_ai = BlackjackAI(use_basic_strategy=True)
basic_strategy_results = basic_strategy_ai.test(10000, explain=False)
print("Basic Strategy Test Results:", basic_strategy_results)