import random
import math
import numpy as np
class BlackjackAI:
# Initialize Blackjack AI agent settings
def __init__(self, use_basic_strategy=False, temperature=1.0, num_decks=6):
# Initialize game state and agent properties
self.state = None # Current state of the game
self.q_table = {} # Stores the Q-values for state-action pairs
self.actions = ['hit', 'stand', 'double down', 'split'] # Possible actions
self.learning_rate = 0.05 # How quickly the agent adopts new values
self.total_games_played = 0 # Track number of games played
self.discount_factor = 0.95 # Discount factor for future rewards
self.exploration_rate = 1.0 # Exploration rate for choosing random actions
self.epsilon_min = 0.05 # Minimum exploration rate
self.epsilon_decay = 0.9995 # Decay rate of exploration rate per game
self.card_count = 0 # Counter for card counting
self.use_basic_strategy = use_basic_strategy # Whether to use basic strategy
self.temperature = temperature # Controls how exploration choices are made
self.num_decks = num_decks # Number of decks in play
self.deck = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11] * 4 * num_decks # Initialize deck
self.deck_cut = int(0.75 * len(self.deck)) # Position to reshuffle the deck
# Shuffle the deck and reset the cut position
def shuffle_deck(self):
self.deck_cut = int(0.75 * len(self.deck)) # Reset cut card position
# Draw a card from the deck, shuffle if below cut position
def deal_card(self):
if len(self.deck) < self.deck_cut:
return self.deck.pop()
# Initialize the Q-table for all possible game states
def initialize_q_table(self):
self.q_table = {}
for player_hand_value in range(4, 22):
for dealer_face_up_card in range(2, 12):
for soft in [False, True]:
for hand_size in range(2, 12):
for can_split in [False, True]:
actions = {'hit': 0, 'stand': 0, 'double down': 0}
if can_split:
actions['split'] = 0
self.q_table[(player_hand_value, dealer_face_up_card, soft, hand_size, can_split)] = actions
# Dynamically adjust the learning rate based on the number of games played
def update_learning_rate(self):
self.learning_rate = max(0.01, self.learning_rate * 0.9999)
def deal_card(self):
card_values = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11]
card = random.choice(card_values)
return card
# Adjust the card counting strategy based on the value of dealt card
def adjust_card_count(self, card):
# Basic Hi-Lo card counting system adjustment for multiple decks
if card in [10, 11]: # High cards: 10 (including 10, J, Q, K) and Aces
self.card_count -= 1
elif card in [2, 3, 4, 5, 6]: # Low cards: 2 to 6
self.card_count += 1
def calculate_hand_value(self, hand):
value = sum(hand)
aces = hand.count(11)
is_soft = bool(aces and value <= 21)
while value > 21 and aces:
value -= 10
aces -= 1
return value, is_soft, len(hand)
# Calculate the bet size based on the true count of cards
def calculate_bet_size(self):
true_count = self.card_count / ((len(self.deck) / 52) or 1) # Avoid division by zero
if true_count > 1:
return min(100, 10 * true_count) # Example betting strategy: bet more when count is high
return 10
# Select an action based on the current state using either basic strategy or Q-learning
def select_action(self, player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split):
state = (player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split)
if self.use_basic_strategy:
return self.basic_strategy(player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split)
if random.random() < self.exploration_rate:
# Smart exploration: probabilities favor actions with higher Q-values
total = sum(math.exp(self.q_table[state][action] / self.temperature) for action in self.q_table[state])
probabilities = {action: math.exp(self.q_table[state][action] / self.temperature) / total for action in self.q_table[state]}
actions, weights = zip(*probabilities.items())
return random.choices(actions, weights=weights)[0]
return max(self.q_table[state], key=self.q_table[state].get)
# Define basic strategy rules for the game
def basic_strategy(self, player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split):
# Rules for splitting
if can_split and hand_size == 2:
if player_hand_value == 16: # Split 8s
return 'split'
elif player_hand_value == 12 and dealer_face_up_card in [2, 3, 7, 8, 9, 10, 11]: # Split 6s except against these dealer cards
return 'split'
# Additional rules can be added here
if player_hand_value == 18 and can_split and dealer_face_up_card == 9:
return 'split' if self.card_count > 0 else 'stand' # Example of adjustment based on count
# Rules for double down
if hand_size == 2: # Usually, double down is considered only on the first move
if player_hand_value == 11:
return 'double down'
elif player_hand_value == 10 and dealer_face_up_card not in [10, 11]:
return 'double down'
# Additional rules can be added here
# General hit/stand rules
if is_soft:
if player_hand_value >= 19:
return 'stand'
if player_hand_value == 18 and dealer_face_up_card in range(2, 7):
return 'stand'
return 'hit'
if player_hand_value >= 17:
return 'stand'
if player_hand_value >= 13 and dealer_face_up_card < 7:
return 'stand'
if player_hand_value == 12 and 4 <= dealer_face_up_card <= 6:
return 'stand'
return 'hit'
# Update the Q-table with new Q-value based on reward received
def update_q_table(self, state, action, reward, next_state):
current_q = self.q_table[state][action]
future_q = max(self.q_table[next_state].values()) if next_state else 0
self.q_table[state][action] = current_q + self.learning_rate * (reward + self.discount_factor * future_q - current_q)
# Simulate one game of blackjack, potentially returning the game's total reward
def play_one_game(self, explain=False):
player_hand = [self.deal_card(), self.deal_card()]
dealer_hand = [self.deal_card(), self.deal_card()]
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand)
dealer_value, dealer_is_soft, dealer_hand_size = self.calculate_hand_value(dealer_hand)
dealer_face_up_card = dealer_hand[0]
state = (player_value, dealer_face_up_card, is_soft, hand_size, False) # Assume can_split is managed correctly
game_over = False
total_reward = 0
while not game_over:
# Game play logic including player actions, dealer response, and reward calculation
action = self.select_action(player_value, dealer_face_up_card, is_soft, hand_size, False)
if explain:
print(f"State: {state}, Chosen Action: {action}, Current Card Count: {self.card_count}")
if action == 'hit':
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand)
if player_value > 21:
if explain:
print("Explanation: Player busts with a hand value of", player_value)
reward = -1
if not self.use_basic_strategy:
self.update_q_table(state, action, reward, None)
total_reward += reward
state = (player_value, dealer_face_up_card, is_soft, hand_size, False)
if explain:
print("Explanation: Player hits and now has a hand value of", player_value)
elif action == 'double down':
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand)
reward = -2 if player_value > 21 else 2
if explain:
print("Explanation: Player chooses to double down.")
print("Double down result:", "Bust" if player_value > 21 else f"Continues with hand value of {player_value}")
if not self.use_basic_strategy:
self.update_q_table(state, action, reward, None)
total_reward += reward
game_over = True
else: # 'stand'
while dealer_value < 17:
dealer_value, dealer_is_soft, dealer_hand_size = self.calculate_hand_value(dealer_hand)
reward = self.calculate_reward(player_value, dealer_value)
if explain:
print("Explanation: Player stands with a hand value of", player_value)
print(f"Game ended. Dealer's hand value: {dealer_value}, Player's hand value: {player_value}, Reward: {reward}")
if not self.use_basic_strategy:
self.update_q_table(state, action, reward, None)
total_reward += reward
game_over = True
return total_reward
def play_split_hand(self, hand, dealer_hand, explain):
# This function needs to handle a single split hand versus the dealer
# For simplicity, it just mimics the main play_one_game logic for one hand
player_value, is_soft, hand_size = self.calculate_hand_value(hand)
dealer_face_up_card = dealer_hand[0]
can_split = False # No further splits allowed
state = (player_value, dealer_face_up_card, is_soft, hand_size, can_split)
total_reward = 0
while True:
action = self.select_action(player_value, dealer_face_up_card, is_soft, hand_size, can_split)
if action == 'hit':
player_value, is_soft, hand_size = self.calculate_hand_value(hand)
if player_value > 21:
if explain:
print("Split hand busts!")
return -1
state = (player_value, dealer_face_up_card, is_soft, hand_size, can_split)
else: # 'stand'
# Now compare with dealer's hand
dealer_value, _, _ = self.calculate_hand_value(dealer_hand)
while dealer_value < 17:
dealer_value, _, _ = self.calculate_hand_value(dealer_hand)
return self.calculate_reward(player_value, dealer_value)
def test(self, num_games, explain=False):
results = {'win': 0, 'lose': 0, 'draw': 0}
total_rewards = 0
for _ in range(num_games):
reward = self.play_one_game(explain=explain)
total_rewards += reward
if reward > 0:
results['win'] += 1
elif reward < 0:
results['lose'] += 1
results['draw'] += 1
print("Test Results:", results)
print("Total Rewards:", total_rewards)
return results, total_rewards
def calculate_reward(self, player_hand_value, dealer_hand_value):
if player_hand_value > 21:
return -1
elif dealer_hand_value > 21:
return 1
elif player_hand_value > dealer_hand_value:
return 1
elif player_hand_value < dealer_hand_value:
return -1
return 0
def train(self, num_episodes):
recent_games = 100 # Number of games to consider for adjusting exploration rate
win_threshold = 0.65 # Threshold of win rate to trigger increased exploration
win_count = 0
for _ in range(num_episodes):
self.update_learning_rate() # Update learning rate dynamically
self.exploration_rate *= self.epsilon_decay
self.exploration_rate = max(self.exploration_rate, self.epsilon_min)
for i in range(num_episodes):
reward = self.play_one_game()
# Track wins for recent games
if reward > 0:
win_count += 1
# Adjust exploration rate every 'recent_games' episodes
if (i + 1) % recent_games == 0:
win_rate = win_count / recent_games
if win_rate > win_threshold:
self.exploration_rate *= 1.10 # Increase exploration by 10%
self.exploration_rate *= self.epsilon_decay # Usual decay
win_count = 0 # Reset win count
# Ensure exploration rate does not fall below the minimum threshold
self.exploration_rate = max(self.exploration_rate, self.epsilon_min)
def adjust_exploration(self, win_rate, recent_rewards):
if win_rate > self.win_threshold:
self.exploration_rate *= 1.10 # Encourages exploration
elif np.std(recent_rewards) > self.reward_variance_threshold:
self.exploration_rate *= 1.05
self.exploration_rate *= self.epsilon_decay
self.exploration_rate = max(self.exploration_rate, self.epsilon_min)
if __name__ == "__main__":
# Create instances of the AI and test both Q-learning and basic strategy approaches
print("Training and testing Q-learning model:")
q_learning_ai = BlackjackAI(use_basic_strategy=False)
q_learning_results = q_learning_ai.test(10000, explain=False)
print("Q-learning Test Results:", q_learning_results)
print("Testing basic strategy model:")
basic_strategy_ai = BlackjackAI(use_basic_strategy=True)
basic_strategy_results = basic_strategy_ai.test(10000, explain=False)
print("Basic Strategy Test Results:", basic_strategy_results)