Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
IndividualProject/blackjack.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
308 lines (271 sloc)
14.8 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import math | |
import numpy as np | |
class BlackjackAI: | |
# Initialize Blackjack AI agent settings | |
def __init__(self, use_basic_strategy=False, temperature=1.0, num_decks=6): | |
# Initialize game state and agent properties | |
self.state = None # Current state of the game | |
self.q_table = {} # Stores the Q-values for state-action pairs | |
self.actions = ['hit', 'stand', 'double down', 'split'] # Possible actions | |
self.learning_rate = 0.05 # How quickly the agent adopts new values | |
self.total_games_played = 0 # Track number of games played | |
self.discount_factor = 0.95 # Discount factor for future rewards | |
self.exploration_rate = 1.0 # Exploration rate for choosing random actions | |
self.epsilon_min = 0.05 # Minimum exploration rate | |
self.epsilon_decay = 0.9995 # Decay rate of exploration rate per game | |
self.card_count = 0 # Counter for card counting | |
self.use_basic_strategy = use_basic_strategy # Whether to use basic strategy | |
self.temperature = temperature # Controls how exploration choices are made | |
self.num_decks = num_decks # Number of decks in play | |
self.deck = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11] * 4 * num_decks # Initialize deck | |
self.deck_cut = int(0.75 * len(self.deck)) # Position to reshuffle the deck | |
# Shuffle the deck and reset the cut position | |
def shuffle_deck(self): | |
random.shuffle(self.deck) | |
self.deck_cut = int(0.75 * len(self.deck)) # Reset cut card position | |
# Draw a card from the deck, shuffle if below cut position | |
def deal_card(self): | |
if len(self.deck) < self.deck_cut: | |
self.shuffle_deck() | |
return self.deck.pop() | |
# Initialize the Q-table for all possible game states | |
def initialize_q_table(self): | |
self.q_table = {} | |
for player_hand_value in range(4, 22): | |
for dealer_face_up_card in range(2, 12): | |
for soft in [False, True]: | |
for hand_size in range(2, 12): | |
for can_split in [False, True]: | |
actions = {'hit': 0, 'stand': 0, 'double down': 0} | |
if can_split: | |
actions['split'] = 0 | |
self.q_table[(player_hand_value, dealer_face_up_card, soft, hand_size, can_split)] = actions | |
# Dynamically adjust the learning rate based on the number of games played | |
def update_learning_rate(self): | |
self.learning_rate = max(0.01, self.learning_rate * 0.9999) | |
def deal_card(self): | |
card_values = [2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11] | |
card = random.choice(card_values) | |
self.adjust_card_count(card) | |
return card | |
# Adjust the card counting strategy based on the value of dealt card | |
def adjust_card_count(self, card): | |
# Basic Hi-Lo card counting system adjustment for multiple decks | |
if card in [10, 11]: # High cards: 10 (including 10, J, Q, K) and Aces | |
self.card_count -= 1 | |
elif card in [2, 3, 4, 5, 6]: # Low cards: 2 to 6 | |
self.card_count += 1 | |
def calculate_hand_value(self, hand): | |
value = sum(hand) | |
aces = hand.count(11) | |
is_soft = bool(aces and value <= 21) | |
while value > 21 and aces: | |
value -= 10 | |
aces -= 1 | |
return value, is_soft, len(hand) | |
# Calculate the bet size based on the true count of cards | |
def calculate_bet_size(self): | |
true_count = self.card_count / ((len(self.deck) / 52) or 1) # Avoid division by zero | |
if true_count > 1: | |
return min(100, 10 * true_count) # Example betting strategy: bet more when count is high | |
return 10 | |
# Select an action based on the current state using either basic strategy or Q-learning | |
def select_action(self, player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split): | |
state = (player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split) | |
if self.use_basic_strategy: | |
return self.basic_strategy(player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split) | |
if random.random() < self.exploration_rate: | |
# Smart exploration: probabilities favor actions with higher Q-values | |
total = sum(math.exp(self.q_table[state][action] / self.temperature) for action in self.q_table[state]) | |
probabilities = {action: math.exp(self.q_table[state][action] / self.temperature) / total for action in self.q_table[state]} | |
actions, weights = zip(*probabilities.items()) | |
return random.choices(actions, weights=weights)[0] | |
return max(self.q_table[state], key=self.q_table[state].get) | |
# Define basic strategy rules for the game | |
def basic_strategy(self, player_hand_value, dealer_face_up_card, is_soft, hand_size, can_split): | |
# Rules for splitting | |
if can_split and hand_size == 2: | |
if player_hand_value == 16: # Split 8s | |
return 'split' | |
elif player_hand_value == 12 and dealer_face_up_card in [2, 3, 7, 8, 9, 10, 11]: # Split 6s except against these dealer cards | |
return 'split' | |
# Additional rules can be added here | |
if player_hand_value == 18 and can_split and dealer_face_up_card == 9: | |
return 'split' if self.card_count > 0 else 'stand' # Example of adjustment based on count | |
# Rules for double down | |
if hand_size == 2: # Usually, double down is considered only on the first move | |
if player_hand_value == 11: | |
return 'double down' | |
elif player_hand_value == 10 and dealer_face_up_card not in [10, 11]: | |
return 'double down' | |
# Additional rules can be added here | |
# General hit/stand rules | |
if is_soft: | |
if player_hand_value >= 19: | |
return 'stand' | |
if player_hand_value == 18 and dealer_face_up_card in range(2, 7): | |
return 'stand' | |
return 'hit' | |
else: | |
if player_hand_value >= 17: | |
return 'stand' | |
if player_hand_value >= 13 and dealer_face_up_card < 7: | |
return 'stand' | |
if player_hand_value == 12 and 4 <= dealer_face_up_card <= 6: | |
return 'stand' | |
return 'hit' | |
# Update the Q-table with new Q-value based on reward received | |
def update_q_table(self, state, action, reward, next_state): | |
current_q = self.q_table[state][action] | |
future_q = max(self.q_table[next_state].values()) if next_state else 0 | |
self.q_table[state][action] = current_q + self.learning_rate * (reward + self.discount_factor * future_q - current_q) | |
# Simulate one game of blackjack, potentially returning the game's total reward | |
def play_one_game(self, explain=False): | |
player_hand = [self.deal_card(), self.deal_card()] | |
dealer_hand = [self.deal_card(), self.deal_card()] | |
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand) | |
dealer_value, dealer_is_soft, dealer_hand_size = self.calculate_hand_value(dealer_hand) | |
dealer_face_up_card = dealer_hand[0] | |
state = (player_value, dealer_face_up_card, is_soft, hand_size, False) # Assume can_split is managed correctly | |
game_over = False | |
total_reward = 0 | |
while not game_over: | |
# Game play logic including player actions, dealer response, and reward calculation | |
action = self.select_action(player_value, dealer_face_up_card, is_soft, hand_size, False) | |
if explain: | |
print(f"State: {state}, Chosen Action: {action}, Current Card Count: {self.card_count}") | |
if action == 'hit': | |
player_hand.append(self.deal_card()) | |
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand) | |
if player_value > 21: | |
if explain: | |
print("Explanation: Player busts with a hand value of", player_value) | |
reward = -1 | |
if not self.use_basic_strategy: | |
self.update_q_table(state, action, reward, None) | |
total_reward += reward | |
break | |
state = (player_value, dealer_face_up_card, is_soft, hand_size, False) | |
if explain: | |
print("Explanation: Player hits and now has a hand value of", player_value) | |
elif action == 'double down': | |
player_hand.append(self.deal_card()) | |
player_value, is_soft, hand_size = self.calculate_hand_value(player_hand) | |
reward = -2 if player_value > 21 else 2 | |
if explain: | |
print("Explanation: Player chooses to double down.") | |
print("Double down result:", "Bust" if player_value > 21 else f"Continues with hand value of {player_value}") | |
if not self.use_basic_strategy: | |
self.update_q_table(state, action, reward, None) | |
total_reward += reward | |
game_over = True | |
else: # 'stand' | |
while dealer_value < 17: | |
dealer_hand.append(self.deal_card()) | |
dealer_value, dealer_is_soft, dealer_hand_size = self.calculate_hand_value(dealer_hand) | |
reward = self.calculate_reward(player_value, dealer_value) | |
if explain: | |
print("Explanation: Player stands with a hand value of", player_value) | |
print(f"Game ended. Dealer's hand value: {dealer_value}, Player's hand value: {player_value}, Reward: {reward}") | |
if not self.use_basic_strategy: | |
self.update_q_table(state, action, reward, None) | |
total_reward += reward | |
game_over = True | |
return total_reward | |
def play_split_hand(self, hand, dealer_hand, explain): | |
# This function needs to handle a single split hand versus the dealer | |
# For simplicity, it just mimics the main play_one_game logic for one hand | |
player_value, is_soft, hand_size = self.calculate_hand_value(hand) | |
dealer_face_up_card = dealer_hand[0] | |
can_split = False # No further splits allowed | |
state = (player_value, dealer_face_up_card, is_soft, hand_size, can_split) | |
total_reward = 0 | |
while True: | |
action = self.select_action(player_value, dealer_face_up_card, is_soft, hand_size, can_split) | |
if action == 'hit': | |
hand.append(self.deal_card()) | |
player_value, is_soft, hand_size = self.calculate_hand_value(hand) | |
if player_value > 21: | |
if explain: | |
print("Split hand busts!") | |
return -1 | |
state = (player_value, dealer_face_up_card, is_soft, hand_size, can_split) | |
else: # 'stand' | |
break | |
# Now compare with dealer's hand | |
dealer_value, _, _ = self.calculate_hand_value(dealer_hand) | |
while dealer_value < 17: | |
dealer_hand.append(self.deal_card()) | |
dealer_value, _, _ = self.calculate_hand_value(dealer_hand) | |
return self.calculate_reward(player_value, dealer_value) | |
def test(self, num_games, explain=False): | |
results = {'win': 0, 'lose': 0, 'draw': 0} | |
total_rewards = 0 | |
for _ in range(num_games): | |
reward = self.play_one_game(explain=explain) | |
total_rewards += reward | |
if reward > 0: | |
results['win'] += 1 | |
elif reward < 0: | |
results['lose'] += 1 | |
else: | |
results['draw'] += 1 | |
print("Test Results:", results) | |
print("Total Rewards:", total_rewards) | |
return results, total_rewards | |
def calculate_reward(self, player_hand_value, dealer_hand_value): | |
if player_hand_value > 21: | |
return -1 | |
elif dealer_hand_value > 21: | |
return 1 | |
elif player_hand_value > dealer_hand_value: | |
return 1 | |
elif player_hand_value < dealer_hand_value: | |
return -1 | |
else: | |
return 0 | |
def train(self, num_episodes): | |
self.initialize_q_table() | |
recent_games = 100 # Number of games to consider for adjusting exploration rate | |
win_threshold = 0.65 # Threshold of win rate to trigger increased exploration | |
win_count = 0 | |
for _ in range(num_episodes): | |
self.play_one_game() | |
self.update_learning_rate() # Update learning rate dynamically | |
self.exploration_rate *= self.epsilon_decay | |
self.exploration_rate = max(self.exploration_rate, self.epsilon_min) | |
for i in range(num_episodes): | |
reward = self.play_one_game() | |
# Track wins for recent games | |
if reward > 0: | |
win_count += 1 | |
# Adjust exploration rate every 'recent_games' episodes | |
if (i + 1) % recent_games == 0: | |
win_rate = win_count / recent_games | |
if win_rate > win_threshold: | |
self.exploration_rate *= 1.10 # Increase exploration by 10% | |
else: | |
self.exploration_rate *= self.epsilon_decay # Usual decay | |
win_count = 0 # Reset win count | |
# Ensure exploration rate does not fall below the minimum threshold | |
self.exploration_rate = max(self.exploration_rate, self.epsilon_min) | |
def adjust_exploration(self, win_rate, recent_rewards): | |
if win_rate > self.win_threshold: | |
self.exploration_rate *= 1.10 # Encourages exploration | |
elif np.std(recent_rewards) > self.reward_variance_threshold: | |
self.exploration_rate *= 1.05 | |
else: | |
self.exploration_rate *= self.epsilon_decay | |
self.exploration_rate = max(self.exploration_rate, self.epsilon_min) | |
if __name__ == "__main__": | |
# Create instances of the AI and test both Q-learning and basic strategy approaches | |
print("Training and testing Q-learning model:") | |
q_learning_ai = BlackjackAI(use_basic_strategy=False) | |
q_learning_ai.train(1000000) | |
q_learning_results = q_learning_ai.test(10000, explain=False) | |
print("Q-learning Test Results:", q_learning_results) | |
print("Testing basic strategy model:") | |
basic_strategy_ai = BlackjackAI(use_basic_strategy=True) | |
basic_strategy_results = basic_strategy_ai.test(10000, explain=False) | |
print("Basic Strategy Test Results:", basic_strategy_results) |