From d547590543f4a3755869d58c983d66ccc8422e7b Mon Sep 17 00:00:00 2001
From: Steven <gong.steven@hotmail.com>
Date: Wed, 19 Jun 2024 23:51:17 -0700
Subject: [PATCH] Update CFR abstraction training

---
 src/aiplayer.py        | 172 ++++++++++++---
 src/base.py            |  49 +++--
 src/environment.py     |  10 +-
 src/holdem_flop.py     | 470 -----------------------------------------
 src/kuhn.py            |   8 +-
 src/postflop_holdem.py | 104 ++++++---
 6 files changed, 253 insertions(+), 560 deletions(-)
 delete mode 100644 src/holdem_flop.py

diff --git a/src/aiplayer.py b/src/aiplayer.py
index 2bfea73..993e497 100644
--- a/src/aiplayer.py
+++ b/src/aiplayer.py
@@ -73,9 +73,44 @@ def trash_talk_fold(self):
         self.engine.say(random.choice(self.get_trash_talk("opponent_fold")))
         self.engine.runAndWait()
 
-    def place_bet(self, observed_env) -> int:  # AI will call every time
+    def process_action(self, action, observed_env):
+        if action == "k":  # check
+            if observed_env.game_stage == 2:
+                self.current_bet = 2
+            else:
+                self.current_bet = 0
+
+            self.engine.say("I Check")
+        elif action == "c":
+            if observed_env.get_highest_current_bet() == self.player_balance:
+                self.engine.say("I call your all-in. You think I'm afraid?")
+            else:
+                self.engine.say(random.choice(self.get_trash_talk("c")))
+            # If you call on the preflop
+            self.current_bet = observed_env.get_highest_current_bet()
+        elif action == "f":
+            self.engine.say(random.choice(self.get_trash_talk("f")))
+        else:
+            self.current_bet = int(action[1:])
+            if self.current_bet == self.player_balance:
+                self.engine.say(random.choice(self.get_trash_talk("all_in")))
+            else:
+                self.engine.say(random.choice(self.get_trash_talk("b", self.current_bet)))
+
+        self.engine.runAndWait()
+
+    def place_bet(self, observed_env):
+        raise NotImplementedError
+
 
-        # Strategy with Heuristic
+class EquityAIPlayer(AIPlayer):
+    def __init__(self, balance) -> None:
+        super().__init__(balance)
+
+    def place_bet(self, observed_env) -> int:  # AI will call every time
+        """
+        A Strategy implemented with human heuristics
+        """
         if "k" in observed_env.valid_actions():
             action = "k"
         else:
@@ -83,7 +118,7 @@ def place_bet(self, observed_env) -> int:  # AI will call every time
 
         card_str = [str(card) for card in self.hand]
         community_cards = [str(card) for card in observed_env.community_cards]
-        # if observed_env.game_stage == 2:
+
         equity = calculate_equity(card_str, community_cards)
 
         # fold, check / call, raise
@@ -96,7 +131,9 @@ def place_bet(self, observed_env) -> int:  # AI will call every time
             ):  # If you are the dealer, raise more of the time
                 strategy = {
                     "k": np_strategy[0],
-                    f"b{min(max(observed_env.BIG_BLIND, int(observed_env.total_pot_balance / 3)), self.player_balance)}": np_strategy[2],
+                    f"b{min(max(observed_env.BIG_BLIND, int(observed_env.total_pot_balance / 3)), self.player_balance)}": np_strategy[
+                        2
+                    ],
                     f"b{min(observed_env.total_pot_balance, self.player_balance)}": np_strategy[1],
                 }
             else:
@@ -138,37 +175,118 @@ def place_bet(self, observed_env) -> int:  # AI will call every time
         print("equity", equity)
         print("AI strategy ", strategy)
         action = getAction(strategy)
+        self.process_action(action, observed_env)
+        return action
 
-        # history = HoldEmHistory(observed_env.history)
-        # strategy = observed_env.get_average_strategy()
 
-        # print("AI strategy", strategy)
-        # print("AI action", action)
+import joblib
+from abstraction import calculate_equity, predict_cluster_fast
+from postflop_holdem import HoldemInfoSet, HoldEmHistory
 
-        if action == "k":  # check
-            if observed_env.game_stage == 2:
-                self.current_bet = 2
-            else:
-                self.current_bet = 0
+import copy
 
-            self.engine.say("I Check")
-        elif action == "c":
-            if observed_env.get_highest_current_bet() == self.player_balance:
-                self.engine.say("I call your all-in. You think I'm afraid?")
+
+class CFRAIPlayer(AIPlayer):
+    def __init__(self, balance) -> None:
+        super().__init__(balance)
+
+        self.infosets = joblib.load("../src/infoSets_batch_7.joblib")
+
+    def perform_postflop_abstraction(self, observed_env):
+        history = copy.deepcopy(observed_env.history)
+
+        pot_total = observed_env.BIG_BLIND * 2
+        # Compute preflop pot size
+        flop_start = history.index("/")
+        for i, action in enumerate(history[:flop_start]):
+            if action[0] == "b":
+                bet_size = int(action[1:])
+                pot_total = 2 * bet_size
+
+        # Remove preflop actions
+        abstracted_history = history[:2]
+
+        # Bet Abstraction (card abstraction is done later)
+        stage_start = flop_start
+        stage = self.get_stage(history[stage_start + 1 :])
+        latest_bet = 0
+        while True:
+            abstracted_history += ["/"]
+
+            if (
+                len(stage) >= 4 and stage[3] != "c"
+            ):  # length 4 that isn't a call, we need to condense down
+                abstracted_history += [stage[0]]
+
+                if stage[-1] == "c":
+                    if len(stage) % 2 == 1:  # ended on dealer
+                        abstracted_history += ["bMAX", "c"]
+                    else:
+                        if stage[0] == "k":
+                            abstracted_history += ["k", "bMAX", "c"]
+                        else:
+                            abstracted_history += ["bMIN", "bMAX", "c"]
             else:
-                self.engine.say(random.choice(self.get_trash_talk("c")))
-            # If you call on the preflop
-            self.current_bet = observed_env.get_highest_current_bet()
-        elif action == "f":
-            self.engine.say(random.choice(self.get_trash_talk("f")))
+                for i, action in enumerate(stage):
+                    if action[0] == "b":
+                        bet_size = int(action[1:])
+                        latest_bet = bet_size
+                        pot_total += bet_size
+
+                        # this is a raise on a small bet
+                        if abstracted_history[-1] == "bMIN":
+                            abstracted_history += ["bMAX"]
+                        # this is a raise on a big bet
+                        elif abstracted_history[-1] == "bMAX":
+                            abstracted_history[-1] = "k"  # turn into a check
+                        else:  # first bet
+                            if bet_size >= pot_total:
+                                abstracted_history += ["bMAX"]
+                            else:
+                                abstracted_history += ["bMIN"]
+
+                    elif action == "c":
+                        pot_total += latest_bet
+                        abstracted_history += ["c"]
+                    else:
+                        abstracted_history += [action]
+
+            # Proceed to next stage or exit if final stage
+            if "/" not in history[stage_start + 1 :]:
+                break
+            stage_start = history[stage_start + 1 :].index("/") + (stage_start + 1)
+            stage = self.get_stage(history[stage_start + 1 :])
+
+        return abstracted_history
+
+    def get_stage(self, history):
+        if "/" in history:
+            return history[: history.index("/")]
         else:
-            self.current_bet = int(action[1:])
-            if self.current_bet == self.player_balance:
-                self.engine.say(random.choice(self.get_trash_talk("all_in")))
+            return history
+
+    def place_bet(self, observed_env):
+        if observed_env.game_stage == 2:  # preflop
+            if "k" in observed_env.valid_actions():
+                action = "k"
             else:
-                self.engine.say(random.choice(self.get_trash_talk("b", self.current_bet)))
+                action = "c"
+        else:
+            abstracted_history = self.perform_postflop_abstraction(observed_env)
+            print("abstracted history", abstracted_history)
+            infoset_key = HoldEmHistory(abstracted_history).get_infoSet_key_online()
+            strategy = self.infosets[infoset_key].get_average_strategy()
+            print(infoset_key)
+            print("AI strategy ", strategy)
+            action = getAction(strategy)
+            if action == "bMIN":
+                action = "b" + str(
+                    max(observed_env.BIG_BLIND, int(1 / 3 * observed_env.total_pot_balance))
+                )
+            elif action == "bMAX":
+                action = "b" + str(min(observed_env.total_pot_balance, self.player_balance))
 
-        self.engine.runAndWait()
+            self.process_action(action, observed_env)
         return action
 
 
diff --git a/src/base.py b/src/base.py
index 2fc5ba9..e8ffe1a 100644
--- a/src/base.py
+++ b/src/base.py
@@ -161,11 +161,10 @@ def __init__(
         create_history,
         n_players: int = 2,
         iterations: int = 1000000,
-        tracker_interval=1000,
     ):
         self.n_players = n_players
         self.iterations = iterations
-        self.tracker_interval = tracker_interval
+        self.tracker_interval = int(iterations / 10)
         self.infoSets: Dict[str, InfoSet] = {}
         self.create_infoSet = create_infoSet
         self.create_history = create_history
@@ -193,7 +192,7 @@ def vanilla_cfr(
         if history.is_terminal():
             if debug:
                 print(f"history: {history.history} utility: {history.terminal_utility(i)}")
-                time.sleep(1)
+                time.sleep(0.1)
             return history.terminal_utility(i)
         elif history.is_chance():
             a = (
@@ -206,9 +205,6 @@ def vanilla_cfr(
         infoSet = self.get_infoSet(history)
         assert infoSet.player() == history.player()
 
-        if debug:
-            print("infoset", infoSet.to_dict())
-
         v = 0
         va = {}
 
@@ -233,19 +229,30 @@ def vanilla_cfr(
             # Update regret matching values
             infoSet.get_strategy()
 
+        if debug:
+            print("infoset", infoSet.to_dict())
+            print("strategy", infoSet.strategy)
+
         return v
 
     def vanilla_cfr_speedup(self, history: History, t: int, pi_0: float, pi_1: float, debug=False):
         """
         We double the speed by updating both player values simultaneously, since this is a zero-sum game.
 
+        NOTE: Doesn't work super well, I don't understand why. The trick here to speedup is by assuming by whatever the opponent gains is
+        the opposite of what we gain. Zero-sum game. However, need to make sure we always return the correct utility.
+
         """
         # Return payoff for terminal states
+        # ['3d7c', '4cQd', '/', '7sKd9c', 'bMIN', 'f']
         if history.is_terminal():
             if debug:
-                print(history.history, history.terminal_utility(0))
-                time.sleep(1)
-            return history.terminal_utility(0)
+                print(
+                    f"utility returned: {history.terminal_utility((len(history.get_last_game_stage())) % 2)}, history: {history.history}"
+                )
+            return history.terminal_utility(
+                (len(history.get_last_game_stage()) + 1) % 2
+            )  # overfit solution for holdem
         elif history.is_chance():
             a = (
                 history.sample_chance_outcome()
@@ -257,9 +264,6 @@ def vanilla_cfr_speedup(self, history: History, t: int, pi_0: float, pi_1: float
         infoSet = self.get_infoSet(history)
         assert infoSet.player() == history.player()
 
-        if debug:
-            print("infoset", infoSet.to_dict())
-
         v = 0
         va = {}
 
@@ -285,6 +289,12 @@ def vanilla_cfr_speedup(self, history: History, t: int, pi_0: float, pi_1: float
         # Update regret matching values
         infoSet.get_strategy()
 
+        if debug:
+            print("infoset", infoSet.to_dict())
+            print("va", va)
+            print("strategy", infoSet.strategy)
+            time.sleep(0.1)
+
         return v
 
     def vanilla_cfr_manim(
@@ -356,11 +366,11 @@ def solve(self, method="vanilla_speedup", debug=False):
                 for player in range(self.n_players):
                     if player == 0:
                         util_0 += self.vanilla_cfr_manim(
-                            self.create_history(), player, t, 1, 1, histories
+                            self.create_history(t), player, t, 1, 1, histories
                         )
                     else:
                         util_1 += self.vanilla_cfr_manim(
-                            self.create_history(), player, t, 1, 1, histories
+                            self.create_history(t), player, t, 1, 1, histories
                         )
 
                 print(histories)
@@ -371,11 +381,11 @@ def solve(self, method="vanilla_speedup", debug=False):
                 ):  # This is the slower way, we can speed by updating both players
                     if player == 0:
                         util_0 += self.vanilla_cfr(
-                            self.create_history(), player, t, 1, 1, debug=debug
+                            self.create_history(t), player, t, 1, 1, debug=debug
                         )
                     else:
                         util_1 += self.vanilla_cfr(
-                            self.create_history(), player, t, 1, 1, debug=debug
+                            self.create_history(t), player, t, 1, 1, debug=debug
                         )
 
             if (t + 1) % self.tracker_interval == 0:
@@ -384,13 +394,14 @@ def solve(self, method="vanilla_speedup", debug=False):
                 self.tracker(self.infoSets)
                 self.tracker.pprint()
 
-            if t % 2500 == 0:
+            if t % 500000 == 0:
                 self.export_infoSets(f"infoSets_{t}.joblib")
 
+        self.export_infoSets("infoSets_solved.joblib")
         if method == "manim":
             return histories
 
-    def export_infoSets(self, filename = "infoSets.joblib"):
+    def export_infoSets(self, filename="infoSets.joblib"):
         joblib.dump(self.infoSets, filename)
 
     def get_expected_value(
@@ -525,4 +536,4 @@ def __call__(self, infoSets: Dict[str, InfoSet]):
     def pprint(self):
         infoSets = self.tracker_hist[-1]
         for infoSet in infoSets.values():
-            print(infoSet.infoSet, infoSet.get_average_strategy())
+            print(infoSet.infoSet, "Regret: ", infoSet.regret, "Average Strategy: ", infoSet.get_average_strategy())
diff --git a/src/environment.py b/src/environment.py
index 6ec79b2..37b8952 100644
--- a/src/environment.py
+++ b/src/environment.py
@@ -2,7 +2,8 @@
 from evaluator import *
 from typing import List
 from player import Player
-from aiplayer import AIPlayer
+from postflop_holdem import PostflopHoldemHistory, PostflopHoldemInfoSet
+from aiplayer import CFRAIPlayer
 
 
 class PokerEnvironment:
@@ -42,7 +43,7 @@ def __init__(self) -> None:
         self.SMALL_BLIND = 1
         self.BIG_BLIND = 2
 
-        self.INPUT_CARDS = True
+        self.INPUT_CARDS = False
 
         self.history = []
         self.players_balance_history = []  # List of "n" list for "n" players
@@ -54,7 +55,7 @@ def get_player(self, idx) -> Player:
         return self.players[idx]
 
     def add_AI_player(self):  # Add a dumb AI
-        self.players.append(AIPlayer(self.new_player_balance))
+        self.players.append(CFRAIPlayer(self.new_player_balance))
         self.AI_player_idx = len(self.players) - 1
 
     def get_winning_players(self) -> List:
@@ -358,7 +359,7 @@ def end_round(self):
                     if player.playing_current_round:
                         player.trash_talk_win()
                     else:
-                        player.get_trash_lose()
+                        player.trash_talk_lose()
 
         else:
             for player in self.players:
@@ -366,6 +367,5 @@ def end_round(self):
                     if player.playing_current_round:
                         player.trash_talk_fold()
 
-
         self.game_stage = 6  # mark end of round
         self.distribute_pot_to_winning_players()
diff --git a/src/holdem_flop.py b/src/holdem_flop.py
deleted file mode 100644
index 44544b7..0000000
--- a/src/holdem_flop.py
+++ /dev/null
@@ -1,470 +0,0 @@
-"""
-Abstracted version of Holdem Poker, used for training.
-
-To make this computationally feasible to solve on my macbook, I start solving at the flop.
-
-Card Abstraction
-- 10 clusters for flop
-- 5 clusters for turn
-- 5 clusters for river
-
-10 * 5 * 5 = 250 clusters
-
-Bet abstraction (ONLY allow these 11 sequences)
-- k ("check")
-- bMIN ("bet 1/3 pot, or big blind if pot is too")
-- bMAX ("bet the pot size")
-- c ("call")
-- f ("fold")
-
-kk
-kbMINf
-kbMINc
-kbPOTf
-kbPOTc
-bMINf
-bMINc
-bMINbMAXf # opponent raises on you
-bMINbMAXc # opponent raises on you
-bPOTf
-bPOTc
-
-11^3 = 1331 possible sequences (3 stages: flop, turn, river)
-
-In total, we have 250 * 1331 = 332750 information sets.
-
-This keeps it manageable. Anything more is in orders of millions...
-"""
-
-import base
-import numpy as np
-from base import Player, Action
-from tqdm import tqdm
-from typing import List
-from abstraction import (
-    predict_cluster_fast,
-)
-from fast_evaluator import phEvaluatorSetup, evaluate_cards
-import time
-
-DISCRETE_ACTIONS = ["k", "bMIN", "bMAX", "c", "f"]
-
-
-# ----- GLOBAL VARIABLES Load the pre-generated dataset -----
-def load_dataset():
-    global boards, player_hands, opponent_hands
-    global player_flop_clusters, player_turn_clusters, player_river_clusters
-    global opp_preflop_clusters, opp_flop_clusters, opp_turn_clusters, opp_river_clusters
-    global winners
-
-    # Load the pre-generated dataset
-    boards = np.load("dataset/boards.npy").tolist()
-    player_hands = np.load("dataset/player_hands.npy").tolist()
-    opponent_hands = np.load("dataset/opponent_hands.npy").tolist()
-
-    # Load player clusters
-    player_flop_clusters = np.load("dataset/player_flop_clusters.npy").tolist()
-    player_turn_clusters = np.load("dataset/player_turn_clusters.npy").tolist()
-    player_river_clusters = np.load("dataset/player_river_clusters.npy").tolist()
-
-    # Load opponent clusters
-    opp_flop_clusters = np.load("dataset/opp_flop_clusters.npy").tolist()
-    opp_turn_clusters = np.load("dataset/opp_turn_clusters.npy").tolist()
-    opp_river_clusters = np.load("dataset/opp_river_clusters.npy").tolist()
-
-    winners = np.load("dataset/winners.npy")
-
-
-class HoldEmHistory(base.History):
-    """
-    Example of history:
-    First two actions are the cards dealt to the players. The rest of the actions are the actions taken by the players.
-            1. ['AkTh', 'QdKd', '/', 'QhJdKs', 'bMIN', 'c', '/', 'Ah', 'k', 'k', ...]
-
-    Infoset:
-    [4, 'bMIN', 'c', '10', 'k', 'k', ...]
-
-    ---- ACTIONS ----
-    - k = check
-    - bX = bet X amount (this includes raising)
-    - c = call
-    - f = fold (you cannot fold if the other player just checked)
-
-    Every round starts the same way:
-    Small blind = 1 chip
-    Big blind = 2 chips
-
-    Total chips = 100BB per player.
-    Minimum raise = X to match bet, and Y is the raise amount
-    If no raise before, then the minimum raise amount is 2x the bet amount (preflop would be 2x big blind).
-    Else it is whatever was previously raised. This is not the same as 2x the previous bet amount. Just the Y raise amount.
-
-    Ex: The bet is 10$. I raise to 50$, so I raised by 40$ (Y = 40). The next player's minimum raise is not 100$, but rather to 90$, since (it's 50$ to match the bet, and 40$ to match the raise).
-
-    Minimum bet = 1 chip (0.5BB)
-
-    The API for the history is inspired from the Slumbot API, https://www.slumbot.com/
-
-    I want to avoid all the extra overhead, so taking inspiration from `environment.py` with the `PokerEnvironment`
-    """
-
-    def __init__(self, history: List[Action] = [], sample_id=0):
-        super().__init__(history)
-        self.sample_id = sample_id % len(player_hands)
-        self.stage_i = history.count("/")
-
-    def is_terminal(self):
-        if len(self.history) == 0:
-            return False
-        folded = self.history[-1] == "f"
-        is_showdown = self.stage_i == 3 and self._game_stage_ended()  # call  # check,check
-        if folded or is_showdown:
-            return True
-        else:
-            return False
-
-    def actions(self):
-        if self.is_chance():  # draw cards
-            return (
-                []
-            )  # This should return the entire deck with current cards removed, but I do this for speedup by loading an existing dataset
-
-        elif not self.is_terminal():
-            """
-            To limit this game going to infinity, I only allow 11 betting seqeunces.
-            Else the branching factor huge.
-
-            kk
-            kbMINf
-            kbMINc
-            kbMAXf
-            kbMAXc
-            bMINf
-            bMINc
-            bMINbMAXf
-            bMINbMAXc
-            bMAXf
-            bMAXc
-
-            This is easy calculation. If someone raises, then treat that as bMAX.
-
-            If we raise and the opponent raises, then we treat that as bMAX. So this way, we can always
-            treat the last action as bMAX.
-
-            bMINbMAX = kBMAX
-            """
-            assert (
-                not self._game_stage_ended()
-            )  # game_stage_ended would mean that it is a chance node
-
-            if self.history[-1] == "k":
-                return ["k", "bMIN", "bMAX"]
-            elif self.history[-1] == "bMIN":
-                return ["f", "c", "bMAX"]
-            elif self.history[-1] == "bMAX":
-                return ["f", "c"]
-            else:
-                return ["k", "bMIN", "bMAX", "f"]
-
-        else:
-            raise Exception("Cannot call actions on a terminal history")
-
-    def player(self):
-        """
-        1. ['AkTh', 'QdKd', '/', 'Qh', 'b2', 'c', '/', '2d', b2', 'f']
-        """
-        if len(self.history) <= 3:
-            return -1
-        elif self._game_stage_ended():
-            return -1
-        elif self.history[-1] == "/":
-            return -1
-        else:
-            return len(self.history) % 2
-
-    def _game_stage_ended(self):
-        return self.history[-1] == "c" or self.history[-1] == "f" or self.history[-2:] == ["k", "k"]
-
-    def is_chance(self):
-        return super().is_chance()
-
-    def sample_chance_outcome(self):
-        assert self.is_chance()
-
-        if len(self.history) == 0:
-            return "".join(player_hands[self.sample_id])
-        elif len(self.history) == 1:
-            return "".join(opponent_hands[self.sample_id])
-        elif self.history[-1] != "/":
-            return "/"
-        elif self.stage_i == 1:
-            return "".join(boards[self.sample_id][:3])
-        elif self.stage_i == 2:
-            return boards[self.sample_id][3]
-        elif self.stage_i == 3:
-            return boards[self.sample_id][4]
-
-    def terminal_utility(self, i: Player) -> int:
-        assert self.is_terminal()  # We can only call the utility for a terminal history
-        assert i in [0, 1]  # Only works for 2 player games for now
-
-        winner = winners[self.sample_id]
-
-        pot_size = self._get_total_pot_size()
-
-        last_game_stage_start_idx = max(loc for loc, val in enumerate(self.history) if val == "/")
-        last_game_stage = self.history[last_game_stage_start_idx + 1:]
-
-        if self.history[-1] == "f":
-            if len(last_game_stage) % 2 == i:
-                return -pot_size
-            else:
-                return pot_size
-
-        # showdown
-        if winner == 0:  # tie
-            return pot_size / 2
-
-        if (winner == 1 and i == 0) or (winner == -1 and i == 1):
-            return pot_size
-        else:
-            return -pot_size
-
-    def _get_total_pot_size(self):
-        total = 0  # starting balance is 4
-        stage_total = 4
-        for idx, action in enumerate(self.history):
-            if action == "/":
-                total += stage_total
-                stage_total = 0
-            elif action == "bMIN":
-                stage_total += max(2, int(total / 3))  # bet 1/3 pot
-            elif action == "bMAX":
-                stage_total += total  # bet the pot
-            elif action == "c":
-                if self.history[idx - 1] == "bMIN":
-                    stage_total += max(2, int(total / 3))
-                elif self.history[idx - 1] == "bMAX" and self.history[idx - 2] == "bMIN":
-                    stage_total = 2 * total
-                elif self.history[idx - 1] == "bMAX":
-                    stage_total += total
-
-                stage_total = total
-
-        total += stage_total
-        return total
-
-    def __add__(self, action: Action):
-        new_history = HoldEmHistory(self.history + [action], self.sample_id)
-        return new_history
-
-    def get_infoSet_key(self) -> List[Action]:
-        """
-        This is where we abstract away cards and bet sizes.
-        """
-        assert not self.is_chance()
-        assert not self.is_terminal()
-
-        player = self.player()
-        infoset = []
-        # ------- CARD ABSTRACTION -------
-        # Assign cluster ID for FLOP/TURN/RIVER
-        stage_i = 0
-        for i, action in enumerate(self.history):
-            if action not in DISCRETE_ACTIONS:
-                if action == "/":
-                    stage_i += 1
-                    continue
-                if stage_i == 1:
-                    if player == 0:
-                        infoset.append(str(player_flop_clusters[self.sample_id]))
-                    else:
-                        infoset.append(str(opp_flop_clusters[self.sample_id]))
-                elif stage_i == 2:
-                    assert len(action) == 2
-                    if player == 0:
-                        infoset.append(str(player_turn_clusters[self.sample_id]))
-                    else:
-                        infoset.append(str(opp_turn_clusters[self.sample_id]))
-                elif stage_i == 3:
-                    assert len(action) == 2
-                    if player == 0:
-                        infoset.append(str(player_river_clusters[self.sample_id]))
-                    else:
-                        infoset.append(str(opp_river_clusters[self.sample_id]))
-            else:
-                infoset.append(action)
-
-        return infoset
-
-
-class HoldemInfoSet(base.InfoSet):
-    """
-    Information Sets (InfoSets) cannot be chance histories, nor terminal histories.
-    This condition is checked when infosets are created.
-
-    This infoset is an abstracted versions of the history in this case.
-    See the `get_infoSet_key(self)` function for these
-
-    There are 2 abstractions we are doing:
-            1. Card Abstraction (grouping together similar hands)
-            2. Action Abstraction
-
-    I've imported my abstractions from `abstraction.py`.
-
-    """
-
-    def __init__(self, infoSet_key: List[Action], actions: List[Action], player: Player):
-        assert len(infoSet_key) >= 1
-        super().__init__(infoSet_key, actions, player)
-
-
-def create_infoSet(infoSet_key: List[Action], actions: List[Action], player: Player):
-    """
-    We create an information set from a history.
-    """
-    return HoldemInfoSet(infoSet_key, actions, player)
-
-
-def create_history(sample_id):
-    return HoldEmHistory(sample_id=sample_id)
-
-
-class HoldemCFR(base.CFR):
-    def __init__(
-        self,
-        create_infoSet,
-        create_history,
-        n_players: int = 2,
-        iterations: int = 1000000,
-    ):
-        super().__init__(create_infoSet, create_history, n_players, iterations)
-
-
-from joblib import Parallel, delayed
-
-
-def evaluate_winner(board, player_hand, opponent_hand):
-    p1_score = evaluate_cards(*(board + player_hand))
-    p2_score = evaluate_cards(*(board + opponent_hand))
-    if p1_score < p2_score:
-        return 1
-    elif p1_score > p2_score:
-        return -1
-    else:
-        return 0
-
-
-def generate_dataset(num_samples=250000, save=True):
-    """
-    To make things faster, we pre-generate the boards and hands. We also pre-cluster the hands
-    """
-    boards, player_hands, opponent_hands = phEvaluatorSetup(num_samples)
-
-    np_boards = np.array(boards)
-    np_player_hands = np.array(player_hands)
-    np_opponent_hands = np.array(opponent_hands)
-
-    player_flop_cards = np.concatenate((np_player_hands, np_boards[:, :3]), axis=1).tolist()
-    player_turn_cards = np.concatenate((np_player_hands, np_boards[:, :4]), axis=1).tolist()
-    player_river_cards = np.concatenate((np_player_hands, np_boards), axis=1).tolist()
-    opp_flop_cards = np.concatenate((np_opponent_hands, np_boards[:, :3]), axis=1).tolist()
-    opp_turn_cards = np.concatenate((np_opponent_hands, np_boards[:, :4]), axis=1).tolist()
-    opp_river_cards = np.concatenate((np_opponent_hands, np_boards), axis=1).tolist()
-
-    curr = time.time()
-    print("generating clusters")
-
-    player_flop_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=1000, total_clusters=10)
-        for cards in tqdm(player_flop_cards)
-    )
-    player_turn_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=1000, total_clusters=5)
-        for cards in tqdm(player_turn_cards)
-    )
-    player_river_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=1000, total_clusters=5)
-        for cards in tqdm(player_river_cards)
-    )
-
-    opp_flop_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=1000, total_clusters=10)
-        for cards in tqdm(opp_flop_cards)
-    )
-    opp_turn_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=500, total_clusters=5)
-        for cards in tqdm(opp_turn_cards)
-    )
-    opp_river_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=200, total_clusters=5)
-        for cards in tqdm(opp_river_cards)
-    )
-
-    winners = Parallel(n_jobs=-1)(
-        delayed(evaluate_winner)(board, player_hand, opponent_hand)
-        for board, player_hand, opponent_hand in tqdm(zip(boards, player_hands, opponent_hands))
-    )
-
-    print("saving datasets")
-    np.save("dataset/boards.npy", boards)
-    np.save("dataset/player_hands.npy", player_hands)
-    np.save("dataset/opponent_hands.npy", opponent_hands)
-    np.save("dataset/winners.npy", winners)
-    print("continuing to save datasets")
-
-    np.save("dataset/player_flop_clusters.npy", player_flop_clusters)
-    np.save("dataset/player_turn_clusters.npy", player_turn_clusters)
-    np.save("dataset/player_river_clusters.npy", player_river_clusters)
-
-    np.save("dataset/opp_flop_clusters.npy", opp_flop_clusters)
-    np.save("dataset/opp_turn_clusters.npy", opp_turn_clusters)
-    np.save("dataset/opp_river_clusters.npy", opp_river_clusters)
-
-    print(time.time() - curr)
-
-
-if __name__ == "__main__":
-    generate_dataset()
-    load_dataset()
-    cfr = HoldemCFR(create_infoSet, create_history)
-    # cfr.infoSets = joblib.load("infoSets_2500.joblib")
-    # print("finished loading")
-    # cfr.solve(debug=True)
-    cfr.solve()
-    # cfr.solve_multiprocess(
-    #     initializer=load_dataset,
-    # )
-
-#     """
-# 	When we work with these abstractions, we have two types:
-# 	1. Action Abstraction
-# 	2. Card Abstraction
-
-# 	Both of these are implemented in a different way.
-
-# 	"""
-
-#     hist: HoldEmHistory = create_history()
-#     assert hist.player() == -1
-#     hist1 = hist + "AkTh"
-#     assert hist1.player() == -1
-#     hist2 = hist1 + "QdKd"
-#     assert hist2.player() == 0
-#     print(hist2.get_infoSet_key(kmeans_flop, kmeans_turn, kmeans_river))
-#     hist3 = hist2 + "b2"
-#     assert hist3.player() == 1
-#     hist4 = hist3 + "c"
-#     assert hist4.player() == -1
-#     # Below are chance events, so it doesn't matter which player it is
-#     hist5 = hist4 + "/"
-#     assert hist5.player() == -1
-#     hist6 = hist5 + "QhKsKh"
-#     assert hist6.player() == 1
-#     hist7 = hist6 + "b1"
-#     hist8: HoldEmHistory = hist7 + "b3"
-#     curr = time.time()
-#     print(hist8.get_infoSet_key(kmeans_flop, kmeans_turn, kmeans_river), time.time() - curr)
-
-#     # cfr = base.CFR(create_infoSet, create_history)
-#     # cfr.solve()
diff --git a/src/kuhn.py b/src/kuhn.py
index cf80e27..689bc61 100644
--- a/src/kuhn.py
+++ b/src/kuhn.py
@@ -117,7 +117,7 @@ class KuhnInfoSet(base.InfoSet):
     """
 
     def __init__(self, infoSet_key: List[Action], actions: List[Action], player: Player):
-        assert len(infoSet) >= 2
+        assert len(infoSet_key) >= 2
         super().__init__(infoSet_key, actions, player)
 
 
@@ -128,12 +128,12 @@ def create_infoSet(infoSet_key: List[Action], actions: List[Action], player: Pla
     return KuhnInfoSet(infoSet_key, actions, player)
 
 
-def create_history():
+def create_history(t):
     return KuhnHistory()
 
 
 if __name__ == "__main__":
-    cfr = base.CFR(create_infoSet, create_history)
-    cfr.solve()
+    cfr = base.CFR(create_infoSet, create_history, iterations=5000)
+    cfr.solve(debug=False, method="vanilla")
     # TODO: Add playing option, right now there is old code in research/kuhn,
     # which is not oop
diff --git a/src/postflop_holdem.py b/src/postflop_holdem.py
index 380e34d..829d90e 100644
--- a/src/postflop_holdem.py
+++ b/src/postflop_holdem.py
@@ -5,10 +5,10 @@
 
 Card Abstraction
 - 10 clusters for flop
-- 5 clusters for turn
-- 5 clusters for river
+- 10 clusters for turn
+- 10 clusters for river
 
-10 * 5 * 5 = 250 clusters
+10^3 = 1000 clusters
 
 Bet abstraction (ONLY allow these 11 sequences)
 - k ("check")
@@ -31,7 +31,8 @@
 
 11^3 = 1331 possible sequences (3 stages: flop, turn, river)
 
-In total, we have 250 * 1331 = 332750 information sets.
+In total, we have 1000 * 1331 = 1 331 000 information sets.
+However, i noticed that only ~10% of the information sets are actually visited, since huge transitions are not possible.
 
 This keeps it manageable. Anything more is in orders of millions...
 """
@@ -42,7 +43,7 @@
 from tqdm import tqdm
 from typing import List
 from abstraction import (
-    predict_cluster_pre,
+    predict_cluster_fast,
 )
 from fast_evaluator import phEvaluatorSetup, evaluate_cards
 import time
@@ -75,7 +76,7 @@ def load_dataset():
     winners = np.load("dataset/winners.npy")
 
 
-class HoldEmHistory(base.History):
+class PostflopHoldemHistory(base.History):
     """
     Example of history:
     First two actions are the cards dealt to the players. The rest of the actions are the actions taken by the players.
@@ -110,7 +111,7 @@ class HoldEmHistory(base.History):
 
     def __init__(self, history: List[Action] = [], sample_id=0):
         super().__init__(history)
-        self.sample_id = sample_id % len(player_hands)
+        self.sample_id = sample_id
         self.stage_i = history.count("/")
 
     def is_terminal(self):
@@ -159,8 +160,10 @@ def actions(self):
 
             if self.history[-1] == "k":
                 return ["k", "bMIN", "bMAX"]
-            elif self.history[-1] == "bMIN":
+            elif self.history[-2:] == ["k", "bMIN"]:
                 return ["f", "c"]
+            elif self.history[-1] == ["bMIN"]:
+                return ["bMAX", "f", "c"]
             elif self.history[-1] == "bMAX":
                 return ["f", "c"]
             else:
@@ -237,8 +240,6 @@ def terminal_utility(self, i: Player) -> int:
         else:
             return -pot_size / 2
 
-
-
     def _get_total_pot_size(self, history):
         total = 0
         stage_total = 4  # assume preflop is a check + call, so 4 in pot (1 BB = 2 chips)
@@ -262,9 +263,50 @@ def _get_total_pot_size(self, history):
         return total
 
     def __add__(self, action: Action):
-        new_history = HoldEmHistory(self.history + [action], self.sample_id)
+        new_history = PostflopHoldemHistory(self.history + [action], self.sample_id)
         return new_history
 
+    def get_infoSet_key_online(self) -> List[Action]:
+        history = self.history
+        player = self.player()
+        infoset = []
+        # ------- CARD ABSTRACTION -------
+        # Assign cluster ID for FLOP/TURN/RIVER
+        stage_i = 0
+        hand = []
+        if player == 0:
+            hand = [history[0][:2], history[0][2:4]]
+        else:
+            hand = [history[1][:2], history[1][2:4]]
+        community_cards = []
+        for i, action in enumerate(history):
+            if action not in DISCRETE_ACTIONS:
+                if action == "/":
+                    stage_i += 1
+                    continue
+                if stage_i != 0:
+                    community_cards += [history[i][j : j + 2] for j in range(0, len(action), 2)]
+                    print(hand + community_cards)
+                if stage_i == 1:
+                    assert len(action) == 6
+                    infoset.append(
+                        str(predict_cluster_fast(hand + community_cards, total_clusters=10))
+                    )
+                elif stage_i == 2:
+                    assert len(action) == 2
+                    infoset.append(
+                        str(predict_cluster_fast(hand + community_cards, total_clusters=5))
+                    )
+                elif stage_i == 3:
+                    assert len(action) == 2
+                    infoset.append(
+                        str(predict_cluster_fast(hand + community_cards, total_clusters=5))
+                    )
+            else:
+                infoset.append(action)
+
+        return "".join(infoset)
+
     def get_infoSet_key(self) -> List[Action]:
         """
         This is where we abstract away cards and bet sizes.
@@ -305,7 +347,7 @@ def get_infoSet_key(self) -> List[Action]:
         return infoset
 
 
-class HoldemInfoSet(base.InfoSet):
+class PostflopHoldemInfoSet(base.InfoSet):
     """
     Information Sets (InfoSets) cannot be chance histories, nor terminal histories.
     This condition is checked when infosets are created.
@@ -330,14 +372,14 @@ def create_infoSet(infoSet_key: List[Action], actions: List[Action], player: Pla
     """
     We create an information set from a history.
     """
-    return HoldemInfoSet(infoSet_key, actions, player)
+    return PostflopHoldemInfoSet(infoSet_key, actions, player)
 
 
 def create_history(sample_id):
-    return HoldEmHistory(sample_id=sample_id)
+    return PostflopHoldemHistory(sample_id=sample_id)
 
 
-class PostFlopHoldemCFR(base.CFR):
+class PostflopHoldemCFR(base.CFR):
     def __init__(
         self,
         create_infoSet,
@@ -383,29 +425,24 @@ def generate_dataset(num_samples=50000, save=True):
     print("generating clusters")
 
     player_flop_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=1000, total_clusters=10)
-        for cards in tqdm(player_flop_cards)
+        delayed(predict_cluster_fast)(cards, total_clusters=10) for cards in tqdm(player_flop_cards)
     )
     player_turn_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=1000, total_clusters=5)
-        for cards in tqdm(player_turn_cards)
+        delayed(predict_cluster_fast)(cards, total_clusters=10) for cards in tqdm(player_turn_cards)
     )
     player_river_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=1000, total_clusters=5)
+        delayed(predict_cluster_fast)(cards, total_clusters=10)
         for cards in tqdm(player_river_cards)
     )
 
     opp_flop_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=1000, total_clusters=10)
-        for cards in tqdm(opp_flop_cards)
+        delayed(predict_cluster_fast)(cards, total_clusters=10) for cards in tqdm(opp_flop_cards)
     )
     opp_turn_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=500, total_clusters=5)
-        for cards in tqdm(opp_turn_cards)
+        delayed(predict_cluster_fast)(cards, total_clusters=10) for cards in tqdm(opp_turn_cards)
     )
     opp_river_clusters = Parallel(n_jobs=-1)(
-        delayed(predict_cluster_fast)(cards, n=200, total_clusters=5)
-        for cards in tqdm(opp_river_cards)
+        delayed(predict_cluster_fast)(cards, total_clusters=10) for cards in tqdm(opp_river_cards)
     )
 
     winners = Parallel(n_jobs=-1)(
@@ -436,14 +473,11 @@ def generate_dataset(num_samples=50000, save=True):
 if __name__ == "__main__":
     # Train in batches of 50,000 hands
     ITERATIONS = 50000
-    cfr = PostFlopHoldemCFR(create_infoSet, create_history, iterations=ITERATIONS)
+    cfr = PostflopHoldemCFR(create_infoSet, create_history, iterations=ITERATIONS)
     for i in range(20):
-        if i == 0:
-            load_dataset()
-        else:
-            generate_dataset(save=False, num_samples=ITERATIONS)
-        cfr.solve(debug=False, method="vanilla")
-        cfr.export_infoSets(f"infoSets_batch_{i}.joblib")
+        generate_dataset(save=False, num_samples=ITERATIONS)
+        cfr.solve(debug=False, method="vanilla_speedup")
+        cfr.export_infoSets(f"new_vanilla_speedup_infoSets_batch_{i}.joblib")
 
     # load_dataset()
     # cfr.infoSets = joblib.load("infoSets_2500.joblib")
@@ -462,7 +496,7 @@ def generate_dataset(num_samples=50000, save=True):
 
 # 	"""
 
-#     hist: HoldEmHistory = create_history()
+#     hist: PostflopHoldemHistory = create_history()
 #     assert hist.player() == -1
 #     hist1 = hist + "AkTh"
 #     assert hist1.player() == -1
@@ -479,7 +513,7 @@ def generate_dataset(num_samples=50000, save=True):
 #     hist6 = hist5 + "QhKsKh"
 #     assert hist6.player() == 1
 #     hist7 = hist6 + "b1"
-#     hist8: HoldEmHistory = hist7 + "b3"
+#     hist8: PostflopHoldemHistory = hist7 + "b3"
 #     curr = time.time()
 #     print(hist8.get_infoSet_key(kmeans_flop, kmeans_turn, kmeans_river), time.time() - curr)