diff --git a/main/.buildinfo b/main/.buildinfo
index 9c7a504a7..8047a875a 100644
--- a/main/.buildinfo
+++ b/main/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 1f81fbac4ab0bac2800b5c84300b339b
+config: af7d9b5182bf4b44c58f9e1dfdc7928a
 tags: d77d1c0d9ca2f4c8421862c7c5a0d620
diff --git a/main/_modules/pettingzoo/classic/hanabi/hanabi/index.html b/main/_modules/pettingzoo/classic/hanabi/hanabi/index.html
index ccb81611f..7c2c5f47a 100644
--- a/main/_modules/pettingzoo/classic/hanabi/hanabi/index.html
+++ b/main/_modules/pettingzoo/classic/hanabi/hanabi/index.html
@@ -394,7 +394,7 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 
 <span class="sd">This environment is part of the &lt;a href=&#39;..&#39;&gt;classic environments&lt;/a&gt;. Please read that page first for general information.</span>
 
-<span class="sd">| Import               | `from pettingzoo.classic import hanabi_v4` |</span>
+<span class="sd">| Import               | `from pettingzoo.classic import hanabi_v5` |</span>
 <span class="sd">|----------------------|--------------------------------------------|</span>
 <span class="sd">| Actions              | Discrete                                   |</span>
 <span class="sd">| Parallel API         | Yes                                        |</span>
@@ -417,8 +417,8 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 <span class="sd">Hanabi takes in a number of arguments defining the size and complexity of the game. Default is a full 2 player hanabi game.</span>
 
 <span class="sd">``` python</span>
-<span class="sd">hanabi_v4.env(colors=5, rank=5, players=2, hand_size=5, max_information_tokens=8,</span>
-<span class="sd">max_life_tokens=3, observation_type=1)</span>
+<span class="sd">hanabi_v5.env(colors=5, rank=5, players=2, hand_size=5, max_information_tokens=8,</span>
+<span class="sd">max_life_tokens=3, observation_type=&quot;minimal&quot;)</span>
 <span class="sd">```</span>
 
 <span class="sd">`colors`: Number of colors the cards can take (affects size of deck)</span>
@@ -431,7 +431,11 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 
 <span class="sd">`max_life_tokens`: Maximum number of life tokens (more tokens makes the game easier by allowing more information to be revealed)</span>
 
-<span class="sd">`observation_type`: 0: Minimal observation. 1: First-order common knowledge observation (default).</span>
+<span class="sd">`observation_type`:</span>
+<span class="sd">    &quot;minimal&quot;: Minimal observation (what a human sees).</span>
+<span class="sd">    &quot;card_knowledge&quot;: includes per-card knowledge of past hints, as well as simple inferred knowledge of the form</span>
+<span class="sd">        &quot;this card is not red, because it was not revealed as red in a past&quot;.</span>
+<span class="sd">    &quot;seer&quot; shows all cards, including the player&#39;s own cards, regardless of what hints have been given.</span>
 
 <span class="sd">### Observation Space</span>
 
@@ -533,6 +537,7 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 
 <span class="sd">### Version History</span>
 
+<span class="sd">* v5: Switched environment to depend on OpenSpiel (using Shimmy) for future compatibility (1.23.0)</span>
 <span class="sd">* v4: Fixed bug in arbitrary calls to observe() (1.8.0)</span>
 <span class="sd">* v3: Legal action mask in observation replaced illegal move list in infos (1.5.0)</span>
 <span class="sd">* v2: Fixed default parameters (1.4.2)</span>
@@ -541,39 +546,17 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 
 <span class="sd">&quot;&quot;&quot;</span>
 
-<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Union</span>
+<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Union</span>
 
 <span class="kn">import</span> <span class="nn">gymnasium</span>
 <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
 <span class="kn">from</span> <span class="nn">gymnasium</span> <span class="kn">import</span> <span class="n">spaces</span>
 <span class="kn">from</span> <span class="nn">gymnasium.utils</span> <span class="kn">import</span> <span class="n">EzPickle</span>
+<span class="kn">from</span> <span class="nn">shimmy.openspiel_compatibility</span> <span class="kn">import</span> <span class="n">OpenSpielCompatibilityV0</span>
 
 <span class="kn">from</span> <span class="nn">pettingzoo</span> <span class="kn">import</span> <span class="n">AECEnv</span>
-<span class="kn">from</span> <span class="nn">pettingzoo.utils</span> <span class="kn">import</span> <span class="n">agent_selector</span><span class="p">,</span> <span class="n">wrappers</span>
-
-<span class="c1"># importing Hanabi and throw error message if pypi package is not installed correctly.</span>
-<span class="k">try</span><span class="p">:</span>
-    <span class="kn">from</span> <span class="nn">hanabi_learning_environment.rl_env</span> <span class="kn">import</span> <span class="n">HanabiEnv</span>
-
-<span class="k">except</span> <span class="ne">ModuleNotFoundError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
-    <span class="k">raise</span> <span class="ne">ImportError</span><span class="p">(</span>
-        <span class="p">(</span>
-            <span class="s2">&quot;Hanabi is not installed.</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">,</span>
-            <span class="s2">&quot;Run ´pip3 install hanabi_learning_environment´ from within your project environment.</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">,</span>
-            <span class="s2">&quot;Consult hanabi/README.md for detailed information.&quot;</span><span class="p">,</span>
-        <span class="p">)</span>
-    <span class="p">)</span> <span class="kn">from</span> <span class="nn">e</span>
-<span class="sd">&quot;&quot;&quot;</span>
-<span class="sd">Wrapper class around Deepmind&#39;s Hanabi Learning Environment.</span>
-<span class="sd">&quot;&quot;&quot;</span>
-
-
-<span class="k">class</span> <span class="nc">HanabiScorePenalty</span><span class="p">:</span>
-    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">env</span><span class="p">):</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">env</span> <span class="o">=</span> <span class="n">env</span>
-
-    <span class="k">def</span> <span class="fm">__float__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-        <span class="k">return</span> <span class="o">-</span><span class="nb">float</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">env</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">state</span><span class="o">.</span><span class="n">score</span><span class="p">())</span>
+<span class="kn">from</span> <span class="nn">pettingzoo.utils</span> <span class="kn">import</span> <span class="n">wrappers</span>
+<span class="kn">from</span> <span class="nn">pettingzoo.utils.agent_selector</span> <span class="kn">import</span> <span class="n">agent_selector</span>
 
 
 <div class="viewcode-block" id="env"><a class="viewcode-back" href="../../../../../environments/classic/hanabi/#pettingzoo.classic.hanabi.hanabi.env">[docs]</a><span class="k">def</span> <span class="nf">env</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
@@ -585,18 +568,16 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
     <span class="k">else</span><span class="p">:</span>
         <span class="n">env</span> <span class="o">=</span> <span class="n">raw_env</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
 
-    <span class="n">env</span> <span class="o">=</span> <span class="n">wrappers</span><span class="o">.</span><span class="n">TerminateIllegalWrapper</span><span class="p">(</span><span class="n">env</span><span class="p">,</span> <span class="n">illegal_reward</span><span class="o">=</span><span class="n">HanabiScorePenalty</span><span class="p">(</span><span class="n">env</span><span class="p">))</span>
+    <span class="n">env</span> <span class="o">=</span> <span class="n">wrappers</span><span class="o">.</span><span class="n">TerminateIllegalWrapper</span><span class="p">(</span><span class="n">env</span><span class="p">,</span> <span class="n">illegal_reward</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
     <span class="n">env</span> <span class="o">=</span> <span class="n">wrappers</span><span class="o">.</span><span class="n">AssertOutOfBoundsWrapper</span><span class="p">(</span><span class="n">env</span><span class="p">)</span>
     <span class="n">env</span> <span class="o">=</span> <span class="n">wrappers</span><span class="o">.</span><span class="n">OrderEnforcingWrapper</span><span class="p">(</span><span class="n">env</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">env</span></div>
 
 
 <div class="viewcode-block" id="raw_env"><a class="viewcode-back" href="../../../../../environments/classic/hanabi/#pettingzoo.classic.hanabi.hanabi.raw_env">[docs]</a><span class="k">class</span> <span class="nc">raw_env</span><span class="p">(</span><span class="n">AECEnv</span><span class="p">,</span> <span class="n">EzPickle</span><span class="p">):</span>
-<span class="w">    </span><span class="sd">&quot;&quot;&quot;This class capsules endpoints provided within deepmind/hanabi-learning-environment/rl_env.py.&quot;&quot;&quot;</span>
-
     <span class="n">metadata</span> <span class="o">=</span> <span class="p">{</span>
         <span class="s2">&quot;render_modes&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;human&quot;</span><span class="p">],</span>
-        <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;hanabi_v4&quot;</span><span class="p">,</span>
+        <span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="s2">&quot;hanabi_v5&quot;</span><span class="p">,</span>
         <span class="s2">&quot;is_parallelizable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
         <span class="s2">&quot;render_fps&quot;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span>
     <span class="p">}</span>
@@ -621,7 +602,7 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
         <span class="n">hand_size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span>
         <span class="n">max_information_tokens</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span>
         <span class="n">max_life_tokens</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">3</span><span class="p">,</span>
-        <span class="n">observation_type</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span>
+        <span class="n">observation_type</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;card_knowledge&quot;</span><span class="p">,</span>
         <span class="n">random_start_player</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
         <span class="n">render_mode</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
     <span class="p">):</span>
@@ -634,9 +615,10 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 <span class="sd">              - hand_size: int, Hand size in [2,5].</span>
 <span class="sd">              - max_information_tokens: int, Number of information tokens (&gt;=0).</span>
 <span class="sd">              - max_life_tokens: int, Number of life tokens (&gt;=1).</span>
-<span class="sd">              - observation_type: int.</span>
-<span class="sd">                    0: Minimal observation.</span>
-<span class="sd">                    1: First-order common knowledge observation.</span>
+<span class="sd">              - observation_type: str.</span>
+<span class="sd">                    &quot;minimal&quot;&quot;: Minimal observation.</span>
+<span class="sd">                    &quot;card_knowledge&quot;: First-order common knowledge observation.</span>
+<span class="sd">                    &quot;seer&quot;: Full information of all cards.</span>
 <span class="sd">              - random_start_player: bool, Random start player.</span>
 
 <span class="sd">        Common game configurations:</span>
@@ -647,27 +629,29 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 <span class="sd">                &quot;max_information_tokens&quot;: 8,</span>
 <span class="sd">                &quot;max_life_tokens&quot;: 3,</span>
 <span class="sd">                &quot;hand_size&quot;: (4 if players &gt;= 4 else 5)</span>
-<span class="sd">                &quot;observation_type&quot;: 1,</span>
-<span class="sd">                &quot;hand_size&quot;: 2</span>
+<span class="sd">                &quot;observation_type&quot;: &quot;card_knowledge&quot;,</span>
+<span class="sd">                &quot;hand_size&quot;: 2,</span>
 <span class="sd">                }</span>
 
 <span class="sd">            Hanabi-Small : {</span>
 <span class="sd">                &quot;colors&quot;: 2,</span>
 <span class="sd">                &quot;ranks&quot;: 5,</span>
 <span class="sd">                &quot;players&quot;: 2,</span>
-<span class="sd">                &quot;max_information_tokens&quot;: 3</span>
+<span class="sd">                &quot;max_information_tokens&quot;: 3,</span>
 <span class="sd">                &quot;hand_size&quot;: 2,</span>
-<span class="sd">                &quot;max_life_tokens&quot;: 1</span>
-<span class="sd">                &quot;observation_type&quot;: 1}</span>
+<span class="sd">                &quot;max_life_tokens&quot;: 1,</span>
+<span class="sd">                &quot;observation_type&quot;: &quot;card_knowledge&quot;,</span>
+<span class="sd">                }</span>
 
 <span class="sd">            Hanabi-Very-Small : {</span>
 <span class="sd">                &quot;colors&quot;: 1,</span>
 <span class="sd">                &quot;ranks&quot;: 5,</span>
 <span class="sd">                &quot;players&quot;: 2,</span>
-<span class="sd">                &quot;max_information_tokens&quot;: 3</span>
+<span class="sd">                &quot;max_information_tokens&quot;: 3,</span>
 <span class="sd">                &quot;hand_size&quot;: 2,</span>
-<span class="sd">                &quot;max_life_tokens&quot;: 1</span>
-<span class="sd">                &quot;observation_type&quot;: 1}</span>
+<span class="sd">                &quot;max_life_tokens&quot;: 1,</span>
+<span class="sd">                &quot;observation_type&quot;: &quot;card_knowledge&quot;,</span>
+<span class="sd">                }</span>
 
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="n">EzPickle</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
@@ -706,39 +690,30 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
             <span class="s2">&quot;observation_type&quot;</span><span class="p">:</span> <span class="n">observation_type</span><span class="p">,</span>
             <span class="s2">&quot;random_start_player&quot;</span><span class="p">:</span> <span class="n">random_start_player</span><span class="p">,</span>
         <span class="p">}</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="p">:</span> <span class="n">HanabiEnv</span> <span class="o">=</span> <span class="n">HanabiEnv</span><span class="p">(</span><span class="n">config</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_config</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span> <span class="o">=</span> <span class="n">OpenSpielCompatibilityV0</span><span class="p">(</span>
+            <span class="n">game_name</span><span class="o">=</span><span class="s2">&quot;hanabi&quot;</span><span class="p">,</span> <span class="n">render_mode</span><span class="o">=</span><span class="n">render_mode</span><span class="p">,</span> <span class="n">config</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_config</span>
+        <span class="p">)</span>
 
         <span class="c1"># List of agent names</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">agents</span> <span class="o">=</span> <span class="p">[</span><span class="sa">f</span><span class="s2">&quot;player_</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s2">&quot;</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">players</span><span class="p">)]</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">possible_agents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">[:]</span>
-
+        <span class="bp">self</span><span class="o">.</span><span class="n">possible_agents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">possible_agents</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span><span class="p">:</span> <span class="nb">str</span>
 
-        <span class="c1"># Sets hanabi game to clean state and updates all internal dictionaries</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
-
-        <span class="c1"># Set action_spaces and observation_spaces based on params in hanabi_env</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">action_spaces</span> <span class="o">=</span> <span class="p">{</span>
-            <span class="n">name</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Discrete</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">num_moves</span><span class="p">())</span> <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span>
+            <span class="n">a</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">action_space</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">possible_agents</span>
         <span class="p">}</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">observation_spaces</span> <span class="o">=</span> <span class="p">{</span>
-            <span class="n">player_name</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Dict</span><span class="p">(</span>
+            <span class="n">a</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Dict</span><span class="p">(</span>
                 <span class="p">{</span>
-                    <span class="s2">&quot;observation&quot;</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Box</span><span class="p">(</span>
-                        <span class="n">low</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
-                        <span class="n">high</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
-                        <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">vectorized_observation_shape</span><span class="p">()[</span><span class="mi">0</span><span class="p">],),</span>
-                        <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">,</span>
-                    <span class="p">),</span>
+                    <span class="s2">&quot;observation&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">observation_space</span><span class="p">(</span><span class="n">a</span><span class="p">),</span>
                     <span class="s2">&quot;action_mask&quot;</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Box</span><span class="p">(</span>
                         <span class="n">low</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
                         <span class="n">high</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
-                        <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">num_moves</span><span class="p">(),),</span>
+                        <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">action_space</span><span class="p">(</span><span class="n">a</span><span class="p">)</span><span class="o">.</span><span class="n">n</span><span class="p">,),</span>
                         <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">int8</span><span class="p">,</span>
                     <span class="p">),</span>
                 <span class="p">}</span>
             <span class="p">)</span>
-            <span class="k">for</span> <span class="n">player_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span>
+            <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">possible_agents</span>
         <span class="p">}</span>
 
         <span class="bp">self</span><span class="o">.</span><span class="n">render_mode</span> <span class="o">=</span> <span class="n">render_mode</span>
@@ -749,10 +724,6 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 <div class="viewcode-block" id="raw_env.action_space"><a class="viewcode-back" href="../../../../../environments/classic/hanabi/#pettingzoo.classic.hanabi.hanabi.raw_env.action_space">[docs]</a>    <span class="k">def</span> <span class="nf">action_space</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">agent</span><span class="p">):</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">action_spaces</span><span class="p">[</span><span class="n">agent</span><span class="p">]</span></div>
 
-    <span class="k">def</span> <span class="nf">_seed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
-        <span class="n">config</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">_config</span><span class="p">)</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span> <span class="o">=</span> <span class="n">HanabiEnv</span><span class="p">(</span><span class="n">config</span><span class="o">=</span><span class="n">config</span><span class="p">)</span>
-
     <span class="nd">@staticmethod</span>
     <span class="k">def</span> <span class="nf">_raise_error_if_config_values_out_of_range</span><span class="p">(</span>
         <span class="n">colors</span><span class="p">,</span>
@@ -799,22 +770,24 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
                 <span class="sa">f</span><span class="s2">&quot;Config parameter </span><span class="si">{</span><span class="n">max_life_tokens</span><span class="si">}</span><span class="s2"> is out of bounds. See description in hanabi.py.&quot;</span>
             <span class="p">)</span>
 
-        <span class="k">elif</span> <span class="ow">not</span> <span class="p">(</span><span class="mi">0</span> <span class="o">&lt;=</span> <span class="n">observation_type</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">):</span>
+        <span class="k">elif</span> <span class="ow">not</span> <span class="p">(</span><span class="n">observation_type</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;minimal&quot;</span><span class="p">,</span> <span class="s2">&quot;card_knowledge&quot;</span><span class="p">,</span> <span class="s2">&quot;seer&quot;</span><span class="p">]):</span>
             <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
-                <span class="sa">f</span><span class="s2">&quot;Config parameter </span><span class="si">{</span><span class="n">observation_type</span><span class="si">}</span><span class="s2"> is out of bounds. See description in hanabi.py.&quot;</span>
+                <span class="sa">f</span><span class="s2">&quot;Config parameter </span><span class="si">{</span><span class="n">observation_type</span><span class="si">}</span><span class="s2"> must be either &#39;minimal&#39;, &#39;card_knowledge&#39;, or &#39;seer&#39;. See description in hanabi.py.&quot;</span>
             <span class="p">)</span>
 
     <span class="nd">@property</span>
     <span class="k">def</span> <span class="nf">observation_vector_dim</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">vectorized_observation_shape</span><span class="p">()</span>
+        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">observation_space</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">possible_agents</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">shape</span>
 
     <span class="nd">@property</span>
     <span class="k">def</span> <span class="nf">legal_moves</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
-        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">infos</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span><span class="p">][</span><span class="s2">&quot;legal_moves&quot;</span><span class="p">]</span>
+        <span class="n">mask</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">infos</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span><span class="p">][</span><span class="s2">&quot;action_mask&quot;</span><span class="p">]</span>
+        <span class="k">return</span> <span class="p">[</span><span class="n">i</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">mask</span><span class="p">))</span> <span class="k">if</span> <span class="n">mask</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">==</span> <span class="mi">1</span><span class="p">]</span>
 
     <span class="nd">@property</span>
     <span class="k">def</span> <span class="nf">all_moves</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
-        <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">num_moves</span><span class="p">()))</span>
+        <span class="n">mask</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">infos</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span><span class="p">][</span><span class="s2">&quot;action_mask&quot;</span><span class="p">]</span>
+        <span class="k">return</span> <span class="p">[</span><span class="n">i</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">mask</span><span class="p">))]</span>
 
     <span class="c1"># ToDo: Fix Return value</span>
 <div class="viewcode-block" id="raw_env.reset"><a class="viewcode-back" href="../../../../../environments/classic/hanabi/#pettingzoo.classic.hanabi.hanabi.raw_env.reset">[docs]</a>    <span class="k">def</span> <span class="nf">reset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">options</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
@@ -824,35 +797,45 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
 <span class="sd">            observation: Optional list of integers of length self.observation_vector_dim, describing observations of</span>
 <span class="sd">            current agent (agent_selection).</span>
 <span class="sd">        &quot;&quot;&quot;</span>
-        <span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">_seed</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
+        <span class="c1"># if seed is not None:</span>
+        <span class="c1">#     config = dict(seed=seed, **self._config)</span>
+        <span class="c1">#     self.hanabi_env = OpenSpielCompatibilityV0(</span>
+        <span class="c1">#         pyspiel.load_game(&quot;hanabi&quot;, config), render_mode=self.render_mode</span>
+        <span class="c1">#     )</span>
 
         <span class="bp">self</span><span class="o">.</span><span class="n">agents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">possible_agents</span><span class="p">[:]</span>
-        <span class="c1"># Reset underlying hanabi reinforcement learning environment</span>
-        <span class="n">obs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
 
-        <span class="c1"># Reset agent and agent_selection</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">_reset_agents</span><span class="p">(</span><span class="n">player_number</span><span class="o">=</span><span class="n">obs</span><span class="p">[</span><span class="s2">&quot;current_player&quot;</span><span class="p">])</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">seed</span><span class="p">)</span>
 
-        <span class="bp">self</span><span class="o">.</span><span class="n">rewards</span> <span class="o">=</span> <span class="p">{</span><span class="n">agent</span><span class="p">:</span> <span class="mi">0</span> <span class="k">for</span> <span class="n">agent</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">}</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_rewards</span> <span class="o">=</span> <span class="p">{</span><span class="n">name</span><span class="p">:</span> <span class="mi">0</span> <span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">}</span>
-        <span class="c1"># Reset internal state</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">_process_latest_observations</span><span class="p">(</span><span class="n">obs</span><span class="o">=</span><span class="n">obs</span><span class="p">)</span></div>
+        <span class="c1"># if self.hanabi_env._env.num_distinct_actions() != self.hanabi_env.</span>
 
-    <span class="k">def</span> <span class="nf">_reset_agents</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">player_number</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
-<span class="w">        </span><span class="sd">&quot;&quot;&quot;Rearrange self.agents as pyhanabi starts a different player after each reset().&quot;&quot;&quot;</span>
-        <span class="c1"># Shifts self.agents list as long order starting player is not according to player_number</span>
-        <span class="k">while</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;player_&quot;</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">player_number</span><span class="p">):</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">agents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
-
-        <span class="c1"># Agent order list, on which the agent selector operates on.</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">_agent_selector</span> <span class="o">=</span> <span class="n">agent_selector</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">)</span>
+        <span class="c1"># Reset spaces</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">action_spaces</span> <span class="o">=</span> <span class="p">{</span><span class="n">a</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">action_space</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">}</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">observation_spaces</span> <span class="o">=</span> <span class="p">{</span>
+            <span class="n">a</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Dict</span><span class="p">(</span>
+                <span class="p">{</span>
+                    <span class="s2">&quot;observation&quot;</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">observation_space</span><span class="p">(</span><span class="n">a</span><span class="p">),</span>
+                    <span class="s2">&quot;action_mask&quot;</span><span class="p">:</span> <span class="n">spaces</span><span class="o">.</span><span class="n">Box</span><span class="p">(</span>
+                        <span class="n">low</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
+                        <span class="n">high</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
+                        <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">action_space</span><span class="p">(</span><span class="n">a</span><span class="p">)</span><span class="o">.</span><span class="n">n</span><span class="p">,),</span>
+                        <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">int8</span><span class="p">,</span>
+                    <span class="p">),</span>
+                <span class="p">}</span>
+            <span class="p">)</span>
+            <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span>
+        <span class="p">}</span>
 
-        <span class="c1"># Reset agent_selection</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_selector</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">rewards</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">rewards</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_rewards</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">_cumulative_rewards</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">agent_selection</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">rewards</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">rewards</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">terminations</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">terminations</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">truncations</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">truncations</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">infos</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">infos</span>
 
-    <span class="k">def</span> <span class="nf">_step_agents</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_selector</span><span class="o">.</span><span class="n">next</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">_agent_selector</span> <span class="o">=</span> <span class="n">agent_selector</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">)</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_agent_selector</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span></div>
 
 <div class="viewcode-block" id="raw_env.step"><a class="viewcode-back" href="../../../../../environments/classic/hanabi/#pettingzoo.classic.hanabi.hanabi.raw_env.step">[docs]</a>    <span class="k">def</span> <span class="nf">step</span><span class="p">(</span>
         <span class="bp">self</span><span class="p">,</span> <span class="n">action</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">observe</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">as_vector</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>
@@ -869,9 +852,6 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
             <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">truncations</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span><span class="p">]</span>
         <span class="p">):</span>
             <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_was_dead_step</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>
-        <span class="n">action</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>
-
-        <span class="n">agent_on_turn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span>
 
         <span class="k">if</span> <span class="n">action</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">legal_moves</span><span class="p">:</span>
             <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
@@ -879,58 +859,18 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
             <span class="p">)</span>
 
         <span class="k">else</span><span class="p">:</span>
-            <span class="c1"># Iterate agent_selection</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">_step_agents</span><span class="p">()</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">action</span><span class="p">)</span>
 
-            <span class="c1"># Apply action</span>
-            <span class="n">all_observations</span><span class="p">,</span> <span class="n">reward</span><span class="p">,</span> <span class="n">done</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">action</span><span class="o">=</span><span class="n">action</span><span class="p">)</span>
-
-            <span class="c1"># Update internal state</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">_process_latest_observations</span><span class="p">(</span>
-                <span class="n">obs</span><span class="o">=</span><span class="n">all_observations</span><span class="p">,</span> <span class="n">reward</span><span class="o">=</span><span class="n">reward</span><span class="p">,</span> <span class="n">done</span><span class="o">=</span><span class="n">done</span>
-            <span class="p">)</span>
-
-            <span class="c1"># sets current reward for 0 to initialize reward accumulation</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">_cumulative_rewards</span><span class="p">[</span><span class="n">agent_on_turn</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">_accumulate_rewards</span><span class="p">()</span></div>
+            <span class="bp">self</span><span class="o">.</span><span class="n">agent_selection</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">agent_selection</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">rewards</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">rewards</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">terminations</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">terminations</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">truncations</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">truncations</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">infos</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">infos</span></div>
 
 <div class="viewcode-block" id="raw_env.observe"><a class="viewcode-back" href="../../../../../environments/classic/hanabi/#pettingzoo.classic.hanabi.hanabi.raw_env.observe">[docs]</a>    <span class="k">def</span> <span class="nf">observe</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">agent_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
-        <span class="n">observation</span> <span class="o">=</span> <span class="p">(</span>
-            <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">infos</span><span class="p">[</span><span class="n">agent_name</span><span class="p">][</span><span class="s2">&quot;observations_vectorized&quot;</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span>
-            <span class="k">if</span> <span class="n">agent_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">infos</span>
-            <span class="k">else</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros_like</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">observation_spaces</span><span class="p">[</span><span class="n">agent_name</span><span class="p">]</span><span class="o">.</span><span class="n">low</span><span class="p">)</span>
-        <span class="p">)</span>
-
-        <span class="n">legal_moves</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">infos</span><span class="p">[</span><span class="n">agent_name</span><span class="p">][</span><span class="s2">&quot;legal_moves&quot;</span><span class="p">]</span>
-        <span class="n">action_mask</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">num_moves</span><span class="p">(),</span> <span class="s2">&quot;int8&quot;</span><span class="p">)</span>
-        <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">legal_moves</span><span class="p">:</span>
-            <span class="n">action_mask</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
-
-        <span class="k">return</span> <span class="p">{</span><span class="s2">&quot;observation&quot;</span><span class="p">:</span> <span class="n">observation</span><span class="p">,</span> <span class="s2">&quot;action_mask&quot;</span><span class="p">:</span> <span class="n">action_mask</span><span class="p">}</span></div>
-
-    <span class="k">def</span> <span class="nf">_process_latest_observations</span><span class="p">(</span>
-        <span class="bp">self</span><span class="p">,</span> <span class="n">obs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">reward</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">done</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span>
-    <span class="p">):</span>
-<span class="w">        </span><span class="sd">&quot;&quot;&quot;Updates internal state.&quot;&quot;&quot;</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">latest_observations</span> <span class="o">=</span> <span class="n">obs</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">rewards</span> <span class="o">=</span> <span class="p">{</span><span class="n">a</span><span class="p">:</span> <span class="n">reward</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">}</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">terminations</span> <span class="o">=</span> <span class="p">{</span><span class="n">player_name</span><span class="p">:</span> <span class="n">done</span> <span class="k">for</span> <span class="n">player_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">}</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">truncations</span> <span class="o">=</span> <span class="p">{</span><span class="n">player_name</span><span class="p">:</span> <span class="n">done</span> <span class="k">for</span> <span class="n">player_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span><span class="p">}</span>
-
-        <span class="c1"># Here we have to deal with the player index with offset = 1</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">infos</span> <span class="o">=</span> <span class="p">{</span>
-            <span class="n">player_name</span><span class="p">:</span> <span class="nb">dict</span><span class="p">(</span>
-                <span class="n">legal_moves</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">latest_observations</span><span class="p">[</span><span class="s2">&quot;player_observations&quot;</span><span class="p">][</span>
-                    <span class="nb">int</span><span class="p">(</span><span class="n">player_name</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
-                <span class="p">][</span><span class="s2">&quot;legal_moves_as_int&quot;</span><span class="p">],</span>
-                <span class="c1"># legal_moves_as_dict=self.latest_observations[&#39;player_observations&#39;][int(player_name[-1])][&#39;legal_moves&#39;],</span>
-                <span class="n">observations_vectorized</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">latest_observations</span><span class="p">[</span><span class="s2">&quot;player_observations&quot;</span><span class="p">][</span>
-                    <span class="nb">int</span><span class="p">(</span><span class="n">player_name</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
-                <span class="p">][</span><span class="s2">&quot;vectorized&quot;</span><span class="p">],</span>
-                <span class="c1"># observations=self.latest_observations[&#39;player_observations&#39;][int(player_name[-1])</span>
-            <span class="p">)</span>
-            <span class="k">for</span> <span class="n">player_name</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">agents</span>
-        <span class="p">}</span>
+        <span class="n">observation</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">observe</span><span class="p">(</span><span class="n">agent_name</span><span class="p">)</span>
+        <span class="n">mask</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">infos</span><span class="p">[</span><span class="n">agent_name</span><span class="p">][</span><span class="s2">&quot;action_mask&quot;</span><span class="p">]</span>
+        <span class="k">return</span> <span class="p">{</span><span class="s2">&quot;observation&quot;</span><span class="p">:</span> <span class="n">observation</span><span class="p">,</span> <span class="s2">&quot;action_mask&quot;</span><span class="p">:</span> <span class="n">mask</span><span class="p">}</span></div>
 
 <div class="viewcode-block" id="raw_env.render"><a class="viewcode-back" href="../../../../../environments/classic/hanabi/#pettingzoo.classic.hanabi.hanabi.raw_env.render">[docs]</a>    <span class="k">def</span> <span class="nf">render</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">        </span><span class="sd">&quot;&quot;&quot;Prints player&#39;s data.</span>
@@ -942,17 +882,10 @@ <h1>Source code for pettingzoo.classic.hanabi.hanabi</h1><div class="highlight">
                 <span class="s2">&quot;You are calling render method without specifying any render mode.&quot;</span>
             <span class="p">)</span>
             <span class="k">return</span>
-
-        <span class="n">player_data</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">latest_observations</span><span class="p">[</span><span class="s2">&quot;player_observations&quot;</span><span class="p">]</span>
-        <span class="nb">print</span><span class="p">(</span>
-            <span class="s2">&quot;Active player:&quot;</span><span class="p">,</span>
-            <span class="bp">self</span><span class="o">.</span><span class="n">possible_agents</span><span class="p">[</span><span class="n">player_data</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s2">&quot;current_player_offset&quot;</span><span class="p">]],</span>
-        <span class="p">)</span>
-        <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">d</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">player_data</span><span class="p">):</span>
-            <span class="nb">print</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">possible_agents</span><span class="p">[</span><span class="n">i</span><span class="p">])</span>
-            <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;========&quot;</span><span class="p">)</span>
-            <span class="nb">print</span><span class="p">(</span><span class="n">d</span><span class="p">[</span><span class="s2">&quot;pyhanabi&quot;</span><span class="p">])</span>
-            <span class="nb">print</span><span class="p">()</span></div>
+        <span class="k">try</span><span class="p">:</span>
+            <span class="bp">self</span><span class="o">.</span><span class="n">hanabi_env</span><span class="o">.</span><span class="n">render</span><span class="p">()</span>
+        <span class="k">except</span> <span class="ne">NotImplementedError</span><span class="p">:</span>
+            <span class="k">return</span></div>
 
 <div class="viewcode-block" id="raw_env.close"><a class="viewcode-back" href="../../../../../environments/classic/hanabi/#pettingzoo.classic.hanabi.hanabi.raw_env.close">[docs]</a>    <span class="k">def</span> <span class="nf">close</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="k">pass</span></div></div>
diff --git a/main/environments/classic/hanabi/index.html b/main/environments/classic/hanabi/index.html
index 8e2ccc6a4..e939a20d9 100644
--- a/main/environments/classic/hanabi/index.html
+++ b/main/environments/classic/hanabi/index.html
@@ -395,7 +395,7 @@ <h1>Hanabi<a class="headerlink" href="#hanabi" title="Permalink to this heading"
 <table class="docutils align-default">
 <thead>
 <tr class="row-odd"><th class="head"><p>Import</p></th>
-<th class="head"><p><code class="docutils literal notranslate"><span class="pre">from</span> <span class="pre">pettingzoo.classic</span> <span class="pre">import</span> <span class="pre">hanabi_v4</span></code></p></th>
+<th class="head"><p><code class="docutils literal notranslate"><span class="pre">from</span> <span class="pre">pettingzoo.classic</span> <span class="pre">import</span> <span class="pre">hanabi_v5</span></code></p></th>
 </tr>
 </thead>
 <tbody>
@@ -436,8 +436,8 @@ <h1>Hanabi<a class="headerlink" href="#hanabi" title="Permalink to this heading"
 <section id="environment-arguments">
 <h2>Environment arguments<a class="headerlink" href="#environment-arguments" title="Permalink to this heading">#</a></h2>
 <p>Hanabi takes in a number of arguments defining the size and complexity of the game. Default is a full 2 player hanabi game.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">hanabi_v4</span><span class="o">.</span><span class="n">env</span><span class="p">(</span><span class="n">colors</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">rank</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">players</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">hand_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">max_information_tokens</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span>
-<span class="n">max_life_tokens</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">observation_type</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">hanabi_v5</span><span class="o">.</span><span class="n">env</span><span class="p">(</span><span class="n">colors</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">rank</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">players</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">hand_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">max_information_tokens</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span>
+<span class="n">max_life_tokens</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">observation_type</span><span class="o">=</span><span class="s2">&quot;minimal&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
 <p><code class="docutils literal notranslate"><span class="pre">colors</span></code>: Number of colors the cards can take (affects size of deck)</p>
@@ -445,7 +445,11 @@ <h2>Environment arguments<a class="headerlink" href="#environment-arguments" tit
 <p><code class="docutils literal notranslate"><span class="pre">hand_size</span></code>: Size of player’s hands. Standard game is (4 if players &gt;= 4 else 5)</p>
 <p><code class="docutils literal notranslate"><span class="pre">max_information_tokens</span></code>: Maximum number of information tokens (more tokens makes the game easier by allowing more information to be revealed)</p>
 <p><code class="docutils literal notranslate"><span class="pre">max_life_tokens</span></code>: Maximum number of life tokens (more tokens makes the game easier by allowing more information to be revealed)</p>
-<p><code class="docutils literal notranslate"><span class="pre">observation_type</span></code>: 0: Minimal observation. 1: First-order common knowledge observation (default).</p>
+<p><code class="docutils literal notranslate"><span class="pre">observation_type</span></code>:
+“minimal”: Minimal observation (what a human sees).
+“card_knowledge”: includes per-card knowledge of past hints, as well as simple inferred knowledge of the form
+“this card is not red, because it was not revealed as red in a past”.
+“seer” shows all cards, including the player’s own cards, regardless of what hints have been given.</p>
 </section>
 <section id="observation-space">
 <h2>Observation Space<a class="headerlink" href="#observation-space" title="Permalink to this heading">#</a></h2>
@@ -692,6 +696,7 @@ <h2>Rewards<a class="headerlink" href="#rewards" title="Permalink to this headin
 <section id="version-history">
 <h2>Version History<a class="headerlink" href="#version-history" title="Permalink to this heading">#</a></h2>
 <ul class="simple">
+<li><p>v5: Switched environment to depend on OpenSpiel (using Shimmy) for future compatibility (1.23.0)</p></li>
 <li><p>v4: Fixed bug in arbitrary calls to observe() (1.8.0)</p></li>
 <li><p>v3: Legal action mask in observation replaced illegal move list in infos (1.5.0)</p></li>
 <li><p>v2: Fixed default parameters (1.4.2)</p></li>
@@ -708,9 +713,8 @@ <h2>API<a class="headerlink" href="#api" title="Permalink to this heading">#</a>
 
 <dl class="py class">
 <dt class="sig sig-object py" id="pettingzoo.classic.hanabi.hanabi.raw_env">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">pettingzoo.classic.hanabi.hanabi.</span></span><span class="sig-name descname"><span class="pre">raw_env</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">colors</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ranks</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">players</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hand_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_information_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_life_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">observation_type</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_start_player</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">render_mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../../../_modules/pettingzoo/classic/hanabi/hanabi/#raw_env"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pettingzoo.classic.hanabi.hanabi.raw_env" title="Permalink to this definition">#</a></dt>
-<dd><p>This class capsules endpoints provided within deepmind/hanabi-learning-environment/rl_env.py.</p>
-<p>Initializes the <cite>raw_env</cite> class.</p>
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">pettingzoo.classic.hanabi.hanabi.</span></span><span class="sig-name descname"><span class="pre">raw_env</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">colors</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ranks</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">players</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">2</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hand_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">5</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_information_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_life_tokens</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">observation_type</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'card_knowledge'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">random_start_player</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">render_mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="../../../_modules/pettingzoo/classic/hanabi/hanabi/#raw_env"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#pettingzoo.classic.hanabi.hanabi.raw_env" title="Permalink to this definition">#</a></dt>
+<dd><p>Initializes the <cite>raw_env</cite> class.</p>
 <dl class="simple">
 <dt>Parameter descriptions :</dt><dd><ul class="simple">
 <li><p>colors: int, Number of colors in [2,5].</p></li>
@@ -720,8 +724,9 @@ <h2>API<a class="headerlink" href="#api" title="Permalink to this heading">#</a>
 <li><p>max_information_tokens: int, Number of information tokens (&gt;=0).</p></li>
 <li><p>max_life_tokens: int, Number of life tokens (&gt;=1).</p></li>
 <li><dl class="simple">
-<dt>observation_type: int.</dt><dd><p>0: Minimal observation.
-1: First-order common knowledge observation.</p>
+<dt>observation_type: str.</dt><dd><p>“minimal””: Minimal observation.
+“card_knowledge”: First-order common knowledge observation.
+“seer”: Full information of all cards.</p>
 </dd>
 </dl>
 </li>
@@ -735,25 +740,27 @@ <h2>API<a class="headerlink" href="#api" title="Permalink to this heading">#</a>
 “max_information_tokens”: 8,
 “max_life_tokens”: 3,
 “hand_size”: (4 if players &gt;= 4 else 5)
-“observation_type”: 1,
-“hand_size”: 2
+“observation_type”: “card_knowledge”,
+“hand_size”: 2,
 }</p>
 </dd>
 <dt>Hanabi-Small<span class="classifier">{</span></dt><dd><p>“colors”: 2,
 “ranks”: 5,
 “players”: 2,
-“max_information_tokens”: 3
+“max_information_tokens”: 3,
 “hand_size”: 2,
-“max_life_tokens”: 1
-“observation_type”: 1}</p>
+“max_life_tokens”: 1,
+“observation_type”: “card_knowledge”,
+}</p>
 </dd>
 <dt>Hanabi-Very-Small<span class="classifier">{</span></dt><dd><p>“colors”: 1,
 “ranks”: 5,
 “players”: 2,
-“max_information_tokens”: 3
+“max_information_tokens”: 3,
 “hand_size”: 2,
-“max_life_tokens”: 1
-“observation_type”: 1}</p>
+“max_life_tokens”: 1,
+“observation_type”: “card_knowledge”,
+}</p>
 </dd>
 </dl>
 </dd>
diff --git a/main/searchindex.js b/main/searchindex.js
index a41369dff..3d3b054c0 100644
--- a/main/searchindex.js
+++ b/main/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["404", "README", "api/aec", "api/parallel", "api/utils", "api/wrappers", "api/wrappers/pz_wrappers", "api/wrappers/shimmy_wrappers", "api/wrappers/supersuit_wrappers", "content/basic_usage", "content/environment_creation", "content/environment_tests", "content/tutorials", "environments/atari", "environments/atari/basketball_pong", "environments/atari/boxing", "environments/atari/combat_plane", "environments/atari/combat_tank", "environments/atari/double_dunk", "environments/atari/entombed_competitive", "environments/atari/entombed_cooperative", "environments/atari/flag_capture", "environments/atari/foozpong", "environments/atari/ice_hockey", "environments/atari/joust", "environments/atari/mario_bros", "environments/atari/maze_craze", "environments/atari/othello", "environments/atari/pong", "environments/atari/quadrapong", "environments/atari/space_invaders", "environments/atari/space_war", "environments/atari/surround", "environments/atari/tennis", "environments/atari/video_checkers", "environments/atari/volleyball_pong", "environments/atari/warlords", "environments/atari/wizard_of_wor", "environments/butterfly", "environments/butterfly/cooperative_pong", "environments/butterfly/knights_archers_zombies", "environments/butterfly/pistonball", "environments/classic", "environments/classic/chess", "environments/classic/connect_four", "environments/classic/gin_rummy", "environments/classic/go", "environments/classic/hanabi", "environments/classic/leduc_holdem", "environments/classic/rps", "environments/classic/texas_holdem", "environments/classic/texas_holdem_no_limit", "environments/classic/tictactoe", "environments/envs", "environments/mpe", "environments/mpe/simple", "environments/mpe/simple_adversary", "environments/mpe/simple_crypto", "environments/mpe/simple_push", "environments/mpe/simple_reference", "environments/mpe/simple_speaker_listener", "environments/mpe/simple_spread", "environments/mpe/simple_tag", "environments/mpe/simple_world_comm", "environments/sisl", "environments/sisl/multiwalker", "environments/sisl/pursuit", "environments/sisl/waterworld", "environments/third_party_envs", "index", "release_notes/index", "tutorials/cleanrl/advanced_PPO", "tutorials/cleanrl/implementing_PPO", "tutorials/cleanrl/index", "tutorials/environmentcreation/1-project-structure", "tutorials/environmentcreation/2-environment-logic", "tutorials/environmentcreation/3-action-masking", "tutorials/environmentcreation/4-testing-your-environment", "tutorials/environmentcreation/5-using-your-environment", "tutorials/environmentcreation/index", "tutorials/index", "tutorials/langchain/index", "tutorials/langchain/langchain", "tutorials/rllib/holdem", "tutorials/rllib/index", "tutorials/rllib/pistonball", "tutorials/sb3/connect_four", "tutorials/sb3/index", "tutorials/sb3/kaz", "tutorials/sb3/waterworld", "tutorials/tianshou/advanced", "tutorials/tianshou/beginner", "tutorials/tianshou/index", "tutorials/tianshou/intermediate"], "filenames": ["404.md", "README.md", "api/aec.md", "api/parallel.md", "api/utils.md", "api/wrappers.md", "api/wrappers/pz_wrappers.md", "api/wrappers/shimmy_wrappers.md", "api/wrappers/supersuit_wrappers.md", "content/basic_usage.md", "content/environment_creation.md", "content/environment_tests.md", "content/tutorials.md", "environments/atari.md", "environments/atari/basketball_pong.md", "environments/atari/boxing.md", "environments/atari/combat_plane.md", "environments/atari/combat_tank.md", "environments/atari/double_dunk.md", "environments/atari/entombed_competitive.md", "environments/atari/entombed_cooperative.md", "environments/atari/flag_capture.md", "environments/atari/foozpong.md", "environments/atari/ice_hockey.md", "environments/atari/joust.md", "environments/atari/mario_bros.md", "environments/atari/maze_craze.md", "environments/atari/othello.md", "environments/atari/pong.md", "environments/atari/quadrapong.md", "environments/atari/space_invaders.md", "environments/atari/space_war.md", "environments/atari/surround.md", "environments/atari/tennis.md", "environments/atari/video_checkers.md", "environments/atari/volleyball_pong.md", "environments/atari/warlords.md", "environments/atari/wizard_of_wor.md", "environments/butterfly.md", "environments/butterfly/cooperative_pong.md", "environments/butterfly/knights_archers_zombies.md", "environments/butterfly/pistonball.md", "environments/classic.md", "environments/classic/chess.md", "environments/classic/connect_four.md", "environments/classic/gin_rummy.md", "environments/classic/go.md", "environments/classic/hanabi.md", "environments/classic/leduc_holdem.md", "environments/classic/rps.md", "environments/classic/texas_holdem.md", "environments/classic/texas_holdem_no_limit.md", "environments/classic/tictactoe.md", "environments/envs.md", "environments/mpe.md", "environments/mpe/simple.md", "environments/mpe/simple_adversary.md", "environments/mpe/simple_crypto.md", "environments/mpe/simple_push.md", "environments/mpe/simple_reference.md", "environments/mpe/simple_speaker_listener.md", "environments/mpe/simple_spread.md", "environments/mpe/simple_tag.md", "environments/mpe/simple_world_comm.md", "environments/sisl.md", "environments/sisl/multiwalker.md", "environments/sisl/pursuit.md", "environments/sisl/waterworld.md", "environments/third_party_envs.md", "index.md", "release_notes/index.md", "tutorials/cleanrl/advanced_PPO.md", "tutorials/cleanrl/implementing_PPO.md", "tutorials/cleanrl/index.md", "tutorials/environmentcreation/1-project-structure.md", "tutorials/environmentcreation/2-environment-logic.md", "tutorials/environmentcreation/3-action-masking.md", "tutorials/environmentcreation/4-testing-your-environment.md", "tutorials/environmentcreation/5-using-your-environment.md", "tutorials/environmentcreation/index.md", "tutorials/index.md", "tutorials/langchain/index.md", "tutorials/langchain/langchain.md", "tutorials/rllib/holdem.md", "tutorials/rllib/index.md", "tutorials/rllib/pistonball.md", "tutorials/sb3/connect_four.md", "tutorials/sb3/index.md", "tutorials/sb3/kaz.md", "tutorials/sb3/waterworld.md", "tutorials/tianshou/advanced.md", "tutorials/tianshou/beginner.md", "tutorials/tianshou/index.md", "tutorials/tianshou/intermediate.md"], "titles": ["404 - Page Not Found", "PettingZoo docs", "AEC API", "Parallel API", "Utils", "Wrappers", "PettingZoo Wrappers", "Shimmy Compatibility Wrappers", "Supersuit Wrappers", "Basic Usage", "Environment Creation", "Testing Environments", "Tutorials", "Atari", "Basketball Pong", "Boxing", "Combat: Plane", "Combat: Tank", "Double Dunk", "Emtombed: Competitive", "Emtombed: Cooperative", "Flag Capture", "Foozpong", "Ice Hockey", "Joust", "Mario Bros", "Maze Craze", "Othello", "Pong", "Quadrapong", "Space Invaders", "Space War", "Surround", "Tennis", "Video Checkers", "Volleyball Pong", "Warlords", "Wizard of Wor", "Butterfly", "Cooperative Pong", "Knights Archers Zombies (\u2018KAZ\u2019)", "Pistonball", "Classic", "Chess", "Connect Four", "Gin Rummy", "Go", "Hanabi", "Leduc Hold\u2019em", "Rock Paper Scissors", "Texas Hold\u2019em", "Texas Hold\u2019em No Limit", "Tic Tac Toe", "&lt;no title&gt;", "MPE", "Simple", "Simple Adversary", "Simple Crypto", "Simple Push", "Simple Reference", "Simple Speaker Listener", "Simple Spread", "Simple Tag", "Simple World Comm", "SISL", "Multiwalker", "Pursuit", "Waterworld", "Third-Party Environments", "&lt;no title&gt;", "Release Notes", "CleanRL: Advanced PPO", "CleanRL: Implementing PPO", "CleanRL Tutorial", "Tutorial: Repository Structure", "Tutorial: Environment Logic", "Tutorial: Action Masking", "Tutorial: Testing Your Environment", "&lt;no title&gt;", "Environment Creation Tutorial", "&lt;no title&gt;", "LangChain Tutorial", "LangChain: Creating LLM agents", "RLlib: DQN for Simple Poker", "Ray RLlib Tutorial", "RLlib: PPO for Pistonball", "SB3: Action Masked PPO for Connect Four", "Stable-Baselines3 Tutorial", "SB3: PPO for Knights-Archers-Zombies", "SB3: PPO for Waterworld", "Tianshou: CLI and Logging", "Tianshou: Basic API Usage", "Tianshou Tutorial", "Tianshou: Training Agents"], "terms": {"thi": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "folder": [1, 4, 70, 71], "contain": [1, 9, 10, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 65, 70, 82], "For": [1, 2, 7, 8, 10, 11, 13, 40, 41, 43, 45, 46, 47, 52, 57, 65, 67, 75, 76, 82, 86, 87, 88, 89], "more": [1, 2, 7, 9, 10, 25, 27, 30, 31, 37, 39, 45, 46, 47, 49, 54, 61, 68, 70, 73, 74, 76, 83, 84, 85, 86, 87, 88, 89], "inform": [1, 2, 7, 8, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 73, 82, 83, 84, 85, 86, 87, 88, 89], "about": [1, 6, 41, 47, 54, 65, 67, 70, 74, 75, 81, 86], "how": [1, 2, 11, 19, 20, 24, 25, 26, 30, 36, 37, 39, 40, 41, 46, 54, 55, 56, 61, 65, 70, 71, 72, 73, 76, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 93], "contribut": [1, 41, 68, 70], "go": [1, 42, 45, 70, 81], "our": [1, 6, 9, 10, 12, 13, 43, 45, 46, 48, 51, 72, 77], "md": [1, 70, 74], "can": [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 57, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 75, 76, 77, 81, 82, 86, 87, 88, 89, 90, 93], "found": [1, 11, 43, 44, 45, 46, 47, 48, 50, 51, 52, 71, 86, 88, 89], "top": [1, 30, 40, 45, 46, 65, 66, 75], "file": [1, 4, 12, 70, 72, 73, 77, 90], "python": [1, 9, 43, 69, 70, 84, 90, 91, 92, 93], "where": [1, 2, 3, 4, 6, 7, 8, 9, 10, 13, 15, 16, 17, 20, 21, 25, 30, 31, 32, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 59, 63, 64, 66, 67, 70, 74, 75, 82, 87], "i": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93], "declar": [1, 10, 45], "exampl": [1, 2, 6, 11, 13, 43, 45, 46, 47, 49, 67, 70, 71, 75, 76, 81, 82, 83, 86, 87, 88, 90, 91, 93], "chess": [1, 42, 68, 70, 76, 84, 86], "classic": [1, 2, 3, 6, 17, 27, 28, 30, 34, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 68, 70, 82, 83, 85, 90, 91, 93], "py": [1, 10, 38, 47, 70, 71, 74, 75, 76, 77], "To": [1, 2, 6, 7, 8, 9, 10, 11, 13, 22, 27, 32, 34, 38, 39, 42, 54, 64, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "gener": [1, 2, 3, 7, 8, 9, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 69, 70, 71, 76, 82], "you": [1, 2, 3, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 37, 38, 40, 42, 43, 45, 46, 48, 51, 54, 64, 68, 71, 72, 74, 79, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 93], "need": [1, 9, 10, 11, 19, 20, 21, 27, 31, 67, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "execut": [1, 2, 3, 8, 9, 12, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 65, 66, 67, 71, 72, 75, 76, 87, 91], "_script": 1, "gen_envs_md": 1, "script": [1, 71, 86, 88, 89, 92], "cd": 1, "instal": [1, 7, 8, 10, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "requir": [1, 8, 9, 10, 11, 20, 25, 38, 49, 66, 70, 74, 86, 88, 90, 91, 93], "packag": [1, 7, 8, 10, 65, 70, 74, 90, 91, 93], "pip": [1, 7, 8, 9, 10, 13, 38, 42, 54, 64], "e": [1, 2, 3, 6, 8, 9, 30, 40, 41, 43, 54, 65, 75, 76, 77, 82], "r": [1, 8, 10, 71, 82], "txt": [1, 74], "onc": [1, 3, 18, 32, 42, 70], "make": [1, 2, 4, 6, 8, 9, 11, 20, 27, 30, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 54, 65, 66, 67, 70, 77, 81], "dirhtml": 1, "rebuild": 1, "automat": [1, 2, 9, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 65, 66, 67, 86], "everi": [1, 2, 3, 6, 9, 13, 41, 63, 66, 67, 89, 91], "time": [1, 2, 6, 8, 9, 10, 14, 16, 18, 22, 23, 24, 26, 27, 28, 29, 30, 33, 34, 35, 37, 38, 39, 40, 41, 42, 44, 48, 49, 51, 63, 65, 66, 67, 70, 71, 76, 82, 86, 88, 89], "chang": [1, 2, 3, 6, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 46, 47, 65, 66, 67, 70, 71, 83, 86], "made": [1, 44, 64, 65, 68, 70, 82], "sphinx": [1, 70], "autobuild": 1, "b": [1, 8, 43, 71, 85, 88], "_build": 1, "By": [2, 4, 47, 62, 63, 65, 66], "default": [2, 3, 4, 8, 9, 10, 13, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 86, 88, 90], "pettingzoo": [2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 71, 72, 74, 75, 76, 77, 79, 81, 83, 85, 87, 88, 89, 90, 91, 93], "model": [2, 9, 81, 82, 83, 85, 86, 87, 88, 89, 90, 93], "game": [2, 3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 70, 71, 75, 76, 82, 86, 87, 88, 89, 91, 92], "agent": [2, 3, 4, 6, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 84, 86, 87, 88, 89, 90, 91, 92], "environ": [2, 3, 4, 5, 6, 8, 12, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 73, 74, 76, 81, 87], "cycl": [2, 3, 6, 9, 10, 40, 54, 66, 70], "allow": [2, 3, 5, 7, 8, 9, 10, 11, 13, 22, 25, 47, 49, 52, 54, 70, 73, 86, 87, 88], "support": [2, 3, 8, 9, 10, 11, 13, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 69, 70, 71, 86, 87, 88], "ani": [2, 6, 8, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 74, 75, 77, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "type": [2, 3, 6, 8, 11, 40, 43, 47, 70, 71, 81, 83, 86, 90, 92, 93], "multi": [2, 9, 13, 54, 64, 68, 69, 82, 84, 87, 92], "rl": [2, 9, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70, 71, 73, 86, 87, 92], "consid": [2, 9, 41, 66], "provid": [2, 3, 5, 6, 10, 40, 41, 47, 73, 81, 82, 83, 85, 87, 88, 92, 93], "standard": [2, 8, 11, 13, 46, 47, 49, 51, 69, 70, 81, 92], "turn": [2, 3, 6, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 67, 69, 70, 83], "base": [2, 3, 6, 9, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41, 42, 44, 47, 52, 54, 55, 56, 57, 58, 61, 67, 68, 69, 70, 82], "mani": [2, 8, 11, 40, 42, 68, 70, 73, 76, 82], "which": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 15, 18, 27, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 57, 59, 65, 66, 67, 70, 72, 76, 81, 82, 83, 86, 88, 91], "implement": [2, 3, 4, 6, 7, 8, 9, 11, 13, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 65, 66, 67, 68, 70, 73, 74, 81, 82, 84, 87], "illeg": [2, 6, 9, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70, 82, 86], "we": [2, 3, 6, 9, 10, 11, 12, 13, 43, 64, 70, 72, 74, 75, 76, 77, 82, 86, 87, 88, 89], "tutori": [2, 3, 68, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "creat": [2, 3, 4, 6, 9, 10, 11, 26, 38, 47, 64, 67, 69, 70, 71, 72, 74, 75, 79, 81, 83, 85, 86, 87, 88, 89, 90, 91, 93], "simpl": [2, 3, 4, 5, 6, 8, 9, 10, 39, 41, 52, 54, 69, 73, 82, 84, 87, 91], "rock": [2, 3, 10, 70, 84, 91], "paper": [2, 3, 10, 43, 64, 66, 70, 71, 84, 91], "scissor": [2, 3, 10, 70, 84, 91], "show": [2, 12, 49, 52, 71, 72, 73, 83, 84, 85, 86, 87, 88, 89, 91, 93], "simultan": [2, 3, 49, 59, 69, 74], "also": [2, 8, 9, 10, 14, 15, 18, 22, 27, 28, 29, 30, 33, 34, 35, 40, 47, 54, 62, 65, 66, 67, 70, 82, 86], "repres": [2, 8, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 67, 69], "interact": [2, 3, 4, 7, 38, 54, 68, 69, 81, 82], "follow": [2, 3, 5, 6, 8, 9, 10, 13, 27, 43, 46, 47, 48, 49, 50, 51, 52, 54, 62, 68, 70, 71, 72, 74, 82, 83, 85, 86, 88, 89, 90, 91, 93], "from": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "import": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "rps_v2": [2, 6, 10, 49, 82, 91], "env": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 22, 26, 28, 30, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 77, 82, 83, 84, 85, 86, 88, 89, 90, 91, 93], "render_mod": [2, 3, 7, 10, 12, 13, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 72, 82, 85, 86, 88, 89, 90, 91], "human": [2, 3, 7, 9, 10, 11, 12, 13, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 64, 65, 66, 67, 68, 69, 72, 82, 86, 88, 89, 90, 91], "reset": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 18, 20, 22, 27, 28, 29, 33, 34, 35, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 52, 54, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 82, 83, 85, 86, 88, 89], "seed": [2, 3, 9, 10, 12, 13, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 52, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 86, 88, 89, 90, 93], "42": [2, 3, 13, 38, 42, 69], "agent_it": [2, 6, 7, 9, 13, 38, 42, 54, 64, 69, 82, 83, 85, 86, 88, 89], "observ": [2, 3, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 68, 69, 70, 72, 75, 76, 81, 82, 83, 85, 86, 87, 88, 89, 90, 93], "reward": [2, 3, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 69, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89, 90], "termin": [2, 3, 6, 7, 9, 10, 12, 13, 20, 38, 41, 42, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 69, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89], "truncat": [2, 3, 6, 7, 9, 10, 12, 13, 38, 42, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 69, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89], "info": [2, 3, 6, 7, 9, 10, 12, 13, 38, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 69, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89], "last": [2, 6, 7, 8, 9, 10, 13, 19, 20, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 52, 54, 64, 65, 66, 67, 69, 82, 83, 85, 86, 88, 89], "none": [2, 3, 4, 6, 7, 9, 10, 12, 13, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 71, 72, 74, 75, 76, 82, 83, 85, 86, 88, 89, 90, 93], "els": [2, 6, 7, 10, 12, 13, 38, 40, 47, 54, 64, 71, 72, 82, 83, 85, 86, 88, 89, 90, 93], "action_spac": [2, 3, 6, 7, 9, 10, 12, 13, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 74, 75, 76, 82, 83, 86, 88, 90, 93], "sampl": [2, 3, 6, 7, 9, 12, 13, 38, 42, 54, 64, 71, 72, 82, 86, 88], "would": [2, 3, 6, 7, 8, 9, 13, 38, 39, 42, 43, 45, 46, 47, 54, 64, 70, 74], "insert": [2, 3, 6, 7, 9, 13, 38, 42, 54, 64], "your": [2, 3, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 42, 54, 64, 69, 71, 74, 79, 82, 83, 85], "polici": [2, 3, 4, 6, 7, 8, 9, 12, 13, 38, 41, 42, 54, 64, 65, 69, 71, 72, 83, 85, 86, 87, 88, 89, 90, 91, 93], "step": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 25, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "close": [2, 3, 6, 7, 9, 10, 13, 15, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 52, 54, 55, 56, 58, 64, 65, 66, 67, 70, 71, 82, 85, 86, 88, 89], "often": [2, 10, 13, 54], "includ": [2, 5, 6, 9, 10, 39, 45, 53, 54, 64, 65, 68, 69, 70, 71, 73, 74, 81], "order": [2, 6, 21, 27, 40, 45, 47, 49, 70, 81, 86], "mark": [2, 7, 52], "valid": [2, 5, 6, 10, 27, 82], "invalid": [2, 76, 86], "us": [2, 4, 6, 7, 8, 9, 11, 12, 13, 19, 20, 21, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 77, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "chess_v6": [2, 43], "option": [2, 3, 10, 11, 13, 30, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 54, 65, 66, 67, 70, 75, 76, 86, 90, 93], "depend": [2, 9, 13, 18, 30, 37, 38, 42, 46, 54, 64, 65, 67, 70, 71, 72, 74, 82, 83, 85, 86, 88, 89, 90, 91, 93], "action_mask": [2, 7, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70, 76, 82, 83, 86], "elif": [2, 75, 76], "isinst": [2, 11, 71, 90, 93], "dict": [2, 3, 9, 10, 39, 40, 41, 43, 44, 46, 47, 49, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 88, 90, 93], "note": [2, 6, 8, 9, 10, 17, 19, 20, 25, 26, 27, 34, 37, 40, 42, 43, 45, 54, 63, 64, 66, 68, 71, 77, 83, 85, 86, 87, 88, 89], "either": [2, 14, 21, 22, 25, 28, 35, 39, 40, 41, 44, 46, 47, 49, 52, 70], "store": [2, 9, 10, 47, 74], "shimmi": [2, 5, 68, 70], "": [2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 19, 20, 21, 22, 23, 25, 27, 29, 30, 34, 35, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 70, 71, 72, 73, 82, 83, 84, 85, 86, 88, 89, 90], "openspiel": [2, 70], "custom": [2, 3, 69, 70, 74, 75, 76, 77, 79, 86], "see": [2, 7, 8, 10, 13, 19, 20, 38, 54, 55, 56, 63, 70, 71, 73, 81, 83, 84, 85, 86, 87, 88, 89], "creation": [2, 9, 70], "A": [2, 3, 5, 9, 10, 14, 21, 25, 26, 32, 33, 34, 40, 41, 42, 43, 44, 45, 47, 50, 51, 66, 68, 73, 74, 88], "closer": [2, 59], "look": [2, 9, 10, 11, 13, 26, 40, 70], "gradient": [2, 71], "algorithm": [2, 8, 12, 68, 71, 72, 73, 83, 84, 85, 87, 92], "huang": [2, 42], "2022": [2, 7, 70, 87], "class": [2, 3, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 74, 75, 76, 82, 83, 85, 86], "util": [2, 3, 5, 9, 11, 69, 70, 71, 74, 75, 76, 81, 83, 86, 87, 90, 93], "sourc": [2, 3, 6, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 81, 84], "The": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 20, 25, 26, 27, 30, 31, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 58, 59, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 82, 83, 85, 86, 88, 89, 90, 91, 93], "one": [2, 6, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 60, 68, 70], "If": [2, 3, 4, 6, 7, 8, 9, 10, 11, 13, 14, 17, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 52, 54, 64, 65, 67, 68, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "ar": [2, 3, 4, 6, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 74, 79, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "unsur": [2, 3], "have": [2, 3, 4, 6, 8, 9, 11, 12, 13, 15, 17, 18, 26, 27, 37, 42, 43, 44, 47, 49, 54, 56, 57, 65, 66, 67, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89, 90, 91, 93], "correctli": [2, 3], "try": [2, 3, 14, 29, 30, 32, 35, 57, 67, 71, 75, 82, 86, 88, 89], "run": [2, 3, 11, 12, 17, 25, 32, 62, 67, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "api_test": [2, 11], "document": [2, 3, 4, 7, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 45, 48, 51, 54, 56, 65, 70, 71, 73, 74, 82, 83, 84, 85, 86, 88, 89, 92], "develop": [2, 3, 4, 13, 68, 70, 81], "websit": [2, 3, 70], "list": [2, 3, 9, 10, 12, 43, 44, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 70, 72, 86, 90, 93], "str": [2, 3, 10, 11, 43, 44, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 71, 86, 88, 89, 90], "name": [2, 3, 4, 8, 9, 10, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67, 70, 71, 74, 75, 76, 82, 83, 85, 86, 88, 89], "all": [2, 3, 4, 6, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 59, 61, 63, 64, 65, 66, 67, 70, 74, 81, 82, 86, 87], "current": [2, 3, 4, 6, 8, 9, 10, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 68, 70, 82, 83, 85, 86], "typic": [2, 3, 9, 10, 54, 67], "integ": [2, 3, 9, 43, 44, 47, 49, 50, 86], "These": [2, 3, 9, 10, 42, 43, 47, 54, 67, 68, 79, 81, 84, 87, 92], "mai": [2, 3, 9, 10, 43, 46, 67, 68, 86], "an": [2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 81, 82, 84, 86, 87, 88, 89, 92], "progress": [2, 3, 9], "ad": [2, 3, 8, 9, 11, 39, 41, 49, 65, 66, 67, 70, 77], "remov": [2, 3, 8, 9, 10, 13, 40, 45, 47, 65, 66, 70, 86, 88], "agentid": [2, 3, 55, 56, 57, 58, 59, 60, 61, 62, 63], "num_ag": [2, 3, 9, 12, 72], "length": [2, 3, 8, 9, 12, 46, 47, 65, 67, 72, 90], "possible_ag": [2, 3, 9, 10, 12, 55, 56, 57, 58, 59, 60, 61, 62, 63, 70, 72, 75, 76, 82, 83, 85, 86, 88, 89], "could": [2, 3, 9, 47, 51], "equival": [2, 3, 6, 8, 9], "space": [2, 3, 6, 8, 9, 10, 13, 38, 39, 40, 41, 42, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 68, 70, 71, 75, 76, 83, 86, 88, 89, 90, 93], "cannot": [2, 3, 9, 14, 27, 40, 44, 47, 54, 57, 60, 68, 70], "through": [2, 3, 6, 9, 10, 11, 17, 19, 20, 21, 26, 70, 74, 81], "plai": [2, 3, 9, 10, 18, 25, 32, 39, 43, 47, 49, 52, 68, 75, 84, 86, 88, 89, 90, 91, 93], "max_num_ag": [2, 3, 9], "agent_select": [2, 6, 9, 10, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 86], "correspond": [2, 8, 9, 10, 26, 40, 43, 44, 48, 52, 67], "select": [2, 4, 9, 10, 12, 18, 34, 41, 66, 70, 72, 81, 82], "taken": [2, 9, 31, 47, 49, 76], "bool": [2, 3, 45, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 71], "float": [2, 3, 6, 12, 45, 46, 55, 56, 57, 58, 59, 60, 61, 62, 63, 67, 71, 72, 90], "call": [2, 4, 6, 8, 9, 10, 11, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 81, 86], "kei": [2, 3, 9, 15, 16, 17, 31, 39, 40, 41, 57, 66, 70, 71, 73, 86, 88, 89], "instantan": [2, 9], "after": [2, 6, 9, 10, 11, 16, 17, 18, 20, 25, 27, 34, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 66, 67, 70, 82, 83, 85, 86, 88, 89], "doe": [2, 6, 8, 9, 11, 14, 18, 22, 27, 28, 29, 33, 34, 35, 47, 82, 86, 87, 88], "directli": [2, 9, 23, 33, 41, 47, 64, 87], "access": [2, 6, 9, 12, 54, 72, 86, 87, 93], "rather": [2, 8, 9, 88, 89], "return": [2, 3, 6, 8, 9, 10, 11, 12, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 62, 65, 66, 67, 70, 71, 72, 74, 75, 76, 82, 83, 85, 86, 88, 89, 90, 93], "intern": [2, 9, 10, 11, 64], "variabl": [2, 10, 40, 70], "structur": [2, 9, 40, 65, 70, 75, 79, 82], "like": [2, 3, 8, 9, 10, 11, 12, 13, 17, 22, 39, 40, 43, 47, 54, 68, 72, 82, 83, 85, 86], "0": [2, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "first": [2, 4, 6, 8, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 74, 81, 87], "1": [2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 71, 72, 75, 76, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "second": [2, 9, 11, 14, 18, 22, 27, 28, 29, 33, 34, 35, 43, 46, 52, 54, 66, 86, 87, 88, 91], "n": [2, 8, 9, 12, 40, 43, 46, 49, 56, 61, 71, 72, 75, 76, 83, 86, 90, 93], "nth": [2, 9, 46], "each": [2, 3, 9, 10, 12, 13, 15, 18, 20, 22, 25, 26, 27, 29, 30, 35, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 74, 86, 88, 89, 91], "observation_spac": [2, 3, 9, 10, 12, 39, 40, 41, 43, 44, 46, 47, 49, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 74, 75, 76, 83, 86, 90, 93], "gymnasium": [2, 5, 7, 8, 9, 10, 55, 56, 57, 58, 59, 60, 61, 62, 63, 68, 69, 70, 71, 75, 76, 83, 86, 90, 93], "actiontyp": [2, 3], "accept": [2, 8, 9, 10, 16, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 86], "switch": [2, 9, 23, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 70], "control": [2, 8, 9, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 74, 87], "next": [2, 6, 9, 10, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 87], "int": [2, 3, 10, 43, 44, 46, 47, 48, 49, 50, 51, 70, 71, 82, 83, 85, 86, 88, 89, 90], "start": [2, 8, 10, 21, 30, 39, 40, 41, 43, 44, 46, 47, 49, 52, 54, 65, 66, 67, 70, 71, 72, 75, 79, 86, 88, 89], "state": [2, 3, 9, 10, 39, 40, 41, 43, 44, 46, 47, 49, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 81, 83, 85, 87], "obstyp": [2, 3], "function": [2, 4, 5, 6, 9, 10, 11, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 62, 63, 65, 66, 67, 68, 70, 71, 74, 82, 83, 86, 90, 93], "render": [2, 3, 6, 7, 9, 10, 12, 13, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 72, 74, 75, 76, 83, 85, 86, 88, 89, 90, 91], "np": [2, 3, 8, 10, 12, 62, 71, 72, 75, 76, 83, 90, 93], "ndarrai": [2, 3, 47], "specifi": [2, 4, 8, 9, 10, 13, 38, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 65, 66, 67, 71], "self": [2, 10, 12, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 71, 72, 74, 75, 76, 82, 83, 84, 85, 86], "mode": [2, 3, 6, 8, 9, 10, 11, 16, 17, 26, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 70, 71, 85], "displai": [2, 3, 10, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 65, 66, 67], "window": [2, 3, 9, 10, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 66, 67, 70], "other": [2, 5, 6, 8, 9, 10, 11, 13, 20, 22, 25, 26, 29, 30, 32, 35, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 59, 60, 61, 63, 65, 66, 67, 70, 81], "rgb_arrai": [2, 3, 9, 11, 12, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 70, 72, 85], "numpi": [2, 3, 8, 9, 10, 12, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 71, 72, 75, 76, 83, 90, 93], "arrai": [2, 3, 8, 9, 10, 12, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 72, 82], "outsid": [2, 3, 6, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 70, 74], "ansi": [2, 3, 6, 9, 10, 11, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67], "string": [2, 3, 6, 8, 9, 10, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67], "print": [2, 3, 6, 9, 10, 11, 12, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89, 90, 93], "specif": [2, 3, 8, 14, 16, 17, 22, 26, 28, 29, 30, 35, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 61, 65, 66, 67, 68, 70, 71, 81], "resourc": [2, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67, 83, 85], "should": [2, 6, 9, 10, 11, 18, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67, 71, 72, 74, 82, 83, 85, 86, 88, 89, 90, 91, 93], "releas": [2, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 64, 65, 66, 67, 68], "subprocess": [2, 10, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67], "network": [2, 8, 10, 12, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67, 71, 72, 83, 90, 93], "connect": [2, 10, 39, 40, 41, 42, 43, 46, 47, 49, 52, 65, 66, 67, 70, 84, 87], "In": [3, 6, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 43, 51, 54, 55, 56, 57, 63, 65, 68, 70, 76, 86], "addit": [3, 8, 11, 19, 20, 35, 37, 41, 45, 48, 49, 51, 65, 70], "main": [3, 25, 26, 43, 44, 45, 46, 47, 48, 50, 51, 52, 56, 81, 82], "secondari": 3, "action": [3, 6, 7, 8, 9, 10, 12, 13, 38, 39, 40, 41, 42, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 69, 70, 71, 72, 74, 75, 79, 81, 83, 85, 87, 90], "via": [3, 8, 9, 10, 13, 38, 42, 54, 64, 65, 68, 70, 74, 93], "parallel_env": [3, 6, 10, 11, 12, 38, 71, 72, 85, 88, 89], "around": [3, 6, 10, 15, 16, 17, 19, 20, 23, 24, 31, 40, 54, 65, 66, 89], "paradigm": [3, 70], "partial": [3, 11, 68, 70], "stochast": 3, "posg": 3, "detail": [3, 45, 48, 51, 54, 87], "similar": [3, 5, 6, 9, 10, 31, 43, 46, 51, 60, 63, 69], "rllib": [3, 70], "multiag": [3, 55, 64, 68], "except": [3, 6, 8, 9, 13, 43, 44, 45, 46, 47, 48, 50, 51, 52, 60, 63, 82, 86, 88, 89], "differ": [3, 6, 11, 24, 27, 42, 45, 47, 49, 54, 58, 59, 66, 81, 82, 88], "between": [3, 5, 6, 8, 10, 22, 23, 27, 30, 37, 40, 41, 54, 70, 81, 82, 92], "convert": [3, 5, 6, 8, 10, 12, 70, 72, 86, 91], "aec": [3, 5, 8, 9, 10, 69, 70, 83, 84, 86, 87, 88, 89], "split": [3, 56], "sequenti": [3, 12, 46, 69, 70, 71, 72, 85], "onli": [3, 6, 10, 11, 13, 17, 18, 19, 20, 26, 30, 40, 42, 45, 47, 49, 54, 59, 65, 70, 71, 86, 87, 92], "previou": [3, 10, 13, 43, 47, 65, 70, 76], "two": [3, 8, 11, 13, 15, 18, 22, 23, 25, 28, 29, 30, 39, 40, 41, 43, 45, 46, 49, 51, 65, 67, 70, 75, 86, 87, 91], "gridworld": 3, "butterfli": [3, 4, 6, 9, 10, 11, 12, 39, 40, 41, 69, 70, 71, 72, 83, 85, 88], "pistonball_v6": [3, 4, 6, 9, 10, 11, 12, 38, 41, 72, 83, 85], "while": [3, 6, 7, 8, 9, 12, 23, 38, 41, 46, 49, 52, 60, 61, 67, 69, 72, 86], "It": [3, 9, 10, 39, 43, 71, 72, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93], "live": [3, 6, 10, 25, 30, 37, 70], "parallel_api_test": [3, 11, 77], "gym": [3, 13, 70, 71], "tupl": [3, 8, 90, 93], "receiv": [3, 9, 14, 20, 22, 26, 27, 28, 29, 30, 33, 35, 36, 39, 45, 47, 48, 49, 51, 52, 54, 62, 65, 66, 67, 82], "dictionari": [3, 9, 10, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70], "obsdict": 3, "And": [3, 10], "frame": [3, 5, 8, 9, 13, 18, 34, 43, 46, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 70, 87, 88, 91], "altern": [3, 22], "global": [3, 9, 39, 40, 41, 59, 61, 67], "view": [3, 39, 47, 71, 73, 83, 85, 92], "appropri": [3, 15, 39], "central": [3, 39], "train": [3, 4, 12, 39, 43, 68, 71, 72, 73, 87, 90, 91, 92], "decentr": [3, 39], "method": [3, 8, 9, 10, 11, 39, 43, 54, 70, 86], "qmix": [3, 39], "take": [3, 4, 5, 6, 8, 9, 10, 11, 13, 18, 27, 34, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 81, 82, 86, 87, 88, 89], "must": [3, 9, 10, 14, 22, 23, 27, 28, 29, 32, 34, 35, 39, 40, 41, 43, 44, 46, 47, 49, 52, 54, 57, 58, 60, 61, 65, 66, 67, 86], "same": [3, 8, 11, 18, 29, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 65, 66, 67, 77, 82, 86, 89], "valu": [3, 4, 6, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 86, 88, 89], "ha": [4, 8, 9, 11, 12, 13, 18, 22, 25, 26, 30, 40, 43, 44, 46, 47, 52, 58, 59, 61, 66, 67, 70, 71, 72, 77, 86, 88, 89], "some": [4, 5, 9, 10, 11, 13, 14, 16, 17, 22, 26, 28, 30, 35, 54, 70, 76, 81, 82], "help": [4, 9, 10, 20, 25, 43, 63, 69, 71, 72, 81, 82, 83, 85, 86, 88, 89, 90, 91, 93], "trivial": 4, "design": [4, 10, 12, 20, 68, 71, 72, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "easier": [4, 47, 70, 71], "present": 4, "sum": [4, 14, 18, 22, 24, 25, 27, 28, 29, 33, 34, 35, 47, 54, 61, 82, 86, 88, 89], "over": [4, 8, 10, 13, 25, 27, 34, 37, 39, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 57, 67, 70, 81, 83, 88], "episod": [4, 12, 40, 57, 70, 71, 72, 91], "establish": 4, "simplest": 4, "possibl": [4, 8, 9, 10, 11, 13, 20, 26, 30, 32, 39, 40, 43, 47, 49, 52, 54], "baselin": [4, 8, 13], "random": [4, 8, 11, 12, 13, 38, 39, 41, 42, 47, 54, 64, 67, 70, 71, 72, 75, 76, 82, 86, 88, 89, 90, 91, 92, 93], "average_total_reward": 4, "max_episod": 4, "100": [4, 10, 15, 26, 30, 39, 41, 45, 47, 51, 65, 75, 76, 82, 86, 88, 89], "max_step": 4, "10000000000": 4, "both": [4, 5, 6, 8, 10, 20, 21, 23, 25, 27, 32, 37, 41, 43, 44, 45, 46, 47, 49, 51, 52, 54, 57, 59, 67, 75, 87], "limit": [4, 42, 48, 67, 70, 86], "number": [4, 6, 8, 10, 11, 12, 13, 14, 15, 22, 26, 27, 28, 30, 32, 33, 35, 40, 41, 43, 46, 47, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 88, 92], "evalu": [4, 8, 11, 13], "when": [4, 6, 8, 9, 10, 11, 12, 13, 16, 17, 20, 23, 24, 26, 27, 30, 31, 33, 36, 37, 39, 40, 44, 47, 54, 63, 65, 66, 67, 70, 72, 86], "hit": [4, 16, 17, 24, 25, 27, 30, 31, 36, 37, 40, 62], "stop": [4, 82, 83, 85], "imag": [4, 8, 13, 41, 43, 70, 85], "along": [4, 8, 39, 43, 74], "chosen": [4, 8, 65, 82], "all_ag": 4, "pass": [4, 8, 10, 11, 22, 23, 26, 40, 46, 70, 74], "true": [4, 8, 9, 11, 13, 16, 17, 30, 39, 40, 41, 45, 47, 54, 65, 66, 67, 70, 71, 75, 76, 82, 85, 86, 88, 89, 90, 93], "work": [4, 8, 20, 25, 30, 45, 47, 48, 51, 54, 65, 67, 70, 77, 81, 86, 87, 88], "directori": [4, 10, 70, 77], "match": [4, 33, 68, 70, 83, 90], "save_dir": 4, "new": [4, 5, 9, 10, 11, 25, 26, 27, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 68, 70, 79, 81, 86], "dure": [4, 9, 11, 70, 90], "desir": 4, "why": 4, "befor": [4, 6, 8, 9, 10, 26, 27, 40, 42, 43, 70, 74], "save_observ": 4, "fals": [4, 8, 9, 10, 11, 12, 13, 17, 30, 39, 40, 41, 45, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 90, 93], "o": [4, 40, 52, 71, 82, 83, 85, 86, 88, 89, 90, 93], "getcwd": 4, "transform": [5, 8, 10, 46], "input": [5, 6, 10, 38, 54, 67], "output": [5, 6, 8, 10, 11, 54, 70, 83], "appli": [5, 6, 8, 10, 41, 59, 61, 65, 66, 67], "convers": [5, 68, 70], "parallel": [5, 8, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 69, 70, 71, 72, 74, 84, 85, 89], "api": [5, 6, 10, 12, 68, 69, 70, 72, 86, 89, 92], "set": [5, 6, 8, 9, 10, 11, 13, 17, 26, 38, 40, 41, 42, 43, 44, 45, 47, 48, 50, 51, 54, 63, 64, 65, 67, 70, 82, 88, 89], "conveni": [5, 6, 10, 43], "reusabl": [5, 6, 10], "logic": [5, 6, 10, 12, 47, 70, 71, 72, 74, 79], "supersuit": [5, 10, 12, 13, 70, 71, 72, 83, 85, 87, 88, 89], "commonli": [5, 7, 9], "pre": [5, 8, 70, 87, 88, 90], "process": [5, 8, 13, 25, 54, 70, 74, 87, 88], "stack": [5, 8, 12, 43, 46, 72, 87, 88], "color": [5, 8, 27, 43, 47, 54, 56, 59, 87, 88], "reduct": [5, 87, 88], "compat": [5, 13, 70, 86], "extern": [5, 7, 70, 81], "reinforc": [5, 7, 8, 13, 42, 64, 68, 69, 73, 84, 87, 92], "learn": [5, 7, 8, 12, 13, 27, 38, 39, 41, 42, 47, 56, 57, 58, 61, 64, 68, 69, 71, 72, 73, 84, 86, 87, 88, 89, 90, 92], "enforc": [6, 70], "clip": [6, 8, 12, 71, 72], "out": [6, 11, 12, 15, 32, 39, 54, 68, 71, 72, 74, 85, 86], "bound": [6, 8, 39, 62, 63], "aec_to_parallel": [6, 70], "aec_env": 6, "aecenv": [6, 10], "parallelenv": [6, 10, 74, 75, 76], "case": [6, 9, 65, 74], "exist": [6, 8, 11, 40, 44, 47, 70], "wrap": [6, 9, 10, 45, 48, 51, 86, 91], "parallel_to_aec_wrapp": 6, "origin": [6, 8, 13, 43, 54, 64, 70], "otherwis": [6, 21, 39, 43, 47, 86], "aec_to_parallel_wrapp": 6, "shown": [6, 51, 66, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "below": [6, 10, 11, 13, 24, 25, 41, 43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "assumpt": [6, 86], "underli": [6, 9], "updat": [6, 8, 10, 70, 71, 90], "end": [6, 12, 13, 15, 19, 20, 30, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 67, 70, 71, 72, 81], "most": [6, 8, 9, 10, 13, 27, 42, 49, 54], "alloc": [6, 10, 65, 67], "scheme": [6, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 70], "within": [6, 8, 9, 13, 14, 22, 28, 29, 33, 35, 39, 45, 47, 67, 74], "timestep": [6, 39, 65, 71, 73, 75, 76, 83], "particular": [6, 9, 10, 20, 26, 40, 63, 70], "parallel_to_aec": [6, 10, 70], "par_env": 6, "aec_to_prallel_wrapp": 6, "effici": 6, "want": [6, 8, 9, 12, 59, 72], "easi": [6, 10, 88], "combin": [6, 18, 41, 43, 45, 54], "manner": [6, 54], "capturestdoutwrapp": [6, 10], "terminateillegalwrapp": 6, "illegal_reward": 6, "initi": [6, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 86], "so": [6, 8, 9, 10, 11, 13, 27, 29, 30, 32, 35, 41, 43, 47, 52, 54, 62, 68, 70, 71, 82, 86, 88, 89], "code": [6, 9, 12, 40, 54, 64, 65, 70, 87, 92], "behavior": [6, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, 70], "basewrapp": [6, 86], "paramet": [6, 8, 9, 12, 41, 47, 59, 61, 67, 70, 71, 72, 84, 87, 90, 93], "inherit": [6, 10], "player": [6, 10, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 68, 70, 71, 75, 82, 87, 90], "lose": [6, 19, 20, 24, 25, 26, 37, 42, 45, 47, 82], "move": [6, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 60, 66, 67, 70, 75, 76, 82, 86], "give": [6, 10, 11, 14, 21, 22, 28, 29, 35, 41, 42, 47, 59, 63, 65, 70, 82, 87], "captur": [6, 13, 26, 27, 34, 43, 46, 67, 71], "instead": [6, 8, 10, 13, 17, 40, 43, 47, 70], "assertoutofboundswrapp": [6, 10], "assert": [6, 11, 71], "given": [6, 8, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 42, 46, 49, 82], "discret": [6, 8, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 71, 75, 76, 83, 86, 87, 89], "clipoutofboundswrapp": [6, 10], "fit": 6, "continu": [6, 8, 12, 32, 41, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 70, 72, 74, 85], "emit": 6, "warn": [6, 8, 10, 11, 70, 82], "orderenforcingwrapp": [6, 10], "check": [6, 11, 21, 43, 48, 50, 51, 57, 71, 75, 76], "attribut": [6, 9, 10, 70], "disallow": 6, "error": [6, 9, 10, 11, 47, 70, 83, 85], "get": [6, 9, 10, 12, 14, 17, 19, 20, 21, 22, 23, 27, 28, 29, 33, 35, 37, 38, 41, 43, 45, 51, 55, 59, 70, 72, 75, 76, 83, 85, 86, 88, 89, 90], "iter": [6, 9, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52], "without": [6, 9, 10, 20, 65, 70, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "70": [7, 45], "variou": [7, 13, 65, 66, 67, 70], "board": [7, 22, 27, 43, 46, 52, 68, 70, 76], "robot": [7, 65], "team": [7, 18, 22, 29, 35, 54, 57, 71], "compet": 7, "suit": [7, 45, 48, 77], "test": [7, 10, 70, 71, 72, 74, 79, 88, 90, 92], "scenario": [7, 38, 68], "assess": 7, "novel": 7, "social": 7, "situat": 7, "familiar": [7, 18], "unfamiliar": 7, "individu": [7, 12, 65, 72], "cooper": [7, 8, 13, 30, 37, 38, 41, 47, 54, 64, 67, 70, 89], "competit": [7, 13, 14, 23, 26, 28, 30, 31, 32, 33, 37, 42, 54, 67], "decept": 7, "reciproc": 7, "trust": 7, "stubborn": 7, "50": [7, 43, 47, 59, 62, 63, 83, 85, 90, 93], "substrat": 7, "250": [7, 26, 31], "load": [7, 70, 83, 85, 86, 88, 89, 90, 91], "dmcontrolmultiagentcompatibilityv0": 7, "dm_control": 7, "locomot": 7, "dm_soccer": 7, "team_siz": 7, "2": [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 57, 58, 59, 60, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "backgammon": [7, 70], "openspielcompatibilityv0": 7, "pyspiel": 7, "load_gam": 7, "prison": [7, 68, 70, 75, 76], "dilemma": 7, "matrix": [7, 40], "meltingpotcompatibilityv0": 7, "substrate_nam": 7, "prisoners_dilemma_in_the_matrix__arena": 7, "dm_control_multiagent_compat": 7, "openspiel_compat": 7, "meltingpot_compat": 7, "research": [7, 8, 13, 42, 45, 48, 51, 54, 73], "pleas": [7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 74, 82, 83, 85, 86, 88, 89, 90, 91, 93], "cite": [7, 8, 42, 45, 48, 51, 54, 64], "softwar": [7, 92], "shimmy2022github": 7, "author": [7, 8, 12, 13, 42, 54, 64, 71, 72, 83, 85, 86, 88, 89, 90, 91, 93], "jun": [7, 13], "jet": [7, 12, 16, 72], "tai": 7, "tower": 7, "elliot": [7, 71, 86, 88, 89], "jordan": 7, "terri": [7, 8, 13], "titl": [7, 8, 13, 42, 54, 64], "url": 7, "http": [7, 10, 12, 64, 70, 71, 72, 83, 85, 86, 87, 88, 89, 90, 91, 93], "github": [7, 12, 64, 70, 71, 72, 74, 83, 85, 86, 87, 88, 89, 90, 91, 93], "com": [7, 12, 64, 70, 71, 72, 83, 85, 86, 88, 89, 90, 91, 93], "farama": [7, 10, 13, 38, 68, 70, 86], "foundat": [7, 13, 68, 70], "version": [7, 10, 11, 13, 67, 70, 74, 83, 90, 91, 93], "year": [7, 8, 13, 42, 54, 64], "companion": [8, 10], "collect": [8, 10, 12, 25, 66, 72, 81, 82, 90, 91, 93], "invad": [8, 13], "greyscal": 8, "4": [8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 57, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 88, 90, 91, 93], "atari": [8, 9, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 70, 71, 72, 73], "space_invaders_v2": [8, 13, 30], "color_reduction_v0": [8, 12, 71, 72, 85], "frame_stack_v1": [8, 12, 13, 71, 72, 85, 88], "full": [8, 10, 13, 44, 47, 51, 66, 67, 70, 71, 88, 90, 92], "clip_reward_v0": [8, 71], "lower_bound": [8, 71], "upper_bound": [8, 71], "popular": [8, 42, 68, 84], "wai": [8, 13, 25, 62, 76, 81], "handl": [8, 9, 10, 13, 19, 20, 23, 24, 25, 26, 36, 37, 40, 65, 70, 74, 76, 88], "signific": [8, 47, 70], "varianc": [8, 12, 72], "magnitud": [8, 67], "especi": [8, 70], "clip_actions_v0": 8, "box": [8, 13, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 83], "high": [8, 9, 30, 38, 61, 73, 83, 92], "low": [8, 54, 83], "keep": [8, 16, 25, 39, 54, 65, 67, 74, 77, 86], "simplifi": [8, 68], "graphic": [8, 10, 11], "x": [8, 12, 40, 41, 43, 46, 52, 59, 62, 63, 65, 66, 67, 71, 72, 82], "y": [8, 13, 40, 43, 46, 65, 67, 86, 88, 89], "3": [8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50, 51, 52, 56, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 88, 89, 90, 91, 93], "shape": [8, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 83, 90, 93], "fulli": [8, 66, 70], "computation": 8, "intens": 8, "argument": [8, 9, 10, 11, 12, 13, 16, 38, 42, 54, 70, 71, 72, 82, 90], "g": [8, 13, 43, 54, 75, 76, 77], "just": [8, 9, 10, 11, 40], "channel": [8, 12, 43, 54, 57, 66, 70, 72], "much": [8, 30, 41, 70, 86], "faster": [8, 13, 16, 30, 62, 70], "suffici": 8, "dtype_v0": [8, 85], "dtype": [8, 82], "recast": 8, "certain": [8, 9, 54, 70, 76], "uint8": 8, "neural": [8, 43, 54], "float16": 8, "float32": [8, 82, 85], "anyth": 8, "flatten_v0": 8, "flatten": [8, 12, 43, 71, 72, 85], "1d": 8, "frame_skip_v0": [8, 13, 71], "num_fram": 8, "skip": [8, 13, 82], "reappli": 8, "old": [8, 10, 12, 26, 72], "ignor": 8, "accumul": [8, 10, 43], "frameskip": 8, "min_skip": 8, "max_skip": 8, "indic": [8, 11, 12, 44, 47, 52, 66, 67, 72, 82], "rang": [8, 10, 12, 41, 47, 67, 71, 72, 86, 88, 89, 90, 93], "randomli": [8, 26, 54, 57, 66, 75], "singl": [8, 9, 11, 20, 43, 47, 55, 65, 66, 70, 73, 81, 86, 87, 88, 89], "delay_observations_v0": 8, "delai": 8, "been": [8, 9, 13, 26, 30, 43, 44, 47, 52, 66, 67, 70, 71, 86, 88, 89], "zero": [8, 12, 14, 18, 22, 27, 28, 29, 33, 34, 35, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 57, 65, 71, 72, 75, 76, 84, 86], "frame_skip": [8, 13], "prefer": 8, "reaction": 8, "fp": [8, 70], "sticky_actions_v0": [8, 13], "repeat_action_prob": [8, 13], "assign": [8, 86], "probabl": [8, 9, 83, 85], "stick": 8, "request": 8, "prevent": [8, 14, 18, 22, 27, 28, 29, 33, 34, 35, 63, 76], "predefin": 8, "pattern": 8, "highli": [8, 9, 38, 41, 73], "determinist": [8, 11, 13, 71, 86, 88, 89], "sticki": [8, 27], "cumul": [8, 9], "chanc": 8, "row": [8, 27, 40, 43, 45, 46], "etc": [8, 10, 11, 70, 82], "recommend": [8, 10, 13, 71, 72, 77, 79, 82, 83, 85, 86, 88, 89, 90, 91, 93], "machado": [8, 13], "et": [8, 13], "al": [8, 13], "2018": [8, 13], "revisit": [8, 13], "arcad": [8, 13, 34, 68], "protocol": [8, 13], "open": [8, 10, 13, 17, 68, 73, 84], "problem": [8, 13, 65, 68, 69, 70], "recent": [8, 10, 68], "vector": [8, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 65, 67, 87, 88, 89, 91], "plain": 8, "concaten": 8, "longer": [8, 10, 14, 18, 20, 22, 27, 28, 29, 33, 34, 35, 40, 41, 70], "2d": [8, 46, 67], "3d": 8, "taller": 8, "At": [8, 10, 44, 45, 47, 48, 51, 61, 74], "don": [8, 33, 62, 86], "t": [8, 9, 10, 12, 30, 33, 47, 56, 62, 70, 71, 72, 86], "yet": [8, 49], "fill": [8, 32, 44], "analog": [8, 11], "max_observation_v0": [8, 13, 71], "memori": [8, 21, 26], "result": [8, 10, 11, 40, 54, 64, 67, 70, 71, 86, 88, 90, 91, 93], "becom": [8, 9], "max": [8, 12, 13, 47, 70, 71, 72, 86, 88, 89], "prior": 8, "element": [8, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 67, 70], "intermit": 8, "flash": 8, "being": [8, 9, 16, 35, 40, 47, 49, 54, 62, 63, 66, 67, 70, 76], "constant": [8, 10, 11, 40, 88], "due": [8, 11, 13, 41, 43, 44, 46, 47, 49, 52, 68, 70], "peculiar": 8, "consol": [8, 13, 47], "crt": 8, "tv": 8, "openai": [8, 13, 54, 82], "maxandskip": [8, 13], "do": [8, 9, 11, 13, 40, 44, 54, 67, 68, 71, 81, 82, 86, 87], "normalize_obs_v0": [8, 85], "env_min": [8, 85], "env_max": [8, 85], "linearli": 8, "scale": [8, 41, 54, 65, 67, 84], "known": [8, 27, 45, 59], "minimum": [8, 48, 50, 51, 61, 63, 67], "maximum": [8, 13, 40, 47, 65, 67, 70, 71], "defin": [8, 10, 11, 45, 46, 47, 82, 86, 91], "float64": 8, "finit": 8, "wish": [8, 10, 83], "normal": [8, 11, 12, 30, 33, 40, 63, 65, 71, 72], "anoth": [8, 9, 47, 48, 51, 54, 77], "reshape_v0": 8, "reshap": [8, 71], "resize_v1": [8, 12, 13, 71, 72, 85, 88], "x_size": [8, 66, 71, 85, 88], "y_size": [8, 66, 71, 85, 88], "linear_interp": 8, "perform": [8, 47, 54, 71, 73, 87, 88, 90], "interpol": 8, "up": [8, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 43, 45, 54, 56, 66, 75, 76, 88, 89], "size": [8, 40, 46, 47, 54, 66, 67, 90, 93], "down": [8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 66, 67, 75, 76], "area": [8, 22, 29, 33, 35, 54, 62, 81], "linear": [8, 12, 71, 72, 85], "avail": [8, 43, 44, 45, 46, 47, 48, 50, 51, 52, 83], "better": [8, 70], "sens": [8, 67], "nan_noop_v0": 8, "nan": [8, 12, 70, 71, 72], "trigger": 8, "oper": [8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36], "its": [8, 9, 10, 11, 25, 39, 40, 42, 45, 46, 47, 48, 49, 51, 54, 65, 90], "place": [8, 9, 10, 27, 44, 46, 47, 52, 54, 65, 66, 75], "noop": 8, "no_op_act": 8, "nan_zeros_v0": 8, "nan_random_v0": 8, "retriev": [8, 9, 86], "mask": [8, 42, 70, 79, 83, 87], "scale_actions_v0": 8, "__init__": [8, 10, 12, 71, 72, 74, 75, 76, 82, 83, 85], "addition": [8, 26, 41, 64], "agent_indicator_v0": [8, 71], "type_onli": [8, 71], "add": [8, 9, 10, 12, 39, 40, 49, 57, 66, 67, 70, 72, 88, 90], "id": [8, 9, 10, 45, 46, 47, 48, 50, 51, 71, 90], "hot": [8, 47], "append": [8, 9, 47, 82, 85, 86], "increas": [8, 49, 70, 81], "necessari": [8, 10], "treat": [8, 9], "sort": 8, "encod": [8, 43, 46, 47, 48, 49, 52], "madrl": [8, 64], "share": [8, 30, 66, 84, 87], "heterogen": 8, "sinc": [8, 9, 25, 49, 54], "tell": [8, 10, 11, 47, 82, 87], "what": [8, 11, 12, 40, 43, 70, 72], "act": [8, 9, 10, 41, 49, 67, 74, 82, 86, 88, 89], "pars": [8, 82, 90], "_": [8, 12, 71, 72, 83, 86, 88, 89, 90, 93], "identifi": [8, 13, 43, 54, 82], "than": [8, 46, 49, 51, 70, 76, 88, 89], "few": [8, 10, 13, 18, 42, 68, 87], "wa": [8, 10, 12, 13, 20, 41, 47, 64, 68, 70, 72, 76, 82], "introduc": [8, 13], "deep": [8, 64, 83, 90, 92, 93], "black_death_v2": 8, "dead": [8, 10, 19, 20, 45, 88], "death": [8, 19, 20, 24, 25, 26, 36, 37, 40, 65, 70, 88], "mechan": 8, "black": [8, 13, 40, 43, 46, 62, 67, 70, 88], "come": [8, 32], "plagu": 8, "fact": [8, 17], "die": [8, 9, 40, 88], "pad_action_space_v0": 8, "pad": [8, 12, 40, 43, 72], "biggest": 8, "per": [8, 13, 18, 24, 47, 67, 71, 88, 90], "pose": 8, "surprisingli": 8, "enabl": [8, 26, 40, 67, 71, 86], "marl": [8, 69, 90, 93], "homogen": 8, "insid": [8, 63], "region": [8, 40], "crop": 8, "pad_observations_v0": 8, "largest": [8, 47, 49, 86], "articl": [8, 13, 42, 54], "microwrapp": 8, "j": [8, 13, 40, 48, 66], "k": [8, 13, 40, 45, 47, 48, 50, 51, 64, 66, 71], "benjamin": [8, 13], "hari": 8, "ananth": 8, "journal": [8, 13, 42, 54], "arxiv": [8, 13, 42, 54], "preprint": [8, 13, 42, 54], "2008": 8, "08932": 8, "2020": [8, 13, 70], "librari": [9, 13, 42, 68, 70, 73, 84, 87], "famili": 9, "problemat": 9, "system": [9, 10, 13, 24, 31, 43, 46, 54, 64], "8": [9, 12, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37, 43, 44, 45, 46, 47, 48, 50, 51, 52, 56, 57, 58, 59, 65, 66, 67, 71, 72, 82, 83, 85, 88, 89, 90, 91, 93], "9": [9, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 35, 37, 39, 43, 44, 46, 47, 49, 62, 63, 65, 83, 85, 88, 90, 93], "10": [9, 11, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 39, 40, 46, 47, 54, 56, 59, 62, 65, 67, 83, 85, 88, 89, 90, 91, 93], "11": [9, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 39, 43, 45, 46, 47, 48, 50, 51, 60, 65, 71, 72], "linux": 9, "maco": 9, "pr": [9, 70, 84], "relat": [9, 40, 54, 70], "offici": [9, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 70, 73], "veri": [9, 27, 38, 47, 68, 74], "them": [9, 11, 24, 25, 27, 30, 34, 41, 42, 54, 56, 65, 70, 81, 88], "configur": [9, 38, 47, 88], "cooperative_pong": [9, 39], "ball_spe": [9, 39], "18": [9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 45, 47, 61], "left_paddle_spe": [9, 39], "25": [9, 13, 45, 47, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 70, 85, 87], "right_paddle_spe": [9, 39], "is_cake_paddl": 9, "max_cycl": [9, 11, 12, 13, 39, 40, 41, 49, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 72, 82, 85, 88], "900": [9, 39, 40, 88], "bounce_random": [9, 39], "interfac": [9, 68, 69, 81, 87, 91], "max_it": 9, "63": [9, 50], "yield": [9, 86], "abl": [9, 11, 16, 21, 26, 35], "comput": [9, 12, 13, 39, 40, 45, 46, 67, 72, 83], "impli": 9, "thu": [9, 41, 58, 90, 91, 93], "fantast": 9, "weird": 9, "becaus": [9, 10, 18, 20, 27, 82], "lower": [9, 40, 41], "level": [9, 30, 32, 37, 61, 77], "won": [9, 10, 33], "Their": 9, "abov": [9, 24, 26, 35, 41, 74, 83], "though": [9, 10, 54, 79], "matter": [9, 11], "factor": [9, 41, 65, 67, 71], "never": [9, 45, 70], "rese": 9, "There": [9, 13, 25, 26, 39, 40, 45, 47, 70, 81], "downstream": 9, "wrapper": [9, 13, 46, 68, 70, 71, 83, 85, 86, 88], "thei": [9, 10, 11, 12, 14, 15, 17, 20, 26, 27, 28, 30, 33, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 57, 61, 62, 65, 66, 67, 70, 72], "special": 9, "circumst": 9, "Not": 9, "featur": [9, 11, 43, 47, 54, 67, 70, 73, 86, 88, 89], "state_spac": [9, 70], "empti": [9, 10, 44, 46], "mean": [9, 12, 14, 18, 20, 22, 27, 28, 29, 33, 34, 35, 37, 40, 41, 44, 49, 54, 56, 65, 66, 70, 71, 72, 74, 90], "condit": [9, 47, 70, 75, 76], "underneath": 9, "layer": [9, 12, 22, 71, 72], "manual": [9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 70], "aspect": [9, 30, 32, 37], "alreadi": [9, 10, 26, 66, 76], "itself": [9, 40, 66, 83, 85, 86], "base_env": 9, "knights_archers_zombies_v10": [9, 10, 38, 40, 69, 88], "cours": 9, "di": [9, 19], "entri": [9, 10, 48, 50, 51], "vacuou": 9, "changeabl": 9, "transit": [9, 12, 72], "point": [9, 14, 15, 16, 17, 18, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 40, 44, 45, 46, 47, 79, 88], "separ": [9, 11, 77, 86], "studi": [9, 13, 43], "encourag": [9, 11, 13, 70], "actor": [9, 12, 54, 68, 71, 72], "hand": [9, 10, 45, 47, 48, 49, 50, 51], "lightweight": [9, 73, 92], "messag": [9, 11, 54, 57, 70], "ensur": [9, 45, 70], "reason": [9, 10, 11, 54], "incorrect": [9, 70], "howev": [9, 24, 29, 30, 47, 49, 82], "small": [9, 39, 43, 47, 63, 65, 70], "amount": [9, 41, 51], "overhead": 9, "raw_env": [9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67], "constructor": [9, 39, 40, 41, 43, 44, 45, 46, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 83], "modul": [9, 12, 71, 72, 85, 86, 88, 89], "overview": 10, "relev": [10, 40], "carefulli": 10, "comment": [10, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "functool": [10, 75, 76], "agent_selector": 10, "num_it": 10, "reward_map": 10, "def": [10, 12, 62, 71, 72, 74, 75, 76, 82, 83, 85, 86, 88, 89, 90, 93], "find": [10, 19, 20, 43], "elsewher": 10, "internal_render_mod": 10, "wide": [10, 40, 68, 69], "vareiti": 10, "user": [10, 11, 12, 38, 72, 73], "strongli": 10, "metadata": [10, 38, 70, 74, 75, 76, 86, 88, 89], "hold": [10, 18, 43, 44, 45, 46, 47, 52, 70, 82, 83, 86], "put": [10, 17], "least": [10, 27, 47, 74, 92], "pretti": 10, "init": [10, 12, 71, 72, 83, 85], "v1": [10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 87], "overridden": 10, "infer": 10, "rais": [10, 41, 48, 50, 51], "player_": 10, "map": [10, 16, 17, 20, 21, 26, 27, 31, 47, 54, 66], "agent_name_map": 10, "zip": [10, 86, 88, 89], "len": [10, 12, 72, 85, 88, 89, 90, 93], "here": [10, 13, 82, 88, 91], "_action_spac": 10, "_observation_spac": 10, "lru_cach": [10, 75, 76], "memoiz": 10, "reduc": [10, 70], "clock": [10, 38, 43], "line": [10, 12, 40, 52, 67, 70, 72, 87, 92], "disabl": [10, 11, 70], "cach": [10, 11, 70], "maxsiz": [10, 75, 76], "org": [10, 86], "understand": [10, 71, 72, 74, 75, 82, 83, 85, 86, 88, 89, 90, 91, 93], "logger": [10, 90], "agent1": 10, "agent2": 10, "format": [10, 70, 82, 91], "sane": 10, "necessarili": 10, "date": 10, "data": [10, 12, 47, 65, 70, 71, 72, 90, 91, 93], "kept": 10, "_cumulative_reward": 10, "issu": [10, 11, 27, 40, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "num_mov": 10, "cyclic": 10, "_agent_selector": 10, "_was_dead_step": 10, "had": [10, 66], "account": [10, 31], "again": [10, 34], "is_last": 10, "until": [10, 13, 44, 49, 55, 56, 57, 58, 59, 60, 61, 62, 63, 81], "_clear_reward": 10, "_accumulate_reward": 10, "from_parallel": [10, 70], "count": [10, 17, 43, 45, 66], "agent_1": [10, 56, 59, 61, 63], "item_1": 10, "agent_2": [10, 61], "item_2": 10, "env_trunc": 10, "still": [10, 26, 40, 41, 49], "back": [10, 17, 21, 25, 30, 43, 46], "forth": [10, 30], "wrapped_env": 10, "debug": [10, 11, 55, 83, 85], "doc": [10, 70, 71, 74, 82], "agent_3": 10, "deprecatedmodul": 10, "guid": [10, 70, 74], "awai": [10, 29, 35, 58], "obsolet": 10, "toward": [10, 29, 35, 36, 41, 43, 65], "ones": [10, 43, 76], "tri": [10, 11], "knights_archers_zombies_v0": 10, "deprecated_modul": 10, "knights_archers_zombi": [10, 40], "v0": [10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67], "v10": [10, 40], "deprecatedenv": 10, "now": [10, 40, 68, 70, 75, 77, 82], "complianc": 11, "own": [11, 12, 21, 27, 39, 40, 46, 47, 54, 69, 72, 74], "sure": [11, 77], "consist": [11, 43, 54, 70, 75], "num_cycl": [11, 77], "1000": [11, 37, 50, 51, 90, 93], "verbose_progress": 11, "As": [11, 38, 40, 47, 48, 64, 70], "simpli": [11, 49, 70, 82], "complet": [11, 47, 66, 70, 77, 90], "properli": 11, "reproduc": [11, 82], "actual": [11, 82], "seed_test": 11, "parallel_seed_test": 11, "env_fn": [11, 86, 88, 89], "test_kept_st": 11, "parallel_env_fn": 11, "long": [11, 12, 15, 27, 32, 71, 72, 81], "determin": [11, 13, 49], "fail": [11, 65, 70], "physic": [11, 31, 41, 65, 68, 70], "bare": 11, "detect": [11, 67, 86], "enough": 11, "correct": [11, 60, 70], "off": [11, 13, 17, 25, 39, 41, 65, 67, 71, 87], "usag": [11, 70, 92], "max_cycles_test": 11, "crash": [11, 40], "produc": [11, 13, 67, 87], "render_test": [11, 70], "custom_test": 11, "non": [11, 13, 38, 40, 42, 46, 70, 87], "svg": 11, "lambda": [11, 71, 82, 83, 85, 90, 91], "render_result": 11, "regress": 11, "5": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 88, 90, 91, 93], "inspect": [11, 82], "performance_benchmark": 11, "visual": [11, 38, 41, 87, 88], "intend": [11, 55, 68, 77, 87], "huge": 11, "bug": [11, 39, 40, 43, 44, 45, 46, 47, 48, 50, 51, 52, 64, 65, 66, 67, 70], "good": [11, 17, 54, 56, 57, 58, 62, 63, 70, 86], "test_save_ob": 11, "basic": [12, 26, 72, 75, 92], "ppo": [12, 73, 83, 84, 87], "pistonbal": [12, 38, 70, 72, 73, 83, 84], "inspir": [12, 68, 72], "cleanrl": [12, 70, 90, 93], "exceedingli": [12, 72], "log": [12, 71, 72, 83, 92, 93], "weight": [12, 41, 59, 61, 71, 72, 73], "save": [12, 46, 72, 85, 86, 88, 89, 90, 93], "intent": [12, 72], "rel": [12, 27, 40, 43, 54, 59, 61, 63, 65, 72, 77], "clean": [12, 54, 72, 87], "200": [12, 30, 37, 47, 65, 72, 83], "refer": [12, 45, 48, 51, 54, 65, 67, 69, 70, 72, 81, 86], "jjshoot": [12, 70, 72], "torch": [12, 71, 72, 83, 85, 90, 93], "nn": [12, 71, 72, 83, 85], "optim": [12, 20, 38, 41, 65, 66, 71, 72, 81, 85, 86, 88, 89, 90, 93], "distribut": [12, 65, 66, 67, 71, 72, 84], "categor": [12, 71, 72], "num_act": [12, 49, 72], "super": [12, 71, 72, 82, 86], "_layer_init": [12, 72], "conv2d": [12, 71, 72, 85], "32": [12, 45, 71, 72, 85], "maxpool2d": [12, 72], "relu": [12, 71, 72, 85], "64": [12, 71, 72, 85, 90, 93], "128": [12, 71, 72, 85, 90, 93], "512": [12, 40, 71, 72, 85], "std": [12, 71, 72], "01": [12, 66, 67, 70, 71, 72], "critic": [12, 41, 54, 71, 72], "sqrt": [12, 71, 72], "bias_const": [12, 71, 72], "orthogonal_": [12, 71, 72], "constant_": [12, 71, 72], "bia": [12, 71, 72], "get_valu": [12, 71, 72], "255": [12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 71, 72], "get_action_and_valu": [12, 71, 72], "hidden": [12, 21, 54, 71, 72, 83, 90], "logit": [12, 71, 72, 83], "prob": [12, 71, 72], "log_prob": [12, 71, 72], "entropi": [12, 71, 72], "batchify_ob": [12, 72], "ob": [12, 71, 72, 82, 83, 85, 86, 88, 89], "devic": [12, 71, 72, 90, 93], "pz": [12, 72], "style": [12, 34, 43, 46, 72], "batch": [12, 71, 72, 87, 90, 93], "axi": [12, 72], "transpos": [12, 72], "height": [12, 40, 41, 72], "width": [12, 40, 72], "tensor": [12, 71, 72, 83], "batchifi": [12, 72], "unbatchifi": [12, 72], "cpu": [12, 71, 72, 86, 88, 90, 93], "enumer": [12, 65, 67, 71, 72], "__name__": [12, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "__main__": [12, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "algo": [12, 71, 72], "param": [12, 71, 72], "cuda": [12, 71, 72, 90, 93], "is_avail": [12, 71, 72, 90, 93], "ent_coef": [12, 71, 72, 88], "vf_coef": [12, 71, 72, 88], "clip_coef": [12, 71, 72], "gamma": [12, 71, 72, 85, 88, 90], "99": [12, 71, 72, 85, 88], "batch_siz": [12, 71, 72, 88, 89, 90, 93], "stack_siz": [12, 72], "frame_s": [12, 72], "125": [12, 41, 47, 72, 85], "total_episod": [12, 72], "setup": [12, 70, 74], "observation_s": [12, 72], "learner": [12, 72], "adam": [12, 71, 72, 90, 93], "lr": [12, 71, 72, 85, 90, 93], "001": [12, 72], "ep": [12, 71, 72, 90], "1e": [12, 65, 71, 72, 86, 89, 90, 93], "storag": [12, 71, 72], "end_step": [12, 72], "total_episodic_return": [12, 72], "rb_ob": [12, 72], "rb_action": [12, 72], "rb_logprob": [12, 72], "rb_reward": [12, 72], "rb_term": [12, 72], "rb_valu": [12, 72], "no_grad": [12, 71, 72], "next_ob": [12, 71, 72], "num_step": [12, 71, 72], "rollov": [12, 72], "logprob": [12, 71, 72], "term": [12, 27, 43, 72, 82], "trunc": [12, 72, 82], "reach": [12, 40, 44, 68, 72], "break": [12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 42, 70, 71, 72, 86, 88, 89], "bootstrap": [12, 71, 72], "done": [12, 25, 26, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 71, 72, 81, 91], "rb_advantag": [12, 72], "zeros_lik": [12, 71, 72], "revers": [12, 71, 72], "delta": [12, 71, 72], "rb_return": [12, 72], "b_ob": [12, 71, 72], "start_dim": [12, 72], "end_dim": [12, 72], "b_logprob": [12, 71, 72], "b_action": [12, 71, 72], "b_return": [12, 71, 72], "b_valu": [12, 71, 72], "b_advantag": [12, 71, 72], "b_index": [12, 72], "arang": [12, 71, 72], "clip_frac": [12, 72], "repeat": [12, 27, 47, 72, 81], "shuffl": [12, 71, 72], "batch_index": [12, 72], "newlogprob": [12, 71, 72], "logratio": [12, 71, 72], "ratio": [12, 41, 70, 71, 72], "exp": [12, 62, 71, 72], "calcul": [12, 47, 70, 71, 72], "approx_kl": [12, 71, 72], "joschu": [12, 71, 72], "net": [12, 35, 71, 72, 90, 93], "blog": [12, 71, 72, 87], "kl": [12, 71, 72], "approx": [12, 71, 72], "html": [12, 53, 71, 72, 86, 88, 89], "old_approx_kl": [12, 71, 72], "ab": [12, 71, 72], "item": [12, 67, 71, 72, 82], "advantaeg": [12, 72], "advantag": [12, 71, 72], "loss": [12, 42, 47, 71, 72, 86], "pg_loss1": [12, 71, 72], "pg_loss2": [12, 71, 72], "clamp": [12, 71, 72, 83], "pg_loss": [12, 71, 72], "v_loss_unclip": [12, 71, 72], "v_clip": [12, 71, 72], "v_loss_clip": [12, 71, 72], "v_loss_max": [12, 71, 72], "v_loss": [12, 71, 72], "entropy_loss": [12, 71, 72], "zero_grad": [12, 71, 72], "backward": [12, 40, 71, 72], "y_pred": [12, 71, 72], "y_true": [12, 71, 72], "var_i": [12, 71, 72], "var": [12, 71, 72], "explained_var": [12, 71, 72], "f": [12, 40, 43, 71, 72, 75, 76, 82, 86, 88, 89, 90, 93], "fraction": [12, 72], "explain": [12, 72], "THE": [12, 72], "eval": [12, 72, 88, 89, 90], "instrument": 13, "modern": [13, 68], "hope": 13, "basketbal": [13, 18], "pong": [13, 38, 70], "combat": [13, 31], "plane": [13, 43, 44, 45, 46, 52], "tank": 13, "doubl": 13, "dunk": 13, "entomb": [13, 19, 20], "flag": [13, 65, 82], "foozpong": [13, 70], "ic": 13, "hockei": 13, "joust": 13, "mario": 13, "bro": 13, "maze": [13, 17, 19, 20], "craze": 13, "othello": 13, "quadrapong": [13, 70], "war": 13, "surround": [13, 41, 46, 66], "tenni": 13, "video": [13, 14, 22, 28, 29, 35, 68, 70, 71, 73], "checker": [13, 70], "volleybal": 13, "warlord": [13, 37], "wizard": 13, "Of": 13, "wor": 13, "uniqu": [13, 38, 42, 54, 64], "rom": [13, 70, 71], "autorom": [13, 71], "path": [13, 16, 32, 40, 70, 71, 77, 83, 85, 86, 88, 89, 90, 93], "rom_path": [13, 71], "launch": [13, 38, 42, 54, 64, 86, 88, 89], "coupl": 13, "variat": [13, 48, 51], "four": [13, 22, 29, 35, 36, 40, 42, 54, 70, 74, 75, 84, 87], "extens": 13, "notabl": 13, "theoret": [13, 68], "memor": 13, "precis": [13, 15, 18], "sequenc": [13, 40, 44, 45, 81], "maxim": [13, 27, 30, 64, 82], "score": [13, 14, 15, 16, 17, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 35, 37, 45, 47, 86], "ideal": 13, "sticky_act": 13, "approach": [13, 43], "flicker": 13, "sprite": 13, "hardwar": 13, "restrict": 13, "knight": [13, 38, 43, 70, 87], "sometim": [13, 51, 54], "even": 13, "pixel": [13, 39, 40, 41, 54, 70], "wise": 13, "space_invaders_v1": 13, "deal": 13, "less": [13, 30, 88], "downscal": 13, "84": [13, 45, 71, 85, 88], "everyth": [13, 40], "screen": [13, 20, 30, 32, 39, 40, 41, 67, 70], "despit": 13, "atari_gam": 13, "obs_typ": 13, "rgb_imag": 13, "full_action_spac": 13, "100000": [13, 83], "auto_rom_install_path": 13, "three": [13, 43, 51, 64], "rgb": [13, 41, 70], "grayscale_imag": 13, "grayscal": 13, "ram": 13, "1024": 13, "bit": [13, 47, 57], "compris": 13, "duplic": 13, "leav": [13, 26, 30, 32], "tool": [13, 69, 81], "re": [13, 70, 86], "bin": 13, "multiplay": [13, 68], "were": [13, 39, 40, 46, 47, 54, 70], "terry2020arcad": 13, "2009": 13, "09341": 13, "bellemare13arcad": 13, "bellemar": 13, "m": [13, 40, 51, 86, 88, 89], "naddaf": 13, "veness": 13, "bowl": 13, "platform": [13, 92], "artifici": 13, "intellig": [13, 64], "2013": 13, "month": 13, "volum": 13, "47": 13, "page": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70], "253": 13, "279": [13, 47], "machado2018revisit": 13, "marlo": 13, "c": [13, 43], "marc": 13, "talviti": 13, "erik": 13, "joel": 13, "hausknecht": 13, "matthew": 13, "michael": 13, "61": [13, 50], "523": 13, "562": 13, "part": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70, 75, 84], "read": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70], "basketball_pong_v3": 14, "ye": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67], "No": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 70, 86, 91], "first_0": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "second_0": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "210": [14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 47], "160": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "ball": [14, 22, 28, 29, 33, 35, 36, 39, 41, 63, 70], "oppon": [14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 44, 45, 46, 48, 49, 66, 90], "hoop": 14, "But": [14, 32, 54], "side": [14, 20, 22, 27], "court": 14, "serv": [14, 22, 28, 29, 33, 35, 70], "timer": [14, 18, 22, 27, 28, 29, 33, 34, 35, 70], "indefinit": [14, 18, 22, 27, 28, 29, 33, 34, 35], "stall": [14, 18, 22, 27, 28, 29, 33, 34, 35, 70], "pure": [14, 18, 22, 27, 28, 29, 33, 34, 35, 92], "olymp": [14, 22, 28, 29, 35], "common": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 47, 49, 71, 81, 86, 90, 93], "describ": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 63, 66, 74], "basketball_pong": [14, 70], "num_play": [14, 22, 28, 35, 48, 50, 51, 70, 82], "choos": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 49, 54, 67, 81], "6": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 52, 63, 65, 67, 71, 75, 76, 82, 88, 90, 91, 93], "fire": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40], "right": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 42, 43, 54, 65, 66, 75, 76], "left": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 46, 47, 54, 65, 66, 70, 75, 76], "v3": [14, 18, 19, 20, 22, 24, 25, 26, 27, 28, 29, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 66, 67], "v2": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67], "entir": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 45, 70], "kwarg": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 83, 85, 86], "boxing_v2": 15, "17": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 40, 47], "adversari": [15, 16, 17, 18, 54, 57, 58, 62, 63], "respons": 15, "minut": [15, 88, 89], "1200": 15, "duke": 15, "ring": 15, "punch": 15, "success": [15, 18, 54], "jab": 15, "power": [15, 19, 20, 81], "ko": 15, "whenev": [15, 16, 17, 19, 31], "penal": [15, 16, 17, 21, 31, 45, 61, 62, 67, 86], "upright": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37], "7": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37, 43, 44, 45, 46, 47, 48, 49, 52, 65, 66, 67, 71, 75, 76, 90], "upleft": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37], "downright": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37], "downleft": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37], "12": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 39, 43, 45, 47, 50, 51, 65], "13": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 43, 45, 47, 50, 51, 65, 71, 72, 83, 85, 88], "14": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 39, 40, 45, 47, 48, 50, 51, 60, 62, 65, 66], "15": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 47, 49, 65], "16": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 40, 47, 62, 66, 71], "minim": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 37, 47, 70, 74, 91, 93], "combat_jet_v1": 16, "256": [16, 71, 88, 89], "posit": [16, 17, 24, 31, 33, 40, 41, 43, 47, 52, 54, 55, 56, 63, 65, 67, 70, 86, 89], "track": [16, 47, 71, 73, 74, 86, 87], "complex": [16, 47, 81], "movement": [16, 41, 54, 76], "fly": 16, "flight": 16, "direct": [16, 21, 39, 40, 41, 43, 54, 67, 75], "speed": [16, 39, 65, 67, 88, 89, 92], "bullet": [16, 17, 31, 37, 68], "combat_plane_v2": 16, "game_vers": [16, 26, 28], "guided_missil": 16, "bi": 16, "whether": [16, 40, 43, 44, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 71, 81], "missil": 16, "fix": [16, 19, 20, 24, 25, 26, 27, 29, 34, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 64, 65, 66, 67, 70, 71, 83, 85], "combat_plan": 16, "combat_tank_v3": 17, "predict": [17, 31, 33, 83, 86, 88, 89], "blast": 17, "obstacl": [17, 32, 54, 62, 63, 66, 67], "potenti": [17, 25, 70], "combat_tank_v2": 17, "has_maz": 17, "is_invis": 17, "billiard_hit": 17, "field": 17, "invis": [17, 20, 30], "unless": [17, 26, 86, 88], "wall": [17, 20, 26, 32, 41, 66, 76], "bounc": [17, 41], "billiard": 17, "combat_tank": 17, "double_dunk_v3": 18, "stage": [18, 20], "difficult": [18, 30, 68, 86], "strategi": [18, 27, 30, 32, 34, 52], "choic": [18, 49, 90], "120": [18, 41], "begin": [18, 47, 48, 51, 57], "fan": 18, "shot": 18, "double_dunk": 18, "entombed_competitive_v3": 19, "race": [19, 26, 68], "longest": [19, 39], "quickli": [19, 20, 21, 25], "navig": [19, 20, 60, 67], "constantli": [19, 20], "stuck": [19, 20], "easili": [19, 20], "yourself": [19, 20, 26], "escap": [19, 20, 75], "rare": [19, 20], "danger": [19, 20], "zombi": [19, 20, 38, 70, 87], "lurk": [19, 20], "avoid": [19, 20, 30, 32, 61, 67, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "prematur": [19, 20, 24, 25, 26, 36, 37, 40, 65], "entombed_competit": 19, "entombed_cooperative_v3": 20, "averag": [20, 56, 59, 65, 67, 70, 89], "total": [20, 39, 40, 46, 47, 59, 63, 65, 70, 71, 86], "23": [20, 43, 65, 71, 72, 83, 85, 86, 88, 89, 90, 91, 93], "explor": [20, 83], "teammat": 20, "far": [20, 47, 58, 61, 82], "powerup": 20, "coordin": [20, 22, 29, 35, 38, 41, 43, 46, 63, 66, 67], "opposit": 20, "appear": [20, 41], "symmetr": 20, "effect": [20, 21, 76], "halv": 20, "ident": [20, 26], "divid": [20, 41, 86], "section": [20, 43, 44, 45, 46, 47, 48, 50, 51, 52, 66], "immedi": [20, 65], "life": [20, 24, 25, 37, 47], "entombed_coop": [20, 70], "flag_capture_v2": 21, "battl": [21, 22, 29, 35, 37], "travel": 21, "squar": [21, 40, 43], "bomb": [21, 30], "sent": [21, 57], "locat": [21, 27, 44, 46, 63, 67, 77], "hint": [21, 47, 70, 92], "distanc": [21, 40, 54, 55, 56, 58, 59, 61, 63, 67], "search": 21, "narrow": 21, "flag_captur": 21, "foozpong_v3": 22, "third_0": [22, 29, 35, 36], "fourth_0": [22, 29, 35, 36], "past": [22, 23, 28, 33], "defend": [22, 29, 36], "tradit": [22, 81], "foozbal": 22, "paddl": [22, 29, 39], "goal": [22, 25, 27, 32, 33, 41, 54, 82], "succe": 22, "ice_hockey_v2": 23, "offens": 23, "puck": 23, "defens": 23, "On": 23, "front": [23, 40, 65, 76], "rapid": 23, "maneuv": 23, "ice_hockei": 23, "joust_v3": 24, "mix": [24, 25, 54], "involv": [24, 51, 81], "unforgiv": 24, "world": [24, 40, 54, 66], "care": [24, 26, 37], "essenti": [24, 37], "well": [24, 37, 40, 49, 65, 88], "awar": [24, 37], "npc": [24, 37], "varieti": [24, 69], "wave": 24, "enemi": 24, "expect": [24, 86, 90], "earn": [24, 25], "3000": 24, "mario_bros_v3": 25, "plan": [25, 26, 32], "kick": 25, "pest": 25, "floor": [25, 35], "flip": [25, 27], "knock": [25, 45], "onto": [25, 35], "800": [25, 41, 43, 46, 49, 70], "opportun": [25, 37], "collabor": 25, "steal": 25, "activ": [25, 41, 88], "firebal": 25, "gain": 25, "20000": [25, 90], "bonu": [25, 30], "coin": 25, "wafer": 25, "mario_bro": 25, "maze_craze_v3": 26, "Its": 26, "win": [26, 27, 34, 42, 48, 49, 90], "robber": 26, "travers": 26, "exit": [26, 33, 62, 86, 88, 89], "confus": 26, "block": [26, 54, 62, 76], "disappear": 26, "tabl": [26, 49, 65, 67], "inaccuraci": 26, "blockad": 26, "maze_craz": 26, "visibilty_level": 26, "visibl": [26, 54], "othello_v3": 27, "piec": [27, 34, 43, 46, 70, 81], "diagon": [27, 43, 44, 52], "trap": 27, "rule": [27, 43, 82], "greedi": 27, "heurist": 27, "poor": 27, "interest": 27, "cursor": [27, 34], "fairli": 27, "awhil": 27, "regist": [27, 34, 83], "plu": [27, 41, 50, 67], "greedili": 27, "bad": [27, 70], "successfulli": [27, 44], "solv": 27, "think": [27, 74, 75], "token": [27, 44, 47], "talli": 27, "noth": [27, 34, 40, 54, 82], "auto": [27, 68], "pong_v3": [28, 71], "quadrapong_v4": 29, "belong": [29, 67], "v4": [29, 34, 39, 40, 41, 43, 45, 46, 47, 48, 50, 51, 65, 66, 67], "ship": 30, "togeth": [30, 47, 54, 67], "clear": [30, 37], "alien": 30, "30": [30, 65, 66, 67, 83, 85], "fli": 30, "across": [30, 41, 70, 73], "worth": 30, "sabotag": 30, "somehow": 30, "alternating_control": 30, "moving_shield": 30, "zigzaging_bomb": 30, "fast_bomb": 30, "invisible_invad": 30, "hoard": 30, "abil": 30, "forev": [30, 67, 70], "eventu": 30, "shift": 30, "anywai": 30, "shield": 30, "reliabl": [30, 70, 87], "protect": 30, "space_invad": 30, "space_war_v2": 31, "advanc": [31, 47, 73], "acceler": [31, 67, 70], "momentum": [31, 41], "space_war": 31, "surround_v2": 32, "trail": 32, "behind": 32, "slowli": 32, "conserv": 32, "master": [32, 70, 71, 86, 88, 89], "higher": [32, 35, 40, 41], "liter": 32, "room": 32, "forc": [32, 65], "dummi": [32, 75, 76, 83, 85], "tennis_v3": 33, "let": [33, 87], "unlik": [33, 43], "video_checkers_v4": 34, "jump": [34, 35], "hover": 34, "press": [34, 40], "button": 34, "held": 34, "multipl": [34, 67, 68, 70, 82], "video_check": 34, "volleyball_pong_v2": 35, "affect": [35, 47, 54], "motion": [35, 54], "volleyball_pong_v3": 35, "volleyball_pong": [35, 70], "warlords_v3": 36, "man": 36, "stand": [36, 44], "fortress": 36, "fall": [36, 44, 65], "wizard_of_wor": [36, 37], "wizard_of_wor_v3": 37, "against": [37, 86, 88], "2500": 37, "ncp": 37, "benefit": 37, "attack": [37, 40], "archer": [38, 70, 87], "challeng": [38, 39, 68], "pygam": [38, 40, 41, 68, 70], "degre": 38, "emerg": [38, 41, 54], "achiev": [38, 41, 54, 65], "respect": [38, 40, 43, 44, 45, 46, 48, 59, 67], "manual_polici": [38, 70], "manualpolici": 38, "tick": 38, "render_fp": [38, 70], "wasd": 38, "cooperative_pong_v4": 39, "paddle_0": 39, "paddle_1": 39, "280": [39, 47], "480": 39, "560": [39, 41], "960": [39, 70], "object": [39, 40, 41, 43, 44, 45, 46, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67], "goe": [39, 45, 65], "edg": [39, 40, 43, 65], "collis": [39, 61, 62, 63, 65, 70], "elast": [39, 41], "alwai": [39, 40, 41, 43, 54, 59, 61, 70], "center": [39, 40, 66], "littl": 39, "tier": 39, "cake": 39, "half": [39, 49, 51], "stai": [39, 41, 66, 88], "max_reward": 39, "off_screen_penalti": 39, "w": [39, 40, 41, 43], "arrow": [39, 40, 66], "cake_paddl": 39, "wed": 39, "angl": [39, 40, 65], "remain": [39, 47], "unchang": 39, "neg": [39, 45, 56, 62, 67, 86], "penalti": [39, 45, 65, 70], "v5": [39, 40, 41, 43, 46, 51, 65, 70], "teleport": 39, "glitch": 39, "occasion": 39, "redesign": [39, 70], "misc": [39, 40, 41, 65, 66, 67, 70], "arg": [39, 40, 41, 43, 44, 45, 46, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 83, 85, 90], "pickl": [39, 40, 41, 43, 44, 45, 46, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70], "archer_0": [40, 88], "archer_1": 40, "knight_0": 40, "knight_1": 40, "720": 40, "1280": 40, "walk": [40, 79], "border": [40, 41, 54], "bottom": [40, 44, 54, 75, 77], "unpredict": 40, "rotat": [40, 65], "clockwis": 40, "counter": 40, "forward": [40, 76, 83, 85], "kill": [40, 88], "swing": 40, "mace": 40, "arc": 40, "head": [40, 65], "straight": 40, "collid": [40, 61, 67], "vector_st": 40, "x5": 40, "num_arch": 40, "num_knight": 40, "num_sword": 40, "max_arrow": 40, "max_zombi": [40, 88], "someth": [40, 83, 85], "sword": 40, "entiti": [40, 67, 71], "breakdown": 40, "corner": [40, 46, 75], "final": [40, 43, 47, 49, 86, 90], "unit": [40, 41, 54, 66], "absolut": [40, 54, 77], "typemask": 40, "prepend": 40, "use_typemask": 40, "whole": 40, "x11": 40, "experiment": 40, "sequence_spac": 40, "bodi": 40, "512x512": 40, "word": 40, "16x16": 40, "d": [40, 41, 43, 68, 86, 88, 89], "shoot": 40, "q": [40, 48, 83, 90, 93], "spawn": [40, 41, 66, 87, 88], "l": [40, 71], "stab": 40, "u": 40, "spawn_rat": 40, "20": [40, 41, 43, 47, 48, 57, 63, 67, 82, 85, 86], "killable_knight": 40, "killable_arch": 40, "pad_observ": 40, "line_death": 40, "rate": [40, 71, 90], "form": [40, 47, 54, 66, 67, 70], "40x40": 40, "grid": [40, 44, 52, 66, 68, 75, 76], "touch": [40, 66, 67], "soon": 40, "vectoriz": 40, "v9": 40, "rewrit": [40, 70], "numer": [40, 70], "v8": [40, 65], "cleanup": [40, 70], "sever": [40, 54], "v7": [40, 65], "minor": [40, 41, 54, 70], "v6": [40, 41, 43, 51, 65, 70], "bump": [40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 70, 76], "pymunk": [40, 41, 70, 89], "piston_0": 41, "piston_1": 41, "piston_19": 41, "457": 41, "880": [41, 70], "vertic": [41, 44, 52, 67], "piston": [41, 70], "proport": [41, 66, 67], "accordingli": 41, "overal": 41, "balanc": [41, 46, 49], "local": [41, 59, 61, 67, 70], "time_penalti": [41, 85], "local_ratio": [41, 59, 61, 65, 67, 70], "local_reward": 41, "global_reward": 41, "chipmunk": 41, "engin": [41, 68, 70, 87, 92], "realist": 41, "angri": 41, "bird": 41, "rightmost": [41, 65], "n_piston": [41, 85], "random_drop": [41, 85], "random_rot": [41, 85], "ball_mass": [41, 85], "75": [41, 47, 65, 67, 85], "ball_frict": [41, 85], "ball_elast": [41, 85], "real": [41, 82], "angular": [41, 65], "mass": 41, "friction": 41, "column": [41, 43, 44, 45, 70], "imprecis": [41, 70], "refactor": [41, 67, 70], "gin": [42, 86], "rummi": [42, 86], "hanabi": [42, 70, 86], "leduc": [42, 83], "holdem": [42, 70, 83], "rp": [42, 49, 70], "texa": [42, 48, 70, 86], "tictacto": [42, 52, 70], "mostli": [42, 54], "texas_holdem_v4": [42, 50], "sole": 42, "commun": [42, 47, 48, 50, 51, 54, 57, 59, 60, 61, 63], "legal": [42, 82], "binari": [42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70], "equal": [42, 45, 47, 65], "rlcard": [42, 45, 48, 50, 51, 70], "zha2019rlcard": 42, "toolkit": 42, "card": [42, 45, 47, 48, 50, 51, 68], "zha": 42, "daochen": 42, "lai": 42, "kwei": 42, "herng": 42, "cao": 42, "yuanpu": 42, "songyi": 42, "wei": 42, "ruzh": 42, "guo": 42, "junyu": 42, "hu": 42, "xia": 42, "1910": 42, "04376": 42, "2019": 42, "player_0": [43, 44, 45, 47, 48, 49, 50, 51, 83], "player_1": [43, 44, 45, 47, 48, 49, 50, 51, 52, 83], "4672": 43, "111": 43, "oldest": 43, "ai": 43, "alphazero": [43, 46], "usual": [43, 44, 45, 46, 47, 48, 50, 51, 52, 74], "8x8": 43, "castl": 43, "white": [43, 46, 47, 66, 70], "queensid": 43, "kingsid": 43, "th": [43, 90], "convolut": 43, "One": [43, 47, 56, 81], "index": [43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 67], "spot": 43, "leelachesszero": 43, "en": [43, 86, 88, 89], "passant": 43, "vulner": 43, "pawn": [43, 76], "8th": 43, "5th": 43, "19": [43, 45, 46, 47, 48, 58, 83, 85], "seen": [43, 63], "fold": [43, 48, 50, 51], "repetit": 43, "latest": 43, "occupi": 43, "orient": [43, 54, 65], "vari": [43, 70, 88, 89], "board_histori": 43, "maintain": [43, 68, 70], "king": [43, 45, 48], "1st": [43, 46, 47], "simpler": [43, 86], "layout": 43, "nevertheless": 43, "incorpor": 43, "facilit": 43, "capabl": [43, 69], "profici": 43, "whose": [43, 44, 45, 46, 47, 48, 50, 51, 52, 67], "alphachesszero": 43, "8x8x73": 43, "dimension": [43, 86, 88, 89], "pick": [43, 45], "56": [43, 50], "queen": [43, 48], "eight": 43, "compass": 43, "ne": 43, "se": 43, "sw": 43, "nw": 43, "underpromot": 43, "bishop": 43, "rook": 43, "seventh": 43, "rank": [43, 45, 47], "promot": 43, "73": 43, "express": 43, "notat": 43, "signifi": 43, "g1": 43, "h": [43, 86, 88, 89], "winner": [43, 45, 46, 48, 49, 50, 51, 52, 86], "loser": [43, 45, 46, 48, 49, 50, 51, 52], "draw": [43, 44, 45, 49, 52], "wrong": 43, "insuffici": 43, "materi": 43, "proper": [43, 46, 70], "arbitrari": [43, 44, 45, 46, 47, 48, 50, 51, 52, 70], "replac": [43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 70, 82], "adopt": [43, 44, 45, 46, 47, 48, 49, 50, 51, 52], "screen_height": [43, 46, 49, 50, 51], "connect_four_v3": [44, 86], "horizont": [44, 52, 65, 67], "drop": [44, 47, 70], "6x7": 44, "placement": [44, 52], "cell": [44, 52], "inclus": 44, "award": [44, 45, 47], "connect_four": 44, "screen_scal": 44, "gin_rummy_v4": 45, "110": 45, "52": [45, 50, 51], "deck": [45, 47, 48, 51], "who": 45, "knock_reward": 45, "gin_reward": 45, "opponents_hand_vis": 45, "unknown": 45, "5x52": 45, "spade": [45, 50, 51], "heart": [45, 50, 51], "diamond": [45, 50, 51], "club": [45, 50, 51], "ac": 45, "descript": [45, 46, 47, 48, 50, 51, 65, 67, 83, 85], "discard": [45, 47], "pile": [45, 47], "exclud": 45, "26": [45, 50, 51, 65, 70], "38": [45, 50, 51], "27": [45, 50, 51, 58, 65, 70], "39": [45, 50, 51], "51": [45, 50, 51], "40": [45, 50, 51], "57": [45, 50, 83, 85], "31": [45, 65], "44": 45, "45": 45, "58": [45, 50], "109": 45, "59": 45, "71": [45, 50], "83": [45, 64], "96": 45, "97": 45, "deadwood": 45, "deadwood_count": 45, "slightli": 45, "those": [45, 47, 64], "upgrad": [45, 48, 50, 51, 70], "rlcard_env": [45, 48, 50, 51], "gin_rummi": 45, "go_v5": 46, "black_0": 46, "white_0": 46, "362": 46, "stone": 46, "intersect": 46, "aim": 46, "territori": 46, "decid": 46, "minigo": 46, "komi": 46, "compens": 46, "board_siz": 46, "disadvantag": 46, "inher": 46, "chines": 46, "tournament": 46, "perfectli": [46, 54], "third": [46, 66, 70], "gtp": 46, "2nd": [46, 47], "2n": 46, "hanabi_v4": 47, "658": 47, "firework": 47, "satisfi": 47, "drawn": 47, "hand_siz": 47, "max_information_token": 47, "max_life_token": 47, "observation_typ": 47, "reveal": [47, 48, 49], "knowledg": 47, "construct": [47, 91], "serial": [47, 70], "unari": 47, "thermomet": 47, "That": 47, "lowest": 47, "red": [47, 54, 56, 62, 66, 67], "11111": 47, "explicitli": 47, "10000": 47, "24": [47, 65, 70], "49": 47, "74": 47, "124": 47, "174": 47, "175": 47, "179": 47, "180": 47, "184": 47, "yellow": 47, "185": 47, "189": 47, "green": [47, 54, 56, 62, 67], "190": 47, "195": 47, "199": 47, "blue": [47, 63, 66], "207": 47, "208": 47, "211": 47, "260": 47, "261": 47, "262": 47, "263": 47, "266": 47, "267": 47, "268": 47, "target": [47, 56, 59, 67, 71, 90], "269": 47, "273": 47, "274": 47, "278": 47, "281": 47, "282": 47, "283": 47, "307": 47, "308": 47, "342": 47, "0th": 47, "343": 47, "377": 47, "378": 47, "412": 47, "413": 47, "447": 47, "3rd": 47, "445": 47, "482": 47, "4th": 47, "483": 47, "517": 47, "518": 47, "552": 47, "553": 47, "587": 47, "588": 47, "622": 47, "663": 47, "657": 47, "scalar": [47, 49], "uuid": 47, "lost": [47, 67], "negat": 47, "took": [47, 70], "ordinari": 47, "random_start_play": 47, "capsul": 47, "endpoint": 47, "deepmind": [47, 68], "rl_env": 47, "agent_nam": [47, 82], "observation_vector_dim": 47, "as_vector": 47, "legal_mov": 47, "throw": [47, 70], "leduc_holdem_v4": [48, 83], "36": 48, "round": [48, 50, 51, 86], "six": [48, 70, 81], "jack": 48, "bet": [48, 51], "public": [48, 57, 73], "best": [48, 77], "33": 48, "chip": [48, 50, 51], "21": [48, 90, 91, 93], "35": 48, "22": [48, 67, 90, 91, 93], "leduc_holdem": 48, "beat": 49, "extra": 49, "pair": 49, "odd": [49, 70], "exactli": 49, "defeat": 49, "expans": 49, "lizard": [49, 70], "spock": [49, 70], "applic": [49, 81, 82, 84], "greater": [49, 51], "pariti": 49, "therefor": [49, 67], "action_6": 49, "action_7": 49, "merg": [49, 70], "72": 50, "boolean": 50, "depict": [50, 66, 67], "53": [50, 51], "62": [50, 62], "66": [50, 64], "67": 50, "68": 50, "texas_holdem": 50, "texas_holdem_no_limit_v6": [51, 82], "54": [51, 61], "poker": [51, 84], "regular": 51, "union": 51, "pot": 51, "acpc": 51, "texas_holdem_no_limit": [51, 82], "tictactoe_v3": [52, 82, 90, 93], "player_2": 52, "3x3": 52, "similarli": [52, 67], "swap": 52, "_________": 52, "timelin": 53, "crypto": 54, "push": 54, "speaker": [54, 59], "listen": [54, 59], "spread": 54, "tag": [54, 66, 70], "comm": 54, "particl": 54, "landmark": [54, 55, 56, 58, 59, 60, 61], "codebas": 54, "simple_tag_v3": [54, 62], "punish": 54, "vice": 54, "versa": 54, "natur": [54, 76], "mixtur": 54, "static": [54, 70], "circular": 54, "destin": 54, "veloc": [54, 65, 67, 70], "temporarili": [54, 70], "broadcast": 54, "transmit": 54, "signal": [54, 57, 65, 70], "bob": [54, 57], "ev": [54, 57], "reconstruct": [54, 57], "circl": [54, 62], "uniformli": 54, "apart": 54, "mind": 54, "compos": [54, 65, 81], "exact": 54, "cardin": [54, 75], "hear": 54, "continuous_act": [54, 55, 56, 57, 58, 59, 60, 61, 62, 63], "oppos": 54, "scene": 54, "grow": 54, "wander": 54, "beyond": [54, 65, 67, 81], "mordatch2017emerg": 54, "ground": [54, 65], "composit": 54, "languag": [54, 81], "popul": 54, "mordatch": 54, "igor": 54, "abbeel": 54, "pieter": 54, "1703": 54, "04908": 54, "2017": [54, 64], "lowe2017multi": 54, "ryan": 54, "wu": 54, "yi": 54, "tamar": 54, "aviv": 54, "harb": 54, "jean": 54, "nip": 54, "mpe": [55, 56, 57, 58, 59, 60, 61, 62, 63, 70], "simple_v3": 55, "agent_0": [55, 56, 58, 59, 61, 62, 63], "inf": [55, 56, 57, 58, 59, 60, 61, 62, 63, 65], "euclidean": [55, 56, 67], "primarili": 55, "purpos": [55, 71, 87], "self_vel": [55, 56, 58, 59, 60, 61, 62, 63], "landmark_rel_posit": [55, 56, 61, 62, 63], "simple_adversary_v3": 56, "adversary_0": [56, 58, 59, 62, 63], "28": [56, 59, 63, 65, 70], "closest": [56, 61], "doesn": [56, 70, 86], "know": 56, "unscal": 56, "cover": [56, 61], "deceiv": 56, "self_po": [56, 61, 62, 63], "goal_rel_posit": [56, 58], "other_agent_rel_posit": [56, 58, 61, 62, 63], "other_agents_rel_posit": 56, "no_act": [56, 58, 59, 60, 61, 62, 63], "move_left": [56, 58, 59, 60, 61, 62, 63], "move_right": [56, 58, 59, 60, 61, 62, 63], "move_down": [56, 58, 59, 60, 61, 62, 63], "move_up": [56, 58, 59, 60, 61, 62, 63], "simple_adversari": 56, "simple_crypto_v3": 57, "eve_0": 57, "bob_0": 57, "alice_0": 57, "alic": 57, "privat": 57, "encrypt": 57, "private_kei": 57, "alices_comm": 57, "say_0": [57, 59, 60, 63], "say_1": [57, 59, 60, 63], "say_2": [57, 59, 60, 63], "say_3": [57, 59, 60, 63], "convei": 57, "simple_crypto": 57, "simple_push_v3": 58, "goal_landmark_id": 58, "all_landmark_rel_posit": [58, 59, 60], "landmark_id": [58, 59], "simple_push": 58, "simple_reference_v3": 59, "goal_id": [59, 60], "say_4": [59, 60], "say_5": [59, 60], "say_6": [59, 60], "say_7": [59, 60], "say_8": [59, 60], "say_9": [59, 60], "cartesian": [59, 63], "product": [59, 63], "simple_refer": [59, 60], "simple_speaker_listener_v4": 60, "speaker_0": 60, "listener_0": 60, "grai": 60, "speak": 60, "simple_speaker_listen": 60, "simple_spread_v3": 61, "simple_spread": [61, 70], "adversary_1": [62, 63], "adversary_2": 62, "predat": 62, "prei": 62, "slower": 62, "larg": [62, 65, 67, 70, 92], "infin": 62, "min": 62, "other_agent_veloc": [62, 63], "num_good": [62, 63], "num_adversari": [62, 63], "num_obstacl": [62, 63], "simple_tag": [62, 63], "simple_world_comm_v3": 63, "leadadversary_0": 63, "adversary_3": 63, "34": 63, "192": 63, "food": [63, 67], "forest": 63, "hide": 63, "leader": 63, "chase": 63, "05": [63, 70, 90, 93], "self_in_forest": 63, "leader_comm": 63, "simple_world_comm": 63, "num_food": 63, "num_forest": 63, "multiwalk": [64, 70], "pursuit": [64, 70], "waterworld": [64, 70, 84, 87], "benchmark": [64, 73], "stanford": 64, "laboratori": 64, "waterworld_v4": [64, 67, 70, 89], "ve": 64, "major": [64, 67, 68, 70], "discourag": 64, "compar": 64, "inproceed": 64, "gupta2017coop": 64, "gupta": 64, "jayesh": 64, "egorov": 64, "kochenderf": 64, "mykel": 64, "booktitl": 64, "confer": 64, "autonom": [64, 68], "organ": 64, "springer": 64, "sisl": [65, 66, 67, 70, 89], "multiwalker_v9": 65, "walker_0": 65, "walker_1": 65, "walker_2": 65, "biped": 65, "attempt": [65, 67, 82], "carri": 65, "walker": 65, "multipli": [65, 67], "forward_reward": 65, "terrain": 65, "under": [65, 70], "300": 65, "terminate_on_fal": 65, "remove_on_fal": 65, "fallen": 65, "shared_reward": [65, 66], "exert": 65, "joint": 65, "leg": 65, "simul": [65, 67, 68, 82], "noisi": 65, "lidar": 65, "neighbor": [65, 67], "durat": [65, 85], "cap": 65, "500": [65, 66, 67], "properti": 65, "neighbour": 65, "nois": 65, "position_nois": 65, "angle_nois": 65, "hull": 65, "pi": 65, "hip": 65, "knee": 65, "contact": 65, "sensor": [65, 67], "leftmost": 65, "29": [65, 70], "n_walker": 65, "terminate_reward": 65, "fall_reward": 65, "terrain_legth": 65, "among": [65, 66, 67, 82], "caus": [65, 70], "rest": 65, "terrain_length": 65, "improv": [65, 70], "qualiti": [65, 70, 73, 92], "pursuit_v4": 66, "pursuer_0": [66, 67], "pursuer_1": [66, 67], "pursuer_7": 66, "evad": 66, "pursuer": [66, 67, 70], "orang": 66, "caught": 66, "prune": 66, "obs_rang": 66, "alli": 66, "n_evad": [66, 67], "n_pursuer": [66, 67], "n_catch": 66, "freeze_evad": 66, "tag_reward": 66, "catch_reward": 66, "urgency_reward": 66, "constraint_window": 66, "toggl": [66, 67, 71], "term_pursuit": 66, "catch": [66, 70, 75], "anywher": 66, "pursuer_4": 67, "242": [67, 89], "archea": 67, "surviv": 67, "consum": 67, "poison": 67, "radiu": 67, "dynam": 67, "barrier": 67, "thrust": 67, "evenli": 67, "report": 67, "speed_featur": 67, "n_sensor": 67, "purpl": 67, "pursuer_max_accel": 67, "exce": 67, "horizontal_thrust": 67, "vertical_thrust": 67, "n_coop": 67, "food_reward": 67, "destroi": 67, "encounter_reward": 67, "poison_reward": 67, "thrust_penalti": 67, "norm": [67, 71], "n_poison": 67, "sensor_rang": 67, "015": 67, "obstacle_radiu": 67, "obstacle_coord": 67, "evader_spe": 67, "poison_spe": 67, "pursu": 67, "dendrit": 67, "pursuer_spe": 67, "v": [67, 83, 85, 86, 88, 89, 93], "guarante": 68, "discord": [68, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "traffic": 68, "flexibl": 68, "tunabl": 68, "scalabl": 68, "miniatur": 68, "f1tenth": 68, "pybullet": 68, "googl": [68, 70], "riot": 68, "fork": 68, "too": 68, "cook": 68, "crazyfli": 68, "drone": 68, "theori": 68, "samaritan": 68, "stag": 68, "hunt": 68, "chicken": 68, "penni": 68, "uniti": 68, "onlin": 68, "playabl": 68, "webgl": 68, "ml": [68, 70, 84, 90, 91, 93], "link": [68, 70], "demo": [68, 70, 86, 88, 89], "webassembli": 68, "drive": 68, "depreci": [68, 74], "massiv": [68, 70], "role": 68, "ssd": 68, "meltingpot": 68, "machin": 68, "ui": 68, "loop": [68, 70, 85], "blizzard": 68, "hearthston": 68, "bot": 68, "secur": [68, 70], "conflict": [68, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "resolut": 68, "vehicl": 68, "confin": 68, "bomberman": 68, "skyjo": 68, "m\u016b": 68, "t\u014drere": 68, "zealand": 68, "2023": 70, "pypi": 70, "hotfix": 70, "test_action_flex": 70, "986": 70, "parrellenv": 70, "987": 70, "992": 70, "broken": [70, 83, 85], "rai": [70, 83, 85], "project": [70, 71, 79, 83, 85], "34696": [70, 83, 85], "32999": 70, "homepag": 70, "985": 70, "citat": 70, "cff": 70, "990": 70, "carla": 70, "third_party_env": 70, "991": 70, "changelog": 70, "finish": [70, 86, 88, 89], "deprec": 70, "favor": [70, 90], "return_info": 70, "written": [70, 92], "pyproject": 70, "toml": 70, "compli": 70, "pep": 70, "621": 70, "parti": 70, "langchain": 70, "tianshou": 70, "toreset": 70, "953": 70, "875": 70, "890": 70, "964": 70, "aec_wrapper_fn": 70, "parallel_wrapper_fn": 70, "879": 70, "smoother": 70, "882": 70, "894": 70, "970": 70, "renam": 70, "baseparallelwrap": 70, "baseparallelwrapp": 70, "typo": 70, "876": 70, "908": 70, "cast": [70, 86], "975": 70, "980": 70, "874": 70, "ci": 70, "workflow": 70, "886": 70, "lint": 70, "commit": 70, "hook": 70, "835": 70, "resolv": 70, "pytest": [70, 86], "897": 70, "unnecessari": 70, "891": 70, "984": 70, "906": 70, "gobblet": 70, "cathedr": 70, "907": 70, "904": 70, "942": 70, "979": 70, "environmentcr": 70, "903": 70, "972": 70, "readm": [70, 74], "950": 70, "instruct": [70, 71, 82], "968": 70, "883": 70, "demonstr": [70, 82, 87, 91], "text": [70, 81, 82], "logo": 70, "954": 70, "03": [70, 86, 87], "yank": 70, "hard": [70, 81], "address": 70, "pseudo": 70, "rnd": 70, "thought": 70, "reginald": 70, "mclean": 70, "mikcnt": 70, "cibeah": 70, "sushant1212": 70, "kallinteri": 70, "andrea": 70, "mgoulao": 70, "elliottow": [70, 71, 86, 88, 89], "favicon": 70, "themikeste1": 70, "855": 70, "diagram": 70, "856": 70, "magent2": 70, "standalon": 70, "dsctt": 70, "857": 70, "flake8": [70, 71], "gitlab": 70, "redtachyon": 70, "858": 70, "willdudlei": [70, 90, 91, 93], "867": 70, "865": 70, "pillow": [70, 83, 85], "dependabot": 70, "859": 70, "869": 70, "to_parallel": 70, "andrewrwilliam": 70, "870": 70, "872": 70, "contributor": 70, "840": 70, "bolundai0216": 70, "841": 70, "845": 70, "846": 70, "propos": 70, "844": 70, "tobirohr": 70, "848": 70, "843": 70, "enh": 70, "gui": 70, "younik": 70, "842": 70, "847": 70, "magent": 70, "artifact": 70, "850": 70, "tut": 70, "852": 70, "overhaul": 70, "838": 70, "853": 70, "raffaelegalliera": 70, "854": 70, "817": 70, "821": 70, "core": 70, "822": 70, "analyt": 70, "825": 70, "content": [70, 82], "823": 70, "thing": 70, "829": 70, "wd": 70, "831": 70, "832": 70, "833": 70, "818": 70, "836": 70, "837": 70, "ref": 70, "839": 70, "07": 70, "gt": 70, "5cat": 70, "802": 70, "unwrap": [70, 82, 86, 88, 89], "808": 70, "theme": 70, "804": 70, "permiss": 70, "andrewtanj": 70, "809": 70, "807": 70, "810": 70, "dep": 70, "814": 70, "09": 70, "singular": 70, "dictat": 70, "meet": 70, "exceed": 70, "prospector": 70, "mahjong": 70, "doudizhu": 70, "pull": [70, 83, 85], "qol": 70, "pyright": 70, "pydocstyl": 70, "concatvecenv": 70, "763": 70, "776": 70, "777": 70, "779": 70, "bunch": 70, "781": 70, "hour": 70, "782": 70, "kir0ul": 70, "737": 70, "config": [70, 71, 83, 85], "787": 70, "780": 70, "domain": 70, "789": 70, "767": 70, "788": 70, "796": 70, "simple_env": 70, "filipinogambino": 70, "798": 70, "autom": 70, "790": 70, "08": 70, "technic": 70, "publish": 70, "734": 70, "731": 70, "pyglet": 70, "732": 70, "738": 70, "buggi": 70, "739": 70, "ezpickl": 70, "741": 70, "bkrl": 70, "743": 70, "749": 70, "manu": 70, "hoffmann": 70, "747": 70, "750": 70, "reflect": 70, "748": 70, "06": [70, 83, 85, 86], "streamlin": 70, "04": [70, 86], "manual_control": 70, "frames_per_second": 70, "autodepr": 70, "kaz": 70, "capabililti": 70, "battlefield": 70, "repo": [70, 82], "gather": 70, "is_paralleliz": 70, "misus": 70, "2021": 70, "learnabl": 70, "upstream": 70, "retreiv": 70, "resurect": 70, "unexpect": 70, "isort": [70, 82], "scipi": 70, "em": [70, 82, 83, 86], "02": 70, "slow": 70, "substanti": 70, "speedup": 70, "art": [70, 87], "n_cycl": 70, "depric": 70, "env_don": 70, "combined_arm": 70, "upcom": 70, "robust": 70, "minimap": 70, "transient": 70, "miscellan": 70, "face": 70, "annoi": 70, "save_ob": 70, "usabl": 70, "random_demo": 70, "redid": 70, "readabl": 70, "impact": 70, "tiger_d": 70, "www": 70, "raw": [70, 86], "max_fram": 70, "hopefulli": 70, "cli": [71, 73, 92], "integr": [71, 74, 81], "tensorboard": [71, 72, 73, 90], "wandb": 71, "experi": [71, 73], "mirror": 71, "dev": 71, "ppo_pettingzoo_ma_ataripi": 71, "newli": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "virtual": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "Then": [71, 87], "question": [71, 72, 81, 82, 83, 85, 86, 88, 89, 90, 91, 93], "feel": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "free": [71, 72, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "ask": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "server": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "adapt": [71, 87], "repositori": [71, 75], "vwxyzjn": 71, "blob": 71, "ppo_pettingzoo_ma_atari": 71, "million": 71, "8000": 71, "costa": 71, "noqa": [71, 82], "argpars": [71, 83, 85, 90], "importlib": 71, "distutil": 71, "strtobool": 71, "ss": [71, 85, 88, 89], "summarywrit": [71, 90], "parse_arg": [71, 83, 85], "fmt": 71, "parser": [71, 83, 85, 90], "argumentpars": [71, 83, 85, 90], "add_argu": [71, 83, 85, 90], "basenam": 71, "__file__": 71, "rstrip": 71, "narg": [71, 90], "const": 71, "backend": 71, "cudnn": 71, "bias": [71, 73], "12000": 71, "2000000": 71, "5e": 71, "num": [71, 90], "rollout": [71, 83, 85], "anneal": [71, 83], "discount": 71, "gae": 71, "95": [71, 88], "estim": 71, "minibatch": 71, "mini": 71, "epoch": [71, 90, 93], "adv": 71, "coef": 71, "surrog": 71, "coeffici": 71, "vloss": 71, "ent": 71, "vf": 71, "grad": 71, "diverg": 71, "threshold": 71, "num_env": 71, "minibatch_s": 71, "num_minibatch": 71, "layer_init": 71, "stride": [71, 85], "single_action_spac": 71, "clone": 71, "permut": [71, 85], "run_nam": 71, "env_id": 71, "__": 71, "exp_nam": 71, "wandb_project_nam": 71, "wandb_ent": 71, "sync_tensorboard": 71, "monitor_gym": 71, "save_cod": 71, "writer": [71, 90], "add_text": [71, 90], "hyperparamet": [71, 73, 86, 88], "join": [71, 90, 93], "NOT": 71, "TO": 71, "modifi": 71, "manual_se": [71, 90, 93], "torch_determinist": 71, "import_modul": 71, "pettingzoo_env_to_vec_env_v1": [71, 88, 89], "concat_vec_envs_v1": [71, 88, 89], "num_cpu": [71, 88, 89], "base_class": [71, 88, 89], "single_observation_spac": 71, "is_vector_env": 71, "capture_video": 71, "recordvideo": 71, "learning_r": [71, 88, 89], "global_step": 71, "start_tim": 71, "next_termin": 71, "next_trunc": 71, "num_upd": 71, "total_timestep": [71, 86, 88, 89], "anneal_lr": 71, "frac": 71, "lrnow": 71, "param_group": 71, "todo": [71, 83, 86, 88], "idx": 71, "player_idx": 71, "episodic_return": 71, "add_scalar": 71, "chart": 71, "episodic_length": 71, "next_valu": 71, "lastgaelam": 71, "next_don": 71, "nextnontermin": 71, "nextvalu": 71, "gae_lambda": [71, 88], "b_ind": 71, "clipfrac": 71, "update_epoch": 71, "mb_ind": 71, "newvalu": 71, "mb_advantag": 71, "norm_adv": 71, "clip_vloss": 71, "clip_grad_norm_": 71, "max_grad_norm": [71, 88], "target_kl": 71, "record": 71, "plot": 71, "value_loss": 71, "policy_loss": 71, "explained_vari": 71, "sp": 71, "scratch": [73, 79], "modular": [73, 77, 92], "friendli": 73, "tight": 73, "tune": [73, 83, 85, 86], "leaderboard": 73, "task": 73, "welcom": 74, "short": 74, "concept": 74, "deploy": 74, "laid": 74, "custom_environ": [74, 75, 76, 77], "custom_environment_v0": [74, 75, 76], "helper": 74, "complic": 74, "readi": 74, "scope": 74, "entireti": 74, "customenviron": [74, 75, 76, 77], "fun": 75, "guard": [75, 76], "7x7": 75, "door": 75, "middl": 75, "copi": [75, 76, 90], "multidiscret": [75, 76], "escape_i": [75, 76], "escape_x": [75, 76], "guard_i": [75, 76], "guard_x": [75, 76], "prisoner_i": [75, 76], "prisoner_x": [75, 76], "randint": [75, 76], "prisoner_act": [75, 76], "guard_act": [75, 76], "overwrit": [75, 76], "p": [75, 76], "imposs": 76, "prisoner_action_mask": 76, "guard_action_mask": 76, "built": 77, "practic": 77, "simplic": 77, "assum": [77, 86], "root": 77, "tutorial3_action_mask": 77, "1_000_000": 77, "anyon": 79, "framework": [81, 83, 85, 92], "manag": 81, "lot": 81, "fetch": 81, "summar": 81, "answer": 81, "decis": 81, "persist": 81, "beta": 81, "notori": 81, "metric": 81, "themselv": 81, "assist": 81, "internet": 82, "databas": 82, "repl": 82, "tenac": 82, "gymnasiumag": 82, "retri": 82, "output_pars": 82, "regexpars": 82, "schema": 82, "humanmessag": 82, "systemmessag": 82, "classmethod": 82, "get_doc": 82, "cl": 82, "__doc__": 82, "termini": 82, "sum_of_reward": 82, "respond": 82, "action_pars": 82, "regex": 82, "output_kei": 82, "default_output_kei": 82, "message_histori": 82, "ret": 82, "random_act": 82, "rew": [82, 90, 93], "obs_messag": 82, "_act": 82, "act_messag": 82, "stop_after_attempt": 82, "wait": 82, "wait_non": 82, "retry_if_exception_typ": 82, "valueerror": [82, 86, 88, 89], "before_sleep": 82, "retry_st": 82, "occur": 82, "outcom": 82, "retryerror": 82, "f841": 82, "pettingzooag": 82, "extend": [82, 90], "gymnasium_ag": 82, "getmodul": 82, "actionmaskag": 82, "subclass": 82, "pettingzoo_ag": 82, "obs_buff": 82, "dequ": 82, "maxlen": 82, "valid_action_instruct": 82, "accord": 82, "int8": 82, "_____": 82, "chat_model": 82, "chatopenai": 82, "action_masking_ag": 82, "rock_paper_scissor": 82, "temperatur": 82, "tic_tac_to": [82, 90], "tensorflow": [83, 85], "rohan": [83, 85], "rohan138": [83, 85], "dqnconfig": 83, "dqn_torch_model": 83, "dqntorchmodel": 83, "pettingzooenv": [83, 85, 90, 91, 93], "modelcatalog": [83, 85], "fcnet": 83, "fullyconnectednetwork": 83, "torchfc": 83, "try_import_torch": 83, "torch_util": 83, "float_max": 83, "registri": [83, 85], "register_env": [83, 85], "torchmaskedact": 83, "pytorch": [83, 87, 92], "parametricactionsmodel": 83, "obs_spac": [83, 85], "num_output": [83, 85], "model_config": 83, "kw": 83, "obs_len": 83, "orig_obs_spac": 83, "action_embed_model": 83, "_action_emb": 83, "input_dict": [83, 85], "seq_len": [83, 85], "extract": 83, "embed": 83, "action_logit": 83, "probit": 83, "inf_mask": 83, "1e10": 83, "value_funct": [83, 85], "alg_nam": 83, "register_custom_model": [83, 85], "pa_model": 83, "env_creat": [83, 85], "env_nam": [83, 85], "test_env": [83, 90, 93], "act_spac": [83, 85], "num_rollout_work": [83, 85], "rollout_fragment_length": [83, 85], "train_batch_s": [83, 85], "duel": 83, "custom_model": 83, "multi_ag": 83, "policy_mapping_fn": 83, "agent_id": [83, 90], "num_gpu": [83, 85], "rllib_num_gpu": [83, 85], "log_level": [83, 85], "exploration_config": 83, "epsilongreedi": 83, "initial_epsilon": 83, "final_epsilon": 83, "epsilon_timestep": 83, "epsilon": 83, "timesteps_tot": [83, 85], "10000000": 83, "checkpoint_freq": [83, 85], "to_dict": [83, 85], "leduoc": 83, "pettingzoo_env": [83, 85, 90, 93], "rllib_leduc_holdem": 83, "sdl_videodriv": [83, 85], "pretrain": [83, 85], "checkpoint": [83, 85], "ray_result": [83, 85], "ppo_pistonball_v6_660ce_00000_0_2021": [83, 85], "11_12": [83, 85], "checkpoint_000050": [83, 85], "checkpoint_path": [83, 85], "expandus": [83, 85], "dqnagent": 83, "from_checkpoint": [83, 85], "reward_sum": [83, 85], "get_polici": 83, "batch_ob": 83, "expand_dim": 83, "batched_act": 83, "state_out": 83, "compute_actions_from_input_dict": 83, "single_act": 83, "dqn": [84, 90, 92, 93], "industri": 84, "grade": 84, "independ": 84, "leela": 84, "proxim": [85, 86, 88, 89], "ppoconfig": 85, "parallelpettingzooenv": 85, "torch_modelv2": 85, "torchmodelv2": 85, "cnnmodelv2": 85, "3136": 85, "policy_fn": 85, "value_fn": 85, "model_out": 85, "_value_out": 85, "clip_act": 85, "2e": 85, "lambda_": 85, "use_ga": 85, "clip_param": 85, "grad_clip": 85, "entropy_coeff": 85, "vf_loss_coeff": 85, "sgd_minibatch_s": 85, "num_sgd_it": 85, "5000000": 85, "local_dir": 85, "pil": 85, "rllib_pistonbal": 85, "ppoagent": 85, "frame_list": 85, "compute_single_act": 85, "img": 85, "fromarrai": 85, "gif": 85, "save_al": 85, "append_imag": 85, "maskabl": 86, "disk": [86, 88, 89], "mlp": [86, 87, 89], "extractor": [86, 88, 89], "sb3actionmaskwrapp": 86, "stabl": [86, 88, 89], "baselines3": [86, 88, 89], "contrib": 86, "readthedoc": [86, 88, 89], "io": [86, 87, 88, 89], "ppo_mask": 86, "glob": [86, 88, 89], "sb3_contrib": 86, "maskableppo": 86, "maskableactorcriticpolici": 86, "actionmask": 86, "strip": 86, "mask_fn": 86, "whatev": 86, "reli": 86, "train_action_mask": 86, "10_000": [86, 88, 89], "env_kwarg": [86, 88, 89], "behav": 86, "action_mask_fn": 86, "did": 86, "earlier": [86, 90], "draft": 86, "verbos": [86, 88, 89], "set_random_se": 86, "strftime": [86, 88, 89], "eval_action_mask": 86, "num_gam": [86, 88, 89], "latest_polici": [86, 88, 89], "getctim": [86, 88, 89], "total_reward": 86, "round_reward": 86, "tie": 86, "winrat": 86, "incl": 86, "10k": 86, "76": 86, "20k": 86, "86": 86, "40k": 86, "7e": 86, "laptop": [86, 88], "20_480": 86, "80": 86, "watch": [86, 88, 89, 90], "sb3": 87, "cnn": [87, 88, 89], "resiz": [87, 88], "iclr": 87, "sub": 87, "consequ": 87, "shelf": 87, "post": 87, "leverag": [88, 89], "multithread": [88, 89], "blank": 88, "__future__": [88, 89], "annot": [88, 89], "stable_baselines3": [88, 89], "mlppolici": [88, 89], "markovvectorenv": 88, "black_death": 88, "black_death_v3": 88, "n_step": [88, 90, 93], "0905168": 88, "00062211": 88, "042202": 88, "n_epoch": 88, "clip_rang": 88, "nstart": [88, 89], "avg_reward": [88, 89], "avg_reward_per_ag": 88, "avg": [88, 89], "seem": 88, "wors": 88, "81_920": 88, "train_butterfly_supersuit": 89, "he": 89, "gpu": 89, "196_608": 89, "significantli": 89, "tic": [90, 93], "tac": [90, 93], "toe": [90, 93], "Will": [90, 91, 93], "git": [90, 91, 93], "deepcopi": 90, "collector": [90, 91, 93], "vectorreplaybuff": [90, 93], "dummyvectorenv": [90, 91, 93], "basepolici": [90, 93], "dqnpolici": [90, 93], "multiagentpolicymanag": [90, 91, 93], "randompolici": [90, 91, 93], "trainer": [90, 93], "offpolicy_train": [90, 93], "tensorboardlogg": 90, "get_pars": 90, "1626": 90, "buffer": 90, "smaller": 90, "freq": 90, "320": [90, 93], "logdir": 90, "store_tru": 90, "resum": 90, "pth": [90, 93], "get_arg": 90, "namespac": 90, "parse_known_arg": 90, "get_ag": 90, "agent_learn": [90, 93], "agent_oppon": [90, 93], "get_env": 90, "state_shap": [90, 93], "action_shap": [90, 93], "hidden_s": [90, 93], "target_update_freq": [90, 93], "resume_path": 90, "load_state_dict": 90, "opponent_path": 90, "train_ag": 90, "train_env": [90, 93], "training_num": [90, 93], "test_num": 90, "train_collector": [90, 93], "buffer_s": 90, "exploration_nois": [90, 93], "test_collector": [90, 93], "set_ep": [90, 93], "log_path": 90, "callback": [90, 93], "save_best_fn": [90, 93], "hasattr": 90, "model_save_path": [90, 93], "state_dict": [90, 93], "stop_fn": [90, 93], "mean_reward": [90, 93], "win_rat": 90, "train_fn": [90, 93], "env_step": [90, 93], "eps_train": 90, "test_fn": [90, 93], "eps_test": 90, "reward_metr": [90, 93], "step_per_epoch": [90, 93], "step_per_collect": [90, 93], "update_per_step": [90, 93], "test_in_train": [90, 93], "n_episod": [90, 91], "betwenen": 91, "vectoris": 91, "introduct": 92, "fast": 92, "build": 92, "4000": 92, "boast": 92, "thorough": 92, "comprehens": 92, "_get_ag": 93, "_get_env": 93, "discount_factor": 93, "estimation_step": 93, "callabl": 93, "20_000": 93, "ttt": 93, "makedir": 93, "exist_ok": 93, "max_epoch": 93, "episode_per_test": 93}, "objects": {"": [[8, 0, 1, "", "agent_indicator_v0"], [8, 0, 1, "", "black_death_v2"], [8, 0, 1, "", "clip_actions_v0"], [8, 0, 1, "", "clip_reward_v0"], [8, 0, 1, "", "color_reduction_v0"], [8, 0, 1, "", "delay_observations_v0"], [8, 0, 1, "", "dtype_v0"], [8, 0, 1, "", "flatten_v0"], [8, 0, 1, "", "frame_skip_v0"], [8, 0, 1, "", "frame_stack_v1"], [8, 0, 1, "", "max_observation_v0"], [8, 0, 1, "", "nan_noop_v0"], [8, 0, 1, "", "nan_random_v0"], [8, 0, 1, "", "nan_zeros_v0"], [8, 0, 1, "", "normalize_obs_v0"], [8, 0, 1, "", "pad_action_space_v0"], [8, 0, 1, "", "pad_observations_v0"], [8, 0, 1, "", "reshape_v0"], [8, 0, 1, "", "resize_v1"], [8, 0, 1, "", "scale_actions_v0"], [8, 0, 1, "", "sticky_actions_v0"]], "pettingzoo.atari.basketball_pong.basketball_pong": [[14, 1, 1, "", "raw_env"]], "pettingzoo.atari.boxing.boxing": [[15, 1, 1, "", "raw_env"]], "pettingzoo.atari.combat_plane.combat_plane": [[16, 1, 1, "", "raw_env"]], "pettingzoo.atari.combat_tank.combat_tank": [[17, 1, 1, "", "raw_env"]], "pettingzoo.atari.double_dunk.double_dunk": [[18, 1, 1, "", "raw_env"]], "pettingzoo.atari.entombed_competitive.entombed_competitive": [[19, 1, 1, "", "raw_env"]], "pettingzoo.atari.entombed_cooperative.entombed_cooperative": [[20, 1, 1, "", "raw_env"]], "pettingzoo.atari.flag_capture.flag_capture": [[21, 1, 1, "", "raw_env"]], "pettingzoo.atari.foozpong.foozpong": [[22, 1, 1, "", "raw_env"]], "pettingzoo.atari.ice_hockey.ice_hockey": [[23, 1, 1, "", "raw_env"]], "pettingzoo.atari.joust.joust": [[24, 1, 1, "", "raw_env"]], "pettingzoo.atari.mario_bros.mario_bros": [[25, 1, 1, "", "raw_env"]], "pettingzoo.atari.maze_craze.maze_craze": [[26, 1, 1, "", "raw_env"]], "pettingzoo.atari.othello.othello": [[27, 1, 1, "", "raw_env"]], "pettingzoo.atari.pong.pong": [[28, 1, 1, "", "raw_env"]], "pettingzoo.atari.quadrapong.quadrapong": [[29, 1, 1, "", "raw_env"]], "pettingzoo.atari.space_invaders.space_invaders": [[30, 1, 1, "", "raw_env"]], "pettingzoo.atari.space_war.space_war": [[31, 1, 1, "", "raw_env"]], "pettingzoo.atari.surround.surround": [[32, 1, 1, "", "raw_env"]], "pettingzoo.atari.tennis.tennis": [[33, 1, 1, "", "raw_env"]], "pettingzoo.atari.video_checkers.video_checkers": [[34, 1, 1, "", "raw_env"]], "pettingzoo.atari.volleyball_pong.volleyball_pong": [[35, 1, 1, "", "raw_env"]], "pettingzoo.atari.warlords.warlords": [[36, 1, 1, "", "raw_env"]], "pettingzoo.atari.wizard_of_wor.wizard_of_wor": [[37, 1, 1, "", "raw_env"]], "pettingzoo.butterfly.cooperative_pong.cooperative_pong": [[39, 1, 1, "", "env"], [39, 1, 1, "", "raw_env"]], "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env": [[39, 2, 1, "", "action_space"], [39, 2, 1, "", "close"], [39, 2, 1, "", "observation_space"], [39, 2, 1, "", "observe"], [39, 2, 1, "", "render"], [39, 2, 1, "", "reset"], [39, 2, 1, "", "state"], [39, 2, 1, "", "step"]], "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies": [[40, 1, 1, "", "env"], [40, 1, 1, "", "raw_env"]], "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env": [[40, 2, 1, "", "action_space"], [40, 2, 1, "", "close"], [40, 2, 1, "", "observation_space"], [40, 2, 1, "", "observe"], [40, 2, 1, "", "render"], [40, 2, 1, "", "reset"], [40, 2, 1, "", "state"], [40, 2, 1, "", "step"]], "pettingzoo.butterfly.pistonball.pistonball": [[41, 1, 1, "", "env"], [41, 1, 1, "", "raw_env"]], "pettingzoo.butterfly.pistonball.pistonball.raw_env": [[41, 2, 1, "", "action_space"], [41, 2, 1, "", "close"], [41, 2, 1, "", "observation_space"], [41, 2, 1, "", "observe"], [41, 2, 1, "", "render"], [41, 2, 1, "", "reset"], [41, 2, 1, "", "state"], [41, 2, 1, "", "step"]], "pettingzoo.classic.chess.chess": [[43, 1, 1, "", "env"], [43, 1, 1, "", "raw_env"]], "pettingzoo.classic.chess.chess.raw_env": [[43, 2, 1, "", "action_space"], [43, 2, 1, "", "close"], [43, 2, 1, "", "observation_space"], [43, 2, 1, "", "observe"], [43, 2, 1, "", "render"], [43, 2, 1, "", "reset"], [43, 2, 1, "", "step"]], "pettingzoo.classic.connect_four.connect_four": [[44, 1, 1, "", "env"], [44, 1, 1, "", "raw_env"]], "pettingzoo.classic.connect_four.connect_four.raw_env": [[44, 2, 1, "", "action_space"], [44, 2, 1, "", "close"], [44, 2, 1, "", "observation_space"], [44, 2, 1, "", "observe"], [44, 2, 1, "", "render"], [44, 2, 1, "", "reset"], [44, 2, 1, "", "step"]], "pettingzoo.classic.go.go": [[46, 1, 1, "", "env"], [46, 1, 1, "", "raw_env"]], "pettingzoo.classic.go.go.raw_env": [[46, 2, 1, "", "action_space"], [46, 2, 1, "", "close"], [46, 2, 1, "", "observation_space"], [46, 2, 1, "", "observe"], [46, 2, 1, "", "render"], [46, 2, 1, "", "reset"], [46, 2, 1, "", "step"]], "pettingzoo.classic.hanabi.hanabi": [[47, 1, 1, "", "env"], [47, 1, 1, "", "raw_env"]], "pettingzoo.classic.hanabi.hanabi.raw_env": [[47, 2, 1, "", "action_space"], [47, 2, 1, "", "close"], [47, 2, 1, "", "observation_space"], [47, 2, 1, "", "observe"], [47, 2, 1, "", "render"], [47, 2, 1, "", "reset"], [47, 2, 1, "", "step"]], "pettingzoo.classic.rlcard_envs.gin_rummy": [[45, 1, 1, "", "env"], [45, 1, 1, "", "raw_env"]], "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env": [[45, 2, 1, "", "observe"], [45, 2, 1, "", "render"], [45, 2, 1, "", "step"]], "pettingzoo.classic.rlcard_envs.leduc_holdem": [[48, 1, 1, "", "env"], [48, 1, 1, "", "raw_env"]], "pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env": [[48, 2, 1, "", "render"], [48, 2, 1, "", "step"]], "pettingzoo.classic.rlcard_envs.texas_holdem": [[50, 1, 1, "", "env"], [50, 1, 1, "", "raw_env"]], "pettingzoo.classic.rlcard_envs.texas_holdem.raw_env": [[50, 2, 1, "", "render"], [50, 2, 1, "", "step"]], "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit": [[51, 1, 1, "", "env"], [51, 1, 1, "", "raw_env"]], "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env": [[51, 2, 1, "", "render"], [51, 2, 1, "", "step"]], "pettingzoo.classic.rps.rps": [[49, 1, 1, "", "env"], [49, 1, 1, "", "raw_env"]], "pettingzoo.classic.rps.rps.raw_env": [[49, 2, 1, "", "action_space"], [49, 2, 1, "", "close"], [49, 2, 1, "", "observation_space"], [49, 2, 1, "", "observe"], [49, 2, 1, "", "render"], [49, 2, 1, "", "reset"], [49, 2, 1, "", "step"]], "pettingzoo.classic.tictactoe.tictactoe": [[52, 1, 1, "", "env"], [52, 1, 1, "", "raw_env"]], "pettingzoo.classic.tictactoe.tictactoe.raw_env": [[52, 2, 1, "", "action_space"], [52, 2, 1, "", "close"], [52, 2, 1, "", "observation_space"], [52, 2, 1, "", "observe"], [52, 2, 1, "", "render"], [52, 2, 1, "", "reset"], [52, 2, 1, "", "step"]], "pettingzoo.mpe.simple.simple": [[55, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple.simple.raw_env": [[55, 3, 1, "", "action_spaces"], [55, 3, 1, "", "agent_selection"], [55, 3, 1, "", "agents"], [55, 3, 1, "", "infos"], [55, 3, 1, "", "observation_spaces"], [55, 3, 1, "", "possible_agents"], [55, 3, 1, "", "rewards"], [55, 3, 1, "", "terminations"], [55, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_adversary.simple_adversary": [[56, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env": [[56, 3, 1, "", "action_spaces"], [56, 3, 1, "", "agent_selection"], [56, 3, 1, "", "agents"], [56, 3, 1, "", "infos"], [56, 3, 1, "", "observation_spaces"], [56, 3, 1, "", "possible_agents"], [56, 3, 1, "", "rewards"], [56, 3, 1, "", "terminations"], [56, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_crypto.simple_crypto": [[57, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env": [[57, 3, 1, "", "action_spaces"], [57, 3, 1, "", "agent_selection"], [57, 3, 1, "", "agents"], [57, 3, 1, "", "infos"], [57, 3, 1, "", "observation_spaces"], [57, 3, 1, "", "possible_agents"], [57, 3, 1, "", "rewards"], [57, 3, 1, "", "terminations"], [57, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_push.simple_push": [[58, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_push.simple_push.raw_env": [[58, 3, 1, "", "action_spaces"], [58, 3, 1, "", "agent_selection"], [58, 3, 1, "", "agents"], [58, 3, 1, "", "infos"], [58, 3, 1, "", "observation_spaces"], [58, 3, 1, "", "possible_agents"], [58, 3, 1, "", "rewards"], [58, 3, 1, "", "terminations"], [58, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_reference.simple_reference": [[59, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_reference.simple_reference.raw_env": [[59, 3, 1, "", "action_spaces"], [59, 3, 1, "", "agent_selection"], [59, 3, 1, "", "agents"], [59, 3, 1, "", "infos"], [59, 3, 1, "", "observation_spaces"], [59, 3, 1, "", "possible_agents"], [59, 3, 1, "", "rewards"], [59, 3, 1, "", "terminations"], [59, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener": [[60, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env": [[60, 3, 1, "", "action_spaces"], [60, 3, 1, "", "agent_selection"], [60, 3, 1, "", "agents"], [60, 3, 1, "", "infos"], [60, 3, 1, "", "observation_spaces"], [60, 3, 1, "", "possible_agents"], [60, 3, 1, "", "rewards"], [60, 3, 1, "", "terminations"], [60, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_spread.simple_spread": [[61, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_spread.simple_spread.raw_env": [[61, 3, 1, "", "action_spaces"], [61, 3, 1, "", "agent_selection"], [61, 3, 1, "", "agents"], [61, 3, 1, "", "infos"], [61, 3, 1, "", "observation_spaces"], [61, 3, 1, "", "possible_agents"], [61, 3, 1, "", "rewards"], [61, 3, 1, "", "terminations"], [61, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_tag.simple_tag": [[62, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_tag.simple_tag.raw_env": [[62, 3, 1, "", "action_spaces"], [62, 3, 1, "", "agent_selection"], [62, 3, 1, "", "agents"], [62, 3, 1, "", "infos"], [62, 3, 1, "", "observation_spaces"], [62, 3, 1, "", "possible_agents"], [62, 3, 1, "", "rewards"], [62, 3, 1, "", "terminations"], [62, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_world_comm.simple_world_comm": [[63, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env": [[63, 3, 1, "", "action_spaces"], [63, 3, 1, "", "agent_selection"], [63, 3, 1, "", "agents"], [63, 3, 1, "", "infos"], [63, 3, 1, "", "observation_spaces"], [63, 3, 1, "", "possible_agents"], [63, 3, 1, "", "rewards"], [63, 3, 1, "", "terminations"], [63, 3, 1, "", "truncations"]], "pettingzoo.sisl.multiwalker.multiwalker": [[65, 1, 1, "", "env"], [65, 1, 1, "", "raw_env"]], "pettingzoo.sisl.multiwalker.multiwalker.raw_env": [[65, 2, 1, "", "action_space"], [65, 2, 1, "", "close"], [65, 2, 1, "", "observation_space"], [65, 2, 1, "", "observe"], [65, 2, 1, "", "render"], [65, 2, 1, "", "reset"], [65, 2, 1, "", "step"]], "pettingzoo.sisl.pursuit.pursuit": [[66, 1, 1, "", "env"], [66, 1, 1, "", "raw_env"]], "pettingzoo.sisl.pursuit.pursuit.raw_env": [[66, 2, 1, "", "action_space"], [66, 2, 1, "", "close"], [66, 2, 1, "", "observation_space"], [66, 2, 1, "", "observe"], [66, 2, 1, "", "render"], [66, 2, 1, "", "reset"], [66, 2, 1, "", "step"]], "pettingzoo.sisl.waterworld.waterworld": [[67, 1, 1, "", "env"], [67, 1, 1, "", "raw_env"]], "pettingzoo.sisl.waterworld.waterworld.raw_env": [[67, 2, 1, "", "action_space"], [67, 2, 1, "", "close"], [67, 2, 1, "", "observation_space"], [67, 2, 1, "", "observe"], [67, 2, 1, "", "render"], [67, 2, 1, "", "reset"], [67, 2, 1, "", "step"]], "pettingzoo.utils": [[6, 4, 0, "-", "conversions"]], "pettingzoo.utils.conversions": [[6, 0, 1, "", "aec_to_parallel"], [6, 0, 1, "", "parallel_to_aec"]], "pettingzoo.utils.env": [[2, 1, 1, "", "AECEnv"], [3, 1, 1, "", "ParallelEnv"]], "pettingzoo.utils.env.AECEnv": [[2, 3, 1, "", "action_spaces"], [2, 3, 1, "", "agent_selection"], [2, 3, 1, "", "agents"], [2, 2, 1, "", "close"], [2, 3, 1, "", "infos"], [2, 3, 1, "", "max_num_agents"], [2, 3, 1, "", "num_agents"], [2, 3, 1, "", "observation_spaces"], [2, 2, 1, "", "observe"], [2, 3, 1, "", "possible_agents"], [2, 2, 1, "", "render"], [2, 2, 1, "", "reset"], [2, 3, 1, "", "rewards"], [2, 2, 1, "", "step"], [2, 3, 1, "", "terminations"], [2, 3, 1, "", "truncations"]], "pettingzoo.utils.env.ParallelEnv": [[3, 2, 1, "", "action_space"], [3, 3, 1, "", "action_spaces"], [3, 3, 1, "", "agents"], [3, 2, 1, "", "close"], [3, 3, 1, "", "max_num_agents"], [3, 3, 1, "", "num_agents"], [3, 2, 1, "", "observation_space"], [3, 3, 1, "", "observation_spaces"], [3, 3, 1, "", "possible_agents"], [3, 2, 1, "", "render"], [3, 2, 1, "", "reset"], [3, 2, 1, "", "state"], [3, 2, 1, "", "step"]], "pettingzoo.utils.wrappers": [[6, 1, 1, "", "AssertOutOfBoundsWrapper"], [6, 1, 1, "", "BaseWrapper"], [6, 1, 1, "", "CaptureStdoutWrapper"], [6, 1, 1, "", "ClipOutOfBoundsWrapper"], [6, 1, 1, "", "OrderEnforcingWrapper"], [6, 1, 1, "", "TerminateIllegalWrapper"]]}, "objtypes": {"0": "py:function", "1": "py:class", "2": "py:method", "3": "py:attribute", "4": "py:module"}, "objnames": {"0": ["py", "function", "Python function"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "module", "Python module"]}, "titleterms": {"404": 0, "page": [0, 1], "Not": 0, "found": 0, "The": 0, "request": 0, "could": 0, "pettingzoo": [1, 6, 68, 70, 73, 82, 84, 86, 92], "doc": 1, "edit": 1, "an": [1, 9], "environ": [1, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 47, 54, 68, 71, 72, 75, 77, 79, 82, 83, 84, 85, 86, 88, 89, 90, 91, 93], "build": 1, "document": 1, "aec": [2, 6, 68], "api": [2, 3, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 91], "usag": [2, 3, 7, 9, 13, 38, 42, 54, 64, 91], "action": [2, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 67, 76, 82, 86], "mask": [2, 43, 44, 45, 46, 47, 48, 50, 51, 52, 76, 82, 86], "aecenv": 2, "attribut": 2, "method": 2, "parallel": [3, 6, 10, 11], "parallelenv": 3, "util": [4, 6, 10], "averag": 4, "total": 4, "reward": [4, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 67], "observ": [4, 11, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 67], "save": [4, 11], "wrapper": [5, 6, 7, 8, 10], "us": [5, 10, 68, 73, 84, 92], "convers": 6, "shimmi": 7, "compat": 7, "support": 7, "multi": [7, 8], "agent": [7, 8, 9, 10, 68, 81, 82, 83, 85, 93], "openspiel": 7, "deepmind": 7, "control": [7, 39, 40, 66], "soccer": 7, "melt": 7, "pot": 7, "citat": [7, 8, 13, 54], "supersuit": 8, "includ": 8, "function": 8, "onli": 8, "basic": [9, 91], "instal": [9, 13, 38, 42, 54, 64], "initi": 9, "interact": 9, "With": 9, "addit": [9, 74], "option": [9, 74], "compon": 9, "notabl": 9, "idiom": 9, "check": 9, "entir": 9, "i": 9, "done": 9, "unwrap": 9, "variabl": 9, "number": 9, "death": 9, "raw": 9, "creation": [10, 79], "exampl": [10, 73, 84, 92], "custom": 10, "develop": 10, "selector": 10, "deprec": 10, "modul": 10, "test": [11, 77, 86], "seed": 11, "max": 11, "cycl": 11, "render": [11, 54], "perform": 11, "benchmark": 11, "tutori": [12, 73, 74, 75, 76, 77, 79, 81, 84, 87, 92], "recommend": 12, "start": 12, "atari": 13, "game": [13, 49, 68], "overview": [13, 73, 81, 84, 87, 92], "detail": 13, "preprocess": 13, "common": 13, "paramet": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "basketbal": 14, "pong": [14, 28, 35, 39], "space": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 67], "minim": [14, 21, 22, 27, 28, 29, 30, 32, 34, 35, 36], "version": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 68], "histori": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66], "box": 15, "combat": [16, 17], "plane": 16, "tank": 17, "doubl": 18, "dunk": 18, "emtomb": [19, 20], "competit": 19, "cooper": [20, 39], "flag": 21, "captur": 21, "foozpong": 22, "ic": 23, "hockei": 23, "joust": 24, "mario": 25, "bro": 25, "maze": 26, "craze": 26, "othello": 27, "quadrapong": 29, "invad": 30, "war": 31, "surround": 32, "tenni": 33, "video": 34, "checker": 34, "volleybal": 35, "warlord": 36, "wizard": 37, "wor": 37, "butterfli": 38, "manual": [39, 40, 66], "argument": [39, 40, 41, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67], "knight": [40, 88], "archer": [40, 88], "zombi": [40, 88], "kaz": 40, "vector": 40, "default": 40, "imag": 40, "base": 40, "pistonbal": [41, 85], "classic": [42, 86], "chess": 43, "legal": [43, 44, 45, 46, 47, 48, 50, 51, 52], "connect": [44, 86], "four": [44, 86], "gin": 45, "rummi": 45, "go": 46, "hanabi": 47, "leduc": 48, "hold": [48, 50, 51], "em": [48, 50, 51], "rock": [49, 82], "paper": [49, 82], "scissor": [49, 82], "expand": 49, "texa": [50, 51, 82], "No": [51, 82], "limit": [51, 82], "tic": [52, 82], "tac": [52, 82], "toe": [52, 82], "mpe": 54, "type": 54, "kei": 54, "concept": 54, "termin": 54, "simpl": [55, 56, 57, 58, 59, 60, 61, 62, 63, 83], "adversari": 56, "crypto": 57, "push": 58, "refer": 59, "speaker": 60, "listen": 60, "spread": 61, "tag": 62, "world": 63, "comm": 63, "sisl": 64, "multiwalk": 65, "pursuit": 66, "waterworld": [67, 89], "third": 68, "parti": 68, "latest": 68, "sumo": 68, "rl": [68, 83, 85], "pogema": 68, "racecar": 68, "gym": 68, "teamfight": 68, "tactic": 68, "muzero": 68, "cookingzoo": 68, "crazi": 68, "dilemma": 68, "env": 68, "breakout": 68, "clone": 68, "gobblet": 68, "cathedr": 68, "carla": 68, "older": 68, "neural": 68, "mmo": 68, "sequenti": 68, "social": 68, "kaggl": 68, "cogment": 68, "vers": 68, "stone": 68, "ground": 68, "hearth": 68, "battl": 68, "cyber": 68, "oper": 68, "research": 68, "conflict_rez": 68, "pz": 68, "battlesnak": 68, "bombermanai": 68, "fanorona": 68, "galaga": 68, "ai": 68, "skyjo_rl": 68, "mu": 68, "torer": 68, "releas": 70, "note": 70, "1": 70, "23": 70, "0": 70, "22": 70, "4": 70, "3": 70, "2": 70, "21": 70, "20": 70, "19": 70, "18": 70, "17": 70, "16": 70, "15": 70, "14": 70, "13": 70, "12": 70, "11": 70, "10": 70, "9": 70, "8": 70, "7": 70, "6": 70, "5": 70, "cleanrl": [71, 72, 73], "advanc": [71, 74], "ppo": [71, 72, 85, 86, 88, 89], "setup": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "code": [71, 72, 74, 75, 76, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "implement": 72, "wandb": 73, "integr": 73, "repositori": 74, "structur": 74, "introduct": [74, 75, 76, 77], "tree": 74, "file": 74, "skeleton": 74, "logic": 75, "your": 77, "langchain": [81, 82], "llm": [81, 82], "prompt": 81, "chain": 81, "data": 81, "augment": 81, "gener": 81, "memori": 81, "evalu": [81, 86, 88, 89], "creat": 82, "loop": 82, "gymnasium": 82, "holdem": 82, "full": 82, "rllib": [83, 84, 85], "dqn": 83, "poker": 83, "train": [83, 84, 85, 86, 88, 89, 93], "watch": [83, 85], "plai": [83, 85], "rai": 84, "architectur": [84, 92], "sb3": [86, 88, 89], "other": 86, "stabl": 87, "baselines3": 87, "baselin": 87, "tianshou": [90, 91, 92, 93], "cli": 90, "log": 90}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"404 - Page Not Found": [[0, "page-not-found"]], "The requested page could not be found.": [[0, "the-requested-page-could-not-be-found"]], "PettingZoo docs": [[1, "pettingzoo-docs"]], "Editing an environment page": [[1, "editing-an-environment-page"]], "Build the Documentation": [[1, "build-the-documentation"]], "AEC API": [[2, "aec-api"]], "Usage": [[2, "usage"], [3, "usage"], [7, "usage"], [13, "usage"], [38, "usage"], [42, "usage"], [54, "usage"], [64, "usage"]], "Action Masking": [[2, "action-masking"]], "AECEnv": [[2, "aecenv"]], "Attributes": [[2, "attributes"]], "Methods": [[2, "methods"]], "Parallel API": [[3, "parallel-api"]], "ParallelEnv": [[3, "parallelenv"]], "Utils": [[4, "utils"]], "Average Total Reward": [[4, "average-total-reward"]], "Observation Saving": [[4, "observation-saving"]], "Wrappers": [[5, "wrappers"]], "Using Wrappers": [[5, "using-wrappers"], [10, "using-wrappers"]], "PettingZoo Wrappers": [[6, "pettingzoo-wrappers"]], "Conversion wrappers": [[6, "conversion-wrappers"]], "AEC to Parallel": [[6, "module-pettingzoo.utils.conversions"]], "Parallel to AEC": [[6, "module-pettingzoo.utils.conversions"]], "Utility Wrappers": [[6, "utility-wrappers"]], "Shimmy Compatibility Wrappers": [[7, "shimmy-compatibility-wrappers"]], "Supported multi-agent environments:": [[7, "supported-multi-agent-environments"]], "OpenSpiel": [[7, "openspiel"]], "DeepMind Control Soccer": [[7, "deepmind-control-soccer"]], "DeepMind Melting Pot": [[7, "deepmind-melting-pot"]], "Multi-Agent Compatibility Wrappers:": [[7, "multi-agent-compatibility-wrappers"]], "Citation": [[7, "citation"], [8, "citation"], [13, "citation"], [54, "citation"]], "Supersuit Wrappers": [[8, "supersuit-wrappers"]], "Included Functions": [[8, "included-functions"]], "Included Multi-Agent Only Functions": [[8, "included-multi-agent-only-functions"]], "Basic Usage": [[9, "basic-usage"]], "Installation": [[9, "installation"], [13, "installation"], [38, "installation"], [42, "installation"], [54, "installation"], [64, "installation"]], "Initializing Environments": [[9, "initializing-environments"]], "Interacting With Environments": [[9, "interacting-with-environments"]], "Additional Environment API": [[9, "additional-environment-api"]], "Optional API Components": [[9, "optional-api-components"]], "Notable Idioms": [[9, "notable-idioms"]], "Checking if the entire environment is done": [[9, "checking-if-the-entire-environment-is-done"]], "Unwrapping an environment": [[9, "unwrapping-an-environment"]], "Variable Numbers of Agents (Death)": [[9, "variable-numbers-of-agents-death"]], "Environment as an Agent": [[9, "environment-as-an-agent"]], "Raw Environments": [[9, "raw-environments"]], "Environment Creation": [[10, "environment-creation"]], "Example Custom Environment": [[10, "example-custom-environment"]], "Example Custom Parallel Environment": [[10, "example-custom-parallel-environment"]], "Developer Utils": [[10, "developer-utils"]], "Agent selector": [[10, "agent-selector"]], "Deprecated Module": [[10, "deprecated-module"]], "Testing Environments": [[11, "testing-environments"]], "API Test": [[11, "api-test"]], "Parallel API Test": [[11, "parallel-api-test"]], "Seed Test": [[11, "seed-test"]], "Max Cycles Test": [[11, "max-cycles-test"]], "Render Test": [[11, "render-test"]], "Performance Benchmark Test": [[11, "performance-benchmark-test"]], "Save Observation Test": [[11, "save-observation-test"]], "Tutorials": [[12, "tutorials"]], "Recommended start": [[12, "recommended-start"]], "Atari": [[13, "atari"]], "Games Overview": [[13, "games-overview"]], "Environment Details": [[13, "environment-details"]], "Preprocessing": [[13, "preprocessing"]], "Common Parameters": [[13, "common-parameters"]], "Basketball Pong": [[14, "basketball-pong"]], "Environment parameters": [[14, "environment-parameters"], [15, "environment-parameters"], [16, "environment-parameters"], [17, "environment-parameters"], [18, "environment-parameters"], [19, "environment-parameters"], [20, "environment-parameters"], [21, "environment-parameters"], [22, "environment-parameters"], [23, "environment-parameters"], [24, "environment-parameters"], [25, "environment-parameters"], [26, "environment-parameters"], [27, "environment-parameters"], [28, "environment-parameters"], [29, "environment-parameters"], [30, "environment-parameters"], [31, "environment-parameters"], [32, "environment-parameters"], [33, "environment-parameters"], [34, "environment-parameters"], [35, "environment-parameters"], [36, "environment-parameters"], [37, "environment-parameters"]], "Action Space (Minimal)": [[14, "action-space-minimal"], [21, "action-space-minimal"], [22, "action-space-minimal"], [27, "action-space-minimal"], [28, "action-space-minimal"], [29, "action-space-minimal"], [30, "action-space-minimal"], [32, "action-space-minimal"], [34, "action-space-minimal"], [35, "action-space-minimal"], [36, "action-space-minimal"]], "Version History": [[14, "version-history"], [15, "version-history"], [16, "version-history"], [17, "version-history"], [18, "version-history"], [19, "version-history"], [20, "version-history"], [21, "version-history"], [22, "version-history"], [23, "version-history"], [24, "version-history"], [25, "version-history"], [26, "version-history"], [27, "version-history"], [28, "version-history"], [29, "version-history"], [30, "version-history"], [31, "version-history"], [32, "version-history"], [33, "version-history"], [34, "version-history"], [35, "version-history"], [36, "version-history"], [37, "version-history"], [39, "version-history"], [40, "version-history"], [41, "version-history"], [43, "version-history"], [44, "version-history"], [45, "version-history"], [46, "version-history"], [47, "version-history"], [48, "version-history"], [49, "version-history"], [50, "version-history"], [51, "version-history"], [52, "version-history"], [65, "version-history"], [66, "version-history"]], "API": [[14, "api"], [15, "api"], [16, "api"], [17, "api"], [18, "api"], [19, "api"], [20, "api"], [21, "api"], [22, "api"], [23, "api"], [24, "api"], [25, "api"], [26, "api"], [27, "api"], [28, "api"], [29, "api"], [30, "api"], [31, "api"], [32, "api"], [33, "api"], [34, "api"], [35, "api"], [36, "api"], [37, "api"], [39, "api"], [40, "api"], [41, "api"], [43, "api"], [44, "api"], [45, "api"], [46, "api"], [47, "api"], [48, "api"], [49, "api"], [50, "api"], [51, "api"], [52, "api"], [55, "api"], [56, "api"], [57, "api"], [58, "api"], [59, "api"], [60, "api"], [61, "api"], [62, "api"], [63, "api"], [65, "api"], [66, "api"], [67, "api"]], "Boxing": [[15, "boxing"]], "Action Space": [[15, "action-space"], [16, "action-space"], [17, "action-space"], [18, "action-space"], [19, "action-space"], [20, "action-space"], [23, "action-space"], [24, "action-space"], [25, "action-space"], [26, "action-space"], [31, "action-space"], [33, "action-space"], [37, "action-space"], [43, "action-space"], [44, "action-space"], [45, "action-space"], [46, "action-space"], [47, "action-space"], [48, "action-space"], [49, "action-space"], [50, "action-space"], [51, "action-space"], [52, "action-space"], [54, "action-space"], [67, "action-space"]], "Combat: Plane": [[16, "combat-plane"]], "Combat: Tank": [[17, "combat-tank"]], "Double Dunk": [[18, "double-dunk"]], "Emtombed: Competitive": [[19, "emtombed-competitive"]], "Emtombed: Cooperative": [[20, "emtombed-cooperative"]], "Flag Capture": [[21, "flag-capture"]], "Foozpong": [[22, "foozpong"]], "Ice Hockey": [[23, "ice-hockey"]], "Joust": [[24, "joust"]], "Mario Bros": [[25, "mario-bros"]], "Maze Craze": [[26, "maze-craze"]], "Othello": [[27, "othello"]], "Pong": [[28, "pong"]], "Quadrapong": [[29, "quadrapong"]], "Space Invaders": [[30, "space-invaders"]], "Space War": [[31, "space-war"]], "Surround": [[32, "surround"]], "Tennis": [[33, "tennis"]], "Video Checkers": [[34, "video-checkers"]], "Volleyball Pong": [[35, "volleyball-pong"]], "Warlords": [[36, "warlords"]], "Wizard of Wor": [[37, "wizard-of-wor"]], "Butterfly": [[38, "butterfly"]], "Cooperative Pong": [[39, "cooperative-pong"]], "Manual Control": [[39, "manual-control"], [40, "manual-control"], [66, "manual-control"]], "Arguments": [[39, "arguments"], [40, "arguments"], [41, "arguments"], [45, "arguments"], [46, "arguments"], [48, "arguments"], [49, "arguments"], [50, "arguments"], [51, "arguments"], [55, "arguments"], [56, "arguments"], [57, "arguments"], [58, "arguments"], [59, "arguments"], [60, "arguments"], [61, "arguments"], [62, "arguments"], [63, "arguments"], [65, "arguments"], [66, "arguments"], [67, "arguments"]], "Knights Archers Zombies (\u2018KAZ\u2019)": [[40, "knights-archers-zombies-kaz"]], "Vectorized (Default)": [[40, "vectorized-default"]], "Image-based": [[40, "image-based"]], "Pistonball": [[41, "pistonball"]], "Classic": [[42, "classic"]], "Chess": [[43, "chess"]], "Observation Space": [[43, "observation-space"], [44, "observation-space"], [45, "observation-space"], [46, "observation-space"], [47, "observation-space"], [48, "observation-space"], [49, "observation-space"], [50, "observation-space"], [51, "observation-space"], [52, "observation-space"], [54, "observation-space"], [65, "observation-space"], [67, "observation-space"]], "Legal Actions Mask": [[43, "legal-actions-mask"], [44, "legal-actions-mask"], [45, "legal-actions-mask"], [46, "legal-actions-mask"], [47, "legal-actions-mask"], [48, "legal-actions-mask"], [50, "legal-actions-mask"], [51, "legal-actions-mask"], [52, "legal-actions-mask"]], "Rewards": [[43, "rewards"], [44, "rewards"], [45, "rewards"], [46, "rewards"], [47, "rewards"], [48, "rewards"], [49, "rewards"], [50, "rewards"], [51, "rewards"], [52, "rewards"], [67, "rewards"]], "Connect Four": [[44, "connect-four"]], "Gin Rummy": [[45, "gin-rummy"]], "Go": [[46, "go"]], "Hanabi": [[47, "hanabi"]], "Environment arguments": [[47, "environment-arguments"]], "Leduc Hold\u2019em": [[48, "leduc-hold-em"]], "Rock Paper Scissors": [[49, "rock-paper-scissors"]], "Rock, Paper, Scissors": [[49, "id1"], [49, "id2"]], "Expanded Game": [[49, "expanded-game"], [49, "id3"]], "Texas Hold\u2019em": [[50, "texas-hold-em"]], "Texas Hold\u2019em No Limit": [[51, "texas-hold-em-no-limit"]], "Tic Tac Toe": [[52, "tic-tac-toe"]], "MPE": [[54, "mpe"]], "Types of Environments": [[54, "types-of-environments"]], "Key Concepts": [[54, "key-concepts"]], "Termination": [[54, "termination"]], "Rendering": [[54, "rendering"]], "Simple": [[55, "simple"]], "Simple Adversary": [[56, "simple-adversary"]], "Simple Crypto": [[57, "simple-crypto"]], "Simple Push": [[58, "simple-push"]], "Simple Reference": [[59, "simple-reference"]], "Simple Speaker Listener": [[60, "simple-speaker-listener"]], "Simple Spread": [[61, "simple-spread"]], "Simple Tag": [[62, "simple-tag"]], "Simple World Comm": [[63, "simple-world-comm"]], "SISL": [[64, "sisl"]], "Multiwalker": [[65, "multiwalker"]], "Pursuit": [[66, "pursuit"]], "Waterworld": [[67, "waterworld"]], "Third-Party Environments": [[68, "third-party-environments"]], "Environments using the latest versions of PettingZoo": [[68, "environments-using-the-latest-versions-of-pettingzoo"]], "Sumo-RL": [[68, "sumo-rl"]], "POGEMA": [[68, "pogema"]], "Racecar Gym": [[68, "racecar-gym"]], "Teamfight Tactics MuZero Agent": [[68, "teamfight-tactics-muzero-agent"]], "CookingZoo": [[68, "cookingzoo"]], "Crazy-RL": [[68, "crazy-rl"]], "PettingZoo Dilemma Envs": [[68, "pettingzoo-dilemma-envs"]], "Breakout-Clone": [[68, "breakout-clone"]], "Gobblet-RL": [[68, "gobblet-rl"]], "Cathedral-RL": [[68, "cathedral-rl"]], "Carla Gym": [[68, "carla-gym"]], "Environments using older versions of PettingZoo": [[68, "environments-using-older-versions-of-pettingzoo"]], "Neural MMO": [[68, "neural-mmo"]], "Sequential Social Dilemma Games": [[68, "sequential-social-dilemma-games"]], "Kaggle Environments": [[68, "kaggle-environments"]], "cogment-verse": [[68, "cogment-verse"]], "Stone Ground Hearth Battles": [[68, "stone-ground-hearth-battles"]], "Cyber Operations Research Gym": [[68, "cyber-operations-research-gym"]], "conflict_rez": [[68, "conflict-rez"]], "pz-battlesnake": [[68, "pz-battlesnake"]], "BomberManAI": [[68, "bombermanai"]], "Fanorona AEC": [[68, "fanorona-aec"]], "Galaga AI": [[68, "galaga-ai"]], "skyjo_rl": [[68, "skyjo-rl"]], "Mu Torere": [[68, "mu-torere"]], "Release Notes": [[70, "release-notes"]], "PettingZoo 1.23.1": [[70, "release-1-23-1"]], "PettingZoo 1.23.0": [[70, "release-1-23-0"]], "1.22.4": [[70, "release-1-22-4"]], "1.22.3": [[70, "release-1-22-3"]], "1.22.2": [[70, "release-1-22-2"]], "1.22.1": [[70, "release-1-22-1"]], "1.22.0": [[70, "release-1-22-0"]], "1.21.0": [[70, "release-1-21-0"]], "1.20.1": [[70, "release-1-20-1"]], "1.20.0": [[70, "release-1-20-0"]], "1.19.1": [[70, "release-1-19-1"]], "1.19.0": [[70, "release-1-19-0"]], "0.18.1: 1.18.1": [[70, "release-0-18-1"]], "1.17.0": [[70, "release-1-17-0"]], "1.16.0": [[70, "release-1-16-0"]], "1.15.0": [[70, "release-1-15-0"]], "1.14.0": [[70, "release-1-14-0"]], "1.13.1": [[70, "release-1-13-1"]], "1.12.0": [[70, "release-1-12-0"]], "1.11.1": [[70, "release-1-11-1"]], "1.11.0": [[70, "release-1-11-0"]], "1.10.0": [[70, "release-1-10-0"]], "1.9.0": [[70, "release-1-9-0"]], "1.8.2": [[70, "release-1-8-2"]], "1.8.1": [[70, "release-1-8-1"]], "1.8.0": [[70, "release-1-8-0"]], "1.7.0": [[70, "release-1-7-0"]], "1.6.1": [[70, "release-1-6-1"]], "1.6.0": [[70, "release-1-6-0"]], "1.5.2": [[70, "release-1-5-2"]], "1.5.1": [[70, "release-1-5-1"]], "1.5.0": [[70, "release-1-5-0"]], "1.4.2": [[70, "release-1-4-2"]], "1.4.0": [[70, "release-1-4-0"]], "CleanRL: Advanced PPO": [[71, "cleanrl-advanced-ppo"]], "Environment Setup": [[71, "environment-setup"], [72, "environment-setup"], [82, "environment-setup"], [83, "environment-setup"], [85, "environment-setup"], [86, "environment-setup"], [88, "environment-setup"], [89, "environment-setup"], [90, "environment-setup"], [91, "environment-setup"], [93, "environment-setup"]], "Code": [[71, "code"], [72, "code"], [75, "code"], [76, "code"], [77, "code"], [83, "code"], [85, "code"], [86, "code"], [88, "code"], [89, "code"], [90, "code"], [91, "code"], [93, "code"]], "CleanRL: Implementing PPO": [[72, "cleanrl-implementing-ppo"]], "CleanRL Tutorial": [[73, "cleanrl-tutorial"]], "CleanRL Overview": [[73, "cleanrl-overview"]], "Examples using PettingZoo:": [[73, "examples-using-pettingzoo"], [84, "examples-using-pettingzoo"]], "WandB Integration": [[73, "wandb-integration"]], "Tutorial: Repository Structure": [[74, "tutorial-repository-structure"]], "Introduction": [[74, "introduction"], [75, "introduction"], [76, "introduction"], [77, "introduction"]], "Tree structure": [[74, "tree-structure"]], "Advanced: Additional (optional) files": [[74, "advanced-additional-optional-files"]], "Skeleton code": [[74, "skeleton-code"]], "Tutorial: Environment Logic": [[75, "tutorial-environment-logic"]], "Tutorial: Action Masking": [[76, "tutorial-action-masking"]], "Tutorial: Testing Your Environment": [[77, "tutorial-testing-your-environment"]], "Environment Creation Tutorial": [[79, "environment-creation-tutorial"]], "LangChain Tutorial": [[81, "langchain-tutorial"]], "LangChain Overview": [[81, "langchain-overview"]], "\ud83d\udcc3 LLMs and Prompts:": [[81, "llms-and-prompts"]], "\ud83d\udd17 Chains:": [[81, "chains"]], "\ud83d\udcda Data Augmented Generation:": [[81, "data-augmented-generation"]], "\ud83e\udd16 Agents:": [[81, "agents"]], "\ud83e\udde0 Memory:": [[81, "memory"]], "\ud83e\uddd0 Evaluation:": [[81, "evaluation"]], "LangChain: Creating LLM agents": [[82, "langchain-creating-llm-agents"]], "Environment Loop": [[82, "environment-loop"]], "Gymnasium Agent": [[82, "gymnasium-agent"]], "PettingZoo Agent": [[82, "pettingzoo-agent"]], "Rock-Paper-Scissors": [[82, "rock-paper-scissors"]], "Action Masking Agent": [[82, "action-masking-agent"]], "Tic-Tac-Toe": [[82, "tic-tac-toe"]], "Texas Holdem\u2019 No Limit": [[82, "texas-holdem-no-limit"]], "Full Code": [[82, "full-code"]], "RLlib: DQN for Simple Poker": [[83, "rllib-dqn-for-simple-poker"]], "Training the RL agent": [[83, "training-the-rl-agent"], [85, "training-the-rl-agent"]], "Watching the trained RL agent play": [[83, "watching-the-trained-rl-agent-play"], [85, "watching-the-trained-rl-agent-play"]], "Ray RLlib Tutorial": [[84, "ray-rllib-tutorial"]], "RLlib Overview": [[84, "rllib-overview"]], "Training:": [[84, "training"]], "Environments:": [[84, "environments"]], "Architecture": [[84, "architecture"], [92, "architecture"]], "RLlib: PPO for Pistonball": [[85, "rllib-ppo-for-pistonball"]], "SB3: Action Masked PPO for Connect Four": [[86, "sb3-action-masked-ppo-for-connect-four"]], "Training and Evaluation": [[86, "training-and-evaluation"], [88, "training-and-evaluation"], [89, "training-and-evaluation"]], "Testing other PettingZoo Classic environments": [[86, "testing-other-pettingzoo-classic-environments"]], "Stable-Baselines3 Tutorial": [[87, "stable-baselines3-tutorial"]], "Stable-Baselines Overview": [[87, "stable-baselines-overview"]], "SB3: PPO for Knights-Archers-Zombies": [[88, "sb3-ppo-for-knights-archers-zombies"]], "SB3: PPO for Waterworld": [[89, "sb3-ppo-for-waterworld"]], "Tianshou: CLI and Logging": [[90, "tianshou-cli-and-logging"]], "Tianshou: Basic API Usage": [[91, "tianshou-basic-api-usage"]], "Tianshou Tutorial": [[92, "tianshou-tutorial"]], "Tianshou Overview": [[92, "tianshou-overview"]], "Examples using PettingZoo": [[92, "examples-using-pettingzoo"]], "Tianshou: Training Agents": [[93, "tianshou-training-agents"]]}, "indexentries": {"aecenv (class in pettingzoo.utils.env)": [[2, "pettingzoo.utils.env.AECEnv"]], "action_spaces (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.action_spaces"]], "agent_selection (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.agent_selection"]], "agents (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.agents"]], "close() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.close"]], "infos (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.infos"]], "max_num_agents (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.max_num_agents"]], "num_agents (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.num_agents"]], "observation_spaces (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.observation_spaces"]], "observe() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.observe"]], "possible_agents (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.possible_agents"]], "render() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.render"]], "reset() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.reset"]], "rewards (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.rewards"]], "step() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.step"]], "terminations (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.terminations"]], "truncations (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.truncations"]], "parallelenv (class in pettingzoo.utils.env)": [[3, "pettingzoo.utils.env.ParallelEnv"]], "action_space() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.action_space"]], "action_spaces (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.action_spaces"]], "agents (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.agents"]], "close() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.close"]], "max_num_agents (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.max_num_agents"]], "num_agents (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.num_agents"]], "observation_space() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.observation_space"]], "observation_spaces (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.observation_spaces"]], "possible_agents (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.possible_agents"]], "render() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.render"]], "reset() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.reset"]], "state() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.state"]], "step() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.step"]], "assertoutofboundswrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.AssertOutOfBoundsWrapper"]], "basewrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.BaseWrapper"]], "capturestdoutwrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.CaptureStdoutWrapper"]], "clipoutofboundswrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.ClipOutOfBoundsWrapper"]], "orderenforcingwrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.OrderEnforcingWrapper"]], "terminateillegalwrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.TerminateIllegalWrapper"]], "aec_to_parallel() (in module pettingzoo.utils.conversions)": [[6, "pettingzoo.utils.conversions.aec_to_parallel"]], "module": [[6, "module-pettingzoo.utils.conversions"]], "parallel_to_aec() (in module pettingzoo.utils.conversions)": [[6, "pettingzoo.utils.conversions.parallel_to_aec"]], "pettingzoo.utils.conversions": [[6, "module-pettingzoo.utils.conversions"]], "agent_indicator_v0()": [[8, "agent_indicator_v0"]], "black_death_v2()": [[8, "black_death_v2"]], "built-in function": [[8, "agent_indicator_v0"], [8, "black_death_v2"], [8, "clip_actions_v0"], [8, "clip_reward_v0"], [8, "color_reduction_v0"], [8, "delay_observations_v0"], [8, "dtype_v0"], [8, "flatten_v0"], [8, "frame_skip_v0"], [8, "frame_stack_v1"], [8, "max_observation_v0"], [8, "nan_noop_v0"], [8, "nan_random_v0"], [8, "nan_zeros_v0"], [8, "normalize_obs_v0"], [8, "pad_action_space_v0"], [8, "pad_observations_v0"], [8, "reshape_v0"], [8, "resize_v1"], [8, "scale_actions_v0"], [8, "sticky_actions_v0"]], "clip_actions_v0()": [[8, "clip_actions_v0"]], "clip_reward_v0()": [[8, "clip_reward_v0"]], "color_reduction_v0()": [[8, "color_reduction_v0"]], "delay_observations_v0()": [[8, "delay_observations_v0"]], "dtype_v0()": [[8, "dtype_v0"]], "flatten_v0()": [[8, "flatten_v0"]], "frame_skip_v0()": [[8, "frame_skip_v0"]], "frame_stack_v1()": [[8, "frame_stack_v1"]], "max_observation_v0()": [[8, "max_observation_v0"]], "nan_noop_v0()": [[8, "nan_noop_v0"]], "nan_random_v0()": [[8, "nan_random_v0"]], "nan_zeros_v0()": [[8, "nan_zeros_v0"]], "normalize_obs_v0()": [[8, "normalize_obs_v0"]], "pad_action_space_v0()": [[8, "pad_action_space_v0"]], "pad_observations_v0()": [[8, "pad_observations_v0"]], "reshape_v0()": [[8, "reshape_v0"]], "resize_v1()": [[8, "resize_v1"]], "scale_actions_v0()": [[8, "scale_actions_v0"]], "sticky_actions_v0()": [[8, "sticky_actions_v0"]], "raw_env (class in pettingzoo.atari.basketball_pong.basketball_pong)": [[14, "pettingzoo.atari.basketball_pong.basketball_pong.raw_env"]], "raw_env (class in pettingzoo.atari.boxing.boxing)": [[15, "pettingzoo.atari.boxing.boxing.raw_env"]], "raw_env (class in pettingzoo.atari.combat_plane.combat_plane)": [[16, "pettingzoo.atari.combat_plane.combat_plane.raw_env"]], "raw_env (class in pettingzoo.atari.combat_tank.combat_tank)": [[17, "pettingzoo.atari.combat_tank.combat_tank.raw_env"]], "raw_env (class in pettingzoo.atari.double_dunk.double_dunk)": [[18, "pettingzoo.atari.double_dunk.double_dunk.raw_env"]], "raw_env (class in pettingzoo.atari.entombed_competitive.entombed_competitive)": [[19, "pettingzoo.atari.entombed_competitive.entombed_competitive.raw_env"]], "raw_env (class in pettingzoo.atari.entombed_cooperative.entombed_cooperative)": [[20, "pettingzoo.atari.entombed_cooperative.entombed_cooperative.raw_env"]], "raw_env (class in pettingzoo.atari.flag_capture.flag_capture)": [[21, "pettingzoo.atari.flag_capture.flag_capture.raw_env"]], "raw_env (class in pettingzoo.atari.foozpong.foozpong)": [[22, "pettingzoo.atari.foozpong.foozpong.raw_env"]], "raw_env (class in pettingzoo.atari.ice_hockey.ice_hockey)": [[23, "pettingzoo.atari.ice_hockey.ice_hockey.raw_env"]], "raw_env (class in pettingzoo.atari.joust.joust)": [[24, "pettingzoo.atari.joust.joust.raw_env"]], "raw_env (class in pettingzoo.atari.mario_bros.mario_bros)": [[25, "pettingzoo.atari.mario_bros.mario_bros.raw_env"]], "raw_env (class in pettingzoo.atari.maze_craze.maze_craze)": [[26, "pettingzoo.atari.maze_craze.maze_craze.raw_env"]], "raw_env (class in pettingzoo.atari.othello.othello)": [[27, "pettingzoo.atari.othello.othello.raw_env"]], "raw_env (class in pettingzoo.atari.pong.pong)": [[28, "pettingzoo.atari.pong.pong.raw_env"]], "raw_env (class in pettingzoo.atari.quadrapong.quadrapong)": [[29, "pettingzoo.atari.quadrapong.quadrapong.raw_env"]], "raw_env (class in pettingzoo.atari.space_invaders.space_invaders)": [[30, "pettingzoo.atari.space_invaders.space_invaders.raw_env"]], "raw_env (class in pettingzoo.atari.space_war.space_war)": [[31, "pettingzoo.atari.space_war.space_war.raw_env"]], "raw_env (class in pettingzoo.atari.surround.surround)": [[32, "pettingzoo.atari.surround.surround.raw_env"]], "raw_env (class in pettingzoo.atari.tennis.tennis)": [[33, "pettingzoo.atari.tennis.tennis.raw_env"]], "raw_env (class in pettingzoo.atari.video_checkers.video_checkers)": [[34, "pettingzoo.atari.video_checkers.video_checkers.raw_env"]], "raw_env (class in pettingzoo.atari.volleyball_pong.volleyball_pong)": [[35, "pettingzoo.atari.volleyball_pong.volleyball_pong.raw_env"]], "raw_env (class in pettingzoo.atari.warlords.warlords)": [[36, "pettingzoo.atari.warlords.warlords.raw_env"]], "raw_env (class in pettingzoo.atari.wizard_of_wor.wizard_of_wor)": [[37, "pettingzoo.atari.wizard_of_wor.wizard_of_wor.raw_env"]], "action_space() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.action_space"]], "close() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.close"]], "env (class in pettingzoo.butterfly.cooperative_pong.cooperative_pong)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.env"]], "observation_space() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.observation_space"]], "observe() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.observe"]], "raw_env (class in pettingzoo.butterfly.cooperative_pong.cooperative_pong)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env"]], "render() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.render"]], "reset() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.reset"]], "state() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.state"]], "step() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.step"]], "action_space() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.action_space"]], "close() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.close"]], "env (class in pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.env"]], "observation_space() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.observation_space"]], "observe() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.observe"]], "raw_env (class in pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env"]], "render() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.render"]], "reset() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.reset"]], "state() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.state"]], "step() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.step"]], "action_space() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.action_space"]], "close() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.close"]], "env (class in pettingzoo.butterfly.pistonball.pistonball)": [[41, "pettingzoo.butterfly.pistonball.pistonball.env"]], "observation_space() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.observation_space"]], "observe() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.observe"]], "raw_env (class in pettingzoo.butterfly.pistonball.pistonball)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env"]], "render() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.render"]], "reset() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.reset"]], "state() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.state"]], "step() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.step"]], "action_space() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.action_space"]], "close() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.close"]], "env (class in pettingzoo.classic.chess.chess)": [[43, "pettingzoo.classic.chess.chess.env"]], "observation_space() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.observation_space"]], "observe() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.observe"]], "raw_env (class in pettingzoo.classic.chess.chess)": [[43, "pettingzoo.classic.chess.chess.raw_env"]], "render() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.render"]], "reset() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.reset"]], "step() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.step"]], "action_space() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.action_space"]], "close() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.close"]], "env (class in pettingzoo.classic.connect_four.connect_four)": [[44, "pettingzoo.classic.connect_four.connect_four.env"]], "observation_space() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.observation_space"]], "observe() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.observe"]], "raw_env (class in pettingzoo.classic.connect_four.connect_four)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env"]], "render() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.render"]], "reset() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.reset"]], "step() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.step"]], "env (class in pettingzoo.classic.rlcard_envs.gin_rummy)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.env"]], "observe() (pettingzoo.classic.rlcard_envs.gin_rummy.raw_env method)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env.observe"]], "raw_env (class in pettingzoo.classic.rlcard_envs.gin_rummy)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env"]], "render() (pettingzoo.classic.rlcard_envs.gin_rummy.raw_env method)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env.render"]], "step() (pettingzoo.classic.rlcard_envs.gin_rummy.raw_env method)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env.step"]], "action_space() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.action_space"]], "close() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.close"]], "env (class in pettingzoo.classic.go.go)": [[46, "pettingzoo.classic.go.go.env"]], "observation_space() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.observation_space"]], "observe() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.observe"]], "raw_env (class in pettingzoo.classic.go.go)": [[46, "pettingzoo.classic.go.go.raw_env"]], "render() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.render"]], "reset() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.reset"]], "step() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.step"]], "action_space() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.action_space"]], "close() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.close"]], "env (class in pettingzoo.classic.hanabi.hanabi)": [[47, "pettingzoo.classic.hanabi.hanabi.env"]], "observation_space() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.observation_space"]], "observe() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.observe"]], "raw_env (class in pettingzoo.classic.hanabi.hanabi)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env"]], "render() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.render"]], "reset() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.reset"]], "step() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.step"]], "env (class in pettingzoo.classic.rlcard_envs.leduc_holdem)": [[48, "pettingzoo.classic.rlcard_envs.leduc_holdem.env"]], "raw_env (class in pettingzoo.classic.rlcard_envs.leduc_holdem)": [[48, "pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env"]], "render() (pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env method)": [[48, "pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env.render"]], "step() (pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env method)": [[48, "pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env.step"]], "action_space() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.action_space"]], "close() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.close"]], "env (class in pettingzoo.classic.rps.rps)": [[49, "pettingzoo.classic.rps.rps.env"]], "observation_space() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.observation_space"]], "observe() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.observe"]], "raw_env (class in pettingzoo.classic.rps.rps)": [[49, "pettingzoo.classic.rps.rps.raw_env"]], "render() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.render"]], "reset() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.reset"]], "step() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.step"]], "env (class in pettingzoo.classic.rlcard_envs.texas_holdem)": [[50, "pettingzoo.classic.rlcard_envs.texas_holdem.env"]], "raw_env (class in pettingzoo.classic.rlcard_envs.texas_holdem)": [[50, "pettingzoo.classic.rlcard_envs.texas_holdem.raw_env"]], "render() (pettingzoo.classic.rlcard_envs.texas_holdem.raw_env method)": [[50, "pettingzoo.classic.rlcard_envs.texas_holdem.raw_env.render"]], "step() (pettingzoo.classic.rlcard_envs.texas_holdem.raw_env method)": [[50, "pettingzoo.classic.rlcard_envs.texas_holdem.raw_env.step"]], "env (class in pettingzoo.classic.rlcard_envs.texas_holdem_no_limit)": [[51, "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.env"]], "raw_env (class in pettingzoo.classic.rlcard_envs.texas_holdem_no_limit)": [[51, "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env"]], "render() (pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env method)": [[51, "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env.render"]], "step() (pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env method)": [[51, "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env.step"]], "action_space() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.action_space"]], "close() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.close"]], "env (class in pettingzoo.classic.tictactoe.tictactoe)": [[52, "pettingzoo.classic.tictactoe.tictactoe.env"]], "observation_space() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.observation_space"]], "observe() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.observe"]], "raw_env (class in pettingzoo.classic.tictactoe.tictactoe)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env"]], "render() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.render"]], "reset() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.reset"]], "step() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.step"]], "action_spaces (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.agents"]], "infos (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple.simple)": [[55, "pettingzoo.mpe.simple.simple.raw_env"]], "rewards (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.agents"]], "infos (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_adversary.simple_adversary)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env"]], "rewards (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.agents"]], "infos (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_crypto.simple_crypto)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env"]], "rewards (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.agents"]], "infos (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_push.simple_push)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env"]], "rewards (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.agents"]], "infos (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_reference.simple_reference)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env"]], "rewards (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.agents"]], "infos (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env"]], "rewards (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.agents"]], "infos (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_spread.simple_spread)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env"]], "rewards (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.agents"]], "infos (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_tag.simple_tag)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env"]], "rewards (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.agents"]], "infos (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_world_comm.simple_world_comm)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env"]], "rewards (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.truncations"]], "action_space() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.action_space"]], "close() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.close"]], "env (class in pettingzoo.sisl.multiwalker.multiwalker)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.env"]], "observation_space() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.observation_space"]], "observe() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.observe"]], "raw_env (class in pettingzoo.sisl.multiwalker.multiwalker)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env"]], "render() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.render"]], "reset() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.reset"]], "step() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.step"]], "action_space() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.action_space"]], "close() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.close"]], "env (class in pettingzoo.sisl.pursuit.pursuit)": [[66, "pettingzoo.sisl.pursuit.pursuit.env"]], "observation_space() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.observation_space"]], "observe() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.observe"]], "raw_env (class in pettingzoo.sisl.pursuit.pursuit)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env"]], "render() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.render"]], "reset() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.reset"]], "step() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.step"]], "action_space() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.action_space"]], "close() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.close"]], "env (class in pettingzoo.sisl.waterworld.waterworld)": [[67, "pettingzoo.sisl.waterworld.waterworld.env"]], "observation_space() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.observation_space"]], "observe() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.observe"]], "raw_env (class in pettingzoo.sisl.waterworld.waterworld)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env"]], "render() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.render"]], "reset() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.reset"]], "step() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.step"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["404", "README", "api/aec", "api/parallel", "api/utils", "api/wrappers", "api/wrappers/pz_wrappers", "api/wrappers/shimmy_wrappers", "api/wrappers/supersuit_wrappers", "content/basic_usage", "content/environment_creation", "content/environment_tests", "content/tutorials", "environments/atari", "environments/atari/basketball_pong", "environments/atari/boxing", "environments/atari/combat_plane", "environments/atari/combat_tank", "environments/atari/double_dunk", "environments/atari/entombed_competitive", "environments/atari/entombed_cooperative", "environments/atari/flag_capture", "environments/atari/foozpong", "environments/atari/ice_hockey", "environments/atari/joust", "environments/atari/mario_bros", "environments/atari/maze_craze", "environments/atari/othello", "environments/atari/pong", "environments/atari/quadrapong", "environments/atari/space_invaders", "environments/atari/space_war", "environments/atari/surround", "environments/atari/tennis", "environments/atari/video_checkers", "environments/atari/volleyball_pong", "environments/atari/warlords", "environments/atari/wizard_of_wor", "environments/butterfly", "environments/butterfly/cooperative_pong", "environments/butterfly/knights_archers_zombies", "environments/butterfly/pistonball", "environments/classic", "environments/classic/chess", "environments/classic/connect_four", "environments/classic/gin_rummy", "environments/classic/go", "environments/classic/hanabi", "environments/classic/leduc_holdem", "environments/classic/rps", "environments/classic/texas_holdem", "environments/classic/texas_holdem_no_limit", "environments/classic/tictactoe", "environments/envs", "environments/mpe", "environments/mpe/simple", "environments/mpe/simple_adversary", "environments/mpe/simple_crypto", "environments/mpe/simple_push", "environments/mpe/simple_reference", "environments/mpe/simple_speaker_listener", "environments/mpe/simple_spread", "environments/mpe/simple_tag", "environments/mpe/simple_world_comm", "environments/sisl", "environments/sisl/multiwalker", "environments/sisl/pursuit", "environments/sisl/waterworld", "environments/third_party_envs", "index", "release_notes/index", "tutorials/cleanrl/advanced_PPO", "tutorials/cleanrl/implementing_PPO", "tutorials/cleanrl/index", "tutorials/environmentcreation/1-project-structure", "tutorials/environmentcreation/2-environment-logic", "tutorials/environmentcreation/3-action-masking", "tutorials/environmentcreation/4-testing-your-environment", "tutorials/environmentcreation/5-using-your-environment", "tutorials/environmentcreation/index", "tutorials/index", "tutorials/langchain/index", "tutorials/langchain/langchain", "tutorials/rllib/holdem", "tutorials/rllib/index", "tutorials/rllib/pistonball", "tutorials/sb3/connect_four", "tutorials/sb3/index", "tutorials/sb3/kaz", "tutorials/sb3/waterworld", "tutorials/tianshou/advanced", "tutorials/tianshou/beginner", "tutorials/tianshou/index", "tutorials/tianshou/intermediate"], "filenames": ["404.md", "README.md", "api/aec.md", "api/parallel.md", "api/utils.md", "api/wrappers.md", "api/wrappers/pz_wrappers.md", "api/wrappers/shimmy_wrappers.md", "api/wrappers/supersuit_wrappers.md", "content/basic_usage.md", "content/environment_creation.md", "content/environment_tests.md", "content/tutorials.md", "environments/atari.md", "environments/atari/basketball_pong.md", "environments/atari/boxing.md", "environments/atari/combat_plane.md", "environments/atari/combat_tank.md", "environments/atari/double_dunk.md", "environments/atari/entombed_competitive.md", "environments/atari/entombed_cooperative.md", "environments/atari/flag_capture.md", "environments/atari/foozpong.md", "environments/atari/ice_hockey.md", "environments/atari/joust.md", "environments/atari/mario_bros.md", "environments/atari/maze_craze.md", "environments/atari/othello.md", "environments/atari/pong.md", "environments/atari/quadrapong.md", "environments/atari/space_invaders.md", "environments/atari/space_war.md", "environments/atari/surround.md", "environments/atari/tennis.md", "environments/atari/video_checkers.md", "environments/atari/volleyball_pong.md", "environments/atari/warlords.md", "environments/atari/wizard_of_wor.md", "environments/butterfly.md", "environments/butterfly/cooperative_pong.md", "environments/butterfly/knights_archers_zombies.md", "environments/butterfly/pistonball.md", "environments/classic.md", "environments/classic/chess.md", "environments/classic/connect_four.md", "environments/classic/gin_rummy.md", "environments/classic/go.md", "environments/classic/hanabi.md", "environments/classic/leduc_holdem.md", "environments/classic/rps.md", "environments/classic/texas_holdem.md", "environments/classic/texas_holdem_no_limit.md", "environments/classic/tictactoe.md", "environments/envs.md", "environments/mpe.md", "environments/mpe/simple.md", "environments/mpe/simple_adversary.md", "environments/mpe/simple_crypto.md", "environments/mpe/simple_push.md", "environments/mpe/simple_reference.md", "environments/mpe/simple_speaker_listener.md", "environments/mpe/simple_spread.md", "environments/mpe/simple_tag.md", "environments/mpe/simple_world_comm.md", "environments/sisl.md", "environments/sisl/multiwalker.md", "environments/sisl/pursuit.md", "environments/sisl/waterworld.md", "environments/third_party_envs.md", "index.md", "release_notes/index.md", "tutorials/cleanrl/advanced_PPO.md", "tutorials/cleanrl/implementing_PPO.md", "tutorials/cleanrl/index.md", "tutorials/environmentcreation/1-project-structure.md", "tutorials/environmentcreation/2-environment-logic.md", "tutorials/environmentcreation/3-action-masking.md", "tutorials/environmentcreation/4-testing-your-environment.md", "tutorials/environmentcreation/5-using-your-environment.md", "tutorials/environmentcreation/index.md", "tutorials/index.md", "tutorials/langchain/index.md", "tutorials/langchain/langchain.md", "tutorials/rllib/holdem.md", "tutorials/rllib/index.md", "tutorials/rllib/pistonball.md", "tutorials/sb3/connect_four.md", "tutorials/sb3/index.md", "tutorials/sb3/kaz.md", "tutorials/sb3/waterworld.md", "tutorials/tianshou/advanced.md", "tutorials/tianshou/beginner.md", "tutorials/tianshou/index.md", "tutorials/tianshou/intermediate.md"], "titles": ["404 - Page Not Found", "PettingZoo docs", "AEC API", "Parallel API", "Utils", "Wrappers", "PettingZoo Wrappers", "Shimmy Compatibility Wrappers", "Supersuit Wrappers", "Basic Usage", "Environment Creation", "Testing Environments", "Tutorials", "Atari", "Basketball Pong", "Boxing", "Combat: Plane", "Combat: Tank", "Double Dunk", "Emtombed: Competitive", "Emtombed: Cooperative", "Flag Capture", "Foozpong", "Ice Hockey", "Joust", "Mario Bros", "Maze Craze", "Othello", "Pong", "Quadrapong", "Space Invaders", "Space War", "Surround", "Tennis", "Video Checkers", "Volleyball Pong", "Warlords", "Wizard of Wor", "Butterfly", "Cooperative Pong", "Knights Archers Zombies (\u2018KAZ\u2019)", "Pistonball", "Classic", "Chess", "Connect Four", "Gin Rummy", "Go", "Hanabi", "Leduc Hold\u2019em", "Rock Paper Scissors", "Texas Hold\u2019em", "Texas Hold\u2019em No Limit", "Tic Tac Toe", "&lt;no title&gt;", "MPE", "Simple", "Simple Adversary", "Simple Crypto", "Simple Push", "Simple Reference", "Simple Speaker Listener", "Simple Spread", "Simple Tag", "Simple World Comm", "SISL", "Multiwalker", "Pursuit", "Waterworld", "Third-Party Environments", "&lt;no title&gt;", "Release Notes", "CleanRL: Advanced PPO", "CleanRL: Implementing PPO", "CleanRL Tutorial", "Tutorial: Repository Structure", "Tutorial: Environment Logic", "Tutorial: Action Masking", "Tutorial: Testing Your Environment", "&lt;no title&gt;", "Environment Creation Tutorial", "&lt;no title&gt;", "LangChain Tutorial", "LangChain: Creating LLM agents", "RLlib: DQN for Simple Poker", "Ray RLlib Tutorial", "RLlib: PPO for Pistonball", "SB3: Action Masked PPO for Connect Four", "Stable-Baselines3 Tutorial", "SB3: PPO for Knights-Archers-Zombies", "SB3: PPO for Waterworld", "Tianshou: CLI and Logging", "Tianshou: Basic API Usage", "Tianshou Tutorial", "Tianshou: Training Agents"], "terms": {"thi": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "folder": [1, 4, 70, 71], "contain": [1, 9, 10, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 65, 70, 82], "For": [1, 2, 7, 8, 10, 11, 13, 40, 41, 43, 45, 46, 47, 52, 57, 65, 67, 75, 76, 82, 86, 87, 88, 89], "more": [1, 2, 7, 9, 10, 25, 27, 30, 31, 37, 39, 45, 46, 47, 49, 54, 61, 68, 70, 73, 74, 76, 83, 84, 85, 86, 87, 88, 89], "inform": [1, 2, 7, 8, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 73, 82, 83, 84, 85, 86, 87, 88, 89], "about": [1, 6, 41, 47, 54, 65, 67, 70, 74, 75, 81, 86], "how": [1, 2, 11, 19, 20, 24, 25, 26, 30, 36, 37, 39, 40, 41, 46, 54, 55, 56, 61, 65, 70, 71, 72, 73, 76, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 93], "contribut": [1, 41, 68, 70], "go": [1, 42, 45, 70, 81], "our": [1, 6, 9, 10, 12, 13, 43, 45, 46, 48, 51, 72, 77], "md": [1, 70, 74], "can": [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 57, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 75, 76, 77, 81, 82, 86, 87, 88, 89, 90, 93], "found": [1, 11, 43, 44, 45, 46, 47, 48, 50, 51, 52, 71, 86, 88, 89], "top": [1, 30, 40, 45, 46, 65, 66, 75], "file": [1, 4, 12, 70, 72, 73, 77, 90], "python": [1, 9, 43, 69, 70, 84, 90, 91, 92, 93], "where": [1, 2, 3, 4, 6, 7, 8, 9, 10, 13, 15, 16, 17, 20, 21, 25, 30, 31, 32, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 59, 63, 64, 66, 67, 70, 74, 75, 82, 87], "i": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93], "declar": [1, 10, 45], "exampl": [1, 2, 6, 11, 13, 43, 45, 46, 47, 49, 67, 70, 71, 75, 76, 81, 82, 83, 86, 87, 88, 90, 91, 93], "chess": [1, 42, 68, 70, 76, 84, 86], "classic": [1, 2, 3, 6, 17, 27, 28, 30, 34, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 68, 70, 82, 83, 85, 90, 91, 93], "py": [1, 10, 38, 70, 71, 74, 75, 76, 77], "To": [1, 2, 6, 7, 8, 9, 10, 11, 13, 22, 27, 32, 34, 38, 39, 42, 54, 64, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "gener": [1, 2, 3, 7, 8, 9, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 69, 70, 71, 76, 82], "you": [1, 2, 3, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 37, 38, 40, 42, 43, 45, 46, 48, 51, 54, 64, 68, 71, 72, 74, 79, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 93], "need": [1, 9, 10, 11, 19, 20, 21, 27, 31, 67, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "execut": [1, 2, 3, 8, 9, 12, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 65, 66, 67, 71, 72, 75, 76, 87, 91], "_script": 1, "gen_envs_md": 1, "script": [1, 71, 86, 88, 89, 92], "cd": 1, "instal": [1, 7, 8, 10, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "requir": [1, 8, 9, 10, 11, 20, 25, 38, 49, 66, 70, 74, 86, 88, 90, 91, 93], "packag": [1, 7, 8, 10, 65, 70, 74, 90, 91, 93], "pip": [1, 7, 8, 9, 10, 13, 38, 42, 54, 64], "e": [1, 2, 3, 6, 8, 9, 30, 40, 41, 43, 54, 65, 75, 76, 77, 82], "r": [1, 8, 10, 71, 82], "txt": [1, 74], "onc": [1, 3, 18, 32, 42, 70], "make": [1, 2, 4, 6, 8, 9, 11, 20, 27, 30, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 54, 65, 66, 67, 70, 77, 81], "dirhtml": 1, "rebuild": 1, "automat": [1, 2, 9, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 65, 66, 67, 86], "everi": [1, 2, 3, 6, 9, 13, 41, 63, 66, 67, 89, 91], "time": [1, 2, 6, 8, 9, 10, 14, 16, 18, 22, 23, 24, 26, 27, 28, 29, 30, 33, 34, 35, 37, 38, 39, 40, 41, 42, 44, 48, 49, 51, 63, 65, 66, 67, 70, 71, 76, 82, 86, 88, 89], "chang": [1, 2, 3, 6, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 46, 47, 65, 66, 67, 70, 71, 83, 86], "made": [1, 44, 64, 65, 68, 70, 82], "sphinx": [1, 70], "autobuild": 1, "b": [1, 8, 43, 71, 85, 88], "_build": 1, "By": [2, 4, 47, 62, 63, 65, 66], "default": [2, 3, 4, 8, 9, 10, 13, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 86, 88, 90], "pettingzoo": [2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 71, 72, 74, 75, 76, 77, 79, 81, 83, 85, 87, 88, 89, 90, 91, 93], "model": [2, 9, 81, 82, 83, 85, 86, 87, 88, 89, 90, 93], "game": [2, 3, 4, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 70, 71, 75, 76, 82, 86, 87, 88, 89, 91, 92], "agent": [2, 3, 4, 6, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 76, 84, 86, 87, 88, 89, 90, 91, 92], "environ": [2, 3, 4, 5, 6, 8, 12, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 73, 74, 76, 81, 87], "cycl": [2, 3, 6, 9, 10, 40, 54, 66, 70], "allow": [2, 3, 5, 7, 8, 9, 10, 11, 13, 22, 25, 47, 49, 52, 54, 70, 73, 86, 87, 88], "support": [2, 3, 8, 9, 10, 11, 13, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 69, 70, 71, 86, 87, 88], "ani": [2, 6, 8, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 74, 75, 77, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "type": [2, 3, 6, 8, 11, 40, 43, 47, 70, 71, 81, 83, 86, 90, 92, 93], "multi": [2, 9, 13, 54, 64, 68, 69, 82, 84, 87, 92], "rl": [2, 9, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70, 71, 73, 86, 87, 92], "consid": [2, 9, 41, 66], "provid": [2, 3, 5, 6, 10, 40, 41, 73, 81, 82, 83, 85, 87, 88, 92, 93], "standard": [2, 8, 11, 13, 46, 47, 49, 51, 69, 70, 81, 92], "turn": [2, 3, 6, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 67, 69, 70, 83], "base": [2, 3, 6, 9, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41, 42, 44, 47, 52, 54, 55, 56, 57, 58, 61, 67, 68, 69, 70, 82], "mani": [2, 8, 11, 40, 42, 68, 70, 73, 76, 82], "which": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 15, 18, 27, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 57, 59, 65, 66, 67, 70, 72, 76, 81, 82, 83, 86, 88, 91], "implement": [2, 3, 4, 6, 7, 8, 9, 11, 13, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 65, 66, 67, 68, 70, 73, 74, 81, 82, 84, 87], "illeg": [2, 6, 9, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70, 82, 86], "we": [2, 3, 6, 9, 10, 11, 12, 13, 43, 64, 70, 72, 74, 75, 76, 77, 82, 86, 87, 88, 89], "tutori": [2, 3, 68, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "creat": [2, 3, 4, 6, 9, 10, 11, 26, 38, 47, 64, 67, 69, 70, 71, 72, 74, 75, 79, 81, 83, 85, 86, 87, 88, 89, 90, 91, 93], "simpl": [2, 3, 4, 5, 6, 8, 9, 10, 39, 41, 47, 52, 54, 69, 73, 82, 84, 87, 91], "rock": [2, 3, 10, 70, 84, 91], "paper": [2, 3, 10, 43, 64, 66, 70, 71, 84, 91], "scissor": [2, 3, 10, 70, 84, 91], "show": [2, 12, 47, 49, 52, 71, 72, 73, 83, 84, 85, 86, 87, 88, 89, 91, 93], "simultan": [2, 3, 49, 59, 69, 74], "also": [2, 8, 9, 10, 14, 15, 18, 22, 27, 28, 29, 30, 33, 34, 35, 40, 47, 54, 62, 65, 66, 67, 70, 82, 86], "repres": [2, 8, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 67, 69], "interact": [2, 3, 4, 7, 38, 54, 68, 69, 81, 82], "follow": [2, 3, 5, 6, 8, 9, 10, 13, 27, 43, 46, 47, 48, 49, 50, 51, 52, 54, 62, 68, 70, 71, 72, 74, 82, 83, 85, 86, 88, 89, 90, 91, 93], "from": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 79, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "import": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "rps_v2": [2, 6, 10, 49, 82, 91], "env": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 22, 26, 28, 30, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 77, 82, 83, 84, 85, 86, 88, 89, 90, 91, 93], "render_mod": [2, 3, 7, 10, 12, 13, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 72, 82, 85, 86, 88, 89, 90, 91], "human": [2, 3, 7, 9, 10, 11, 12, 13, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 64, 65, 66, 67, 68, 69, 72, 82, 86, 88, 89, 90, 91], "reset": [2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 18, 20, 22, 27, 28, 29, 33, 34, 35, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 52, 54, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 82, 83, 85, 86, 88, 89], "seed": [2, 3, 9, 10, 12, 13, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 52, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 86, 88, 89, 90, 93], "42": [2, 3, 13, 38, 42, 69], "agent_it": [2, 6, 7, 9, 13, 38, 42, 54, 64, 69, 82, 83, 85, 86, 88, 89], "observ": [2, 3, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 68, 69, 70, 72, 75, 76, 81, 82, 83, 85, 86, 87, 88, 89, 90, 93], "reward": [2, 3, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 69, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89, 90], "termin": [2, 3, 6, 7, 9, 10, 12, 13, 20, 38, 41, 42, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 66, 69, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89], "truncat": [2, 3, 6, 7, 9, 10, 12, 13, 38, 42, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 69, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89], "info": [2, 3, 6, 7, 9, 10, 12, 13, 38, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 69, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89], "last": [2, 6, 7, 8, 9, 10, 13, 19, 20, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 52, 54, 64, 65, 66, 67, 69, 82, 83, 85, 86, 88, 89], "none": [2, 3, 4, 6, 7, 9, 10, 12, 13, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 71, 72, 74, 75, 76, 82, 83, 85, 86, 88, 89, 90, 93], "els": [2, 6, 7, 10, 12, 13, 38, 40, 47, 54, 64, 71, 72, 82, 83, 85, 86, 88, 89, 90, 93], "action_spac": [2, 3, 6, 7, 9, 10, 12, 13, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 74, 75, 76, 82, 83, 86, 88, 90, 93], "sampl": [2, 3, 6, 7, 9, 12, 13, 38, 42, 54, 64, 71, 72, 82, 86, 88], "would": [2, 3, 6, 7, 8, 9, 13, 38, 39, 42, 43, 45, 46, 47, 54, 64, 70, 74], "insert": [2, 3, 6, 7, 9, 13, 38, 42, 54, 64], "your": [2, 3, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 42, 54, 64, 69, 71, 74, 79, 82, 83, 85], "polici": [2, 3, 4, 6, 7, 8, 9, 12, 13, 38, 41, 42, 54, 64, 65, 69, 71, 72, 83, 85, 86, 87, 88, 89, 90, 91, 93], "step": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 25, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "close": [2, 3, 6, 7, 9, 10, 13, 15, 38, 39, 40, 41, 42, 43, 44, 46, 47, 49, 52, 54, 55, 56, 58, 64, 65, 66, 67, 70, 71, 82, 85, 86, 88, 89], "often": [2, 10, 13, 54], "includ": [2, 5, 6, 9, 10, 39, 45, 47, 53, 54, 64, 65, 68, 69, 70, 71, 73, 74, 81], "order": [2, 6, 21, 27, 40, 45, 47, 49, 70, 81, 86], "mark": [2, 7, 52], "valid": [2, 5, 6, 10, 27, 82], "invalid": [2, 76, 86], "us": [2, 4, 6, 7, 8, 9, 11, 12, 13, 19, 20, 21, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 74, 75, 76, 77, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "chess_v6": [2, 43], "option": [2, 3, 10, 11, 13, 30, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 54, 65, 66, 67, 70, 75, 76, 86, 90, 93], "depend": [2, 9, 13, 18, 30, 37, 38, 42, 46, 47, 54, 64, 65, 67, 70, 71, 72, 74, 82, 83, 85, 86, 88, 89, 90, 91, 93], "action_mask": [2, 7, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70, 76, 82, 83, 86], "elif": [2, 75, 76], "isinst": [2, 11, 71, 90, 93], "dict": [2, 3, 9, 10, 39, 40, 41, 43, 44, 46, 47, 49, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 88, 90, 93], "note": [2, 6, 8, 9, 10, 17, 19, 20, 25, 26, 27, 34, 37, 40, 42, 43, 45, 54, 63, 64, 66, 68, 71, 77, 83, 85, 86, 87, 88, 89], "either": [2, 14, 21, 22, 25, 28, 35, 39, 40, 41, 44, 46, 47, 49, 52, 70], "store": [2, 9, 10, 47, 74], "shimmi": [2, 5, 47, 68, 70], "": [2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17, 19, 20, 21, 22, 23, 25, 27, 29, 30, 34, 35, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 70, 71, 72, 73, 82, 83, 84, 85, 86, 88, 89, 90], "openspiel": [2, 47, 70], "custom": [2, 3, 69, 70, 74, 75, 76, 77, 79, 86], "see": [2, 7, 8, 10, 13, 19, 20, 38, 47, 54, 55, 56, 63, 70, 71, 73, 81, 83, 84, 85, 86, 87, 88, 89], "creation": [2, 9, 70], "A": [2, 3, 5, 9, 10, 14, 21, 25, 26, 32, 33, 34, 40, 41, 42, 43, 44, 45, 47, 50, 51, 66, 68, 73, 74, 88], "closer": [2, 59], "look": [2, 9, 10, 11, 13, 26, 40, 70], "gradient": [2, 71], "algorithm": [2, 8, 12, 68, 71, 72, 73, 83, 84, 85, 87, 92], "huang": [2, 42], "2022": [2, 7, 70, 87], "class": [2, 3, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 74, 75, 76, 82, 83, 85, 86], "util": [2, 3, 5, 9, 11, 69, 70, 71, 74, 75, 76, 81, 83, 86, 87, 90, 93], "sourc": [2, 3, 6, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 81, 84], "The": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 20, 25, 26, 27, 30, 31, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 58, 59, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 82, 83, 85, 86, 88, 89, 90, 91, 93], "one": [2, 6, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 60, 68, 70], "If": [2, 3, 4, 6, 7, 8, 9, 10, 11, 13, 14, 17, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 52, 54, 64, 65, 67, 68, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "ar": [2, 3, 4, 6, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 70, 71, 72, 74, 79, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "unsur": [2, 3], "have": [2, 3, 4, 6, 8, 9, 11, 12, 13, 15, 17, 18, 26, 27, 37, 42, 43, 44, 47, 49, 54, 56, 57, 65, 66, 67, 70, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89, 90, 91, 93], "correctli": [2, 3], "try": [2, 3, 14, 29, 30, 32, 35, 57, 67, 71, 75, 82, 86, 88, 89], "run": [2, 3, 11, 12, 17, 25, 32, 62, 67, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "api_test": [2, 11], "document": [2, 3, 4, 7, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 45, 48, 51, 54, 56, 65, 70, 71, 73, 74, 82, 83, 84, 85, 86, 88, 89, 92], "develop": [2, 3, 4, 13, 68, 70, 81], "websit": [2, 3, 70], "list": [2, 3, 9, 10, 12, 43, 44, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 70, 72, 86, 90, 93], "str": [2, 3, 10, 11, 43, 44, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 71, 86, 88, 89, 90], "name": [2, 3, 4, 8, 9, 10, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67, 70, 71, 74, 75, 76, 82, 83, 85, 86, 88, 89], "all": [2, 3, 4, 6, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 59, 61, 63, 64, 65, 66, 67, 70, 74, 81, 82, 86, 87], "current": [2, 3, 4, 6, 8, 9, 10, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 68, 70, 82, 83, 85, 86], "typic": [2, 3, 9, 10, 54, 67], "integ": [2, 3, 9, 43, 44, 47, 49, 50, 86], "These": [2, 3, 9, 10, 42, 43, 47, 54, 67, 68, 79, 81, 84, 87, 92], "mai": [2, 3, 9, 10, 43, 46, 67, 68, 86], "an": [2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 81, 82, 84, 86, 87, 88, 89, 92], "progress": [2, 3, 9], "ad": [2, 3, 8, 9, 11, 39, 41, 49, 65, 66, 67, 70, 77], "remov": [2, 3, 8, 9, 10, 13, 40, 45, 47, 65, 66, 70, 86, 88], "agentid": [2, 3, 55, 56, 57, 58, 59, 60, 61, 62, 63], "num_ag": [2, 3, 9, 12, 72], "length": [2, 3, 8, 9, 12, 46, 47, 65, 67, 72, 90], "possible_ag": [2, 3, 9, 10, 12, 55, 56, 57, 58, 59, 60, 61, 62, 63, 70, 72, 75, 76, 82, 83, 85, 86, 88, 89], "could": [2, 3, 9, 47, 51], "equival": [2, 3, 6, 8, 9], "space": [2, 3, 6, 8, 9, 10, 13, 38, 39, 40, 41, 42, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 68, 70, 71, 75, 76, 83, 86, 88, 89, 90, 93], "cannot": [2, 3, 9, 14, 27, 40, 44, 47, 54, 57, 60, 68, 70], "through": [2, 3, 6, 9, 10, 11, 17, 19, 20, 21, 26, 70, 74, 81], "plai": [2, 3, 9, 10, 18, 25, 32, 39, 43, 47, 49, 52, 68, 75, 84, 86, 88, 89, 90, 91, 93], "max_num_ag": [2, 3, 9], "agent_select": [2, 6, 9, 10, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 86], "correspond": [2, 8, 9, 10, 26, 40, 43, 44, 48, 52, 67], "select": [2, 4, 9, 10, 12, 18, 34, 41, 66, 70, 72, 81, 82], "taken": [2, 9, 31, 47, 49, 76], "bool": [2, 3, 45, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 71], "float": [2, 3, 6, 12, 45, 46, 55, 56, 57, 58, 59, 60, 61, 62, 63, 67, 71, 72, 90], "call": [2, 4, 6, 8, 9, 10, 11, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 81, 86], "kei": [2, 3, 9, 15, 16, 17, 31, 39, 40, 41, 57, 66, 70, 71, 73, 86, 88, 89], "instantan": [2, 9], "after": [2, 6, 9, 10, 11, 16, 17, 18, 20, 25, 27, 34, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 66, 67, 70, 82, 83, 85, 86, 88, 89], "doe": [2, 6, 8, 9, 11, 14, 18, 22, 27, 28, 29, 33, 34, 35, 47, 82, 86, 87, 88], "directli": [2, 9, 23, 33, 41, 47, 64, 87], "access": [2, 6, 9, 12, 54, 72, 86, 87, 93], "rather": [2, 8, 9, 88, 89], "return": [2, 3, 6, 8, 9, 10, 11, 12, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 62, 65, 66, 67, 70, 71, 72, 74, 75, 76, 82, 83, 85, 86, 88, 89, 90, 93], "intern": [2, 9, 10, 11, 64], "variabl": [2, 10, 40, 70], "structur": [2, 9, 40, 65, 70, 75, 79, 82], "like": [2, 3, 8, 9, 10, 11, 12, 13, 17, 22, 39, 40, 43, 47, 54, 68, 72, 82, 83, 85, 86], "0": [2, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "first": [2, 4, 6, 8, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 74, 81, 87], "1": [2, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 71, 72, 75, 76, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "second": [2, 9, 11, 14, 18, 22, 27, 28, 29, 33, 34, 35, 43, 46, 52, 54, 66, 86, 87, 88, 91], "n": [2, 8, 9, 12, 40, 43, 46, 49, 56, 61, 71, 72, 75, 76, 83, 86, 90, 93], "nth": [2, 9, 46], "each": [2, 3, 9, 10, 12, 13, 15, 18, 20, 22, 25, 26, 27, 29, 30, 35, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 74, 86, 88, 89, 91], "observation_spac": [2, 3, 9, 10, 12, 39, 40, 41, 43, 44, 46, 47, 49, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 74, 75, 76, 83, 86, 90, 93], "gymnasium": [2, 5, 7, 8, 9, 10, 55, 56, 57, 58, 59, 60, 61, 62, 63, 68, 69, 70, 71, 75, 76, 83, 86, 90, 93], "actiontyp": [2, 3], "accept": [2, 8, 9, 10, 16, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 86], "switch": [2, 9, 23, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70], "control": [2, 8, 9, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 74, 87], "next": [2, 6, 9, 10, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 87], "int": [2, 3, 10, 43, 44, 46, 47, 48, 49, 50, 51, 70, 71, 82, 83, 85, 86, 88, 89, 90], "start": [2, 8, 10, 21, 30, 39, 40, 41, 43, 44, 46, 47, 49, 52, 54, 65, 66, 67, 70, 71, 72, 75, 79, 86, 88, 89], "state": [2, 3, 9, 10, 39, 40, 41, 43, 44, 46, 47, 49, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 81, 83, 85, 87], "obstyp": [2, 3], "function": [2, 4, 5, 6, 9, 10, 11, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 62, 63, 65, 66, 67, 68, 70, 71, 74, 82, 83, 86, 90, 93], "render": [2, 3, 6, 7, 9, 10, 12, 13, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 72, 74, 75, 76, 83, 85, 86, 88, 89, 90, 91], "np": [2, 3, 8, 10, 12, 62, 71, 72, 75, 76, 83, 90, 93], "ndarrai": [2, 3, 47], "specifi": [2, 4, 8, 9, 10, 13, 38, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 65, 66, 67, 71], "self": [2, 10, 12, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 71, 72, 74, 75, 76, 82, 83, 84, 85, 86], "mode": [2, 3, 6, 8, 9, 10, 11, 16, 17, 26, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 70, 71, 85], "displai": [2, 3, 10, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 54, 65, 66, 67], "window": [2, 3, 9, 10, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 66, 67, 70], "other": [2, 5, 6, 8, 9, 10, 11, 13, 20, 22, 25, 26, 29, 30, 32, 35, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 59, 60, 61, 63, 65, 66, 67, 70, 81], "rgb_arrai": [2, 3, 9, 11, 12, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 70, 72, 85], "numpi": [2, 3, 8, 9, 10, 12, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 71, 72, 75, 76, 83, 90, 93], "arrai": [2, 3, 8, 9, 10, 12, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 72, 82], "outsid": [2, 3, 6, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67, 70, 74], "ansi": [2, 3, 6, 9, 10, 11, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 65, 66, 67], "string": [2, 3, 6, 8, 9, 10, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67], "print": [2, 3, 6, 9, 10, 11, 12, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 86, 88, 89, 90, 93], "specif": [2, 3, 8, 14, 16, 17, 22, 26, 28, 29, 30, 35, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 61, 65, 66, 67, 68, 70, 71, 81], "resourc": [2, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67, 83, 85], "should": [2, 6, 9, 10, 11, 18, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67, 71, 72, 74, 82, 83, 85, 86, 88, 89, 90, 91, 93], "releas": [2, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 64, 65, 66, 67, 68], "subprocess": [2, 10, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67], "network": [2, 8, 10, 12, 39, 40, 41, 43, 44, 46, 47, 49, 52, 65, 66, 67, 71, 72, 83, 90, 93], "connect": [2, 10, 39, 40, 41, 42, 43, 46, 47, 49, 52, 65, 66, 67, 70, 84, 87], "In": [3, 6, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 42, 43, 51, 54, 55, 56, 57, 63, 65, 68, 70, 76, 86], "addit": [3, 8, 11, 19, 20, 35, 37, 41, 45, 48, 49, 51, 65, 70], "main": [3, 25, 26, 43, 44, 45, 46, 47, 48, 50, 51, 52, 56, 81, 82], "secondari": 3, "action": [3, 6, 7, 8, 9, 10, 12, 13, 38, 39, 40, 41, 42, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 69, 70, 71, 72, 74, 75, 79, 81, 83, 85, 87, 90], "via": [3, 8, 9, 10, 13, 38, 42, 54, 64, 65, 68, 70, 74, 93], "parallel_env": [3, 6, 10, 11, 12, 38, 71, 72, 85, 88, 89], "around": [3, 6, 10, 15, 16, 17, 19, 20, 23, 24, 31, 40, 54, 65, 66, 89], "paradigm": [3, 70], "partial": [3, 11, 68, 70], "stochast": 3, "posg": 3, "detail": [3, 45, 48, 51, 54, 87], "similar": [3, 5, 6, 9, 10, 31, 43, 46, 51, 60, 63, 69], "rllib": [3, 70], "multiag": [3, 55, 64, 68], "except": [3, 6, 8, 9, 13, 43, 44, 45, 46, 47, 48, 50, 51, 52, 60, 63, 82, 86, 88, 89], "differ": [3, 6, 11, 24, 27, 42, 45, 47, 49, 54, 58, 59, 66, 81, 82, 88], "between": [3, 5, 6, 8, 10, 22, 23, 27, 30, 37, 40, 41, 54, 70, 81, 82, 92], "convert": [3, 5, 6, 8, 10, 12, 70, 72, 86, 91], "aec": [3, 5, 8, 9, 10, 69, 70, 83, 84, 86, 87, 88, 89], "split": [3, 56], "sequenti": [3, 12, 46, 69, 70, 71, 72, 85], "onli": [3, 6, 10, 11, 13, 17, 18, 19, 20, 26, 30, 40, 42, 45, 47, 49, 54, 59, 65, 70, 71, 86, 87, 92], "previou": [3, 10, 13, 43, 47, 65, 70, 76], "two": [3, 8, 11, 13, 15, 18, 22, 23, 25, 28, 29, 30, 39, 40, 41, 43, 45, 46, 49, 51, 65, 67, 70, 75, 86, 87, 91], "gridworld": 3, "butterfli": [3, 4, 6, 9, 10, 11, 12, 39, 40, 41, 69, 70, 71, 72, 83, 85, 88], "pistonball_v6": [3, 4, 6, 9, 10, 11, 12, 38, 41, 72, 83, 85], "while": [3, 6, 7, 8, 9, 12, 23, 38, 41, 46, 49, 52, 60, 61, 67, 69, 72, 86], "It": [3, 9, 10, 39, 43, 71, 72, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93], "live": [3, 6, 10, 25, 30, 37, 70], "parallel_api_test": [3, 11, 77], "gym": [3, 13, 70, 71], "tupl": [3, 8, 90, 93], "receiv": [3, 9, 14, 20, 22, 26, 27, 28, 29, 30, 33, 35, 36, 39, 45, 47, 48, 49, 51, 52, 54, 62, 65, 66, 67, 82], "dictionari": [3, 9, 10, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70], "obsdict": 3, "And": [3, 10], "frame": [3, 5, 8, 9, 13, 18, 34, 43, 46, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 70, 87, 88, 91], "altern": [3, 22], "global": [3, 9, 39, 40, 41, 59, 61, 67], "view": [3, 39, 47, 71, 73, 83, 85, 92], "appropri": [3, 15, 39], "central": [3, 39], "train": [3, 4, 12, 39, 43, 68, 71, 72, 73, 87, 90, 91, 92], "decentr": [3, 39], "method": [3, 8, 9, 10, 11, 39, 43, 54, 70, 86], "qmix": [3, 39], "take": [3, 4, 5, 6, 8, 9, 10, 11, 13, 18, 27, 34, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 81, 82, 86, 87, 88, 89], "must": [3, 9, 10, 14, 22, 23, 27, 28, 29, 32, 34, 35, 39, 40, 41, 43, 44, 46, 47, 49, 52, 54, 57, 58, 60, 61, 65, 66, 67, 86], "same": [3, 8, 11, 18, 29, 39, 40, 41, 43, 44, 45, 46, 47, 49, 52, 65, 66, 67, 77, 82, 86, 89], "valu": [3, 4, 6, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 86, 88, 89], "ha": [4, 8, 9, 11, 12, 13, 18, 22, 25, 26, 30, 40, 43, 44, 46, 47, 52, 58, 59, 61, 66, 67, 70, 71, 72, 77, 86, 88, 89], "some": [4, 5, 9, 10, 11, 13, 14, 16, 17, 22, 26, 28, 30, 35, 54, 70, 76, 81, 82], "help": [4, 9, 10, 20, 25, 43, 63, 69, 71, 72, 81, 82, 83, 85, 86, 88, 89, 90, 91, 93], "trivial": 4, "design": [4, 10, 12, 20, 68, 71, 72, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "easier": [4, 47, 70, 71], "present": 4, "sum": [4, 14, 18, 22, 24, 25, 27, 28, 29, 33, 34, 35, 47, 54, 61, 82, 86, 88, 89], "over": [4, 8, 10, 13, 25, 27, 34, 37, 39, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 57, 67, 70, 81, 83, 88], "episod": [4, 12, 40, 57, 70, 71, 72, 91], "establish": 4, "simplest": 4, "possibl": [4, 8, 9, 10, 11, 13, 20, 26, 30, 32, 39, 40, 43, 47, 49, 52, 54], "baselin": [4, 8, 13], "random": [4, 8, 11, 12, 13, 38, 39, 41, 42, 47, 54, 64, 67, 70, 71, 72, 75, 76, 82, 86, 88, 89, 90, 91, 92, 93], "average_total_reward": 4, "max_episod": 4, "100": [4, 10, 15, 26, 30, 39, 41, 45, 47, 51, 65, 75, 76, 82, 86, 88, 89], "max_step": 4, "10000000000": 4, "both": [4, 5, 6, 8, 10, 20, 21, 23, 25, 27, 32, 37, 41, 43, 44, 45, 46, 47, 49, 51, 52, 54, 57, 59, 67, 75, 87], "limit": [4, 42, 48, 67, 70, 86], "number": [4, 6, 8, 10, 11, 12, 13, 14, 15, 22, 26, 27, 28, 30, 32, 33, 35, 40, 41, 43, 46, 47, 48, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 72, 88, 92], "evalu": [4, 8, 11, 13], "when": [4, 6, 8, 9, 10, 11, 12, 13, 16, 17, 20, 23, 24, 26, 27, 30, 31, 33, 36, 37, 39, 40, 44, 47, 54, 63, 65, 66, 67, 70, 72, 86], "hit": [4, 16, 17, 24, 25, 27, 30, 31, 36, 37, 40, 62], "stop": [4, 82, 83, 85], "imag": [4, 8, 13, 41, 43, 70, 85], "along": [4, 8, 39, 43, 74], "chosen": [4, 8, 65, 82], "all_ag": 4, "pass": [4, 8, 10, 11, 22, 23, 26, 40, 46, 70, 74], "true": [4, 8, 9, 11, 13, 16, 17, 30, 39, 40, 41, 45, 47, 54, 65, 66, 67, 70, 71, 75, 76, 82, 85, 86, 88, 89, 90, 93], "work": [4, 8, 20, 25, 30, 45, 47, 48, 51, 54, 65, 67, 70, 77, 81, 86, 87, 88], "directori": [4, 10, 70, 77], "match": [4, 33, 68, 70, 83, 90], "save_dir": 4, "new": [4, 5, 9, 10, 11, 25, 26, 27, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 68, 70, 79, 81, 86], "dure": [4, 9, 11, 70, 90], "desir": 4, "why": 4, "befor": [4, 6, 8, 9, 10, 26, 27, 40, 42, 43, 70, 74], "save_observ": 4, "fals": [4, 8, 9, 10, 11, 12, 13, 17, 30, 39, 40, 41, 45, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 90, 93], "o": [4, 40, 52, 71, 82, 83, 85, 86, 88, 89, 90, 93], "getcwd": 4, "transform": [5, 8, 10, 46], "input": [5, 6, 10, 38, 54, 67], "output": [5, 6, 8, 10, 11, 54, 70, 83], "appli": [5, 6, 8, 10, 41, 59, 61, 65, 66, 67], "convers": [5, 68, 70], "parallel": [5, 8, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 69, 70, 71, 72, 74, 84, 85, 89], "api": [5, 6, 10, 12, 68, 69, 70, 72, 86, 89, 92], "set": [5, 6, 8, 9, 10, 11, 13, 17, 26, 38, 40, 41, 42, 43, 44, 45, 47, 48, 50, 51, 54, 63, 64, 65, 67, 70, 82, 88, 89], "conveni": [5, 6, 10, 43], "reusabl": [5, 6, 10], "logic": [5, 6, 10, 12, 47, 70, 71, 72, 74, 79], "supersuit": [5, 10, 12, 13, 70, 71, 72, 83, 85, 87, 88, 89], "commonli": [5, 7, 9], "pre": [5, 8, 70, 87, 88, 90], "process": [5, 8, 13, 25, 54, 70, 74, 87, 88], "stack": [5, 8, 12, 43, 46, 72, 87, 88], "color": [5, 8, 27, 43, 47, 54, 56, 59, 87, 88], "reduct": [5, 87, 88], "compat": [5, 13, 47, 70, 86], "extern": [5, 7, 70, 81], "reinforc": [5, 7, 8, 13, 42, 64, 68, 69, 73, 84, 87, 92], "learn": [5, 7, 8, 12, 13, 27, 38, 39, 41, 42, 56, 57, 58, 61, 64, 68, 69, 71, 72, 73, 84, 86, 87, 88, 89, 90, 92], "enforc": [6, 70], "clip": [6, 8, 12, 71, 72], "out": [6, 11, 12, 15, 32, 39, 54, 68, 71, 72, 74, 85, 86], "bound": [6, 8, 39, 62, 63], "aec_to_parallel": [6, 70], "aec_env": 6, "aecenv": [6, 10], "parallelenv": [6, 10, 74, 75, 76], "case": [6, 9, 65, 74], "exist": [6, 8, 11, 40, 44, 47, 70], "wrap": [6, 9, 10, 45, 48, 51, 86, 91], "parallel_to_aec_wrapp": 6, "origin": [6, 8, 13, 43, 54, 64, 70], "otherwis": [6, 21, 39, 43, 47, 86], "aec_to_parallel_wrapp": 6, "shown": [6, 51, 66, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "below": [6, 10, 11, 13, 24, 25, 41, 43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "assumpt": [6, 86], "underli": [6, 9], "updat": [6, 8, 10, 70, 71, 90], "end": [6, 12, 13, 15, 19, 20, 30, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 67, 70, 71, 72, 81], "most": [6, 8, 9, 10, 13, 27, 42, 49, 54], "alloc": [6, 10, 65, 67], "scheme": [6, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 70], "within": [6, 8, 9, 13, 14, 22, 28, 29, 33, 35, 39, 45, 47, 67, 74], "timestep": [6, 39, 65, 71, 73, 75, 76, 83], "particular": [6, 9, 10, 20, 26, 40, 63, 70], "parallel_to_aec": [6, 10, 70], "par_env": 6, "aec_to_prallel_wrapp": 6, "effici": 6, "want": [6, 8, 9, 12, 59, 72], "easi": [6, 10, 88], "combin": [6, 18, 41, 43, 45, 54], "manner": [6, 54], "capturestdoutwrapp": [6, 10], "terminateillegalwrapp": 6, "illegal_reward": 6, "initi": [6, 10, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 86], "so": [6, 8, 9, 10, 11, 13, 27, 29, 30, 32, 35, 41, 43, 47, 52, 54, 62, 68, 70, 71, 82, 86, 88, 89], "code": [6, 9, 12, 40, 54, 64, 65, 70, 87, 92], "behavior": [6, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, 70], "basewrapp": [6, 86], "paramet": [6, 8, 9, 12, 41, 47, 59, 61, 67, 70, 71, 72, 84, 87, 90, 93], "inherit": [6, 10], "player": [6, 10, 13, 14, 15, 16, 17, 18, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 68, 70, 71, 75, 82, 87, 90], "lose": [6, 19, 20, 24, 25, 26, 37, 42, 45, 47, 82], "move": [6, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 60, 66, 67, 70, 75, 76, 82, 86], "give": [6, 10, 11, 14, 21, 22, 28, 29, 35, 41, 42, 47, 59, 63, 65, 70, 82, 87], "captur": [6, 13, 26, 27, 34, 43, 46, 67, 71], "instead": [6, 8, 10, 13, 17, 40, 43, 47, 70], "assertoutofboundswrapp": [6, 10], "assert": [6, 11, 71], "given": [6, 8, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 42, 46, 47, 49, 82], "discret": [6, 8, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 71, 75, 76, 83, 86, 87, 89], "clipoutofboundswrapp": [6, 10], "fit": 6, "continu": [6, 8, 12, 32, 41, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 70, 72, 74, 85], "emit": 6, "warn": [6, 8, 10, 11, 70, 82], "orderenforcingwrapp": [6, 10], "check": [6, 11, 21, 43, 48, 50, 51, 57, 71, 75, 76], "attribut": [6, 9, 10, 70], "disallow": 6, "error": [6, 9, 10, 11, 47, 70, 83, 85], "get": [6, 9, 10, 12, 14, 17, 19, 20, 21, 22, 23, 27, 28, 29, 33, 35, 37, 38, 41, 43, 45, 51, 55, 59, 70, 72, 75, 76, 83, 85, 86, 88, 89, 90], "iter": [6, 9, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52], "without": [6, 9, 10, 20, 65, 70, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "70": [7, 45], "variou": [7, 13, 65, 66, 67, 70], "board": [7, 22, 27, 43, 46, 52, 68, 70, 76], "robot": [7, 65], "team": [7, 18, 22, 29, 35, 54, 57, 71], "compet": 7, "suit": [7, 45, 48, 77], "test": [7, 10, 70, 71, 72, 74, 79, 88, 90, 92], "scenario": [7, 38, 68], "assess": 7, "novel": 7, "social": 7, "situat": 7, "familiar": [7, 18], "unfamiliar": 7, "individu": [7, 12, 65, 72], "cooper": [7, 8, 13, 30, 37, 38, 41, 47, 54, 64, 67, 70, 89], "competit": [7, 13, 14, 23, 26, 28, 30, 31, 32, 33, 37, 42, 54, 67], "decept": 7, "reciproc": 7, "trust": 7, "stubborn": 7, "50": [7, 43, 47, 59, 62, 63, 83, 85, 90, 93], "substrat": 7, "250": [7, 26, 31], "load": [7, 70, 83, 85, 86, 88, 89, 90, 91], "dmcontrolmultiagentcompatibilityv0": 7, "dm_control": 7, "locomot": 7, "dm_soccer": 7, "team_siz": 7, "2": [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 56, 57, 58, 59, 60, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "backgammon": [7, 70], "openspielcompatibilityv0": 7, "pyspiel": 7, "load_gam": 7, "prison": [7, 68, 70, 75, 76], "dilemma": 7, "matrix": [7, 40], "meltingpotcompatibilityv0": 7, "substrate_nam": 7, "prisoners_dilemma_in_the_matrix__arena": 7, "dm_control_multiagent_compat": 7, "openspiel_compat": 7, "meltingpot_compat": 7, "research": [7, 8, 13, 42, 45, 48, 51, 54, 73], "pleas": [7, 8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 72, 74, 82, 83, 85, 86, 88, 89, 90, 91, 93], "cite": [7, 8, 42, 45, 48, 51, 54, 64], "softwar": [7, 92], "shimmy2022github": 7, "author": [7, 8, 12, 13, 42, 54, 64, 71, 72, 83, 85, 86, 88, 89, 90, 91, 93], "jun": [7, 13], "jet": [7, 12, 16, 72], "tai": 7, "tower": 7, "elliot": [7, 71, 86, 88, 89], "jordan": 7, "terri": [7, 8, 13], "titl": [7, 8, 13, 42, 54, 64], "url": 7, "http": [7, 10, 12, 64, 70, 71, 72, 83, 85, 86, 87, 88, 89, 90, 91, 93], "github": [7, 12, 64, 70, 71, 72, 74, 83, 85, 86, 87, 88, 89, 90, 91, 93], "com": [7, 12, 64, 70, 71, 72, 83, 85, 86, 88, 89, 90, 91, 93], "farama": [7, 10, 13, 38, 68, 70, 86], "foundat": [7, 13, 68, 70], "version": [7, 10, 11, 13, 67, 70, 74, 83, 90, 91, 93], "year": [7, 8, 13, 42, 54, 64], "companion": [8, 10], "collect": [8, 10, 12, 25, 66, 72, 81, 82, 90, 91, 93], "invad": [8, 13], "greyscal": 8, "4": [8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 57, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 88, 90, 91, 93], "atari": [8, 9, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 70, 71, 72, 73], "space_invaders_v2": [8, 13, 30], "color_reduction_v0": [8, 12, 71, 72, 85], "frame_stack_v1": [8, 12, 13, 71, 72, 85, 88], "full": [8, 10, 13, 44, 47, 51, 66, 67, 70, 71, 88, 90, 92], "clip_reward_v0": [8, 71], "lower_bound": [8, 71], "upper_bound": [8, 71], "popular": [8, 42, 68, 84], "wai": [8, 13, 25, 62, 76, 81], "handl": [8, 9, 10, 13, 19, 20, 23, 24, 25, 26, 36, 37, 40, 65, 70, 74, 76, 88], "signific": [8, 47, 70], "varianc": [8, 12, 72], "magnitud": [8, 67], "especi": [8, 70], "clip_actions_v0": 8, "box": [8, 13, 55, 56, 57, 58, 59, 60, 61, 62, 63, 66, 83], "high": [8, 9, 30, 38, 61, 73, 83, 92], "low": [8, 54, 83], "keep": [8, 16, 25, 39, 54, 65, 67, 74, 77, 86], "simplifi": [8, 68], "graphic": [8, 10, 11], "x": [8, 12, 40, 41, 43, 46, 52, 59, 62, 63, 65, 66, 67, 71, 72, 82], "y": [8, 13, 40, 43, 46, 65, 67, 86, 88, 89], "3": [8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 45, 46, 47, 48, 49, 50, 51, 52, 56, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 88, 89, 90, 91, 93], "shape": [8, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 83, 90, 93], "fulli": [8, 66, 70], "computation": 8, "intens": 8, "argument": [8, 9, 10, 11, 12, 13, 16, 38, 42, 54, 70, 71, 72, 82, 90], "g": [8, 13, 43, 54, 75, 76, 77], "just": [8, 9, 10, 11, 40], "channel": [8, 12, 43, 54, 57, 66, 70, 72], "much": [8, 30, 41, 70, 86], "faster": [8, 13, 16, 30, 62, 70], "suffici": 8, "dtype_v0": [8, 85], "dtype": [8, 82], "recast": 8, "certain": [8, 9, 54, 70, 76], "uint8": 8, "neural": [8, 43, 54], "float16": 8, "float32": [8, 82, 85], "anyth": 8, "flatten_v0": 8, "flatten": [8, 12, 43, 71, 72, 85], "1d": 8, "frame_skip_v0": [8, 13, 71], "num_fram": 8, "skip": [8, 13, 82], "reappli": 8, "old": [8, 10, 12, 26, 72], "ignor": 8, "accumul": [8, 10, 43], "frameskip": 8, "min_skip": 8, "max_skip": 8, "indic": [8, 11, 12, 44, 47, 52, 66, 67, 72, 82], "rang": [8, 10, 12, 41, 47, 67, 71, 72, 86, 88, 89, 90, 93], "randomli": [8, 26, 54, 57, 66, 75], "singl": [8, 9, 11, 20, 43, 47, 55, 65, 66, 70, 73, 81, 86, 87, 88, 89], "delay_observations_v0": 8, "delai": 8, "been": [8, 9, 13, 26, 30, 43, 44, 47, 52, 66, 67, 70, 71, 86, 88, 89], "zero": [8, 12, 14, 18, 22, 27, 28, 29, 33, 34, 35, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 57, 65, 71, 72, 75, 76, 84, 86], "frame_skip": [8, 13], "prefer": 8, "reaction": 8, "fp": [8, 70], "sticky_actions_v0": [8, 13], "repeat_action_prob": [8, 13], "assign": [8, 86], "probabl": [8, 9, 83, 85], "stick": 8, "request": 8, "prevent": [8, 14, 18, 22, 27, 28, 29, 33, 34, 35, 63, 76], "predefin": 8, "pattern": 8, "highli": [8, 9, 38, 41, 73], "determinist": [8, 11, 13, 71, 86, 88, 89], "sticki": [8, 27], "cumul": [8, 9], "chanc": 8, "row": [8, 27, 40, 43, 45, 46], "etc": [8, 10, 11, 70, 82], "recommend": [8, 10, 13, 71, 72, 77, 79, 82, 83, 85, 86, 88, 89, 90, 91, 93], "machado": [8, 13], "et": [8, 13], "al": [8, 13], "2018": [8, 13], "revisit": [8, 13], "arcad": [8, 13, 34, 68], "protocol": [8, 13], "open": [8, 10, 13, 17, 68, 73, 84], "problem": [8, 13, 65, 68, 69, 70], "recent": [8, 10, 68], "vector": [8, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 65, 67, 87, 88, 89, 91], "plain": 8, "concaten": 8, "longer": [8, 10, 14, 18, 20, 22, 27, 28, 29, 33, 34, 35, 40, 41, 70], "2d": [8, 46, 67], "3d": 8, "taller": 8, "At": [8, 10, 44, 45, 47, 48, 51, 61, 74], "don": [8, 33, 62, 86], "t": [8, 9, 10, 12, 30, 33, 47, 56, 62, 70, 71, 72, 86], "yet": [8, 49], "fill": [8, 32, 44], "analog": [8, 11], "max_observation_v0": [8, 13, 71], "memori": [8, 21, 26], "result": [8, 10, 11, 40, 54, 64, 67, 70, 71, 86, 88, 90, 91, 93], "becom": [8, 9], "max": [8, 12, 13, 47, 70, 71, 72, 86, 88, 89], "prior": 8, "element": [8, 42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 67, 70], "intermit": 8, "flash": 8, "being": [8, 9, 16, 35, 40, 47, 49, 54, 62, 63, 66, 67, 70, 76], "constant": [8, 10, 11, 40, 88], "due": [8, 11, 13, 41, 43, 44, 46, 47, 49, 52, 68, 70], "peculiar": 8, "consol": [8, 13, 47], "crt": 8, "tv": 8, "openai": [8, 13, 54, 82], "maxandskip": [8, 13], "do": [8, 9, 11, 13, 40, 44, 54, 67, 68, 71, 81, 82, 86, 87], "normalize_obs_v0": [8, 85], "env_min": [8, 85], "env_max": [8, 85], "linearli": 8, "scale": [8, 41, 54, 65, 67, 84], "known": [8, 27, 45, 59], "minimum": [8, 48, 50, 51, 61, 63, 67], "maximum": [8, 13, 40, 47, 65, 67, 70, 71], "defin": [8, 10, 11, 45, 46, 47, 82, 86, 91], "float64": 8, "finit": 8, "wish": [8, 10, 83], "normal": [8, 11, 12, 30, 33, 40, 63, 65, 71, 72], "anoth": [8, 9, 47, 48, 51, 54, 77], "reshape_v0": 8, "reshap": [8, 71], "resize_v1": [8, 12, 13, 71, 72, 85, 88], "x_size": [8, 66, 71, 85, 88], "y_size": [8, 66, 71, 85, 88], "linear_interp": 8, "perform": [8, 47, 54, 71, 73, 87, 88, 90], "interpol": 8, "up": [8, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 41, 43, 45, 54, 56, 66, 75, 76, 88, 89], "size": [8, 40, 46, 47, 54, 66, 67, 90, 93], "down": [8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 66, 67, 75, 76], "area": [8, 22, 29, 33, 35, 54, 62, 81], "linear": [8, 12, 71, 72, 85], "avail": [8, 43, 44, 45, 46, 47, 48, 50, 51, 52, 83], "better": [8, 70], "sens": [8, 67], "nan_noop_v0": 8, "nan": [8, 12, 70, 71, 72], "trigger": 8, "oper": [8, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36], "its": [8, 9, 10, 11, 25, 39, 40, 42, 45, 46, 47, 48, 49, 51, 54, 65, 90], "place": [8, 9, 10, 27, 44, 46, 47, 52, 54, 65, 66, 75], "noop": 8, "no_op_act": 8, "nan_zeros_v0": 8, "nan_random_v0": 8, "retriev": [8, 9, 86], "mask": [8, 42, 70, 79, 83, 87], "scale_actions_v0": 8, "__init__": [8, 10, 12, 71, 72, 74, 75, 76, 82, 83, 85], "addition": [8, 26, 41, 64], "agent_indicator_v0": [8, 71], "type_onli": [8, 71], "add": [8, 9, 10, 12, 39, 40, 49, 57, 66, 67, 70, 72, 88, 90], "id": [8, 9, 10, 45, 46, 47, 48, 50, 51, 71, 90], "hot": [8, 47], "append": [8, 9, 47, 82, 85, 86], "increas": [8, 49, 70, 81], "necessari": [8, 10], "treat": [8, 9], "sort": 8, "encod": [8, 43, 46, 47, 48, 49, 52], "madrl": [8, 64], "share": [8, 30, 66, 84, 87], "heterogen": 8, "sinc": [8, 9, 25, 49, 54], "tell": [8, 10, 11, 47, 82, 87], "what": [8, 11, 12, 40, 43, 47, 70, 72], "act": [8, 9, 10, 41, 49, 67, 74, 82, 86, 88, 89], "pars": [8, 82, 90], "_": [8, 12, 71, 72, 83, 86, 88, 89, 90, 93], "identifi": [8, 13, 43, 54, 82], "than": [8, 46, 49, 51, 70, 76, 88, 89], "few": [8, 10, 13, 18, 42, 68, 87], "wa": [8, 10, 12, 13, 20, 41, 47, 64, 68, 70, 72, 76, 82], "introduc": [8, 13], "deep": [8, 64, 83, 90, 92, 93], "black_death_v2": 8, "dead": [8, 10, 19, 20, 45, 88], "death": [8, 19, 20, 24, 25, 26, 36, 37, 40, 65, 70, 88], "mechan": 8, "black": [8, 13, 40, 43, 46, 62, 67, 70, 88], "come": [8, 32], "plagu": 8, "fact": [8, 17], "die": [8, 9, 40, 88], "pad_action_space_v0": 8, "pad": [8, 12, 40, 43, 72], "biggest": 8, "per": [8, 13, 18, 24, 47, 67, 71, 88, 90], "pose": 8, "surprisingli": 8, "enabl": [8, 26, 40, 67, 71, 86], "marl": [8, 69, 90, 93], "homogen": 8, "insid": [8, 63], "region": [8, 40], "crop": 8, "pad_observations_v0": 8, "largest": [8, 47, 49, 86], "articl": [8, 13, 42, 54], "microwrapp": 8, "j": [8, 13, 40, 48, 66], "k": [8, 13, 40, 45, 47, 48, 50, 51, 64, 66, 71], "benjamin": [8, 13], "hari": 8, "ananth": 8, "journal": [8, 13, 42, 54], "arxiv": [8, 13, 42, 54], "preprint": [8, 13, 42, 54], "2008": 8, "08932": 8, "2020": [8, 13, 70], "librari": [9, 13, 42, 68, 70, 73, 84, 87], "famili": 9, "problemat": 9, "system": [9, 10, 13, 24, 31, 43, 46, 54, 64], "8": [9, 12, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37, 43, 44, 45, 46, 47, 48, 50, 51, 52, 56, 57, 58, 59, 65, 66, 67, 71, 72, 82, 83, 85, 88, 89, 90, 91, 93], "9": [9, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 35, 37, 39, 43, 44, 46, 47, 49, 62, 63, 65, 83, 85, 88, 90, 93], "10": [9, 11, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 39, 40, 46, 47, 54, 56, 59, 62, 65, 67, 83, 85, 88, 89, 90, 91, 93], "11": [9, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 39, 43, 45, 46, 47, 48, 50, 51, 60, 65, 71, 72], "linux": 9, "maco": 9, "pr": [9, 70, 84], "relat": [9, 40, 54, 70], "offici": [9, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 70, 73], "veri": [9, 27, 38, 47, 68, 74], "them": [9, 11, 24, 25, 27, 30, 34, 41, 42, 54, 56, 65, 70, 81, 88], "configur": [9, 38, 47, 88], "cooperative_pong": [9, 39], "ball_spe": [9, 39], "18": [9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 45, 47, 61], "left_paddle_spe": [9, 39], "25": [9, 13, 45, 47, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 70, 85, 87], "right_paddle_spe": [9, 39], "is_cake_paddl": 9, "max_cycl": [9, 11, 12, 13, 39, 40, 41, 49, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 72, 82, 85, 88], "900": [9, 39, 40, 88], "bounce_random": [9, 39], "interfac": [9, 68, 69, 81, 87, 91], "max_it": 9, "63": [9, 50], "yield": [9, 86], "abl": [9, 11, 16, 21, 26, 35], "comput": [9, 12, 13, 39, 40, 45, 46, 67, 72, 83], "impli": 9, "thu": [9, 41, 58, 90, 91, 93], "fantast": 9, "weird": 9, "becaus": [9, 10, 18, 20, 27, 47, 82], "lower": [9, 40, 41], "level": [9, 30, 32, 37, 61, 77], "won": [9, 10, 33], "Their": 9, "abov": [9, 24, 26, 35, 41, 74, 83], "though": [9, 10, 54, 79], "matter": [9, 11], "factor": [9, 41, 65, 67, 71], "never": [9, 45, 70], "rese": 9, "There": [9, 13, 25, 26, 39, 40, 45, 47, 70, 81], "downstream": 9, "wrapper": [9, 13, 46, 68, 70, 71, 83, 85, 86, 88], "thei": [9, 10, 11, 12, 14, 15, 17, 20, 26, 27, 28, 30, 33, 40, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 57, 61, 62, 65, 66, 67, 70, 72], "special": 9, "circumst": 9, "Not": 9, "featur": [9, 11, 43, 47, 54, 67, 70, 73, 86, 88, 89], "state_spac": [9, 70], "empti": [9, 10, 44, 46], "mean": [9, 12, 14, 18, 20, 22, 27, 28, 29, 33, 34, 35, 37, 40, 41, 44, 49, 54, 56, 65, 66, 70, 71, 72, 74, 90], "condit": [9, 47, 70, 75, 76], "underneath": 9, "layer": [9, 12, 22, 71, 72], "manual": [9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 70], "aspect": [9, 30, 32, 37], "alreadi": [9, 10, 26, 66, 76], "itself": [9, 40, 66, 83, 85, 86], "base_env": 9, "knights_archers_zombies_v10": [9, 10, 38, 40, 69, 88], "cours": 9, "di": [9, 19], "entri": [9, 10, 48, 50, 51], "vacuou": 9, "changeabl": 9, "transit": [9, 12, 72], "point": [9, 14, 15, 16, 17, 18, 21, 22, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 40, 44, 45, 46, 47, 79, 88], "separ": [9, 11, 77, 86], "studi": [9, 13, 43], "encourag": [9, 11, 13, 70], "actor": [9, 12, 54, 68, 71, 72], "hand": [9, 10, 45, 47, 48, 49, 50, 51], "lightweight": [9, 73, 92], "messag": [9, 11, 54, 57, 70], "ensur": [9, 45, 70], "reason": [9, 10, 11, 54], "incorrect": [9, 70], "howev": [9, 24, 29, 30, 47, 49, 82], "small": [9, 39, 43, 47, 63, 65, 70], "amount": [9, 41, 51], "overhead": 9, "raw_env": [9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67], "constructor": [9, 39, 40, 41, 43, 44, 45, 46, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 83], "modul": [9, 12, 71, 72, 85, 86, 88, 89], "overview": 10, "relev": [10, 40], "carefulli": 10, "comment": [10, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "functool": [10, 75, 76], "agent_selector": 10, "num_it": 10, "reward_map": 10, "def": [10, 12, 62, 71, 72, 74, 75, 76, 82, 83, 85, 86, 88, 89, 90, 93], "find": [10, 19, 20, 43], "elsewher": 10, "internal_render_mod": 10, "wide": [10, 40, 68, 69], "vareiti": 10, "user": [10, 11, 12, 38, 72, 73], "strongli": 10, "metadata": [10, 38, 70, 74, 75, 76, 86, 88, 89], "hold": [10, 18, 43, 44, 45, 46, 47, 52, 70, 82, 83, 86], "put": [10, 17], "least": [10, 27, 47, 74, 92], "pretti": 10, "init": [10, 12, 71, 72, 83, 85], "v1": [10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 87], "overridden": 10, "infer": [10, 47], "rais": [10, 41, 48, 50, 51], "player_": 10, "map": [10, 16, 17, 20, 21, 26, 27, 31, 47, 54, 66], "agent_name_map": 10, "zip": [10, 86, 88, 89], "len": [10, 12, 72, 85, 88, 89, 90, 93], "here": [10, 13, 82, 88, 91], "_action_spac": 10, "_observation_spac": 10, "lru_cach": [10, 75, 76], "memoiz": 10, "reduc": [10, 70], "clock": [10, 38, 43], "line": [10, 12, 40, 52, 67, 70, 72, 87, 92], "disabl": [10, 11, 70], "cach": [10, 11, 70], "maxsiz": [10, 75, 76], "org": [10, 86], "understand": [10, 71, 72, 74, 75, 82, 83, 85, 86, 88, 89, 90, 91, 93], "logger": [10, 90], "agent1": 10, "agent2": 10, "format": [10, 70, 82, 91], "sane": 10, "necessarili": 10, "date": 10, "data": [10, 12, 47, 65, 70, 71, 72, 90, 91, 93], "kept": 10, "_cumulative_reward": 10, "issu": [10, 11, 27, 40, 70, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "num_mov": 10, "cyclic": 10, "_agent_selector": 10, "_was_dead_step": 10, "had": [10, 66], "account": [10, 31], "again": [10, 34], "is_last": 10, "until": [10, 13, 44, 49, 55, 56, 57, 58, 59, 60, 61, 62, 63, 81], "_clear_reward": 10, "_accumulate_reward": 10, "from_parallel": [10, 70], "count": [10, 17, 43, 45, 66], "agent_1": [10, 56, 59, 61, 63], "item_1": 10, "agent_2": [10, 61], "item_2": 10, "env_trunc": 10, "still": [10, 26, 40, 41, 49], "back": [10, 17, 21, 25, 30, 43, 46], "forth": [10, 30], "wrapped_env": 10, "debug": [10, 11, 55, 83, 85], "doc": [10, 70, 71, 74, 82], "agent_3": 10, "deprecatedmodul": 10, "guid": [10, 70, 74], "awai": [10, 29, 35, 58], "obsolet": 10, "toward": [10, 29, 35, 36, 41, 43, 65], "ones": [10, 43, 76], "tri": [10, 11], "knights_archers_zombies_v0": 10, "deprecated_modul": 10, "knights_archers_zombi": [10, 40], "v0": [10, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67], "v10": [10, 40], "deprecatedenv": 10, "now": [10, 40, 68, 70, 75, 77, 82], "complianc": 11, "own": [11, 12, 21, 27, 39, 40, 46, 47, 54, 69, 72, 74], "sure": [11, 77], "consist": [11, 43, 54, 70, 75], "num_cycl": [11, 77], "1000": [11, 37, 50, 51, 90, 93], "verbose_progress": 11, "As": [11, 38, 40, 47, 48, 64, 70], "simpli": [11, 49, 70, 82], "complet": [11, 47, 66, 70, 77, 90], "properli": 11, "reproduc": [11, 82], "actual": [11, 82], "seed_test": 11, "parallel_seed_test": 11, "env_fn": [11, 86, 88, 89], "test_kept_st": 11, "parallel_env_fn": 11, "long": [11, 12, 15, 27, 32, 71, 72, 81], "determin": [11, 13, 49], "fail": [11, 65, 70], "physic": [11, 31, 41, 65, 68, 70], "bare": 11, "detect": [11, 67, 86], "enough": 11, "correct": [11, 60, 70], "off": [11, 13, 17, 25, 39, 41, 65, 67, 71, 87], "usag": [11, 70, 92], "max_cycles_test": 11, "crash": [11, 40], "produc": [11, 13, 67, 87], "render_test": [11, 70], "custom_test": 11, "non": [11, 13, 38, 40, 42, 46, 70, 87], "svg": 11, "lambda": [11, 71, 82, 83, 85, 90, 91], "render_result": 11, "regress": 11, "5": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 58, 59, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 82, 83, 85, 88, 90, 91, 93], "inspect": [11, 82], "performance_benchmark": 11, "visual": [11, 38, 41, 87, 88], "intend": [11, 55, 68, 77, 87], "huge": 11, "bug": [11, 39, 40, 43, 44, 45, 46, 47, 48, 50, 51, 52, 64, 65, 66, 67, 70], "good": [11, 17, 54, 56, 57, 58, 62, 63, 70, 86], "test_save_ob": 11, "basic": [12, 26, 72, 75, 92], "ppo": [12, 73, 83, 84, 87], "pistonbal": [12, 38, 70, 72, 73, 83, 84], "inspir": [12, 68, 72], "cleanrl": [12, 70, 90, 93], "exceedingli": [12, 72], "log": [12, 71, 72, 83, 92, 93], "weight": [12, 41, 59, 61, 71, 72, 73], "save": [12, 46, 72, 85, 86, 88, 89, 90, 93], "intent": [12, 72], "rel": [12, 27, 40, 43, 54, 59, 61, 63, 65, 72, 77], "clean": [12, 54, 72, 87], "200": [12, 30, 37, 47, 65, 72, 83], "refer": [12, 45, 48, 51, 54, 65, 67, 69, 70, 72, 81, 86], "jjshoot": [12, 70, 72], "torch": [12, 71, 72, 83, 85, 90, 93], "nn": [12, 71, 72, 83, 85], "optim": [12, 20, 38, 41, 65, 66, 71, 72, 81, 85, 86, 88, 89, 90, 93], "distribut": [12, 65, 66, 67, 71, 72, 84], "categor": [12, 71, 72], "num_act": [12, 49, 72], "super": [12, 71, 72, 82, 86], "_layer_init": [12, 72], "conv2d": [12, 71, 72, 85], "32": [12, 45, 71, 72, 85], "maxpool2d": [12, 72], "relu": [12, 71, 72, 85], "64": [12, 71, 72, 85, 90, 93], "128": [12, 71, 72, 85, 90, 93], "512": [12, 40, 71, 72, 85], "std": [12, 71, 72], "01": [12, 66, 67, 70, 71, 72], "critic": [12, 41, 54, 71, 72], "sqrt": [12, 71, 72], "bias_const": [12, 71, 72], "orthogonal_": [12, 71, 72], "constant_": [12, 71, 72], "bia": [12, 71, 72], "get_valu": [12, 71, 72], "255": [12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 71, 72], "get_action_and_valu": [12, 71, 72], "hidden": [12, 21, 54, 71, 72, 83, 90], "logit": [12, 71, 72, 83], "prob": [12, 71, 72], "log_prob": [12, 71, 72], "entropi": [12, 71, 72], "batchify_ob": [12, 72], "ob": [12, 71, 72, 82, 83, 85, 86, 88, 89], "devic": [12, 71, 72, 90, 93], "pz": [12, 72], "style": [12, 34, 43, 46, 72], "batch": [12, 71, 72, 87, 90, 93], "axi": [12, 72], "transpos": [12, 72], "height": [12, 40, 41, 72], "width": [12, 40, 72], "tensor": [12, 71, 72, 83], "batchifi": [12, 72], "unbatchifi": [12, 72], "cpu": [12, 71, 72, 86, 88, 90, 93], "enumer": [12, 65, 67, 71, 72], "__name__": [12, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "__main__": [12, 71, 72, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "algo": [12, 71, 72], "param": [12, 71, 72], "cuda": [12, 71, 72, 90, 93], "is_avail": [12, 71, 72, 90, 93], "ent_coef": [12, 71, 72, 88], "vf_coef": [12, 71, 72, 88], "clip_coef": [12, 71, 72], "gamma": [12, 71, 72, 85, 88, 90], "99": [12, 71, 72, 85, 88], "batch_siz": [12, 71, 72, 88, 89, 90, 93], "stack_siz": [12, 72], "frame_s": [12, 72], "125": [12, 41, 47, 72, 85], "total_episod": [12, 72], "setup": [12, 70, 74], "observation_s": [12, 72], "learner": [12, 72], "adam": [12, 71, 72, 90, 93], "lr": [12, 71, 72, 85, 90, 93], "001": [12, 72], "ep": [12, 71, 72, 90], "1e": [12, 65, 71, 72, 86, 89, 90, 93], "storag": [12, 71, 72], "end_step": [12, 72], "total_episodic_return": [12, 72], "rb_ob": [12, 72], "rb_action": [12, 72], "rb_logprob": [12, 72], "rb_reward": [12, 72], "rb_term": [12, 72], "rb_valu": [12, 72], "no_grad": [12, 71, 72], "next_ob": [12, 71, 72], "num_step": [12, 71, 72], "rollov": [12, 72], "logprob": [12, 71, 72], "term": [12, 27, 43, 72, 82], "trunc": [12, 72, 82], "reach": [12, 40, 44, 68, 72], "break": [12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 42, 70, 71, 72, 86, 88, 89], "bootstrap": [12, 71, 72], "done": [12, 25, 26, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67, 70, 71, 72, 81, 91], "rb_advantag": [12, 72], "zeros_lik": [12, 71, 72], "revers": [12, 71, 72], "delta": [12, 71, 72], "rb_return": [12, 72], "b_ob": [12, 71, 72], "start_dim": [12, 72], "end_dim": [12, 72], "b_logprob": [12, 71, 72], "b_action": [12, 71, 72], "b_return": [12, 71, 72], "b_valu": [12, 71, 72], "b_advantag": [12, 71, 72], "b_index": [12, 72], "arang": [12, 71, 72], "clip_frac": [12, 72], "repeat": [12, 27, 47, 72, 81], "shuffl": [12, 71, 72], "batch_index": [12, 72], "newlogprob": [12, 71, 72], "logratio": [12, 71, 72], "ratio": [12, 41, 70, 71, 72], "exp": [12, 62, 71, 72], "calcul": [12, 47, 70, 71, 72], "approx_kl": [12, 71, 72], "joschu": [12, 71, 72], "net": [12, 35, 71, 72, 90, 93], "blog": [12, 71, 72, 87], "kl": [12, 71, 72], "approx": [12, 71, 72], "html": [12, 53, 71, 72, 86, 88, 89], "old_approx_kl": [12, 71, 72], "ab": [12, 71, 72], "item": [12, 67, 71, 72, 82], "advantaeg": [12, 72], "advantag": [12, 71, 72], "loss": [12, 42, 47, 71, 72, 86], "pg_loss1": [12, 71, 72], "pg_loss2": [12, 71, 72], "clamp": [12, 71, 72, 83], "pg_loss": [12, 71, 72], "v_loss_unclip": [12, 71, 72], "v_clip": [12, 71, 72], "v_loss_clip": [12, 71, 72], "v_loss_max": [12, 71, 72], "v_loss": [12, 71, 72], "entropy_loss": [12, 71, 72], "zero_grad": [12, 71, 72], "backward": [12, 40, 71, 72], "y_pred": [12, 71, 72], "y_true": [12, 71, 72], "var_i": [12, 71, 72], "var": [12, 71, 72], "explained_var": [12, 71, 72], "f": [12, 40, 43, 71, 72, 75, 76, 82, 86, 88, 89, 90, 93], "fraction": [12, 72], "explain": [12, 72], "THE": [12, 72], "eval": [12, 72, 88, 89, 90], "instrument": 13, "modern": [13, 68], "hope": 13, "basketbal": [13, 18], "pong": [13, 38, 70], "combat": [13, 31], "plane": [13, 43, 44, 45, 46, 52], "tank": 13, "doubl": 13, "dunk": 13, "entomb": [13, 19, 20], "flag": [13, 65, 82], "foozpong": [13, 70], "ic": 13, "hockei": 13, "joust": 13, "mario": 13, "bro": 13, "maze": [13, 17, 19, 20], "craze": 13, "othello": 13, "quadrapong": [13, 70], "war": 13, "surround": [13, 41, 46, 66], "tenni": 13, "video": [13, 14, 22, 28, 29, 35, 68, 70, 71, 73], "checker": [13, 70], "volleybal": 13, "warlord": [13, 37], "wizard": 13, "Of": 13, "wor": 13, "uniqu": [13, 38, 42, 54, 64], "rom": [13, 70, 71], "autorom": [13, 71], "path": [13, 16, 32, 40, 70, 71, 77, 83, 85, 86, 88, 89, 90, 93], "rom_path": [13, 71], "launch": [13, 38, 42, 54, 64, 86, 88, 89], "coupl": 13, "variat": [13, 48, 51], "four": [13, 22, 29, 35, 36, 40, 42, 54, 70, 74, 75, 84, 87], "extens": 13, "notabl": 13, "theoret": [13, 68], "memor": 13, "precis": [13, 15, 18], "sequenc": [13, 40, 44, 45, 81], "maxim": [13, 27, 30, 64, 82], "score": [13, 14, 15, 16, 17, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 35, 37, 45, 47, 86], "ideal": 13, "sticky_act": 13, "approach": [13, 43], "flicker": 13, "sprite": 13, "hardwar": 13, "restrict": 13, "knight": [13, 38, 43, 70, 87], "sometim": [13, 51, 54], "even": 13, "pixel": [13, 39, 40, 41, 54, 70], "wise": 13, "space_invaders_v1": 13, "deal": 13, "less": [13, 30, 88], "downscal": 13, "84": [13, 45, 71, 85, 88], "everyth": [13, 40], "screen": [13, 20, 30, 32, 39, 40, 41, 67, 70], "despit": 13, "atari_gam": 13, "obs_typ": 13, "rgb_imag": 13, "full_action_spac": 13, "100000": [13, 83], "auto_rom_install_path": 13, "three": [13, 43, 51, 64], "rgb": [13, 41, 70], "grayscale_imag": 13, "grayscal": 13, "ram": 13, "1024": 13, "bit": [13, 47, 57], "compris": 13, "duplic": 13, "leav": [13, 26, 30, 32], "tool": [13, 69, 81], "re": [13, 70, 86], "bin": 13, "multiplay": [13, 68], "were": [13, 39, 40, 46, 47, 54, 70], "terry2020arcad": 13, "2009": 13, "09341": 13, "bellemare13arcad": 13, "bellemar": 13, "m": [13, 40, 51, 86, 88, 89], "naddaf": 13, "veness": 13, "bowl": 13, "platform": [13, 92], "artifici": 13, "intellig": [13, 64], "2013": 13, "month": 13, "volum": 13, "47": 13, "page": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70], "253": 13, "279": [13, 47], "machado2018revisit": 13, "marlo": 13, "c": [13, 43], "marc": 13, "talviti": 13, "erik": 13, "joel": 13, "hausknecht": 13, "matthew": 13, "michael": 13, "61": [13, 50], "523": 13, "562": 13, "part": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 70, 75, 84], "read": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70], "basketball_pong_v3": 14, "ye": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67], "No": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 67, 70, 86, 91], "first_0": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "second_0": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "210": [14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 47], "160": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "ball": [14, 22, 28, 29, 33, 35, 36, 39, 41, 63, 70], "oppon": [14, 15, 16, 17, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 44, 45, 46, 48, 49, 66, 90], "hoop": 14, "But": [14, 32, 54], "side": [14, 20, 22, 27], "court": 14, "serv": [14, 22, 28, 29, 33, 35, 70], "timer": [14, 18, 22, 27, 28, 29, 33, 34, 35, 70], "indefinit": [14, 18, 22, 27, 28, 29, 33, 34, 35], "stall": [14, 18, 22, 27, 28, 29, 33, 34, 35, 70], "pure": [14, 18, 22, 27, 28, 29, 33, 34, 35, 92], "olymp": [14, 22, 28, 29, 35], "common": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 47, 49, 71, 81, 86, 90, 93], "describ": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 63, 66, 74], "basketball_pong": [14, 70], "num_play": [14, 22, 28, 35, 48, 50, 51, 70, 82], "choos": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 49, 54, 67, 81], "6": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 40, 41, 43, 44, 45, 46, 47, 48, 49, 52, 63, 65, 67, 71, 75, 76, 82, 88, 90, 91, 93], "fire": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40], "right": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 42, 43, 54, 65, 66, 75, 76], "left": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 46, 47, 54, 65, 66, 70, 75, 76], "v3": [14, 18, 19, 20, 22, 24, 25, 26, 27, 28, 29, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 66, 67], "v2": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 67], "entir": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 45, 70], "kwarg": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 83, 85, 86], "boxing_v2": 15, "17": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 40, 47], "adversari": [15, 16, 17, 18, 54, 57, 58, 62, 63], "respons": 15, "minut": [15, 88, 89], "1200": 15, "duke": 15, "ring": 15, "punch": 15, "success": [15, 18, 54], "jab": 15, "power": [15, 19, 20, 81], "ko": 15, "whenev": [15, 16, 17, 19, 31], "penal": [15, 16, 17, 21, 31, 45, 61, 62, 67, 86], "upright": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37], "7": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37, 43, 44, 45, 46, 47, 48, 49, 52, 65, 66, 67, 71, 75, 76, 90], "upleft": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37], "downright": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37], "downleft": [15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 37], "12": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 39, 43, 45, 47, 50, 51, 65], "13": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 43, 45, 47, 50, 51, 65, 71, 72, 83, 85, 88], "14": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 39, 40, 45, 47, 48, 50, 51, 60, 62, 65, 66], "15": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 47, 49, 65], "16": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 40, 47, 62, 66, 71], "minim": [15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 31, 33, 37, 47, 70, 74, 91, 93], "combat_jet_v1": 16, "256": [16, 71, 88, 89], "posit": [16, 17, 24, 31, 33, 40, 41, 43, 47, 52, 54, 55, 56, 63, 65, 67, 70, 86, 89], "track": [16, 47, 71, 73, 74, 86, 87], "complex": [16, 47, 81], "movement": [16, 41, 54, 76], "fly": 16, "flight": 16, "direct": [16, 21, 39, 40, 41, 43, 54, 67, 75], "speed": [16, 39, 65, 67, 88, 89, 92], "bullet": [16, 17, 31, 37, 68], "combat_plane_v2": 16, "game_vers": [16, 26, 28], "guided_missil": 16, "bi": 16, "whether": [16, 40, 43, 44, 45, 46, 47, 48, 50, 51, 52, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 71, 81], "missil": 16, "fix": [16, 19, 20, 24, 25, 26, 27, 29, 34, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 50, 51, 52, 54, 64, 65, 66, 67, 70, 71, 83, 85], "combat_plan": 16, "combat_tank_v3": 17, "predict": [17, 31, 33, 83, 86, 88, 89], "blast": 17, "obstacl": [17, 32, 54, 62, 63, 66, 67], "potenti": [17, 25, 70], "combat_tank_v2": 17, "has_maz": 17, "is_invis": 17, "billiard_hit": 17, "field": 17, "invis": [17, 20, 30], "unless": [17, 26, 86, 88], "wall": [17, 20, 26, 32, 41, 66, 76], "bounc": [17, 41], "billiard": 17, "combat_tank": 17, "double_dunk_v3": 18, "stage": [18, 20], "difficult": [18, 30, 68, 86], "strategi": [18, 27, 30, 32, 34, 52], "choic": [18, 49, 90], "120": [18, 41], "begin": [18, 47, 48, 51, 57], "fan": 18, "shot": 18, "double_dunk": 18, "entombed_competitive_v3": 19, "race": [19, 26, 68], "longest": [19, 39], "quickli": [19, 20, 21, 25], "navig": [19, 20, 60, 67], "constantli": [19, 20], "stuck": [19, 20], "easili": [19, 20], "yourself": [19, 20, 26], "escap": [19, 20, 75], "rare": [19, 20], "danger": [19, 20], "zombi": [19, 20, 38, 70, 87], "lurk": [19, 20], "avoid": [19, 20, 30, 32, 61, 67, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "prematur": [19, 20, 24, 25, 26, 36, 37, 40, 65], "entombed_competit": 19, "entombed_cooperative_v3": 20, "averag": [20, 56, 59, 65, 67, 70, 89], "total": [20, 39, 40, 46, 47, 59, 63, 65, 70, 71, 86], "23": [20, 43, 47, 65, 71, 72, 83, 85, 86, 88, 89, 90, 91, 93], "explor": [20, 83], "teammat": 20, "far": [20, 47, 58, 61, 82], "powerup": 20, "coordin": [20, 22, 29, 35, 38, 41, 43, 46, 63, 66, 67], "opposit": 20, "appear": [20, 41], "symmetr": 20, "effect": [20, 21, 76], "halv": 20, "ident": [20, 26], "divid": [20, 41, 86], "section": [20, 43, 44, 45, 46, 47, 48, 50, 51, 52, 66], "immedi": [20, 65], "life": [20, 24, 25, 37, 47], "entombed_coop": [20, 70], "flag_capture_v2": 21, "battl": [21, 22, 29, 35, 37], "travel": 21, "squar": [21, 40, 43], "bomb": [21, 30], "sent": [21, 57], "locat": [21, 27, 44, 46, 63, 67, 77], "hint": [21, 47, 70, 92], "distanc": [21, 40, 54, 55, 56, 58, 59, 61, 63, 67], "search": 21, "narrow": 21, "flag_captur": 21, "foozpong_v3": 22, "third_0": [22, 29, 35, 36], "fourth_0": [22, 29, 35, 36], "past": [22, 23, 28, 33, 47], "defend": [22, 29, 36], "tradit": [22, 81], "foozbal": 22, "paddl": [22, 29, 39], "goal": [22, 25, 27, 32, 33, 41, 54, 82], "succe": 22, "ice_hockey_v2": 23, "offens": 23, "puck": 23, "defens": 23, "On": 23, "front": [23, 40, 65, 76], "rapid": 23, "maneuv": 23, "ice_hockei": 23, "joust_v3": 24, "mix": [24, 25, 54], "involv": [24, 51, 81], "unforgiv": 24, "world": [24, 40, 54, 66], "care": [24, 26, 37], "essenti": [24, 37], "well": [24, 37, 40, 47, 49, 65, 88], "awar": [24, 37], "npc": [24, 37], "varieti": [24, 69], "wave": 24, "enemi": 24, "expect": [24, 86, 90], "earn": [24, 25], "3000": 24, "mario_bros_v3": 25, "plan": [25, 26, 32], "kick": 25, "pest": 25, "floor": [25, 35], "flip": [25, 27], "knock": [25, 45], "onto": [25, 35], "800": [25, 41, 43, 46, 49, 70], "opportun": [25, 37], "collabor": 25, "steal": 25, "activ": [25, 41, 88], "firebal": 25, "gain": 25, "20000": [25, 90], "bonu": [25, 30], "coin": 25, "wafer": 25, "mario_bro": 25, "maze_craze_v3": 26, "Its": 26, "win": [26, 27, 34, 42, 48, 49, 90], "robber": 26, "travers": 26, "exit": [26, 33, 62, 86, 88, 89], "confus": 26, "block": [26, 54, 62, 76], "disappear": 26, "tabl": [26, 49, 65, 67], "inaccuraci": 26, "blockad": 26, "maze_craz": 26, "visibilty_level": 26, "visibl": [26, 54], "othello_v3": 27, "piec": [27, 34, 43, 46, 70, 81], "diagon": [27, 43, 44, 52], "trap": 27, "rule": [27, 43, 82], "greedi": 27, "heurist": 27, "poor": 27, "interest": 27, "cursor": [27, 34], "fairli": 27, "awhil": 27, "regist": [27, 34, 83], "plu": [27, 41, 50, 67], "greedili": 27, "bad": [27, 70], "successfulli": [27, 44], "solv": 27, "think": [27, 74, 75], "token": [27, 44, 47], "talli": 27, "noth": [27, 34, 40, 54, 82], "auto": [27, 68], "pong_v3": [28, 71], "quadrapong_v4": 29, "belong": [29, 67], "v4": [29, 34, 39, 40, 41, 43, 45, 46, 47, 48, 50, 51, 65, 66, 67], "ship": 30, "togeth": [30, 47, 54, 67], "clear": [30, 37], "alien": 30, "30": [30, 65, 66, 67, 83, 85], "fli": 30, "across": [30, 41, 70, 73], "worth": 30, "sabotag": 30, "somehow": 30, "alternating_control": 30, "moving_shield": 30, "zigzaging_bomb": 30, "fast_bomb": 30, "invisible_invad": 30, "hoard": 30, "abil": 30, "forev": [30, 67, 70], "eventu": 30, "shift": 30, "anywai": 30, "shield": 30, "reliabl": [30, 70, 87], "protect": 30, "space_invad": 30, "space_war_v2": 31, "advanc": [31, 47, 73], "acceler": [31, 67, 70], "momentum": [31, 41], "space_war": 31, "surround_v2": 32, "trail": 32, "behind": 32, "slowli": 32, "conserv": 32, "master": [32, 70, 71, 86, 88, 89], "higher": [32, 35, 40, 41], "liter": 32, "room": 32, "forc": [32, 65], "dummi": [32, 75, 76, 83, 85], "tennis_v3": 33, "let": [33, 87], "unlik": [33, 43], "video_checkers_v4": 34, "jump": [34, 35], "hover": 34, "press": [34, 40], "button": 34, "held": 34, "multipl": [34, 67, 68, 70, 82], "video_check": 34, "volleyball_pong_v2": 35, "affect": [35, 47, 54], "motion": [35, 54], "volleyball_pong_v3": 35, "volleyball_pong": [35, 70], "warlords_v3": 36, "man": 36, "stand": [36, 44], "fortress": 36, "fall": [36, 44, 65], "wizard_of_wor": [36, 37], "wizard_of_wor_v3": 37, "against": [37, 86, 88], "2500": 37, "ncp": 37, "benefit": 37, "attack": [37, 40], "archer": [38, 70, 87], "challeng": [38, 39, 68], "pygam": [38, 40, 41, 68, 70], "degre": 38, "emerg": [38, 41, 54], "achiev": [38, 41, 54, 65], "respect": [38, 40, 43, 44, 45, 46, 48, 59, 67], "manual_polici": [38, 70], "manualpolici": 38, "tick": 38, "render_fp": [38, 70], "wasd": 38, "cooperative_pong_v4": 39, "paddle_0": 39, "paddle_1": 39, "280": [39, 47], "480": 39, "560": [39, 41], "960": [39, 70], "object": [39, 40, 41, 43, 44, 45, 46, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67], "goe": [39, 45, 65], "edg": [39, 40, 43, 65], "collis": [39, 61, 62, 63, 65, 70], "elast": [39, 41], "alwai": [39, 40, 41, 43, 54, 59, 61, 70], "center": [39, 40, 66], "littl": 39, "tier": 39, "cake": 39, "half": [39, 49, 51], "stai": [39, 41, 66, 88], "max_reward": 39, "off_screen_penalti": 39, "w": [39, 40, 41, 43], "arrow": [39, 40, 66], "cake_paddl": 39, "wed": 39, "angl": [39, 40, 65], "remain": [39, 47], "unchang": 39, "neg": [39, 45, 56, 62, 67, 86], "penalti": [39, 45, 65, 70], "v5": [39, 40, 41, 43, 46, 47, 51, 65, 70], "teleport": 39, "glitch": 39, "occasion": 39, "redesign": [39, 70], "misc": [39, 40, 41, 65, 66, 67, 70], "arg": [39, 40, 41, 43, 44, 45, 46, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70, 71, 83, 85, 90], "pickl": [39, 40, 41, 43, 44, 45, 46, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 70], "archer_0": [40, 88], "archer_1": 40, "knight_0": 40, "knight_1": 40, "720": 40, "1280": 40, "walk": [40, 79], "border": [40, 41, 54], "bottom": [40, 44, 54, 75, 77], "unpredict": 40, "rotat": [40, 65], "clockwis": 40, "counter": 40, "forward": [40, 76, 83, 85], "kill": [40, 88], "swing": 40, "mace": 40, "arc": 40, "head": [40, 65], "straight": 40, "collid": [40, 61, 67], "vector_st": 40, "x5": 40, "num_arch": 40, "num_knight": 40, "num_sword": 40, "max_arrow": 40, "max_zombi": [40, 88], "someth": [40, 83, 85], "sword": 40, "entiti": [40, 67, 71], "breakdown": 40, "corner": [40, 46, 75], "final": [40, 43, 47, 49, 86, 90], "unit": [40, 41, 54, 66], "absolut": [40, 54, 77], "typemask": 40, "prepend": 40, "use_typemask": 40, "whole": 40, "x11": 40, "experiment": 40, "sequence_spac": 40, "bodi": 40, "512x512": 40, "word": 40, "16x16": 40, "d": [40, 41, 43, 68, 86, 88, 89], "shoot": 40, "q": [40, 48, 83, 90, 93], "spawn": [40, 41, 66, 87, 88], "l": [40, 71], "stab": 40, "u": 40, "spawn_rat": 40, "20": [40, 41, 43, 47, 48, 57, 63, 67, 82, 85, 86], "killable_knight": 40, "killable_arch": 40, "pad_observ": 40, "line_death": 40, "rate": [40, 71, 90], "form": [40, 47, 54, 66, 67, 70], "40x40": 40, "grid": [40, 44, 52, 66, 68, 75, 76], "touch": [40, 66, 67], "soon": 40, "vectoriz": 40, "v9": 40, "rewrit": [40, 70], "numer": [40, 70], "v8": [40, 65], "cleanup": [40, 70], "sever": [40, 54], "v7": [40, 65], "minor": [40, 41, 54, 70], "v6": [40, 41, 43, 51, 65, 70], "bump": [40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 70, 76], "pymunk": [40, 41, 70, 89], "piston_0": 41, "piston_1": 41, "piston_19": 41, "457": 41, "880": [41, 70], "vertic": [41, 44, 52, 67], "piston": [41, 70], "proport": [41, 66, 67], "accordingli": 41, "overal": 41, "balanc": [41, 46, 49], "local": [41, 59, 61, 67, 70], "time_penalti": [41, 85], "local_ratio": [41, 59, 61, 65, 67, 70], "local_reward": 41, "global_reward": 41, "chipmunk": 41, "engin": [41, 68, 70, 87, 92], "realist": 41, "angri": 41, "bird": 41, "rightmost": [41, 65], "n_piston": [41, 85], "random_drop": [41, 85], "random_rot": [41, 85], "ball_mass": [41, 85], "75": [41, 47, 65, 67, 85], "ball_frict": [41, 85], "ball_elast": [41, 85], "real": [41, 82], "angular": [41, 65], "mass": 41, "friction": 41, "column": [41, 43, 44, 45, 70], "imprecis": [41, 70], "refactor": [41, 67, 70], "gin": [42, 86], "rummi": [42, 86], "hanabi": [42, 70, 86], "leduc": [42, 83], "holdem": [42, 70, 83], "rp": [42, 49, 70], "texa": [42, 48, 70, 86], "tictacto": [42, 52, 70], "mostli": [42, 54], "texas_holdem_v4": [42, 50], "sole": 42, "commun": [42, 47, 48, 50, 51, 54, 57, 59, 60, 61, 63], "legal": [42, 82], "binari": [42, 43, 44, 45, 46, 47, 48, 50, 51, 52, 70], "equal": [42, 45, 47, 65], "rlcard": [42, 45, 48, 50, 51, 70], "zha2019rlcard": 42, "toolkit": 42, "card": [42, 45, 47, 48, 50, 51, 68], "zha": 42, "daochen": 42, "lai": 42, "kwei": 42, "herng": 42, "cao": 42, "yuanpu": 42, "songyi": 42, "wei": 42, "ruzh": 42, "guo": 42, "junyu": 42, "hu": 42, "xia": 42, "1910": 42, "04376": 42, "2019": 42, "player_0": [43, 44, 45, 47, 48, 49, 50, 51, 83], "player_1": [43, 44, 45, 47, 48, 49, 50, 51, 52, 83], "4672": 43, "111": 43, "oldest": 43, "ai": 43, "alphazero": [43, 46], "usual": [43, 44, 45, 46, 47, 48, 50, 51, 52, 74], "8x8": 43, "castl": 43, "white": [43, 46, 47, 66, 70], "queensid": 43, "kingsid": 43, "th": [43, 90], "convolut": 43, "One": [43, 47, 56, 81], "index": [43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 67], "spot": 43, "leelachesszero": 43, "en": [43, 86, 88, 89], "passant": 43, "vulner": 43, "pawn": [43, 76], "8th": 43, "5th": 43, "19": [43, 45, 46, 47, 48, 58, 83, 85], "seen": [43, 63], "fold": [43, 48, 50, 51], "repetit": 43, "latest": 43, "occupi": 43, "orient": [43, 54, 65], "vari": [43, 70, 88, 89], "board_histori": 43, "maintain": [43, 68, 70], "king": [43, 45, 48], "1st": [43, 46, 47], "simpler": [43, 86], "layout": 43, "nevertheless": 43, "incorpor": 43, "facilit": 43, "capabl": [43, 69], "profici": 43, "whose": [43, 44, 45, 46, 47, 48, 50, 51, 52, 67], "alphachesszero": 43, "8x8x73": 43, "dimension": [43, 86, 88, 89], "pick": [43, 45], "56": [43, 50], "queen": [43, 48], "eight": 43, "compass": 43, "ne": 43, "se": 43, "sw": 43, "nw": 43, "underpromot": 43, "bishop": 43, "rook": 43, "seventh": 43, "rank": [43, 45, 47], "promot": 43, "73": 43, "express": 43, "notat": 43, "signifi": 43, "g1": 43, "h": [43, 86, 88, 89], "winner": [43, 45, 46, 48, 49, 50, 51, 52, 86], "loser": [43, 45, 46, 48, 49, 50, 51, 52], "draw": [43, 44, 45, 49, 52], "wrong": 43, "insuffici": 43, "materi": 43, "proper": [43, 46, 70], "arbitrari": [43, 44, 45, 46, 47, 48, 50, 51, 52, 70], "replac": [43, 44, 45, 46, 47, 48, 50, 51, 52, 65, 70, 82], "adopt": [43, 44, 45, 46, 47, 48, 49, 50, 51, 52], "screen_height": [43, 46, 49, 50, 51], "connect_four_v3": [44, 86], "horizont": [44, 52, 65, 67], "drop": [44, 47, 70], "6x7": 44, "placement": [44, 52], "cell": [44, 52], "inclus": 44, "award": [44, 45, 47], "connect_four": 44, "screen_scal": 44, "gin_rummy_v4": 45, "110": 45, "52": [45, 50, 51], "deck": [45, 47, 48, 51], "who": 45, "knock_reward": 45, "gin_reward": 45, "opponents_hand_vis": 45, "unknown": 45, "5x52": 45, "spade": [45, 50, 51], "heart": [45, 50, 51], "diamond": [45, 50, 51], "club": [45, 50, 51], "ac": 45, "descript": [45, 46, 47, 48, 50, 51, 65, 67, 83, 85], "discard": [45, 47], "pile": [45, 47], "exclud": 45, "26": [45, 50, 51, 65, 70], "38": [45, 50, 51], "27": [45, 50, 51, 58, 65, 70], "39": [45, 50, 51], "51": [45, 50, 51], "40": [45, 50, 51], "57": [45, 50, 83, 85], "31": [45, 65], "44": 45, "45": 45, "58": [45, 50], "109": 45, "59": 45, "71": [45, 50], "83": [45, 64], "96": 45, "97": 45, "deadwood": 45, "deadwood_count": 45, "slightli": 45, "those": [45, 47, 64], "upgrad": [45, 48, 50, 51, 70], "rlcard_env": [45, 48, 50, 51], "gin_rummi": 45, "go_v5": 46, "black_0": 46, "white_0": 46, "362": 46, "stone": 46, "intersect": 46, "aim": 46, "territori": 46, "decid": 46, "minigo": 46, "komi": 46, "compens": 46, "board_siz": 46, "disadvantag": 46, "inher": 46, "chines": 46, "tournament": 46, "perfectli": [46, 54], "third": [46, 66, 70], "gtp": 46, "2nd": [46, 47], "2n": 46, "hanabi_v5": 47, "658": 47, "firework": 47, "satisfi": 47, "drawn": 47, "hand_siz": 47, "max_information_token": 47, "max_life_token": 47, "observation_typ": 47, "reveal": [47, 48, 49], "card_knowledg": 47, "knowledg": 47, "red": [47, 54, 56, 62, 66, 67], "seer": 47, "regardless": 47, "construct": [47, 91], "serial": [47, 70], "unari": 47, "thermomet": 47, "That": 47, "lowest": 47, "11111": 47, "explicitli": 47, "10000": 47, "24": [47, 65, 70], "49": 47, "74": 47, "124": 47, "174": 47, "175": 47, "179": 47, "180": 47, "184": 47, "yellow": 47, "185": 47, "189": 47, "green": [47, 54, 56, 62, 67], "190": 47, "195": 47, "199": 47, "blue": [47, 63, 66], "207": 47, "208": 47, "211": 47, "260": 47, "261": 47, "262": 47, "263": 47, "266": 47, "267": 47, "268": 47, "target": [47, 56, 59, 67, 71, 90], "269": 47, "273": 47, "274": 47, "278": 47, "281": 47, "282": 47, "283": 47, "307": 47, "308": 47, "342": 47, "0th": 47, "343": 47, "377": 47, "378": 47, "412": 47, "413": 47, "447": 47, "3rd": 47, "445": 47, "482": 47, "4th": 47, "483": 47, "517": 47, "518": 47, "552": 47, "553": 47, "587": 47, "588": 47, "622": 47, "663": 47, "657": 47, "scalar": [47, 49], "uuid": 47, "lost": [47, 67], "negat": 47, "took": [47, 70], "ordinari": 47, "futur": 47, "random_start_play": 47, "agent_nam": [47, 82], "observation_vector_dim": 47, "as_vector": 47, "legal_mov": 47, "throw": [47, 70], "leduc_holdem_v4": [48, 83], "36": 48, "round": [48, 50, 51, 86], "six": [48, 70, 81], "jack": 48, "bet": [48, 51], "public": [48, 57, 73], "best": [48, 77], "33": 48, "chip": [48, 50, 51], "21": [48, 90, 91, 93], "35": 48, "22": [48, 67, 90, 91, 93], "leduc_holdem": 48, "beat": 49, "extra": 49, "pair": 49, "odd": [49, 70], "exactli": 49, "defeat": 49, "expans": 49, "lizard": [49, 70], "spock": [49, 70], "applic": [49, 81, 82, 84], "greater": [49, 51], "pariti": 49, "therefor": [49, 67], "action_6": 49, "action_7": 49, "merg": [49, 70], "72": 50, "boolean": 50, "depict": [50, 66, 67], "53": [50, 51], "62": [50, 62], "66": [50, 64], "67": 50, "68": 50, "texas_holdem": 50, "texas_holdem_no_limit_v6": [51, 82], "54": [51, 61], "poker": [51, 84], "regular": 51, "union": 51, "pot": 51, "acpc": 51, "texas_holdem_no_limit": [51, 82], "tictactoe_v3": [52, 82, 90, 93], "player_2": 52, "3x3": 52, "similarli": [52, 67], "swap": 52, "_________": 52, "timelin": 53, "crypto": 54, "push": 54, "speaker": [54, 59], "listen": [54, 59], "spread": 54, "tag": [54, 66, 70], "comm": 54, "particl": 54, "landmark": [54, 55, 56, 58, 59, 60, 61], "codebas": 54, "simple_tag_v3": [54, 62], "punish": 54, "vice": 54, "versa": 54, "natur": [54, 76], "mixtur": 54, "static": [54, 70], "circular": 54, "destin": 54, "veloc": [54, 65, 67, 70], "temporarili": [54, 70], "broadcast": 54, "transmit": 54, "signal": [54, 57, 65, 70], "bob": [54, 57], "ev": [54, 57], "reconstruct": [54, 57], "circl": [54, 62], "uniformli": 54, "apart": 54, "mind": 54, "compos": [54, 65, 81], "exact": 54, "cardin": [54, 75], "hear": 54, "continuous_act": [54, 55, 56, 57, 58, 59, 60, 61, 62, 63], "oppos": 54, "scene": 54, "grow": 54, "wander": 54, "beyond": [54, 65, 67, 81], "mordatch2017emerg": 54, "ground": [54, 65], "composit": 54, "languag": [54, 81], "popul": 54, "mordatch": 54, "igor": 54, "abbeel": 54, "pieter": 54, "1703": 54, "04908": 54, "2017": [54, 64], "lowe2017multi": 54, "ryan": 54, "wu": 54, "yi": 54, "tamar": 54, "aviv": 54, "harb": 54, "jean": 54, "nip": 54, "mpe": [55, 56, 57, 58, 59, 60, 61, 62, 63, 70], "simple_v3": 55, "agent_0": [55, 56, 58, 59, 61, 62, 63], "inf": [55, 56, 57, 58, 59, 60, 61, 62, 63, 65], "euclidean": [55, 56, 67], "primarili": 55, "purpos": [55, 71, 87], "self_vel": [55, 56, 58, 59, 60, 61, 62, 63], "landmark_rel_posit": [55, 56, 61, 62, 63], "simple_adversary_v3": 56, "adversary_0": [56, 58, 59, 62, 63], "28": [56, 59, 63, 65, 70], "closest": [56, 61], "doesn": [56, 70, 86], "know": 56, "unscal": 56, "cover": [56, 61], "deceiv": 56, "self_po": [56, 61, 62, 63], "goal_rel_posit": [56, 58], "other_agent_rel_posit": [56, 58, 61, 62, 63], "other_agents_rel_posit": 56, "no_act": [56, 58, 59, 60, 61, 62, 63], "move_left": [56, 58, 59, 60, 61, 62, 63], "move_right": [56, 58, 59, 60, 61, 62, 63], "move_down": [56, 58, 59, 60, 61, 62, 63], "move_up": [56, 58, 59, 60, 61, 62, 63], "simple_adversari": 56, "simple_crypto_v3": 57, "eve_0": 57, "bob_0": 57, "alice_0": 57, "alic": 57, "privat": 57, "encrypt": 57, "private_kei": 57, "alices_comm": 57, "say_0": [57, 59, 60, 63], "say_1": [57, 59, 60, 63], "say_2": [57, 59, 60, 63], "say_3": [57, 59, 60, 63], "convei": 57, "simple_crypto": 57, "simple_push_v3": 58, "goal_landmark_id": 58, "all_landmark_rel_posit": [58, 59, 60], "landmark_id": [58, 59], "simple_push": 58, "simple_reference_v3": 59, "goal_id": [59, 60], "say_4": [59, 60], "say_5": [59, 60], "say_6": [59, 60], "say_7": [59, 60], "say_8": [59, 60], "say_9": [59, 60], "cartesian": [59, 63], "product": [59, 63], "simple_refer": [59, 60], "simple_speaker_listener_v4": 60, "speaker_0": 60, "listener_0": 60, "grai": 60, "speak": 60, "simple_speaker_listen": 60, "simple_spread_v3": 61, "simple_spread": [61, 70], "adversary_1": [62, 63], "adversary_2": 62, "predat": 62, "prei": 62, "slower": 62, "larg": [62, 65, 67, 70, 92], "infin": 62, "min": 62, "other_agent_veloc": [62, 63], "num_good": [62, 63], "num_adversari": [62, 63], "num_obstacl": [62, 63], "simple_tag": [62, 63], "simple_world_comm_v3": 63, "leadadversary_0": 63, "adversary_3": 63, "34": 63, "192": 63, "food": [63, 67], "forest": 63, "hide": 63, "leader": 63, "chase": 63, "05": [63, 70, 90, 93], "self_in_forest": 63, "leader_comm": 63, "simple_world_comm": 63, "num_food": 63, "num_forest": 63, "multiwalk": [64, 70], "pursuit": [64, 70], "waterworld": [64, 70, 84, 87], "benchmark": [64, 73], "stanford": 64, "laboratori": 64, "waterworld_v4": [64, 67, 70, 89], "ve": 64, "major": [64, 67, 68, 70], "discourag": 64, "compar": 64, "inproceed": 64, "gupta2017coop": 64, "gupta": 64, "jayesh": 64, "egorov": 64, "kochenderf": 64, "mykel": 64, "booktitl": 64, "confer": 64, "autonom": [64, 68], "organ": 64, "springer": 64, "sisl": [65, 66, 67, 70, 89], "multiwalker_v9": 65, "walker_0": 65, "walker_1": 65, "walker_2": 65, "biped": 65, "attempt": [65, 67, 82], "carri": 65, "walker": 65, "multipli": [65, 67], "forward_reward": 65, "terrain": 65, "under": [65, 70], "300": 65, "terminate_on_fal": 65, "remove_on_fal": 65, "fallen": 65, "shared_reward": [65, 66], "exert": 65, "joint": 65, "leg": 65, "simul": [65, 67, 68, 82], "noisi": 65, "lidar": 65, "neighbor": [65, 67], "durat": [65, 85], "cap": 65, "500": [65, 66, 67], "properti": 65, "neighbour": 65, "nois": 65, "position_nois": 65, "angle_nois": 65, "hull": 65, "pi": 65, "hip": 65, "knee": 65, "contact": 65, "sensor": [65, 67], "leftmost": 65, "29": [65, 70], "n_walker": 65, "terminate_reward": 65, "fall_reward": 65, "terrain_legth": 65, "among": [65, 66, 67, 82], "caus": [65, 70], "rest": 65, "terrain_length": 65, "improv": [65, 70], "qualiti": [65, 70, 73, 92], "pursuit_v4": 66, "pursuer_0": [66, 67], "pursuer_1": [66, 67], "pursuer_7": 66, "evad": 66, "pursuer": [66, 67, 70], "orang": 66, "caught": 66, "prune": 66, "obs_rang": 66, "alli": 66, "n_evad": [66, 67], "n_pursuer": [66, 67], "n_catch": 66, "freeze_evad": 66, "tag_reward": 66, "catch_reward": 66, "urgency_reward": 66, "constraint_window": 66, "toggl": [66, 67, 71], "term_pursuit": 66, "catch": [66, 70, 75], "anywher": 66, "pursuer_4": 67, "242": [67, 89], "archea": 67, "surviv": 67, "consum": 67, "poison": 67, "radiu": 67, "dynam": 67, "barrier": 67, "thrust": 67, "evenli": 67, "report": 67, "speed_featur": 67, "n_sensor": 67, "purpl": 67, "pursuer_max_accel": 67, "exce": 67, "horizontal_thrust": 67, "vertical_thrust": 67, "n_coop": 67, "food_reward": 67, "destroi": 67, "encounter_reward": 67, "poison_reward": 67, "thrust_penalti": 67, "norm": [67, 71], "n_poison": 67, "sensor_rang": 67, "015": 67, "obstacle_radiu": 67, "obstacle_coord": 67, "evader_spe": 67, "poison_spe": 67, "pursu": 67, "dendrit": 67, "pursuer_spe": 67, "v": [67, 83, 85, 86, 88, 89, 93], "guarante": 68, "discord": [68, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "traffic": 68, "flexibl": 68, "tunabl": 68, "scalabl": 68, "miniatur": 68, "f1tenth": 68, "pybullet": 68, "googl": [68, 70], "deepmind": 68, "riot": 68, "fork": 68, "too": 68, "cook": 68, "crazyfli": 68, "drone": 68, "theori": 68, "samaritan": 68, "stag": 68, "hunt": 68, "chicken": 68, "penni": 68, "uniti": 68, "onlin": 68, "playabl": 68, "webgl": 68, "ml": [68, 70, 84, 90, 91, 93], "link": [68, 70], "demo": [68, 70, 86, 88, 89], "webassembli": 68, "drive": 68, "depreci": [68, 74], "massiv": [68, 70], "role": 68, "ssd": 68, "meltingpot": 68, "machin": 68, "ui": 68, "loop": [68, 70, 85], "blizzard": 68, "hearthston": 68, "bot": 68, "secur": [68, 70], "conflict": [68, 71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "resolut": 68, "vehicl": 68, "confin": 68, "bomberman": 68, "skyjo": 68, "m\u016b": 68, "t\u014drere": 68, "zealand": 68, "2023": 70, "pypi": 70, "hotfix": 70, "test_action_flex": 70, "986": 70, "parrellenv": 70, "987": 70, "992": 70, "broken": [70, 83, 85], "rai": [70, 83, 85], "project": [70, 71, 79, 83, 85], "34696": [70, 83, 85], "32999": 70, "homepag": 70, "985": 70, "citat": 70, "cff": 70, "990": 70, "carla": 70, "third_party_env": 70, "991": 70, "changelog": 70, "finish": [70, 86, 88, 89], "deprec": 70, "favor": [70, 90], "return_info": 70, "written": [70, 92], "pyproject": 70, "toml": 70, "compli": 70, "pep": 70, "621": 70, "parti": 70, "langchain": 70, "tianshou": 70, "toreset": 70, "953": 70, "875": 70, "890": 70, "964": 70, "aec_wrapper_fn": 70, "parallel_wrapper_fn": 70, "879": 70, "smoother": 70, "882": 70, "894": 70, "970": 70, "renam": 70, "baseparallelwrap": 70, "baseparallelwrapp": 70, "typo": 70, "876": 70, "908": 70, "cast": [70, 86], "975": 70, "980": 70, "874": 70, "ci": 70, "workflow": 70, "886": 70, "lint": 70, "commit": 70, "hook": 70, "835": 70, "resolv": 70, "pytest": [70, 86], "897": 70, "unnecessari": 70, "891": 70, "984": 70, "906": 70, "gobblet": 70, "cathedr": 70, "907": 70, "904": 70, "942": 70, "979": 70, "environmentcr": 70, "903": 70, "972": 70, "readm": [70, 74], "950": 70, "instruct": [70, 71, 82], "968": 70, "883": 70, "demonstr": [70, 82, 87, 91], "text": [70, 81, 82], "logo": 70, "954": 70, "03": [70, 86, 87], "yank": 70, "hard": [70, 81], "address": 70, "pseudo": 70, "rnd": 70, "thought": 70, "reginald": 70, "mclean": 70, "mikcnt": 70, "cibeah": 70, "sushant1212": 70, "kallinteri": 70, "andrea": 70, "mgoulao": 70, "elliottow": [70, 71, 86, 88, 89], "favicon": 70, "themikeste1": 70, "855": 70, "diagram": 70, "856": 70, "magent2": 70, "standalon": 70, "dsctt": 70, "857": 70, "flake8": [70, 71], "gitlab": 70, "redtachyon": 70, "858": 70, "willdudlei": [70, 90, 91, 93], "867": 70, "865": 70, "pillow": [70, 83, 85], "dependabot": 70, "859": 70, "869": 70, "to_parallel": 70, "andrewrwilliam": 70, "870": 70, "872": 70, "contributor": 70, "840": 70, "bolundai0216": 70, "841": 70, "845": 70, "846": 70, "propos": 70, "844": 70, "tobirohr": 70, "848": 70, "843": 70, "enh": 70, "gui": 70, "younik": 70, "842": 70, "847": 70, "magent": 70, "artifact": 70, "850": 70, "tut": 70, "852": 70, "overhaul": 70, "838": 70, "853": 70, "raffaelegalliera": 70, "854": 70, "817": 70, "821": 70, "core": 70, "822": 70, "analyt": 70, "825": 70, "content": [70, 82], "823": 70, "thing": 70, "829": 70, "wd": 70, "831": 70, "832": 70, "833": 70, "818": 70, "836": 70, "837": 70, "ref": 70, "839": 70, "07": 70, "gt": 70, "5cat": 70, "802": 70, "unwrap": [70, 82, 86, 88, 89], "808": 70, "theme": 70, "804": 70, "permiss": 70, "andrewtanj": 70, "809": 70, "807": 70, "810": 70, "dep": 70, "814": 70, "09": 70, "singular": 70, "dictat": 70, "meet": 70, "exceed": 70, "prospector": 70, "mahjong": 70, "doudizhu": 70, "pull": [70, 83, 85], "qol": 70, "pyright": 70, "pydocstyl": 70, "concatvecenv": 70, "763": 70, "776": 70, "777": 70, "779": 70, "bunch": 70, "781": 70, "hour": 70, "782": 70, "kir0ul": 70, "737": 70, "config": [70, 71, 83, 85], "787": 70, "780": 70, "domain": 70, "789": 70, "767": 70, "788": 70, "796": 70, "simple_env": 70, "filipinogambino": 70, "798": 70, "autom": 70, "790": 70, "08": 70, "technic": 70, "publish": 70, "734": 70, "731": 70, "pyglet": 70, "732": 70, "738": 70, "buggi": 70, "739": 70, "ezpickl": 70, "741": 70, "bkrl": 70, "743": 70, "749": 70, "manu": 70, "hoffmann": 70, "747": 70, "750": 70, "reflect": 70, "748": 70, "06": [70, 83, 85, 86], "streamlin": 70, "04": [70, 86], "manual_control": 70, "frames_per_second": 70, "autodepr": 70, "kaz": 70, "capabililti": 70, "battlefield": 70, "repo": [70, 82], "gather": 70, "is_paralleliz": 70, "misus": 70, "2021": 70, "learnabl": 70, "upstream": 70, "retreiv": 70, "resurect": 70, "unexpect": 70, "isort": [70, 82], "scipi": 70, "em": [70, 82, 83, 86], "02": 70, "slow": 70, "substanti": 70, "speedup": 70, "art": [70, 87], "n_cycl": 70, "depric": 70, "env_don": 70, "combined_arm": 70, "upcom": 70, "robust": 70, "minimap": 70, "transient": 70, "miscellan": 70, "face": 70, "annoi": 70, "save_ob": 70, "usabl": 70, "random_demo": 70, "redid": 70, "readabl": 70, "impact": 70, "tiger_d": 70, "www": 70, "raw": [70, 86], "max_fram": 70, "hopefulli": 70, "cli": [71, 73, 92], "integr": [71, 74, 81], "tensorboard": [71, 72, 73, 90], "wandb": 71, "experi": [71, 73], "mirror": 71, "dev": 71, "ppo_pettingzoo_ma_ataripi": 71, "newli": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "virtual": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "Then": [71, 87], "question": [71, 72, 81, 82, 83, 85, 86, 88, 89, 90, 91, 93], "feel": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "free": [71, 72, 82, 83, 85, 86, 87, 88, 89, 90, 91, 93], "ask": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "server": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "adapt": [71, 87], "repositori": [71, 75], "vwxyzjn": 71, "blob": 71, "ppo_pettingzoo_ma_atari": 71, "million": 71, "8000": 71, "costa": 71, "noqa": [71, 82], "argpars": [71, 83, 85, 90], "importlib": 71, "distutil": 71, "strtobool": 71, "ss": [71, 85, 88, 89], "summarywrit": [71, 90], "parse_arg": [71, 83, 85], "fmt": 71, "parser": [71, 83, 85, 90], "argumentpars": [71, 83, 85, 90], "add_argu": [71, 83, 85, 90], "basenam": 71, "__file__": 71, "rstrip": 71, "narg": [71, 90], "const": 71, "backend": 71, "cudnn": 71, "bias": [71, 73], "12000": 71, "2000000": 71, "5e": 71, "num": [71, 90], "rollout": [71, 83, 85], "anneal": [71, 83], "discount": 71, "gae": 71, "95": [71, 88], "estim": 71, "minibatch": 71, "mini": 71, "epoch": [71, 90, 93], "adv": 71, "coef": 71, "surrog": 71, "coeffici": 71, "vloss": 71, "ent": 71, "vf": 71, "grad": 71, "diverg": 71, "threshold": 71, "num_env": 71, "minibatch_s": 71, "num_minibatch": 71, "layer_init": 71, "stride": [71, 85], "single_action_spac": 71, "clone": 71, "permut": [71, 85], "run_nam": 71, "env_id": 71, "__": 71, "exp_nam": 71, "wandb_project_nam": 71, "wandb_ent": 71, "sync_tensorboard": 71, "monitor_gym": 71, "save_cod": 71, "writer": [71, 90], "add_text": [71, 90], "hyperparamet": [71, 73, 86, 88], "join": [71, 90, 93], "NOT": 71, "TO": 71, "modifi": 71, "manual_se": [71, 90, 93], "torch_determinist": 71, "import_modul": 71, "pettingzoo_env_to_vec_env_v1": [71, 88, 89], "concat_vec_envs_v1": [71, 88, 89], "num_cpu": [71, 88, 89], "base_class": [71, 88, 89], "single_observation_spac": 71, "is_vector_env": 71, "capture_video": 71, "recordvideo": 71, "learning_r": [71, 88, 89], "global_step": 71, "start_tim": 71, "next_termin": 71, "next_trunc": 71, "num_upd": 71, "total_timestep": [71, 86, 88, 89], "anneal_lr": 71, "frac": 71, "lrnow": 71, "param_group": 71, "todo": [71, 83, 86, 88], "idx": 71, "player_idx": 71, "episodic_return": 71, "add_scalar": 71, "chart": 71, "episodic_length": 71, "next_valu": 71, "lastgaelam": 71, "next_don": 71, "nextnontermin": 71, "nextvalu": 71, "gae_lambda": [71, 88], "b_ind": 71, "clipfrac": 71, "update_epoch": 71, "mb_ind": 71, "newvalu": 71, "mb_advantag": 71, "norm_adv": 71, "clip_vloss": 71, "clip_grad_norm_": 71, "max_grad_norm": [71, 88], "target_kl": 71, "record": 71, "plot": 71, "value_loss": 71, "policy_loss": 71, "explained_vari": 71, "sp": 71, "scratch": [73, 79], "modular": [73, 77, 92], "friendli": 73, "tight": 73, "tune": [73, 83, 85, 86], "leaderboard": 73, "task": 73, "welcom": 74, "short": 74, "concept": 74, "deploy": 74, "laid": 74, "custom_environ": [74, 75, 76, 77], "custom_environment_v0": [74, 75, 76], "helper": 74, "complic": 74, "readi": 74, "scope": 74, "entireti": 74, "customenviron": [74, 75, 76, 77], "fun": 75, "guard": [75, 76], "7x7": 75, "door": 75, "middl": 75, "copi": [75, 76, 90], "multidiscret": [75, 76], "escape_i": [75, 76], "escape_x": [75, 76], "guard_i": [75, 76], "guard_x": [75, 76], "prisoner_i": [75, 76], "prisoner_x": [75, 76], "randint": [75, 76], "prisoner_act": [75, 76], "guard_act": [75, 76], "overwrit": [75, 76], "p": [75, 76], "imposs": 76, "prisoner_action_mask": 76, "guard_action_mask": 76, "built": 77, "practic": 77, "simplic": 77, "assum": [77, 86], "root": 77, "tutorial3_action_mask": 77, "1_000_000": 77, "anyon": 79, "framework": [81, 83, 85, 92], "manag": 81, "lot": 81, "fetch": 81, "summar": 81, "answer": 81, "decis": 81, "persist": 81, "beta": 81, "notori": 81, "metric": 81, "themselv": 81, "assist": 81, "internet": 82, "databas": 82, "repl": 82, "tenac": 82, "gymnasiumag": 82, "retri": 82, "output_pars": 82, "regexpars": 82, "schema": 82, "humanmessag": 82, "systemmessag": 82, "classmethod": 82, "get_doc": 82, "cl": 82, "__doc__": 82, "termini": 82, "sum_of_reward": 82, "respond": 82, "action_pars": 82, "regex": 82, "output_kei": 82, "default_output_kei": 82, "message_histori": 82, "ret": 82, "random_act": 82, "rew": [82, 90, 93], "obs_messag": 82, "_act": 82, "act_messag": 82, "stop_after_attempt": 82, "wait": 82, "wait_non": 82, "retry_if_exception_typ": 82, "valueerror": [82, 86, 88, 89], "before_sleep": 82, "retry_st": 82, "occur": 82, "outcom": 82, "retryerror": 82, "f841": 82, "pettingzooag": 82, "extend": [82, 90], "gymnasium_ag": 82, "getmodul": 82, "actionmaskag": 82, "subclass": 82, "pettingzoo_ag": 82, "obs_buff": 82, "dequ": 82, "maxlen": 82, "valid_action_instruct": 82, "accord": 82, "int8": 82, "_____": 82, "chat_model": 82, "chatopenai": 82, "action_masking_ag": 82, "rock_paper_scissor": 82, "temperatur": 82, "tic_tac_to": [82, 90], "tensorflow": [83, 85], "rohan": [83, 85], "rohan138": [83, 85], "dqnconfig": 83, "dqn_torch_model": 83, "dqntorchmodel": 83, "pettingzooenv": [83, 85, 90, 91, 93], "modelcatalog": [83, 85], "fcnet": 83, "fullyconnectednetwork": 83, "torchfc": 83, "try_import_torch": 83, "torch_util": 83, "float_max": 83, "registri": [83, 85], "register_env": [83, 85], "torchmaskedact": 83, "pytorch": [83, 87, 92], "parametricactionsmodel": 83, "obs_spac": [83, 85], "num_output": [83, 85], "model_config": 83, "kw": 83, "obs_len": 83, "orig_obs_spac": 83, "action_embed_model": 83, "_action_emb": 83, "input_dict": [83, 85], "seq_len": [83, 85], "extract": 83, "embed": 83, "action_logit": 83, "probit": 83, "inf_mask": 83, "1e10": 83, "value_funct": [83, 85], "alg_nam": 83, "register_custom_model": [83, 85], "pa_model": 83, "env_creat": [83, 85], "env_nam": [83, 85], "test_env": [83, 90, 93], "act_spac": [83, 85], "num_rollout_work": [83, 85], "rollout_fragment_length": [83, 85], "train_batch_s": [83, 85], "duel": 83, "custom_model": 83, "multi_ag": 83, "policy_mapping_fn": 83, "agent_id": [83, 90], "num_gpu": [83, 85], "rllib_num_gpu": [83, 85], "log_level": [83, 85], "exploration_config": 83, "epsilongreedi": 83, "initial_epsilon": 83, "final_epsilon": 83, "epsilon_timestep": 83, "epsilon": 83, "timesteps_tot": [83, 85], "10000000": 83, "checkpoint_freq": [83, 85], "to_dict": [83, 85], "leduoc": 83, "pettingzoo_env": [83, 85, 90, 93], "rllib_leduc_holdem": 83, "sdl_videodriv": [83, 85], "pretrain": [83, 85], "checkpoint": [83, 85], "ray_result": [83, 85], "ppo_pistonball_v6_660ce_00000_0_2021": [83, 85], "11_12": [83, 85], "checkpoint_000050": [83, 85], "checkpoint_path": [83, 85], "expandus": [83, 85], "dqnagent": 83, "from_checkpoint": [83, 85], "reward_sum": [83, 85], "get_polici": 83, "batch_ob": 83, "expand_dim": 83, "batched_act": 83, "state_out": 83, "compute_actions_from_input_dict": 83, "single_act": 83, "dqn": [84, 90, 92, 93], "industri": 84, "grade": 84, "independ": 84, "leela": 84, "proxim": [85, 86, 88, 89], "ppoconfig": 85, "parallelpettingzooenv": 85, "torch_modelv2": 85, "torchmodelv2": 85, "cnnmodelv2": 85, "3136": 85, "policy_fn": 85, "value_fn": 85, "model_out": 85, "_value_out": 85, "clip_act": 85, "2e": 85, "lambda_": 85, "use_ga": 85, "clip_param": 85, "grad_clip": 85, "entropy_coeff": 85, "vf_loss_coeff": 85, "sgd_minibatch_s": 85, "num_sgd_it": 85, "5000000": 85, "local_dir": 85, "pil": 85, "rllib_pistonbal": 85, "ppoagent": 85, "frame_list": 85, "compute_single_act": 85, "img": 85, "fromarrai": 85, "gif": 85, "save_al": 85, "append_imag": 85, "maskabl": 86, "disk": [86, 88, 89], "mlp": [86, 87, 89], "extractor": [86, 88, 89], "sb3actionmaskwrapp": 86, "stabl": [86, 88, 89], "baselines3": [86, 88, 89], "contrib": 86, "readthedoc": [86, 88, 89], "io": [86, 87, 88, 89], "ppo_mask": 86, "glob": [86, 88, 89], "sb3_contrib": 86, "maskableppo": 86, "maskableactorcriticpolici": 86, "actionmask": 86, "strip": 86, "mask_fn": 86, "whatev": 86, "reli": 86, "train_action_mask": 86, "10_000": [86, 88, 89], "env_kwarg": [86, 88, 89], "behav": 86, "action_mask_fn": 86, "did": 86, "earlier": [86, 90], "draft": 86, "verbos": [86, 88, 89], "set_random_se": 86, "strftime": [86, 88, 89], "eval_action_mask": 86, "num_gam": [86, 88, 89], "latest_polici": [86, 88, 89], "getctim": [86, 88, 89], "total_reward": 86, "round_reward": 86, "tie": 86, "winrat": 86, "incl": 86, "10k": 86, "76": 86, "20k": 86, "86": 86, "40k": 86, "7e": 86, "laptop": [86, 88], "20_480": 86, "80": 86, "watch": [86, 88, 89, 90], "sb3": 87, "cnn": [87, 88, 89], "resiz": [87, 88], "iclr": 87, "sub": 87, "consequ": 87, "shelf": 87, "post": 87, "leverag": [88, 89], "multithread": [88, 89], "blank": 88, "__future__": [88, 89], "annot": [88, 89], "stable_baselines3": [88, 89], "mlppolici": [88, 89], "markovvectorenv": 88, "black_death": 88, "black_death_v3": 88, "n_step": [88, 90, 93], "0905168": 88, "00062211": 88, "042202": 88, "n_epoch": 88, "clip_rang": 88, "nstart": [88, 89], "avg_reward": [88, 89], "avg_reward_per_ag": 88, "avg": [88, 89], "seem": 88, "wors": 88, "81_920": 88, "train_butterfly_supersuit": 89, "he": 89, "gpu": 89, "196_608": 89, "significantli": 89, "tic": [90, 93], "tac": [90, 93], "toe": [90, 93], "Will": [90, 91, 93], "git": [90, 91, 93], "deepcopi": 90, "collector": [90, 91, 93], "vectorreplaybuff": [90, 93], "dummyvectorenv": [90, 91, 93], "basepolici": [90, 93], "dqnpolici": [90, 93], "multiagentpolicymanag": [90, 91, 93], "randompolici": [90, 91, 93], "trainer": [90, 93], "offpolicy_train": [90, 93], "tensorboardlogg": 90, "get_pars": 90, "1626": 90, "buffer": 90, "smaller": 90, "freq": 90, "320": [90, 93], "logdir": 90, "store_tru": 90, "resum": 90, "pth": [90, 93], "get_arg": 90, "namespac": 90, "parse_known_arg": 90, "get_ag": 90, "agent_learn": [90, 93], "agent_oppon": [90, 93], "get_env": 90, "state_shap": [90, 93], "action_shap": [90, 93], "hidden_s": [90, 93], "target_update_freq": [90, 93], "resume_path": 90, "load_state_dict": 90, "opponent_path": 90, "train_ag": 90, "train_env": [90, 93], "training_num": [90, 93], "test_num": 90, "train_collector": [90, 93], "buffer_s": 90, "exploration_nois": [90, 93], "test_collector": [90, 93], "set_ep": [90, 93], "log_path": 90, "callback": [90, 93], "save_best_fn": [90, 93], "hasattr": 90, "model_save_path": [90, 93], "state_dict": [90, 93], "stop_fn": [90, 93], "mean_reward": [90, 93], "win_rat": 90, "train_fn": [90, 93], "env_step": [90, 93], "eps_train": 90, "test_fn": [90, 93], "eps_test": 90, "reward_metr": [90, 93], "step_per_epoch": [90, 93], "step_per_collect": [90, 93], "update_per_step": [90, 93], "test_in_train": [90, 93], "n_episod": [90, 91], "betwenen": 91, "vectoris": 91, "introduct": 92, "fast": 92, "build": 92, "4000": 92, "boast": 92, "thorough": 92, "comprehens": 92, "_get_ag": 93, "_get_env": 93, "discount_factor": 93, "estimation_step": 93, "callabl": 93, "20_000": 93, "ttt": 93, "makedir": 93, "exist_ok": 93, "max_epoch": 93, "episode_per_test": 93}, "objects": {"": [[8, 0, 1, "", "agent_indicator_v0"], [8, 0, 1, "", "black_death_v2"], [8, 0, 1, "", "clip_actions_v0"], [8, 0, 1, "", "clip_reward_v0"], [8, 0, 1, "", "color_reduction_v0"], [8, 0, 1, "", "delay_observations_v0"], [8, 0, 1, "", "dtype_v0"], [8, 0, 1, "", "flatten_v0"], [8, 0, 1, "", "frame_skip_v0"], [8, 0, 1, "", "frame_stack_v1"], [8, 0, 1, "", "max_observation_v0"], [8, 0, 1, "", "nan_noop_v0"], [8, 0, 1, "", "nan_random_v0"], [8, 0, 1, "", "nan_zeros_v0"], [8, 0, 1, "", "normalize_obs_v0"], [8, 0, 1, "", "pad_action_space_v0"], [8, 0, 1, "", "pad_observations_v0"], [8, 0, 1, "", "reshape_v0"], [8, 0, 1, "", "resize_v1"], [8, 0, 1, "", "scale_actions_v0"], [8, 0, 1, "", "sticky_actions_v0"]], "pettingzoo.atari.basketball_pong.basketball_pong": [[14, 1, 1, "", "raw_env"]], "pettingzoo.atari.boxing.boxing": [[15, 1, 1, "", "raw_env"]], "pettingzoo.atari.combat_plane.combat_plane": [[16, 1, 1, "", "raw_env"]], "pettingzoo.atari.combat_tank.combat_tank": [[17, 1, 1, "", "raw_env"]], "pettingzoo.atari.double_dunk.double_dunk": [[18, 1, 1, "", "raw_env"]], "pettingzoo.atari.entombed_competitive.entombed_competitive": [[19, 1, 1, "", "raw_env"]], "pettingzoo.atari.entombed_cooperative.entombed_cooperative": [[20, 1, 1, "", "raw_env"]], "pettingzoo.atari.flag_capture.flag_capture": [[21, 1, 1, "", "raw_env"]], "pettingzoo.atari.foozpong.foozpong": [[22, 1, 1, "", "raw_env"]], "pettingzoo.atari.ice_hockey.ice_hockey": [[23, 1, 1, "", "raw_env"]], "pettingzoo.atari.joust.joust": [[24, 1, 1, "", "raw_env"]], "pettingzoo.atari.mario_bros.mario_bros": [[25, 1, 1, "", "raw_env"]], "pettingzoo.atari.maze_craze.maze_craze": [[26, 1, 1, "", "raw_env"]], "pettingzoo.atari.othello.othello": [[27, 1, 1, "", "raw_env"]], "pettingzoo.atari.pong.pong": [[28, 1, 1, "", "raw_env"]], "pettingzoo.atari.quadrapong.quadrapong": [[29, 1, 1, "", "raw_env"]], "pettingzoo.atari.space_invaders.space_invaders": [[30, 1, 1, "", "raw_env"]], "pettingzoo.atari.space_war.space_war": [[31, 1, 1, "", "raw_env"]], "pettingzoo.atari.surround.surround": [[32, 1, 1, "", "raw_env"]], "pettingzoo.atari.tennis.tennis": [[33, 1, 1, "", "raw_env"]], "pettingzoo.atari.video_checkers.video_checkers": [[34, 1, 1, "", "raw_env"]], "pettingzoo.atari.volleyball_pong.volleyball_pong": [[35, 1, 1, "", "raw_env"]], "pettingzoo.atari.warlords.warlords": [[36, 1, 1, "", "raw_env"]], "pettingzoo.atari.wizard_of_wor.wizard_of_wor": [[37, 1, 1, "", "raw_env"]], "pettingzoo.butterfly.cooperative_pong.cooperative_pong": [[39, 1, 1, "", "env"], [39, 1, 1, "", "raw_env"]], "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env": [[39, 2, 1, "", "action_space"], [39, 2, 1, "", "close"], [39, 2, 1, "", "observation_space"], [39, 2, 1, "", "observe"], [39, 2, 1, "", "render"], [39, 2, 1, "", "reset"], [39, 2, 1, "", "state"], [39, 2, 1, "", "step"]], "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies": [[40, 1, 1, "", "env"], [40, 1, 1, "", "raw_env"]], "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env": [[40, 2, 1, "", "action_space"], [40, 2, 1, "", "close"], [40, 2, 1, "", "observation_space"], [40, 2, 1, "", "observe"], [40, 2, 1, "", "render"], [40, 2, 1, "", "reset"], [40, 2, 1, "", "state"], [40, 2, 1, "", "step"]], "pettingzoo.butterfly.pistonball.pistonball": [[41, 1, 1, "", "env"], [41, 1, 1, "", "raw_env"]], "pettingzoo.butterfly.pistonball.pistonball.raw_env": [[41, 2, 1, "", "action_space"], [41, 2, 1, "", "close"], [41, 2, 1, "", "observation_space"], [41, 2, 1, "", "observe"], [41, 2, 1, "", "render"], [41, 2, 1, "", "reset"], [41, 2, 1, "", "state"], [41, 2, 1, "", "step"]], "pettingzoo.classic.chess.chess": [[43, 1, 1, "", "env"], [43, 1, 1, "", "raw_env"]], "pettingzoo.classic.chess.chess.raw_env": [[43, 2, 1, "", "action_space"], [43, 2, 1, "", "close"], [43, 2, 1, "", "observation_space"], [43, 2, 1, "", "observe"], [43, 2, 1, "", "render"], [43, 2, 1, "", "reset"], [43, 2, 1, "", "step"]], "pettingzoo.classic.connect_four.connect_four": [[44, 1, 1, "", "env"], [44, 1, 1, "", "raw_env"]], "pettingzoo.classic.connect_four.connect_four.raw_env": [[44, 2, 1, "", "action_space"], [44, 2, 1, "", "close"], [44, 2, 1, "", "observation_space"], [44, 2, 1, "", "observe"], [44, 2, 1, "", "render"], [44, 2, 1, "", "reset"], [44, 2, 1, "", "step"]], "pettingzoo.classic.go.go": [[46, 1, 1, "", "env"], [46, 1, 1, "", "raw_env"]], "pettingzoo.classic.go.go.raw_env": [[46, 2, 1, "", "action_space"], [46, 2, 1, "", "close"], [46, 2, 1, "", "observation_space"], [46, 2, 1, "", "observe"], [46, 2, 1, "", "render"], [46, 2, 1, "", "reset"], [46, 2, 1, "", "step"]], "pettingzoo.classic.hanabi.hanabi": [[47, 1, 1, "", "env"], [47, 1, 1, "", "raw_env"]], "pettingzoo.classic.hanabi.hanabi.raw_env": [[47, 2, 1, "", "action_space"], [47, 2, 1, "", "close"], [47, 2, 1, "", "observation_space"], [47, 2, 1, "", "observe"], [47, 2, 1, "", "render"], [47, 2, 1, "", "reset"], [47, 2, 1, "", "step"]], "pettingzoo.classic.rlcard_envs.gin_rummy": [[45, 1, 1, "", "env"], [45, 1, 1, "", "raw_env"]], "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env": [[45, 2, 1, "", "observe"], [45, 2, 1, "", "render"], [45, 2, 1, "", "step"]], "pettingzoo.classic.rlcard_envs.leduc_holdem": [[48, 1, 1, "", "env"], [48, 1, 1, "", "raw_env"]], "pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env": [[48, 2, 1, "", "render"], [48, 2, 1, "", "step"]], "pettingzoo.classic.rlcard_envs.texas_holdem": [[50, 1, 1, "", "env"], [50, 1, 1, "", "raw_env"]], "pettingzoo.classic.rlcard_envs.texas_holdem.raw_env": [[50, 2, 1, "", "render"], [50, 2, 1, "", "step"]], "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit": [[51, 1, 1, "", "env"], [51, 1, 1, "", "raw_env"]], "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env": [[51, 2, 1, "", "render"], [51, 2, 1, "", "step"]], "pettingzoo.classic.rps.rps": [[49, 1, 1, "", "env"], [49, 1, 1, "", "raw_env"]], "pettingzoo.classic.rps.rps.raw_env": [[49, 2, 1, "", "action_space"], [49, 2, 1, "", "close"], [49, 2, 1, "", "observation_space"], [49, 2, 1, "", "observe"], [49, 2, 1, "", "render"], [49, 2, 1, "", "reset"], [49, 2, 1, "", "step"]], "pettingzoo.classic.tictactoe.tictactoe": [[52, 1, 1, "", "env"], [52, 1, 1, "", "raw_env"]], "pettingzoo.classic.tictactoe.tictactoe.raw_env": [[52, 2, 1, "", "action_space"], [52, 2, 1, "", "close"], [52, 2, 1, "", "observation_space"], [52, 2, 1, "", "observe"], [52, 2, 1, "", "render"], [52, 2, 1, "", "reset"], [52, 2, 1, "", "step"]], "pettingzoo.mpe.simple.simple": [[55, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple.simple.raw_env": [[55, 3, 1, "", "action_spaces"], [55, 3, 1, "", "agent_selection"], [55, 3, 1, "", "agents"], [55, 3, 1, "", "infos"], [55, 3, 1, "", "observation_spaces"], [55, 3, 1, "", "possible_agents"], [55, 3, 1, "", "rewards"], [55, 3, 1, "", "terminations"], [55, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_adversary.simple_adversary": [[56, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env": [[56, 3, 1, "", "action_spaces"], [56, 3, 1, "", "agent_selection"], [56, 3, 1, "", "agents"], [56, 3, 1, "", "infos"], [56, 3, 1, "", "observation_spaces"], [56, 3, 1, "", "possible_agents"], [56, 3, 1, "", "rewards"], [56, 3, 1, "", "terminations"], [56, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_crypto.simple_crypto": [[57, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env": [[57, 3, 1, "", "action_spaces"], [57, 3, 1, "", "agent_selection"], [57, 3, 1, "", "agents"], [57, 3, 1, "", "infos"], [57, 3, 1, "", "observation_spaces"], [57, 3, 1, "", "possible_agents"], [57, 3, 1, "", "rewards"], [57, 3, 1, "", "terminations"], [57, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_push.simple_push": [[58, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_push.simple_push.raw_env": [[58, 3, 1, "", "action_spaces"], [58, 3, 1, "", "agent_selection"], [58, 3, 1, "", "agents"], [58, 3, 1, "", "infos"], [58, 3, 1, "", "observation_spaces"], [58, 3, 1, "", "possible_agents"], [58, 3, 1, "", "rewards"], [58, 3, 1, "", "terminations"], [58, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_reference.simple_reference": [[59, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_reference.simple_reference.raw_env": [[59, 3, 1, "", "action_spaces"], [59, 3, 1, "", "agent_selection"], [59, 3, 1, "", "agents"], [59, 3, 1, "", "infos"], [59, 3, 1, "", "observation_spaces"], [59, 3, 1, "", "possible_agents"], [59, 3, 1, "", "rewards"], [59, 3, 1, "", "terminations"], [59, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener": [[60, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env": [[60, 3, 1, "", "action_spaces"], [60, 3, 1, "", "agent_selection"], [60, 3, 1, "", "agents"], [60, 3, 1, "", "infos"], [60, 3, 1, "", "observation_spaces"], [60, 3, 1, "", "possible_agents"], [60, 3, 1, "", "rewards"], [60, 3, 1, "", "terminations"], [60, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_spread.simple_spread": [[61, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_spread.simple_spread.raw_env": [[61, 3, 1, "", "action_spaces"], [61, 3, 1, "", "agent_selection"], [61, 3, 1, "", "agents"], [61, 3, 1, "", "infos"], [61, 3, 1, "", "observation_spaces"], [61, 3, 1, "", "possible_agents"], [61, 3, 1, "", "rewards"], [61, 3, 1, "", "terminations"], [61, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_tag.simple_tag": [[62, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_tag.simple_tag.raw_env": [[62, 3, 1, "", "action_spaces"], [62, 3, 1, "", "agent_selection"], [62, 3, 1, "", "agents"], [62, 3, 1, "", "infos"], [62, 3, 1, "", "observation_spaces"], [62, 3, 1, "", "possible_agents"], [62, 3, 1, "", "rewards"], [62, 3, 1, "", "terminations"], [62, 3, 1, "", "truncations"]], "pettingzoo.mpe.simple_world_comm.simple_world_comm": [[63, 1, 1, "", "raw_env"]], "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env": [[63, 3, 1, "", "action_spaces"], [63, 3, 1, "", "agent_selection"], [63, 3, 1, "", "agents"], [63, 3, 1, "", "infos"], [63, 3, 1, "", "observation_spaces"], [63, 3, 1, "", "possible_agents"], [63, 3, 1, "", "rewards"], [63, 3, 1, "", "terminations"], [63, 3, 1, "", "truncations"]], "pettingzoo.sisl.multiwalker.multiwalker": [[65, 1, 1, "", "env"], [65, 1, 1, "", "raw_env"]], "pettingzoo.sisl.multiwalker.multiwalker.raw_env": [[65, 2, 1, "", "action_space"], [65, 2, 1, "", "close"], [65, 2, 1, "", "observation_space"], [65, 2, 1, "", "observe"], [65, 2, 1, "", "render"], [65, 2, 1, "", "reset"], [65, 2, 1, "", "step"]], "pettingzoo.sisl.pursuit.pursuit": [[66, 1, 1, "", "env"], [66, 1, 1, "", "raw_env"]], "pettingzoo.sisl.pursuit.pursuit.raw_env": [[66, 2, 1, "", "action_space"], [66, 2, 1, "", "close"], [66, 2, 1, "", "observation_space"], [66, 2, 1, "", "observe"], [66, 2, 1, "", "render"], [66, 2, 1, "", "reset"], [66, 2, 1, "", "step"]], "pettingzoo.sisl.waterworld.waterworld": [[67, 1, 1, "", "env"], [67, 1, 1, "", "raw_env"]], "pettingzoo.sisl.waterworld.waterworld.raw_env": [[67, 2, 1, "", "action_space"], [67, 2, 1, "", "close"], [67, 2, 1, "", "observation_space"], [67, 2, 1, "", "observe"], [67, 2, 1, "", "render"], [67, 2, 1, "", "reset"], [67, 2, 1, "", "step"]], "pettingzoo.utils": [[6, 4, 0, "-", "conversions"]], "pettingzoo.utils.conversions": [[6, 0, 1, "", "aec_to_parallel"], [6, 0, 1, "", "parallel_to_aec"]], "pettingzoo.utils.env": [[2, 1, 1, "", "AECEnv"], [3, 1, 1, "", "ParallelEnv"]], "pettingzoo.utils.env.AECEnv": [[2, 3, 1, "", "action_spaces"], [2, 3, 1, "", "agent_selection"], [2, 3, 1, "", "agents"], [2, 2, 1, "", "close"], [2, 3, 1, "", "infos"], [2, 3, 1, "", "max_num_agents"], [2, 3, 1, "", "num_agents"], [2, 3, 1, "", "observation_spaces"], [2, 2, 1, "", "observe"], [2, 3, 1, "", "possible_agents"], [2, 2, 1, "", "render"], [2, 2, 1, "", "reset"], [2, 3, 1, "", "rewards"], [2, 2, 1, "", "step"], [2, 3, 1, "", "terminations"], [2, 3, 1, "", "truncations"]], "pettingzoo.utils.env.ParallelEnv": [[3, 2, 1, "", "action_space"], [3, 3, 1, "", "action_spaces"], [3, 3, 1, "", "agents"], [3, 2, 1, "", "close"], [3, 3, 1, "", "max_num_agents"], [3, 3, 1, "", "num_agents"], [3, 2, 1, "", "observation_space"], [3, 3, 1, "", "observation_spaces"], [3, 3, 1, "", "possible_agents"], [3, 2, 1, "", "render"], [3, 2, 1, "", "reset"], [3, 2, 1, "", "state"], [3, 2, 1, "", "step"]], "pettingzoo.utils.wrappers": [[6, 1, 1, "", "AssertOutOfBoundsWrapper"], [6, 1, 1, "", "BaseWrapper"], [6, 1, 1, "", "CaptureStdoutWrapper"], [6, 1, 1, "", "ClipOutOfBoundsWrapper"], [6, 1, 1, "", "OrderEnforcingWrapper"], [6, 1, 1, "", "TerminateIllegalWrapper"]]}, "objtypes": {"0": "py:function", "1": "py:class", "2": "py:method", "3": "py:attribute", "4": "py:module"}, "objnames": {"0": ["py", "function", "Python function"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "module", "Python module"]}, "titleterms": {"404": 0, "page": [0, 1], "Not": 0, "found": 0, "The": 0, "request": 0, "could": 0, "pettingzoo": [1, 6, 68, 70, 73, 82, 84, 86, 92], "doc": 1, "edit": 1, "an": [1, 9], "environ": [1, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 47, 54, 68, 71, 72, 75, 77, 79, 82, 83, 84, 85, 86, 88, 89, 90, 91, 93], "build": 1, "document": 1, "aec": [2, 6, 68], "api": [2, 3, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 91], "usag": [2, 3, 7, 9, 13, 38, 42, 54, 64, 91], "action": [2, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 67, 76, 82, 86], "mask": [2, 43, 44, 45, 46, 47, 48, 50, 51, 52, 76, 82, 86], "aecenv": 2, "attribut": 2, "method": 2, "parallel": [3, 6, 10, 11], "parallelenv": 3, "util": [4, 6, 10], "averag": 4, "total": 4, "reward": [4, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 67], "observ": [4, 11, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 67], "save": [4, 11], "wrapper": [5, 6, 7, 8, 10], "us": [5, 10, 68, 73, 84, 92], "convers": 6, "shimmi": 7, "compat": 7, "support": 7, "multi": [7, 8], "agent": [7, 8, 9, 10, 68, 81, 82, 83, 85, 93], "openspiel": 7, "deepmind": 7, "control": [7, 39, 40, 66], "soccer": 7, "melt": 7, "pot": 7, "citat": [7, 8, 13, 54], "supersuit": 8, "includ": 8, "function": 8, "onli": 8, "basic": [9, 91], "instal": [9, 13, 38, 42, 54, 64], "initi": 9, "interact": 9, "With": 9, "addit": [9, 74], "option": [9, 74], "compon": 9, "notabl": 9, "idiom": 9, "check": 9, "entir": 9, "i": 9, "done": 9, "unwrap": 9, "variabl": 9, "number": 9, "death": 9, "raw": 9, "creation": [10, 79], "exampl": [10, 73, 84, 92], "custom": 10, "develop": 10, "selector": 10, "deprec": 10, "modul": 10, "test": [11, 77, 86], "seed": 11, "max": 11, "cycl": 11, "render": [11, 54], "perform": 11, "benchmark": 11, "tutori": [12, 73, 74, 75, 76, 77, 79, 81, 84, 87, 92], "recommend": 12, "start": 12, "atari": 13, "game": [13, 49, 68], "overview": [13, 73, 81, 84, 87, 92], "detail": 13, "preprocess": 13, "common": 13, "paramet": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37], "basketbal": 14, "pong": [14, 28, 35, 39], "space": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 65, 67], "minim": [14, 21, 22, 27, 28, 29, 30, 32, 34, 35, 36], "version": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66, 68], "histori": [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 65, 66], "box": 15, "combat": [16, 17], "plane": 16, "tank": 17, "doubl": 18, "dunk": 18, "emtomb": [19, 20], "competit": 19, "cooper": [20, 39], "flag": 21, "captur": 21, "foozpong": 22, "ic": 23, "hockei": 23, "joust": 24, "mario": 25, "bro": 25, "maze": 26, "craze": 26, "othello": 27, "quadrapong": 29, "invad": 30, "war": 31, "surround": 32, "tenni": 33, "video": 34, "checker": 34, "volleybal": 35, "warlord": 36, "wizard": 37, "wor": 37, "butterfli": 38, "manual": [39, 40, 66], "argument": [39, 40, 41, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67], "knight": [40, 88], "archer": [40, 88], "zombi": [40, 88], "kaz": 40, "vector": 40, "default": 40, "imag": 40, "base": 40, "pistonbal": [41, 85], "classic": [42, 86], "chess": 43, "legal": [43, 44, 45, 46, 47, 48, 50, 51, 52], "connect": [44, 86], "four": [44, 86], "gin": 45, "rummi": 45, "go": 46, "hanabi": 47, "leduc": 48, "hold": [48, 50, 51], "em": [48, 50, 51], "rock": [49, 82], "paper": [49, 82], "scissor": [49, 82], "expand": 49, "texa": [50, 51, 82], "No": [51, 82], "limit": [51, 82], "tic": [52, 82], "tac": [52, 82], "toe": [52, 82], "mpe": 54, "type": 54, "kei": 54, "concept": 54, "termin": 54, "simpl": [55, 56, 57, 58, 59, 60, 61, 62, 63, 83], "adversari": 56, "crypto": 57, "push": 58, "refer": 59, "speaker": 60, "listen": 60, "spread": 61, "tag": 62, "world": 63, "comm": 63, "sisl": 64, "multiwalk": 65, "pursuit": 66, "waterworld": [67, 89], "third": 68, "parti": 68, "latest": 68, "sumo": 68, "rl": [68, 83, 85], "pogema": 68, "racecar": 68, "gym": 68, "teamfight": 68, "tactic": 68, "muzero": 68, "cookingzoo": 68, "crazi": 68, "dilemma": 68, "env": 68, "breakout": 68, "clone": 68, "gobblet": 68, "cathedr": 68, "carla": 68, "older": 68, "neural": 68, "mmo": 68, "sequenti": 68, "social": 68, "kaggl": 68, "cogment": 68, "vers": 68, "stone": 68, "ground": 68, "hearth": 68, "battl": 68, "cyber": 68, "oper": 68, "research": 68, "conflict_rez": 68, "pz": 68, "battlesnak": 68, "bombermanai": 68, "fanorona": 68, "galaga": 68, "ai": 68, "skyjo_rl": 68, "mu": 68, "torer": 68, "releas": 70, "note": 70, "1": 70, "23": 70, "0": 70, "22": 70, "4": 70, "3": 70, "2": 70, "21": 70, "20": 70, "19": 70, "18": 70, "17": 70, "16": 70, "15": 70, "14": 70, "13": 70, "12": 70, "11": 70, "10": 70, "9": 70, "8": 70, "7": 70, "6": 70, "5": 70, "cleanrl": [71, 72, 73], "advanc": [71, 74], "ppo": [71, 72, 85, 86, 88, 89], "setup": [71, 72, 82, 83, 85, 86, 88, 89, 90, 91, 93], "code": [71, 72, 74, 75, 76, 77, 82, 83, 85, 86, 88, 89, 90, 91, 93], "implement": 72, "wandb": 73, "integr": 73, "repositori": 74, "structur": 74, "introduct": [74, 75, 76, 77], "tree": 74, "file": 74, "skeleton": 74, "logic": 75, "your": 77, "langchain": [81, 82], "llm": [81, 82], "prompt": 81, "chain": 81, "data": 81, "augment": 81, "gener": 81, "memori": 81, "evalu": [81, 86, 88, 89], "creat": 82, "loop": 82, "gymnasium": 82, "holdem": 82, "full": 82, "rllib": [83, 84, 85], "dqn": 83, "poker": 83, "train": [83, 84, 85, 86, 88, 89, 93], "watch": [83, 85], "plai": [83, 85], "rai": 84, "architectur": [84, 92], "sb3": [86, 88, 89], "other": 86, "stabl": 87, "baselines3": 87, "baselin": 87, "tianshou": [90, 91, 92, 93], "cli": 90, "log": 90}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"404 - Page Not Found": [[0, "page-not-found"]], "The requested page could not be found.": [[0, "the-requested-page-could-not-be-found"]], "PettingZoo docs": [[1, "pettingzoo-docs"]], "Editing an environment page": [[1, "editing-an-environment-page"]], "Build the Documentation": [[1, "build-the-documentation"]], "AEC API": [[2, "aec-api"]], "Usage": [[2, "usage"], [3, "usage"], [7, "usage"], [13, "usage"], [38, "usage"], [42, "usage"], [54, "usage"], [64, "usage"]], "Action Masking": [[2, "action-masking"]], "AECEnv": [[2, "aecenv"]], "Attributes": [[2, "attributes"]], "Methods": [[2, "methods"]], "Parallel API": [[3, "parallel-api"]], "ParallelEnv": [[3, "parallelenv"]], "Utils": [[4, "utils"]], "Average Total Reward": [[4, "average-total-reward"]], "Observation Saving": [[4, "observation-saving"]], "Wrappers": [[5, "wrappers"]], "Using Wrappers": [[5, "using-wrappers"], [10, "using-wrappers"]], "PettingZoo Wrappers": [[6, "pettingzoo-wrappers"]], "Conversion wrappers": [[6, "conversion-wrappers"]], "AEC to Parallel": [[6, "module-pettingzoo.utils.conversions"]], "Parallel to AEC": [[6, "module-pettingzoo.utils.conversions"]], "Utility Wrappers": [[6, "utility-wrappers"]], "Shimmy Compatibility Wrappers": [[7, "shimmy-compatibility-wrappers"]], "Supported multi-agent environments:": [[7, "supported-multi-agent-environments"]], "OpenSpiel": [[7, "openspiel"]], "DeepMind Control Soccer": [[7, "deepmind-control-soccer"]], "DeepMind Melting Pot": [[7, "deepmind-melting-pot"]], "Multi-Agent Compatibility Wrappers:": [[7, "multi-agent-compatibility-wrappers"]], "Citation": [[7, "citation"], [8, "citation"], [13, "citation"], [54, "citation"]], "Supersuit Wrappers": [[8, "supersuit-wrappers"]], "Included Functions": [[8, "included-functions"]], "Included Multi-Agent Only Functions": [[8, "included-multi-agent-only-functions"]], "Basic Usage": [[9, "basic-usage"]], "Installation": [[9, "installation"], [13, "installation"], [38, "installation"], [42, "installation"], [54, "installation"], [64, "installation"]], "Initializing Environments": [[9, "initializing-environments"]], "Interacting With Environments": [[9, "interacting-with-environments"]], "Additional Environment API": [[9, "additional-environment-api"]], "Optional API Components": [[9, "optional-api-components"]], "Notable Idioms": [[9, "notable-idioms"]], "Checking if the entire environment is done": [[9, "checking-if-the-entire-environment-is-done"]], "Unwrapping an environment": [[9, "unwrapping-an-environment"]], "Variable Numbers of Agents (Death)": [[9, "variable-numbers-of-agents-death"]], "Environment as an Agent": [[9, "environment-as-an-agent"]], "Raw Environments": [[9, "raw-environments"]], "Environment Creation": [[10, "environment-creation"]], "Example Custom Environment": [[10, "example-custom-environment"]], "Example Custom Parallel Environment": [[10, "example-custom-parallel-environment"]], "Developer Utils": [[10, "developer-utils"]], "Agent selector": [[10, "agent-selector"]], "Deprecated Module": [[10, "deprecated-module"]], "Testing Environments": [[11, "testing-environments"]], "API Test": [[11, "api-test"]], "Parallel API Test": [[11, "parallel-api-test"]], "Seed Test": [[11, "seed-test"]], "Max Cycles Test": [[11, "max-cycles-test"]], "Render Test": [[11, "render-test"]], "Performance Benchmark Test": [[11, "performance-benchmark-test"]], "Save Observation Test": [[11, "save-observation-test"]], "Tutorials": [[12, "tutorials"]], "Recommended start": [[12, "recommended-start"]], "Atari": [[13, "atari"]], "Games Overview": [[13, "games-overview"]], "Environment Details": [[13, "environment-details"]], "Preprocessing": [[13, "preprocessing"]], "Common Parameters": [[13, "common-parameters"]], "Basketball Pong": [[14, "basketball-pong"]], "Environment parameters": [[14, "environment-parameters"], [15, "environment-parameters"], [16, "environment-parameters"], [17, "environment-parameters"], [18, "environment-parameters"], [19, "environment-parameters"], [20, "environment-parameters"], [21, "environment-parameters"], [22, "environment-parameters"], [23, "environment-parameters"], [24, "environment-parameters"], [25, "environment-parameters"], [26, "environment-parameters"], [27, "environment-parameters"], [28, "environment-parameters"], [29, "environment-parameters"], [30, "environment-parameters"], [31, "environment-parameters"], [32, "environment-parameters"], [33, "environment-parameters"], [34, "environment-parameters"], [35, "environment-parameters"], [36, "environment-parameters"], [37, "environment-parameters"]], "Action Space (Minimal)": [[14, "action-space-minimal"], [21, "action-space-minimal"], [22, "action-space-minimal"], [27, "action-space-minimal"], [28, "action-space-minimal"], [29, "action-space-minimal"], [30, "action-space-minimal"], [32, "action-space-minimal"], [34, "action-space-minimal"], [35, "action-space-minimal"], [36, "action-space-minimal"]], "Version History": [[14, "version-history"], [15, "version-history"], [16, "version-history"], [17, "version-history"], [18, "version-history"], [19, "version-history"], [20, "version-history"], [21, "version-history"], [22, "version-history"], [23, "version-history"], [24, "version-history"], [25, "version-history"], [26, "version-history"], [27, "version-history"], [28, "version-history"], [29, "version-history"], [30, "version-history"], [31, "version-history"], [32, "version-history"], [33, "version-history"], [34, "version-history"], [35, "version-history"], [36, "version-history"], [37, "version-history"], [39, "version-history"], [40, "version-history"], [41, "version-history"], [43, "version-history"], [44, "version-history"], [45, "version-history"], [46, "version-history"], [47, "version-history"], [48, "version-history"], [49, "version-history"], [50, "version-history"], [51, "version-history"], [52, "version-history"], [65, "version-history"], [66, "version-history"]], "API": [[14, "api"], [15, "api"], [16, "api"], [17, "api"], [18, "api"], [19, "api"], [20, "api"], [21, "api"], [22, "api"], [23, "api"], [24, "api"], [25, "api"], [26, "api"], [27, "api"], [28, "api"], [29, "api"], [30, "api"], [31, "api"], [32, "api"], [33, "api"], [34, "api"], [35, "api"], [36, "api"], [37, "api"], [39, "api"], [40, "api"], [41, "api"], [43, "api"], [44, "api"], [45, "api"], [46, "api"], [47, "api"], [48, "api"], [49, "api"], [50, "api"], [51, "api"], [52, "api"], [55, "api"], [56, "api"], [57, "api"], [58, "api"], [59, "api"], [60, "api"], [61, "api"], [62, "api"], [63, "api"], [65, "api"], [66, "api"], [67, "api"]], "Boxing": [[15, "boxing"]], "Action Space": [[15, "action-space"], [16, "action-space"], [17, "action-space"], [18, "action-space"], [19, "action-space"], [20, "action-space"], [23, "action-space"], [24, "action-space"], [25, "action-space"], [26, "action-space"], [31, "action-space"], [33, "action-space"], [37, "action-space"], [43, "action-space"], [44, "action-space"], [45, "action-space"], [46, "action-space"], [47, "action-space"], [48, "action-space"], [49, "action-space"], [50, "action-space"], [51, "action-space"], [52, "action-space"], [54, "action-space"], [67, "action-space"]], "Combat: Plane": [[16, "combat-plane"]], "Combat: Tank": [[17, "combat-tank"]], "Double Dunk": [[18, "double-dunk"]], "Emtombed: Competitive": [[19, "emtombed-competitive"]], "Emtombed: Cooperative": [[20, "emtombed-cooperative"]], "Flag Capture": [[21, "flag-capture"]], "Foozpong": [[22, "foozpong"]], "Ice Hockey": [[23, "ice-hockey"]], "Joust": [[24, "joust"]], "Mario Bros": [[25, "mario-bros"]], "Maze Craze": [[26, "maze-craze"]], "Othello": [[27, "othello"]], "Pong": [[28, "pong"]], "Quadrapong": [[29, "quadrapong"]], "Space Invaders": [[30, "space-invaders"]], "Space War": [[31, "space-war"]], "Surround": [[32, "surround"]], "Tennis": [[33, "tennis"]], "Video Checkers": [[34, "video-checkers"]], "Volleyball Pong": [[35, "volleyball-pong"]], "Warlords": [[36, "warlords"]], "Wizard of Wor": [[37, "wizard-of-wor"]], "Butterfly": [[38, "butterfly"]], "Cooperative Pong": [[39, "cooperative-pong"]], "Manual Control": [[39, "manual-control"], [40, "manual-control"], [66, "manual-control"]], "Arguments": [[39, "arguments"], [40, "arguments"], [41, "arguments"], [45, "arguments"], [46, "arguments"], [48, "arguments"], [49, "arguments"], [50, "arguments"], [51, "arguments"], [55, "arguments"], [56, "arguments"], [57, "arguments"], [58, "arguments"], [59, "arguments"], [60, "arguments"], [61, "arguments"], [62, "arguments"], [63, "arguments"], [65, "arguments"], [66, "arguments"], [67, "arguments"]], "Knights Archers Zombies (\u2018KAZ\u2019)": [[40, "knights-archers-zombies-kaz"]], "Vectorized (Default)": [[40, "vectorized-default"]], "Image-based": [[40, "image-based"]], "Pistonball": [[41, "pistonball"]], "Classic": [[42, "classic"]], "Chess": [[43, "chess"]], "Observation Space": [[43, "observation-space"], [44, "observation-space"], [45, "observation-space"], [46, "observation-space"], [47, "observation-space"], [48, "observation-space"], [49, "observation-space"], [50, "observation-space"], [51, "observation-space"], [52, "observation-space"], [54, "observation-space"], [65, "observation-space"], [67, "observation-space"]], "Legal Actions Mask": [[43, "legal-actions-mask"], [44, "legal-actions-mask"], [45, "legal-actions-mask"], [46, "legal-actions-mask"], [47, "legal-actions-mask"], [48, "legal-actions-mask"], [50, "legal-actions-mask"], [51, "legal-actions-mask"], [52, "legal-actions-mask"]], "Rewards": [[43, "rewards"], [44, "rewards"], [45, "rewards"], [46, "rewards"], [47, "rewards"], [48, "rewards"], [49, "rewards"], [50, "rewards"], [51, "rewards"], [52, "rewards"], [67, "rewards"]], "Connect Four": [[44, "connect-four"]], "Gin Rummy": [[45, "gin-rummy"]], "Go": [[46, "go"]], "Hanabi": [[47, "hanabi"]], "Environment arguments": [[47, "environment-arguments"]], "Leduc Hold\u2019em": [[48, "leduc-hold-em"]], "Rock Paper Scissors": [[49, "rock-paper-scissors"]], "Rock, Paper, Scissors": [[49, "id1"], [49, "id2"]], "Expanded Game": [[49, "expanded-game"], [49, "id3"]], "Texas Hold\u2019em": [[50, "texas-hold-em"]], "Texas Hold\u2019em No Limit": [[51, "texas-hold-em-no-limit"]], "Tic Tac Toe": [[52, "tic-tac-toe"]], "MPE": [[54, "mpe"]], "Types of Environments": [[54, "types-of-environments"]], "Key Concepts": [[54, "key-concepts"]], "Termination": [[54, "termination"]], "Rendering": [[54, "rendering"]], "Simple": [[55, "simple"]], "Simple Adversary": [[56, "simple-adversary"]], "Simple Crypto": [[57, "simple-crypto"]], "Simple Push": [[58, "simple-push"]], "Simple Reference": [[59, "simple-reference"]], "Simple Speaker Listener": [[60, "simple-speaker-listener"]], "Simple Spread": [[61, "simple-spread"]], "Simple Tag": [[62, "simple-tag"]], "Simple World Comm": [[63, "simple-world-comm"]], "SISL": [[64, "sisl"]], "Multiwalker": [[65, "multiwalker"]], "Pursuit": [[66, "pursuit"]], "Waterworld": [[67, "waterworld"]], "Third-Party Environments": [[68, "third-party-environments"]], "Environments using the latest versions of PettingZoo": [[68, "environments-using-the-latest-versions-of-pettingzoo"]], "Sumo-RL": [[68, "sumo-rl"]], "POGEMA": [[68, "pogema"]], "Racecar Gym": [[68, "racecar-gym"]], "Teamfight Tactics MuZero Agent": [[68, "teamfight-tactics-muzero-agent"]], "CookingZoo": [[68, "cookingzoo"]], "Crazy-RL": [[68, "crazy-rl"]], "PettingZoo Dilemma Envs": [[68, "pettingzoo-dilemma-envs"]], "Breakout-Clone": [[68, "breakout-clone"]], "Gobblet-RL": [[68, "gobblet-rl"]], "Cathedral-RL": [[68, "cathedral-rl"]], "Carla Gym": [[68, "carla-gym"]], "Environments using older versions of PettingZoo": [[68, "environments-using-older-versions-of-pettingzoo"]], "Neural MMO": [[68, "neural-mmo"]], "Sequential Social Dilemma Games": [[68, "sequential-social-dilemma-games"]], "Kaggle Environments": [[68, "kaggle-environments"]], "cogment-verse": [[68, "cogment-verse"]], "Stone Ground Hearth Battles": [[68, "stone-ground-hearth-battles"]], "Cyber Operations Research Gym": [[68, "cyber-operations-research-gym"]], "conflict_rez": [[68, "conflict-rez"]], "pz-battlesnake": [[68, "pz-battlesnake"]], "BomberManAI": [[68, "bombermanai"]], "Fanorona AEC": [[68, "fanorona-aec"]], "Galaga AI": [[68, "galaga-ai"]], "skyjo_rl": [[68, "skyjo-rl"]], "Mu Torere": [[68, "mu-torere"]], "Release Notes": [[70, "release-notes"]], "PettingZoo 1.23.1": [[70, "release-1-23-1"]], "PettingZoo 1.23.0": [[70, "release-1-23-0"]], "1.22.4": [[70, "release-1-22-4"]], "1.22.3": [[70, "release-1-22-3"]], "1.22.2": [[70, "release-1-22-2"]], "1.22.1": [[70, "release-1-22-1"]], "1.22.0": [[70, "release-1-22-0"]], "1.21.0": [[70, "release-1-21-0"]], "1.20.1": [[70, "release-1-20-1"]], "1.20.0": [[70, "release-1-20-0"]], "1.19.1": [[70, "release-1-19-1"]], "1.19.0": [[70, "release-1-19-0"]], "0.18.1: 1.18.1": [[70, "release-0-18-1"]], "1.17.0": [[70, "release-1-17-0"]], "1.16.0": [[70, "release-1-16-0"]], "1.15.0": [[70, "release-1-15-0"]], "1.14.0": [[70, "release-1-14-0"]], "1.13.1": [[70, "release-1-13-1"]], "1.12.0": [[70, "release-1-12-0"]], "1.11.1": [[70, "release-1-11-1"]], "1.11.0": [[70, "release-1-11-0"]], "1.10.0": [[70, "release-1-10-0"]], "1.9.0": [[70, "release-1-9-0"]], "1.8.2": [[70, "release-1-8-2"]], "1.8.1": [[70, "release-1-8-1"]], "1.8.0": [[70, "release-1-8-0"]], "1.7.0": [[70, "release-1-7-0"]], "1.6.1": [[70, "release-1-6-1"]], "1.6.0": [[70, "release-1-6-0"]], "1.5.2": [[70, "release-1-5-2"]], "1.5.1": [[70, "release-1-5-1"]], "1.5.0": [[70, "release-1-5-0"]], "1.4.2": [[70, "release-1-4-2"]], "1.4.0": [[70, "release-1-4-0"]], "CleanRL: Advanced PPO": [[71, "cleanrl-advanced-ppo"]], "Environment Setup": [[71, "environment-setup"], [72, "environment-setup"], [82, "environment-setup"], [83, "environment-setup"], [85, "environment-setup"], [86, "environment-setup"], [88, "environment-setup"], [89, "environment-setup"], [90, "environment-setup"], [91, "environment-setup"], [93, "environment-setup"]], "Code": [[71, "code"], [72, "code"], [75, "code"], [76, "code"], [77, "code"], [83, "code"], [85, "code"], [86, "code"], [88, "code"], [89, "code"], [90, "code"], [91, "code"], [93, "code"]], "CleanRL: Implementing PPO": [[72, "cleanrl-implementing-ppo"]], "CleanRL Tutorial": [[73, "cleanrl-tutorial"]], "CleanRL Overview": [[73, "cleanrl-overview"]], "Examples using PettingZoo:": [[73, "examples-using-pettingzoo"], [84, "examples-using-pettingzoo"]], "WandB Integration": [[73, "wandb-integration"]], "Tutorial: Repository Structure": [[74, "tutorial-repository-structure"]], "Introduction": [[74, "introduction"], [75, "introduction"], [76, "introduction"], [77, "introduction"]], "Tree structure": [[74, "tree-structure"]], "Advanced: Additional (optional) files": [[74, "advanced-additional-optional-files"]], "Skeleton code": [[74, "skeleton-code"]], "Tutorial: Environment Logic": [[75, "tutorial-environment-logic"]], "Tutorial: Action Masking": [[76, "tutorial-action-masking"]], "Tutorial: Testing Your Environment": [[77, "tutorial-testing-your-environment"]], "Environment Creation Tutorial": [[79, "environment-creation-tutorial"]], "LangChain Tutorial": [[81, "langchain-tutorial"]], "LangChain Overview": [[81, "langchain-overview"]], "\ud83d\udcc3 LLMs and Prompts:": [[81, "llms-and-prompts"]], "\ud83d\udd17 Chains:": [[81, "chains"]], "\ud83d\udcda Data Augmented Generation:": [[81, "data-augmented-generation"]], "\ud83e\udd16 Agents:": [[81, "agents"]], "\ud83e\udde0 Memory:": [[81, "memory"]], "\ud83e\uddd0 Evaluation:": [[81, "evaluation"]], "LangChain: Creating LLM agents": [[82, "langchain-creating-llm-agents"]], "Environment Loop": [[82, "environment-loop"]], "Gymnasium Agent": [[82, "gymnasium-agent"]], "PettingZoo Agent": [[82, "pettingzoo-agent"]], "Rock-Paper-Scissors": [[82, "rock-paper-scissors"]], "Action Masking Agent": [[82, "action-masking-agent"]], "Tic-Tac-Toe": [[82, "tic-tac-toe"]], "Texas Holdem\u2019 No Limit": [[82, "texas-holdem-no-limit"]], "Full Code": [[82, "full-code"]], "RLlib: DQN for Simple Poker": [[83, "rllib-dqn-for-simple-poker"]], "Training the RL agent": [[83, "training-the-rl-agent"], [85, "training-the-rl-agent"]], "Watching the trained RL agent play": [[83, "watching-the-trained-rl-agent-play"], [85, "watching-the-trained-rl-agent-play"]], "Ray RLlib Tutorial": [[84, "ray-rllib-tutorial"]], "RLlib Overview": [[84, "rllib-overview"]], "Training:": [[84, "training"]], "Environments:": [[84, "environments"]], "Architecture": [[84, "architecture"], [92, "architecture"]], "RLlib: PPO for Pistonball": [[85, "rllib-ppo-for-pistonball"]], "SB3: Action Masked PPO for Connect Four": [[86, "sb3-action-masked-ppo-for-connect-four"]], "Training and Evaluation": [[86, "training-and-evaluation"], [88, "training-and-evaluation"], [89, "training-and-evaluation"]], "Testing other PettingZoo Classic environments": [[86, "testing-other-pettingzoo-classic-environments"]], "Stable-Baselines3 Tutorial": [[87, "stable-baselines3-tutorial"]], "Stable-Baselines Overview": [[87, "stable-baselines-overview"]], "SB3: PPO for Knights-Archers-Zombies": [[88, "sb3-ppo-for-knights-archers-zombies"]], "SB3: PPO for Waterworld": [[89, "sb3-ppo-for-waterworld"]], "Tianshou: CLI and Logging": [[90, "tianshou-cli-and-logging"]], "Tianshou: Basic API Usage": [[91, "tianshou-basic-api-usage"]], "Tianshou Tutorial": [[92, "tianshou-tutorial"]], "Tianshou Overview": [[92, "tianshou-overview"]], "Examples using PettingZoo": [[92, "examples-using-pettingzoo"]], "Tianshou: Training Agents": [[93, "tianshou-training-agents"]]}, "indexentries": {"aecenv (class in pettingzoo.utils.env)": [[2, "pettingzoo.utils.env.AECEnv"]], "action_spaces (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.action_spaces"]], "agent_selection (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.agent_selection"]], "agents (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.agents"]], "close() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.close"]], "infos (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.infos"]], "max_num_agents (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.max_num_agents"]], "num_agents (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.num_agents"]], "observation_spaces (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.observation_spaces"]], "observe() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.observe"]], "possible_agents (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.possible_agents"]], "render() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.render"]], "reset() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.reset"]], "rewards (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.rewards"]], "step() (pettingzoo.utils.env.aecenv method)": [[2, "pettingzoo.utils.env.AECEnv.step"]], "terminations (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.terminations"]], "truncations (pettingzoo.utils.env.aecenv attribute)": [[2, "pettingzoo.utils.env.AECEnv.truncations"]], "parallelenv (class in pettingzoo.utils.env)": [[3, "pettingzoo.utils.env.ParallelEnv"]], "action_space() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.action_space"]], "action_spaces (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.action_spaces"]], "agents (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.agents"]], "close() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.close"]], "max_num_agents (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.max_num_agents"]], "num_agents (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.num_agents"]], "observation_space() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.observation_space"]], "observation_spaces (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.observation_spaces"]], "possible_agents (pettingzoo.utils.env.parallelenv attribute)": [[3, "pettingzoo.utils.env.ParallelEnv.possible_agents"]], "render() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.render"]], "reset() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.reset"]], "state() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.state"]], "step() (pettingzoo.utils.env.parallelenv method)": [[3, "pettingzoo.utils.env.ParallelEnv.step"]], "assertoutofboundswrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.AssertOutOfBoundsWrapper"]], "basewrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.BaseWrapper"]], "capturestdoutwrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.CaptureStdoutWrapper"]], "clipoutofboundswrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.ClipOutOfBoundsWrapper"]], "orderenforcingwrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.OrderEnforcingWrapper"]], "terminateillegalwrapper (class in pettingzoo.utils.wrappers)": [[6, "pettingzoo.utils.wrappers.TerminateIllegalWrapper"]], "aec_to_parallel() (in module pettingzoo.utils.conversions)": [[6, "pettingzoo.utils.conversions.aec_to_parallel"]], "module": [[6, "module-pettingzoo.utils.conversions"]], "parallel_to_aec() (in module pettingzoo.utils.conversions)": [[6, "pettingzoo.utils.conversions.parallel_to_aec"]], "pettingzoo.utils.conversions": [[6, "module-pettingzoo.utils.conversions"]], "agent_indicator_v0()": [[8, "agent_indicator_v0"]], "black_death_v2()": [[8, "black_death_v2"]], "built-in function": [[8, "agent_indicator_v0"], [8, "black_death_v2"], [8, "clip_actions_v0"], [8, "clip_reward_v0"], [8, "color_reduction_v0"], [8, "delay_observations_v0"], [8, "dtype_v0"], [8, "flatten_v0"], [8, "frame_skip_v0"], [8, "frame_stack_v1"], [8, "max_observation_v0"], [8, "nan_noop_v0"], [8, "nan_random_v0"], [8, "nan_zeros_v0"], [8, "normalize_obs_v0"], [8, "pad_action_space_v0"], [8, "pad_observations_v0"], [8, "reshape_v0"], [8, "resize_v1"], [8, "scale_actions_v0"], [8, "sticky_actions_v0"]], "clip_actions_v0()": [[8, "clip_actions_v0"]], "clip_reward_v0()": [[8, "clip_reward_v0"]], "color_reduction_v0()": [[8, "color_reduction_v0"]], "delay_observations_v0()": [[8, "delay_observations_v0"]], "dtype_v0()": [[8, "dtype_v0"]], "flatten_v0()": [[8, "flatten_v0"]], "frame_skip_v0()": [[8, "frame_skip_v0"]], "frame_stack_v1()": [[8, "frame_stack_v1"]], "max_observation_v0()": [[8, "max_observation_v0"]], "nan_noop_v0()": [[8, "nan_noop_v0"]], "nan_random_v0()": [[8, "nan_random_v0"]], "nan_zeros_v0()": [[8, "nan_zeros_v0"]], "normalize_obs_v0()": [[8, "normalize_obs_v0"]], "pad_action_space_v0()": [[8, "pad_action_space_v0"]], "pad_observations_v0()": [[8, "pad_observations_v0"]], "reshape_v0()": [[8, "reshape_v0"]], "resize_v1()": [[8, "resize_v1"]], "scale_actions_v0()": [[8, "scale_actions_v0"]], "sticky_actions_v0()": [[8, "sticky_actions_v0"]], "raw_env (class in pettingzoo.atari.basketball_pong.basketball_pong)": [[14, "pettingzoo.atari.basketball_pong.basketball_pong.raw_env"]], "raw_env (class in pettingzoo.atari.boxing.boxing)": [[15, "pettingzoo.atari.boxing.boxing.raw_env"]], "raw_env (class in pettingzoo.atari.combat_plane.combat_plane)": [[16, "pettingzoo.atari.combat_plane.combat_plane.raw_env"]], "raw_env (class in pettingzoo.atari.combat_tank.combat_tank)": [[17, "pettingzoo.atari.combat_tank.combat_tank.raw_env"]], "raw_env (class in pettingzoo.atari.double_dunk.double_dunk)": [[18, "pettingzoo.atari.double_dunk.double_dunk.raw_env"]], "raw_env (class in pettingzoo.atari.entombed_competitive.entombed_competitive)": [[19, "pettingzoo.atari.entombed_competitive.entombed_competitive.raw_env"]], "raw_env (class in pettingzoo.atari.entombed_cooperative.entombed_cooperative)": [[20, "pettingzoo.atari.entombed_cooperative.entombed_cooperative.raw_env"]], "raw_env (class in pettingzoo.atari.flag_capture.flag_capture)": [[21, "pettingzoo.atari.flag_capture.flag_capture.raw_env"]], "raw_env (class in pettingzoo.atari.foozpong.foozpong)": [[22, "pettingzoo.atari.foozpong.foozpong.raw_env"]], "raw_env (class in pettingzoo.atari.ice_hockey.ice_hockey)": [[23, "pettingzoo.atari.ice_hockey.ice_hockey.raw_env"]], "raw_env (class in pettingzoo.atari.joust.joust)": [[24, "pettingzoo.atari.joust.joust.raw_env"]], "raw_env (class in pettingzoo.atari.mario_bros.mario_bros)": [[25, "pettingzoo.atari.mario_bros.mario_bros.raw_env"]], "raw_env (class in pettingzoo.atari.maze_craze.maze_craze)": [[26, "pettingzoo.atari.maze_craze.maze_craze.raw_env"]], "raw_env (class in pettingzoo.atari.othello.othello)": [[27, "pettingzoo.atari.othello.othello.raw_env"]], "raw_env (class in pettingzoo.atari.pong.pong)": [[28, "pettingzoo.atari.pong.pong.raw_env"]], "raw_env (class in pettingzoo.atari.quadrapong.quadrapong)": [[29, "pettingzoo.atari.quadrapong.quadrapong.raw_env"]], "raw_env (class in pettingzoo.atari.space_invaders.space_invaders)": [[30, "pettingzoo.atari.space_invaders.space_invaders.raw_env"]], "raw_env (class in pettingzoo.atari.space_war.space_war)": [[31, "pettingzoo.atari.space_war.space_war.raw_env"]], "raw_env (class in pettingzoo.atari.surround.surround)": [[32, "pettingzoo.atari.surround.surround.raw_env"]], "raw_env (class in pettingzoo.atari.tennis.tennis)": [[33, "pettingzoo.atari.tennis.tennis.raw_env"]], "raw_env (class in pettingzoo.atari.video_checkers.video_checkers)": [[34, "pettingzoo.atari.video_checkers.video_checkers.raw_env"]], "raw_env (class in pettingzoo.atari.volleyball_pong.volleyball_pong)": [[35, "pettingzoo.atari.volleyball_pong.volleyball_pong.raw_env"]], "raw_env (class in pettingzoo.atari.warlords.warlords)": [[36, "pettingzoo.atari.warlords.warlords.raw_env"]], "raw_env (class in pettingzoo.atari.wizard_of_wor.wizard_of_wor)": [[37, "pettingzoo.atari.wizard_of_wor.wizard_of_wor.raw_env"]], "action_space() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.action_space"]], "close() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.close"]], "env (class in pettingzoo.butterfly.cooperative_pong.cooperative_pong)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.env"]], "observation_space() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.observation_space"]], "observe() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.observe"]], "raw_env (class in pettingzoo.butterfly.cooperative_pong.cooperative_pong)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env"]], "render() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.render"]], "reset() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.reset"]], "state() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.state"]], "step() (pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env method)": [[39, "pettingzoo.butterfly.cooperative_pong.cooperative_pong.raw_env.step"]], "action_space() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.action_space"]], "close() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.close"]], "env (class in pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.env"]], "observation_space() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.observation_space"]], "observe() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.observe"]], "raw_env (class in pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env"]], "render() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.render"]], "reset() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.reset"]], "state() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.state"]], "step() (pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env method)": [[40, "pettingzoo.butterfly.knights_archers_zombies.knights_archers_zombies.raw_env.step"]], "action_space() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.action_space"]], "close() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.close"]], "env (class in pettingzoo.butterfly.pistonball.pistonball)": [[41, "pettingzoo.butterfly.pistonball.pistonball.env"]], "observation_space() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.observation_space"]], "observe() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.observe"]], "raw_env (class in pettingzoo.butterfly.pistonball.pistonball)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env"]], "render() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.render"]], "reset() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.reset"]], "state() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.state"]], "step() (pettingzoo.butterfly.pistonball.pistonball.raw_env method)": [[41, "pettingzoo.butterfly.pistonball.pistonball.raw_env.step"]], "action_space() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.action_space"]], "close() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.close"]], "env (class in pettingzoo.classic.chess.chess)": [[43, "pettingzoo.classic.chess.chess.env"]], "observation_space() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.observation_space"]], "observe() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.observe"]], "raw_env (class in pettingzoo.classic.chess.chess)": [[43, "pettingzoo.classic.chess.chess.raw_env"]], "render() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.render"]], "reset() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.reset"]], "step() (pettingzoo.classic.chess.chess.raw_env method)": [[43, "pettingzoo.classic.chess.chess.raw_env.step"]], "action_space() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.action_space"]], "close() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.close"]], "env (class in pettingzoo.classic.connect_four.connect_four)": [[44, "pettingzoo.classic.connect_four.connect_four.env"]], "observation_space() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.observation_space"]], "observe() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.observe"]], "raw_env (class in pettingzoo.classic.connect_four.connect_four)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env"]], "render() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.render"]], "reset() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.reset"]], "step() (pettingzoo.classic.connect_four.connect_four.raw_env method)": [[44, "pettingzoo.classic.connect_four.connect_four.raw_env.step"]], "env (class in pettingzoo.classic.rlcard_envs.gin_rummy)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.env"]], "observe() (pettingzoo.classic.rlcard_envs.gin_rummy.raw_env method)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env.observe"]], "raw_env (class in pettingzoo.classic.rlcard_envs.gin_rummy)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env"]], "render() (pettingzoo.classic.rlcard_envs.gin_rummy.raw_env method)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env.render"]], "step() (pettingzoo.classic.rlcard_envs.gin_rummy.raw_env method)": [[45, "pettingzoo.classic.rlcard_envs.gin_rummy.raw_env.step"]], "action_space() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.action_space"]], "close() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.close"]], "env (class in pettingzoo.classic.go.go)": [[46, "pettingzoo.classic.go.go.env"]], "observation_space() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.observation_space"]], "observe() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.observe"]], "raw_env (class in pettingzoo.classic.go.go)": [[46, "pettingzoo.classic.go.go.raw_env"]], "render() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.render"]], "reset() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.reset"]], "step() (pettingzoo.classic.go.go.raw_env method)": [[46, "pettingzoo.classic.go.go.raw_env.step"]], "action_space() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.action_space"]], "close() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.close"]], "env (class in pettingzoo.classic.hanabi.hanabi)": [[47, "pettingzoo.classic.hanabi.hanabi.env"]], "observation_space() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.observation_space"]], "observe() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.observe"]], "raw_env (class in pettingzoo.classic.hanabi.hanabi)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env"]], "render() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.render"]], "reset() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.reset"]], "step() (pettingzoo.classic.hanabi.hanabi.raw_env method)": [[47, "pettingzoo.classic.hanabi.hanabi.raw_env.step"]], "env (class in pettingzoo.classic.rlcard_envs.leduc_holdem)": [[48, "pettingzoo.classic.rlcard_envs.leduc_holdem.env"]], "raw_env (class in pettingzoo.classic.rlcard_envs.leduc_holdem)": [[48, "pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env"]], "render() (pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env method)": [[48, "pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env.render"]], "step() (pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env method)": [[48, "pettingzoo.classic.rlcard_envs.leduc_holdem.raw_env.step"]], "action_space() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.action_space"]], "close() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.close"]], "env (class in pettingzoo.classic.rps.rps)": [[49, "pettingzoo.classic.rps.rps.env"]], "observation_space() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.observation_space"]], "observe() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.observe"]], "raw_env (class in pettingzoo.classic.rps.rps)": [[49, "pettingzoo.classic.rps.rps.raw_env"]], "render() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.render"]], "reset() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.reset"]], "step() (pettingzoo.classic.rps.rps.raw_env method)": [[49, "pettingzoo.classic.rps.rps.raw_env.step"]], "env (class in pettingzoo.classic.rlcard_envs.texas_holdem)": [[50, "pettingzoo.classic.rlcard_envs.texas_holdem.env"]], "raw_env (class in pettingzoo.classic.rlcard_envs.texas_holdem)": [[50, "pettingzoo.classic.rlcard_envs.texas_holdem.raw_env"]], "render() (pettingzoo.classic.rlcard_envs.texas_holdem.raw_env method)": [[50, "pettingzoo.classic.rlcard_envs.texas_holdem.raw_env.render"]], "step() (pettingzoo.classic.rlcard_envs.texas_holdem.raw_env method)": [[50, "pettingzoo.classic.rlcard_envs.texas_holdem.raw_env.step"]], "env (class in pettingzoo.classic.rlcard_envs.texas_holdem_no_limit)": [[51, "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.env"]], "raw_env (class in pettingzoo.classic.rlcard_envs.texas_holdem_no_limit)": [[51, "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env"]], "render() (pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env method)": [[51, "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env.render"]], "step() (pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env method)": [[51, "pettingzoo.classic.rlcard_envs.texas_holdem_no_limit.raw_env.step"]], "action_space() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.action_space"]], "close() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.close"]], "env (class in pettingzoo.classic.tictactoe.tictactoe)": [[52, "pettingzoo.classic.tictactoe.tictactoe.env"]], "observation_space() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.observation_space"]], "observe() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.observe"]], "raw_env (class in pettingzoo.classic.tictactoe.tictactoe)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env"]], "render() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.render"]], "reset() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.reset"]], "step() (pettingzoo.classic.tictactoe.tictactoe.raw_env method)": [[52, "pettingzoo.classic.tictactoe.tictactoe.raw_env.step"]], "action_spaces (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.agents"]], "infos (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple.simple)": [[55, "pettingzoo.mpe.simple.simple.raw_env"]], "rewards (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple.simple.raw_env attribute)": [[55, "pettingzoo.mpe.simple.simple.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.agents"]], "infos (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_adversary.simple_adversary)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env"]], "rewards (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_adversary.simple_adversary.raw_env attribute)": [[56, "pettingzoo.mpe.simple_adversary.simple_adversary.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.agents"]], "infos (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_crypto.simple_crypto)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env"]], "rewards (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_crypto.simple_crypto.raw_env attribute)": [[57, "pettingzoo.mpe.simple_crypto.simple_crypto.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.agents"]], "infos (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_push.simple_push)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env"]], "rewards (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_push.simple_push.raw_env attribute)": [[58, "pettingzoo.mpe.simple_push.simple_push.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.agents"]], "infos (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_reference.simple_reference)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env"]], "rewards (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_reference.simple_reference.raw_env attribute)": [[59, "pettingzoo.mpe.simple_reference.simple_reference.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.agents"]], "infos (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env"]], "rewards (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env attribute)": [[60, "pettingzoo.mpe.simple_speaker_listener.simple_speaker_listener.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.agents"]], "infos (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_spread.simple_spread)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env"]], "rewards (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_spread.simple_spread.raw_env attribute)": [[61, "pettingzoo.mpe.simple_spread.simple_spread.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.agents"]], "infos (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_tag.simple_tag)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env"]], "rewards (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_tag.simple_tag.raw_env attribute)": [[62, "pettingzoo.mpe.simple_tag.simple_tag.raw_env.truncations"]], "action_spaces (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.action_spaces"]], "agent_selection (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.agent_selection"]], "agents (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.agents"]], "infos (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.infos"]], "observation_spaces (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.observation_spaces"]], "possible_agents (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.possible_agents"]], "raw_env (class in pettingzoo.mpe.simple_world_comm.simple_world_comm)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env"]], "rewards (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.rewards"]], "terminations (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.terminations"]], "truncations (pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env attribute)": [[63, "pettingzoo.mpe.simple_world_comm.simple_world_comm.raw_env.truncations"]], "action_space() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.action_space"]], "close() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.close"]], "env (class in pettingzoo.sisl.multiwalker.multiwalker)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.env"]], "observation_space() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.observation_space"]], "observe() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.observe"]], "raw_env (class in pettingzoo.sisl.multiwalker.multiwalker)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env"]], "render() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.render"]], "reset() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.reset"]], "step() (pettingzoo.sisl.multiwalker.multiwalker.raw_env method)": [[65, "pettingzoo.sisl.multiwalker.multiwalker.raw_env.step"]], "action_space() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.action_space"]], "close() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.close"]], "env (class in pettingzoo.sisl.pursuit.pursuit)": [[66, "pettingzoo.sisl.pursuit.pursuit.env"]], "observation_space() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.observation_space"]], "observe() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.observe"]], "raw_env (class in pettingzoo.sisl.pursuit.pursuit)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env"]], "render() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.render"]], "reset() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.reset"]], "step() (pettingzoo.sisl.pursuit.pursuit.raw_env method)": [[66, "pettingzoo.sisl.pursuit.pursuit.raw_env.step"]], "action_space() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.action_space"]], "close() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.close"]], "env (class in pettingzoo.sisl.waterworld.waterworld)": [[67, "pettingzoo.sisl.waterworld.waterworld.env"]], "observation_space() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.observation_space"]], "observe() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.observe"]], "raw_env (class in pettingzoo.sisl.waterworld.waterworld)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env"]], "render() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.render"]], "reset() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.reset"]], "step() (pettingzoo.sisl.waterworld.waterworld.raw_env method)": [[67, "pettingzoo.sisl.waterworld.waterworld.raw_env.step"]]}})
\ No newline at end of file