Created testing sender for Dagger

jestjest · jestjest · commit ab23701b6f82 · 2017-07-31T19:06:49.000Z
diff --git a/a3c/run_sender.py b/a3c/run_sender.py
@@ -7,7 +7,7 @@
 from os import path
 from env.sender import Sender
 from models import ActorCriticNetwork
-from a3c import ewma
+from helpers.helpers import ewma
 
 
 class Learner(object):
diff --git a/dagger/dagger.py b/dagger/dagger.py
@@ -294,7 +294,8 @@ def sample_action(self, step_state_buf):
         Appends to the state/action buffers the state and the
         "correct" action to take according to the expert.
         """
-        start_time = time.time()
+        if self.is_chief:
+            start_time = time.time()
 
         # For ewma delay, only want first component, the one-way delay
         # For the cwnd, try only the most recent cwnd
diff --git a/dagger/run_sender.py b/dagger/run_sender.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import argparse
+import project_root
+import numpy as np
+import tensorflow as tf
+from os import path
+from env.sender import Sender
+from models import DaggerNetwork
+from helpers.helpers import ewma
+
+
+def softmax(x):
+    e_x = np.exp(x - np.max(x))
+    return e_x / e_x.sum(axis=0)
+
+
+class Learner(object):
+    def __init__(self, state_dim, action_cnt, restore_vars):
+
+        with tf.variable_scope('local'):
+            self.pi = DaggerNetwork(state_dim=state_dim, action_cnt=action_cnt)
+
+        self.ewma_window = 3        # alpha = 2 / (window + 1)
+        self.session = tf.Session()
+
+        # restore saved variables
+        saver = tf.train.Saver(self.pi.trainable_vars)
+        saver.restore(self.session, restore_vars)
+
+        # init the remaining vars, especially those created by optimizer
+        uninit_vars = set(tf.global_variables()) - set(self.pi.trainable_vars)
+        self.session.run(tf.variables_initializer(uninit_vars))
+
+    def sample_action(self, step_state_buf):
+
+        # For ewma delay, only want first component, the one-way delay
+        # For the cwnd, try only the most recent cwnd
+        owd_buf = np.asarray([state[0] for state in step_state_buf])
+        ewma_delay = ewma(owd_buf, self.ewma_window)
+        last_cwnd = step_state_buf[-1][1]
+
+        # Get probability of each action from the local network.
+        pi = self.local_network
+        action_probs = self.sess.run(pi.action_probs,
+                                     feed_dict={pi.states: [[ewma_delay,
+                                                             last_cwnd]]})
+
+        # action = np.argmax(action_probs[0])
+        # action = np.argmax(np.random.multinomial(1, action_probs[0] - 1e-5))
+        temperature = 1.0
+        temp_probs = softmax(action_probs[0] / temperature)
+        action = np.argmax(np.random.multinomial(1, temp_probs - 1e-5))
+        return action
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('port', type=int)
+    args = parser.parse_args()
+
+    sender = Sender(args.port)
+
+    model_path = path.join(project_root.DIR, 'dagger', 'logs',
+                           '2017-07-31--06-32-01-true-expert-2',
+                           'checkpoint-1100')
+
+    learner = Learner(
+        state_dim=Sender.state_dim,
+        action_cnt=Sender.action_cnt,
+        restore_vars=model_path)
+
+    sender.set_sample_action(learner.sample_action)
+
+    try:
+        sender.handshake()
+        sender.run()
+    except KeyboardInterrupt:
+        pass
+    finally:
+        sender.cleanup()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/env/sender.py b/env/sender.py
@@ -32,6 +32,8 @@ class Sender(object):
     action_cnt = len(action_mapping)
 
     def __init__(self, port=0, train=False, debug=False):
+        self.step_time_file = open('/tmp/step_time', 'a')
+
         self.train = train
         self.debug = debug
 
@@ -183,6 +185,7 @@ def recv(self):
             self.step_start_ms = curr_ts_ms()
 
         if curr_ts_ms() - self.step_start_ms > self.step_len_ms:  # step's end
+            self.step_time_file.write('step length: %f ms\n' % (curr_ts_ms() - self.step_start_ms))
             action = self.sample_action(self.step_state_buf)
             self.take_action(action)