MiuLab
diff --git a/‎README.md
+82 b/‎README.md
+82
diff --git a/‎imgs/noe2e_learning_curve.png
54.4 KB b/‎imgs/noe2e_learning_curve.png
54.4 KB
diff --git a/‎instructions
+81 b/‎instructions
+81
diff --git a/‎src/deep_dialog/__init__.py
+1 b/‎src/deep_dialog/__init__.py
+1
diff --git a/‎src/deep_dialog/agents/__init__.py
+3 b/‎src/deep_dialog/agents/__init__.py
+3
diff --git a/‎src/deep_dialog/agents/agent.py
+92 b/‎src/deep_dialog/agents/agent.py
+92
@@ -1,2 +1,84 @@
 # UserSimulator
 User Simulation for Task-Completion Dialogues
+This instruction describes how to run the simulation and agents (rule, command line, RL).
+
+
+1. Some DataSets: 
+under this folder: ./src/deep_dialog/data
+
+[movie_kb]
+movie_kb.1k.p: 94% success rate (for user_goals_first_turn_template_subsets.v1.p)
+movie_kb.v2.p: 36% success rate (for user_goals_first_turn_template_subsets.v1.p)
+
+[user goal files]:
+first turn: user_goals_first_turn_template.v2.p
+user_goals_first_turn_template.part.movie.v1.p: a subset of user goal. [Please use this one, the upper bound success rate on movie_kb.1k.json is 0.9765.]
+
+[NLG rule template]
+dia_act_nl_pairs.v6.json: some predefined NLG rule templates for both User simulator and Agent.
+
+[Dialog Act Intent]:
+dia_acts.txt
+
+[Dialog Act Slot]:
+slot_set.txt
+
+
+2. Some Parameters:
+
+-agt: the agent id
+-usr: the user (simulator) id
+-max_turn: maximum turns
+-episodes: how many dialogues you want to run
+-slot_err_prob: slot level err probability
+-slot_err_mode: which kind of slot err mode
+-intent_err_prob: intent level err probability
+
+-movie_kb_path: the movie kb path for agent side
+-goal_file_path: the user goal file path for user simulator side
+
+-dqn_hidden_size: hidden size for RL (DQN) agent
+-batch_size: batch size for DQN training
+-simulation_epoch_size: how many dialogue to be simulated in one epoch
+
+-warm_start: use rule policy to fill the experience replay buffer at the beginning.
+-warm_start_epochs: how many dialogues to run in the warm start
+
+-run_mode: 0 for display mode (NL); 1 for debug mode (dia_act); 2 for debug mode (dia_act and NL); >3 for no display (i.e. training)
+-auto_suggest: 0 for no auto_suggest; 1 for auto_suggest.
+-act_level: 0 for user simulator is dia_act level; 1 for user simulator is NL level
+-cmd_input_mode: 0 for NL input; 1 for Dia_Act input. (this is for AgentCmd only)
+
+-write_model_dir: the directory to write the models
+-trained_model_path: the trained RL agent model; load the trained model for prediction purpose.
+
+
+3. Commands to run the different agents and user simulators
+
+Rule Agent: 
+python run.py --agt 5 --usr 1 --max_turn 40 --episodes 150 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --intent_err_prob 0.00 --slot_err_prob 0.00 --episodes 500 --act_level 0
+
+
+Cmd Agent:
+NL Input: python run.py --agt 0 --usr 1 --max_turn 40 --episodes 150 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --intent_err_prob 0.00 --slot_err_prob 0.00 --episodes 500 --act_level 0 --run_mode 0 --cmd_input_mode 0
+Dia_Act Input: python run.py --agt 0 --usr 1 --max_turn 40 --episodes 150 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --intent_err_prob 0.00 --slot_err_prob 0.00 --episodes 500 --act_level 0 --run_mode 0 --cmd_input_mode 1
+
+
+Train RL Agent:
+[End2End without NLU and NLG, with simulated noise in NLU]
+RL: python run.py --agt 9 --usr 1 --max_turn 40 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --dqn_hidden_size 80 --experience_replay_pool_size 1000 --episodes 500 --simulation_epoch_size 100 --write_model_dir .\deep_dialog\checkpoints\rl_agent\ --run_mode 3 --act_level 0 --slot_err_prob 0.00 --intent_err_prob 0.00 --batch_size 16 --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --warm_start 1 --warm_start_epochs 120
+
+[End2End with NLU and NLG]
+RL: python run.py --agt 9 --usr 1 --max_turn 40 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --dqn_hidden_size 80 --experience_replay_pool_size 1000 --episodes 500 --simulation_epoch_size 100 --write_model_dir .\deep_dialog\checkpoints\rl_agent\ --run_mode 3 --act_level 1 --slot_err_prob 0.00 --intent_err_prob 0.00 --batch_size 16 --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --warm_start 1 --warm_start_epochs 120
+
+
+Test RL Agent with N dialogues:
+RL: python run.py --agt 9 --usr 1 --max_turn 40 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --dqn_hidden_size 80 --experience_replay_pool_size 1000 --episodes 300 --simulation_epoch_size 100 --write_model_dir .\deep_dialog\checkpoints\rl_agent\ --slot_err_prob 0.00 --intent_err_prob 0.00 --batch_size 16 --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --trained_model_path .\deep_dialog\checkpoints\rl_agent\noe2e\agt_9_400_420_0.90000.p --run_mode 3
+
+
+4. Learning Curves:
+1). python draw_learning_curve.py --result_file ./deep_dialog/checkpoints/rl_agent/noe2e/agt_9_performance_records.json
+
+2). Or pull out the numbers and draw the curves in Excel
+
+
@@ -0,0 +1,81 @@
+This instruction describes how to run the simulation and agents (rule, command line, RL).
+
+
+1. Some DataSets: 
+under this folder: ./src/deep_dialog/data
+
+[movie_kb]
+movie_kb.1k.p: 94% success rate (for user_goals_first_turn_template_subsets.v1.p)
+movie_kb.v2.p: 36% success rate (for user_goals_first_turn_template_subsets.v1.p)
+
+[user goal files]:
+first turn: user_goals_first_turn_template.v2.p
+user_goals_first_turn_template.part.movie.v1.p: a subset of user goal. [Please use this one, the upper bound success rate on movie_kb.1k.json is 0.9765.]
+
+[NLG rule template]
+dia_act_nl_pairs.v6.json: some predefined NLG rule templates for both User simulator and Agent.
+
+[Dialog Act Intent]:
+dia_acts.txt
+
+[Dialog Act Slot]:
+slot_set.txt
+
+
+2. Some Parameters:
+
+-agt: the agent id
+-usr: the user (simulator) id
+-max_turn: maximum turns
+-episodes: how many dialogues you want to run
+-slot_err_prob: slot level err probability
+-slot_err_mode: which kind of slot err mode
+-intent_err_prob: intent level err probability
+
+-movie_kb_path: the movie kb path for agent side
+-goal_file_path: the user goal file path for user simulator side
+
+-dqn_hidden_size: hidden size for RL (DQN) agent
+-batch_size: batch size for DQN training
+-simulation_epoch_size: how many dialogue to be simulated in one epoch
+
+-warm_start: use rule policy to fill the experience replay buffer at the beginning.
+-warm_start_epochs: how many dialogues to run in the warm start
+
+-run_mode: 0 for display mode (NL); 1 for debug mode (dia_act); 2 for debug mode (dia_act and NL); >3 for no display (i.e. training)
+-auto_suggest: 0 for no auto_suggest; 1 for auto_suggest.
+-act_level: 0 for user simulator is dia_act level; 1 for user simulator is NL level
+-cmd_input_mode: 0 for NL input; 1 for Dia_Act input. (this is for AgentCmd only)
+
+-write_model_dir: the directory to write the models
+-trained_model_path: the trained RL agent model; load the trained model for prediction purpose.
+
+
+3. Commands to run the different agents and user simulators
+
+Rule Agent: 
+python run.py --agt 5 --usr 1 --max_turn 40 --episodes 150 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --intent_err_prob 0.00 --slot_err_prob 0.00 --episodes 500 --act_level 0
+
+
+Cmd Agent:
+NL Input: python run.py --agt 0 --usr 1 --max_turn 40 --episodes 150 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --intent_err_prob 0.00 --slot_err_prob 0.00 --episodes 500 --act_level 0 --run_mode 0 --cmd_input_mode 0
+Dia_Act Input: python run.py --agt 0 --usr 1 --max_turn 40 --episodes 150 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --intent_err_prob 0.00 --slot_err_prob 0.00 --episodes 500 --act_level 0 --run_mode 0 --cmd_input_mode 1
+
+
+Train RL Agent:
+[End2End without NLU and NLG, with simulated noise in NLU]
+RL: python run.py --agt 9 --usr 1 --max_turn 40 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --dqn_hidden_size 80 --experience_replay_pool_size 1000 --episodes 500 --simulation_epoch_size 100 --write_model_dir .\deep_dialog\checkpoints\rl_agent\ --run_mode 3 --act_level 0 --slot_err_prob 0.00 --intent_err_prob 0.00 --batch_size 16 --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --warm_start 1 --warm_start_epochs 120
+
+[End2End with NLU and NLG]
+RL: python run.py --agt 9 --usr 1 --max_turn 40 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --dqn_hidden_size 80 --experience_replay_pool_size 1000 --episodes 500 --simulation_epoch_size 100 --write_model_dir .\deep_dialog\checkpoints\rl_agent\ --run_mode 3 --act_level 1 --slot_err_prob 0.00 --intent_err_prob 0.00 --batch_size 16 --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --warm_start 1 --warm_start_epochs 120
+
+
+Test RL Agent with N dialogues:
+RL: python run.py --agt 9 --usr 1 --max_turn 40 --movie_kb_path .\deep_dialog\data\movie_kb.1k.p --dqn_hidden_size 80 --experience_replay_pool_size 1000 --episodes 300 --simulation_epoch_size 100 --write_model_dir .\deep_dialog\checkpoints\rl_agent\ --slot_err_prob 0.00 --intent_err_prob 0.00 --batch_size 16 --goal_file_path .\deep_dialog\data\user_goals_first_turn_template.part.movie.v1.p --trained_model_path .\deep_dialog\checkpoints\rl_agent\noe2e\agt_9_400_420_0.90000.p --run_mode 3
+
+
+4. Learning Curves:
+1). python draw_learning_curve.py --result_file ./deep_dialog/checkpoints/rl_agent/noe2e/agt_9_performance_records.json
+
+2). Or pull out the numbers and draw the curves in Excel
+
@@ -0,0 +1 @@
+#
@@ -0,0 +1,3 @@
+from .agent_cmd import *
+from .agent_baselines import *
+from .agent_dqn import *
@@ -0,0 +1,92 @@
+"""
+Created on May 17, 2016
+
+@author: xiul, t-zalipt
+"""
+
+from deep_dialog import dialog_config
+
+class Agent:
+    """ Prototype for all agent classes, defining the interface they must uphold """
+
+    def __init__(self, movie_dict=None, act_set=None, slot_set=None, params=None):
+        """ Constructor for the Agent class
+
+        Arguments:
+        movie_dict      --  This is here now but doesn't belong - the agent doesn't know about movies
+        act_set         --  The set of acts. #### Shouldn't this be more abstract? Don't we want our agent to be more broadly usable?
+        slot_set        --  The set of available slots
+        """
+        self.movie_dict = movie_dict
+        self.act_set = act_set
+        self.slot_set = slot_set
+        self.act_cardinality = len(act_set.keys())
+        self.slot_cardinality = len(slot_set.keys())
+        
+        self.epsilon = params['epsilon']
+        self.agent_run_mode = params['agent_run_mode']
+        self.agent_act_level = params['agent_act_level']
+        
+
+    def initialize_episode(self):
+        """ Initialize a new episode. This function is called every time a new episode is run. """
+        self.current_action = {}                    #   TODO Changed this variable's name to current_action
+        self.current_action['diaact'] = None        #   TODO Does it make sense to call it a state if it has an act? Which act? The Most recent?
+        self.current_action['inform_slots'] = {}
+        self.current_action['request_slots'] = {}
+        self.current_action['turn'] = 0
+
+    def state_to_action(self, state, available_actions):
+        """ Take the current state and return an action according to the current exploration/exploitation policy
+
+        We define the agents flexibly so that they can either operate on act_slot representations or act_slot_value representations.
+        We also define the responses flexibly, returning a dictionary with keys [act_slot_response, act_slot_value_response]. This way the command-line agent can continue to operate with values
+
+        Arguments:
+        state      --   A tuple of (history, kb_results) where history is a sequence of previous actions and kb_results contains information on the number of results matching the current constraints.
+        user_action         --   A legacy representation used to run the command line agent. We should remove this ASAP but not just yet
+        available_actions   --   A list of the allowable actions in the current state
+
+        Returns:
+        act_slot_action         --   An action consisting of one act and >= 0 slots as well as which slots are informed vs requested.
+        act_slot_value_action   --   An action consisting of acts slots and values in the legacy format. This can be used in the future for training agents that take value into account and interact directly with the database
+        """
+        act_slot_response = None
+        act_slot_value_response = None
+        return {"act_slot_response": act_slot_response, "act_slot_value_response": act_slot_value_response}
+
+
+    def register_experience_replay_tuple(self, s_t, a_t, reward, s_tplus1, episode_over):
+        """  Register feedback from the environment, to be stored as future training data
+
+        Arguments:
+        s_t                 --  The state in which the last action was taken
+        a_t                 --  The previous agent action
+        reward              --  The reward received immediately following the action
+        s_tplus1            --  The state transition following the latest action
+        episode_over        --  A boolean value representing whether the this is the final action.
+
+        Returns:
+        None
+        """
+        pass
+    
+    
+    def set_nlg_model(self, nlg_model):
+        self.nlg_model = nlg_model  
+    
+    def set_nlu_model(self, nlu_model):
+        self.nlu_model = nlu_model
+     
+       
+    def add_nl_to_action(self, agent_action):
+        """ Add NL to Agent Dia_Act """
+        
+        if agent_action['act_slot_response']:
+            agent_action['act_slot_response']['nl'] = ""
+            user_nlg_sentence = self.nlg_model.convert_diaact_to_nl(agent_action['act_slot_response'], 'agt') #self.nlg_model.translate_diaact(agent_action['act_slot_response']) # NLG
+            agent_action['act_slot_response']['nl'] = user_nlg_sentence
+        elif agent_action['act_slot_value_response']:
+            agent_action['act_slot_value_response']['nl'] = ""
+            user_nlg_sentence = self.nlg_model.convert_diaact_to_nl(agent_action['act_slot_value_response'], 'agt') #self.nlg_model.translate_diaact(agent_action['act_slot_value_response']) # NLG
+            agent_action['act_slot_response']['nl'] = user_nlg_sentence
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .agent_cmd import *`
	`2`	`+from .agent_baselines import *`
	`3`	`+from .agent_dqn import *`