Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speedup ut && format. #57

Merged
merged 2 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function run_test {
while [[ $attempts -lt 3 ]]; do
rm -rf core*
ray stop
"$@"
time "$@"
haolin-nju marked this conversation as resolved.
Show resolved Hide resolved
if [[ $? -eq 0 ]]; then
echo "$@ success"
break
Expand Down
1 change: 0 additions & 1 deletion tests/test_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ def test_args():
assert args0.models['policy'].args_dict['test'] == 123
assert args0.models['policy'].args_dict['generate_config']['eos_token_id'] == 103


def test_args2():
os.environ["num_training_epoch"] = "2"
args0 = parse_args()
Expand Down
34 changes: 18 additions & 16 deletions tests/test_data_dp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
from chatlearn import TorchModule
from chatlearn.utils import future


class CustomDataset(Dataset):

def __init__(self, data):
self.data = data
self.collate_fn = None
Expand All @@ -22,8 +24,6 @@ def __getitem__(self, idx):
return {"query": self.data[idx]}


chatlearn.init()

class PolicyModel(TorchModule):

def forward_step(self, data, iteration):
Expand All @@ -38,7 +38,6 @@ def build_dataset(self, prompts, is_eval=False):
return dataset



class ReferenceModel(TorchModule):

def forward_step(self, data, iteration):
Expand All @@ -50,12 +49,12 @@ def forward_step(self, data, iteration):

class RewardModel(TorchModule):


def forward_step(self, data, iteration):
print("reward forward =========", flush=True)
data["reward_out"] = data["ref_out"].cuda() + data["policy_out"].cuda()
return data


class ValueModel(TorchModule):

def forward_step(self, data, iteration):
Expand Down Expand Up @@ -93,6 +92,7 @@ def train_step(self, data, iteration):
def get_data(self):
return self.data


class PPOValue(TorchModule):

@property
Expand All @@ -110,6 +110,8 @@ def train_step(self, data, iteration):
num_mb = len(data)
return num_mb


chatlearn.init()
for _, model_config in chatlearn.get_args().models.items():
model_config.num_gpu = 8

Expand All @@ -128,7 +130,7 @@ def train_step(self, data, iteration):
chatlearn.get_args().runtime_args.train_global_batch_size = 16
chatlearn.get_args().runtime_args.generation_batch_size = 8
chatlearn.get_args().runtime_args.max_relay_episode = 1
chatlearn.get_args().runtime_args.sample_per_episode = 1024
chatlearn.get_args().runtime_args.sample_per_episode = 256
policy = PolicyModel("policy")
reference = ReferenceModel("reference")
reward = RewardModel("reward")
Expand All @@ -140,9 +142,9 @@ def train_step(self, data, iteration):
def relay_sample_fn(episode_relay_buffers):
buffer = episode_relay_buffers[-1].buffer
episode_id = episode_relay_buffers[-1]._episode_id
assert len(buffer) == 1024
assert len(buffer) == 256
for i in range(len(buffer)):
assert int(buffer[i]['query'][0].item()) == i + episode_id * 1024
assert int(buffer[i]['query'][0].item()) == i + episode_id * 256
return buffer

engine.set_relay_sample_fn(relay_sample_fn)
Expand All @@ -152,7 +154,7 @@ def relay_sample_fn(episode_relay_buffers):
assert value.num_replica == 2
assert ppo_policy.num_replica == 1
assert ppo_value.num_replica == 1
data = [torch.ones([1024]) * i for i in range(2048)]
data = [torch.ones([1024]) * i for i in range(512)]
engine.set_dataset(data)
engine.learn()
assert engine.named_models['policy'].replicas[0].data_parallel_size == 1
Expand All @@ -175,21 +177,21 @@ def relay_sample_fn(episode_relay_buffers):
for batch in item:
all_data.extend([i for i in batch['query'][:, 0].numpy()])

assert len(all_data) == 2048
assert len(all_data) == 512
distinct_data = set(all_data)
assert len(distinct_data) == 2048
assert len(distinct_data) == 512
assert min(distinct_data) == 0.0
assert max(distinct_data) == 2047.0
assert max(distinct_data) == 511.0

dp_rank_to_actors = engine.named_models['ppo_value'].replicas[0].dp_rank_to_actors
assert len(dp_rank_to_actors) == 2
assert len(dp_rank_to_actors[0]) == 4
assert len(dp_rank_to_actors[1]) == 4

assert engine.env.batch_per_episode == 256
assert engine.env.num_iteration == 256
assert engine.trainer.batch_per_episode == 64
assert engine.trainer.num_iteration == 64
assert engine.env.batch_per_episode == 64
assert engine.env.num_iteration == 64
assert engine.trainer.batch_per_episode == 16
assert engine.trainer.num_iteration == 16
assert engine.trainer.num_micro_batch_per_dp == 2

assert len(engine.env._dataset) == 2048, len(engine.env._dataset)
assert len(engine.env._dataset) == 512, len(engine.env._dataset)
32 changes: 17 additions & 15 deletions tests/test_data_dp_zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@


class CustomDataset(Dataset):

def __init__(self, data):
self.data = data
self.collate_fn = None
Expand All @@ -23,8 +24,6 @@ def __getitem__(self, idx):
return {"query": self.data[idx]}


chatlearn.init()

class PolicyModel(TorchModule):

@property
Expand All @@ -47,7 +46,6 @@ def build_dataset(self, prompts, is_eval=False):
return dataset



class ReferenceModel(TorchModule):

@property
Expand Down Expand Up @@ -80,6 +78,7 @@ def forward_step(self, data, iteration):
data["reward_out"] = data["ref_out"].cuda() + data["policy_out"].cuda()
return data


class ValueModel(TorchModule):

@property
Expand Down Expand Up @@ -119,6 +118,7 @@ def train_step(self, data, iteration):
def get_data(self):
return self.data


class PPOValue(TorchModule):

@property
Expand All @@ -134,6 +134,8 @@ def train_step(self, data, iteration):
num_mb = len(data)
return num_mb


chatlearn.init()
for _, model_config in chatlearn.get_args().models.items():
model_config.num_gpu = 8

Expand All @@ -150,7 +152,7 @@ def train_step(self, data, iteration):
chatlearn.get_args().runtime_args.train_global_batch_size = 32
chatlearn.get_args().runtime_args.generation_batch_size = 8
chatlearn.get_args().runtime_args.max_relay_episode = 1
chatlearn.get_args().runtime_args.sample_per_episode = 1024
chatlearn.get_args().runtime_args.sample_per_episode = 256
policy = PolicyModel("policy")
reference = ReferenceModel("reference")
reward = RewardModel("reward")
Expand All @@ -163,9 +165,9 @@ def train_step(self, data, iteration):
def relay_sample_fn(episode_relay_buffers):
buffer = episode_relay_buffers[-1].buffer
episode_id = episode_relay_buffers[-1]._episode_id
assert len(buffer) == 1024
assert len(buffer) == 256
for i in range(len(buffer)):
assert int(buffer[i]['query'][0].item()) == i + episode_id * 1024
assert int(buffer[i]['query'][0].item()) == i + episode_id * 256
return buffer

engine.set_relay_sample_fn(relay_sample_fn)
Expand All @@ -175,7 +177,7 @@ def relay_sample_fn(episode_relay_buffers):
assert value.num_replica == 1
assert ppo_policy.num_replica == 1
assert ppo_value.num_replica == 1
data = [torch.ones([1024]) * i for i in range(2048)]
data = [torch.ones([1024]) * i for i in range(512)]
engine.set_dataset(data)
engine.learn()
assert engine.named_models['policy'].replicas[0].data_parallel_size == 8
Expand All @@ -197,21 +199,21 @@ def relay_sample_fn(episode_relay_buffers):
for batch in item:
all_data.extend([i for i in batch['query'][:, 0].numpy()])

assert len(all_data) == 2048
assert len(all_data) == 512
distinct_data = set(all_data)
assert len(distinct_data) == 2048
assert len(distinct_data) == 512
assert min(distinct_data) == 0.0
assert max(distinct_data) == 2047.0
assert max(distinct_data) == 511.0

dp_rank_to_actors = engine.named_models['ppo_value'].replicas[0].dp_rank_to_actors
assert len(dp_rank_to_actors) == 8
assert len(dp_rank_to_actors[0]) == 1
assert len(dp_rank_to_actors[1]) == 1

assert engine.env.batch_per_episode == 256
assert engine.env.num_iteration == 32
assert engine.trainer.batch_per_episode == 32
assert engine.trainer.num_iteration == 32
assert engine.env.batch_per_episode == 64
assert engine.env.num_iteration == 8
assert engine.trainer.batch_per_episode == 8
assert engine.trainer.num_iteration == 8
assert engine.trainer.num_micro_batch_per_dp == 1

assert len(engine.env._dataset) == 2048, len(engine.env._dataset)
assert len(engine.env._dataset) == 512, len(engine.env._dataset)
7 changes: 2 additions & 5 deletions tests/test_distactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from chatlearn.runtime.engine import BaseEngine
from chatlearn import TorchModule

chatlearn.init()

class PolicyModel(TorchModule):

def setup(self):
Expand All @@ -19,6 +19,7 @@ def forward_step(self, data, iteration=0):
return data


chatlearn.init()
model = PolicyModel('policy')

engine = BaseEngine(model)
Expand All @@ -39,7 +40,3 @@ def forward_step(self, data, iteration=0):
engine.logging_summary()

print(res0)




6 changes: 3 additions & 3 deletions tests/test_dynamic_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@


class CustomDataset(Dataset):

def __init__(self, data):
self.data = data
self.collate_fn = None
Expand All @@ -21,12 +22,14 @@ def __len__(self):
def __getitem__(self, idx):
return {"query": self.data[idx]}


chatlearn.init()

chatlearn.get_args().runtime_args.dynamic_train_samples = True
chatlearn.get_args().runtime_args.stream_data_loader_type = "dynamic"
sample_per_episode = chatlearn.get_args().runtime_args.sample_per_episode


class PolicyModel(TorchModule):

def setup(self):
Expand All @@ -44,10 +47,8 @@ def build_dataset(self, prompts, is_eval=False):
return dataset



class ReferenceModel(TorchModule):


def forward_step(self, data, iteration):
print("reference forward =========", flush=True)
query = data["policy_out"].cuda()
Expand All @@ -57,7 +58,6 @@ def forward_step(self, data, iteration):

class RewardModel(TorchModule):


def forward_step(self, data, iteration):
print("reward forward =========", flush=True)
data["reward_out"] = data["ref_out"].cuda() + data["policy_out"].cuda()
Expand Down
3 changes: 2 additions & 1 deletion tests/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@


class CustomDataset(Dataset):

def __init__(self, data):
self.data = data
self.collate_fn = None
Expand All @@ -24,7 +25,6 @@ def __getitem__(self, idx):
return {"query": self.data[idx]}



class PolicyModel(TorchModule):

def setup(self):
Expand All @@ -42,6 +42,7 @@ def build_dataset(self, prompts, is_eval=False):
chatlearn.init()
chatlearn.get_args().models["policy"].num_gpu = 3
policy = PolicyModel("policy")

def eval_flow(b):
r0 = policy.forward_step(b)
return r0
Expand Down
3 changes: 3 additions & 0 deletions tests/test_evaluator2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


class CustomDataset(Dataset):

def __init__(self, data):
self.data = data
self.collate_fn = None
Expand Down Expand Up @@ -50,10 +51,12 @@ def eval_step(self, data):
policy = PolicyModel("policy")

reward = RewardModel("reward")

def eval_flow(b):
r0 = policy.forward_step(b)
r1 = reward.eval_step(r0)
return r1

engine = EvalEngine(eval_flow)

assert policy.num_replica == 3, policy.num_replica
Expand Down
5 changes: 5 additions & 0 deletions tests/test_evaluator_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


class CustomDataset(Dataset):

def __init__(self, data):
self.data = data
self.collate_fn = None
Expand Down Expand Up @@ -44,6 +45,7 @@ def eval_step(self, data):
new_data['reward'] = ['reward_' + item for item in data['policy']]
return new_data


class RewardModel2(TorchModule):

def setup(self):
Expand All @@ -54,12 +56,14 @@ def eval_step(self, data):
new_data['reward2'] = ['reward2_' + item for item in data['policy']]
return new_data


chatlearn.init()
chatlearn.get_args().models["policy"].num_gpu = 3
policy = PolicyModel("policy")
reward = RewardModel("reward")
reward2 = RewardModel2("reward2")


class CustomEngine(EvalEngine):

def __init__(self, models):
Expand All @@ -71,6 +75,7 @@ def eval_flow(batch):
evaluator = Evaluator(eval_flow)
super().__init__(models, evaluator=evaluator)


engine = CustomEngine([policy, reward, reward2])

assert policy.num_replica == 3, policy.num_replica
Expand Down
Loading
Loading