-
Notifications
You must be signed in to change notification settings - Fork 3
/
monitoring.py
82 lines (62 loc) · 1.67 KB
/
monitoring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import wandb
import numpy as np
def wandb_inference_init():
wandb.init(
entity="charred",
project="charred-inference",
job_type="inference",
)
print("WandB inference init...")
def wandb_inference_log(log: list):
wandb_log = []
for entry in log:
wandb_log.append(wandb.Image(entry["image"], caption=entry["prompt"]))
wandb.log({"inference": wandb_log})
print("WandB inference log...")
def wandb_init(args, num_devices):
wandb.init(
entity="charred",
project="charred",
job_type="train",
config=args,
)
wandb.config.update(
{
"num_devices": num_devices,
}
)
wandb.define_metric("*", step_metric="step")
wandb.define_metric("step", step_metric="walltime")
print("WandB setup...")
def wandb_close():
wandb.finish()
print("WandB closed...")
def get_wandb_log_batch_lambda(
get_predictions,
):
def __wandb_log_batch(
global_walltime,
global_training_steps,
delta_time,
epoch,
loss,
unet_params,
is_milestone,
):
log_data = {
"walltime": global_walltime,
"step": global_training_steps,
"batch_delta_time": delta_time,
"epoch": epoch,
"loss": loss.mean(),
}
if is_milestone and get_predictions is not None:
log_data["validation"] = [
wandb.Image(image, caption=prompt)
for prompt, image in get_predictions(unet_params)
]
wandb.log(
data=log_data,
commit=True,
)
return __wandb_log_batch