Skip to content

Commit

Permalink
Feat/allow test set=none (#97)
Browse files Browse the repository at this point in the history
  • Loading branch information
L-M-Sherlock authored Mar 25, 2024
1 parent 2ce4924 commit 5fb95e1
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 33 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "FSRS-Optimizer"
version = "4.26.8"
version = "4.27.0"
readme = "README.md"
dependencies = [
"matplotlib>=3.7.0",
Expand Down
59 changes: 27 additions & 32 deletions src/fsrs_optimizer/fsrs_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ class Trainer:
def __init__(
self,
train_set: pd.DataFrame,
test_set: pd.DataFrame,
test_set: Optional[pd.DataFrame],
init_w: List[float],
n_epoch: int = 1,
lr: float = 1e-2,
Expand All @@ -267,7 +267,7 @@ def __init__(
self.avg_eval_losses = []
self.loss_fn = nn.BCELoss(reduction="none")

def build_dataset(self, train_set: pd.DataFrame, test_set: pd.DataFrame):
def build_dataset(self, train_set: pd.DataFrame, test_set: Optional[pd.DataFrame]):
pre_train_set = train_set[train_set["i"] == 2]
self.pre_train_set = BatchDataset(pre_train_set, batch_size=self.batch_size)
self.pre_train_data_loader = BatchLoader(self.pre_train_set)
Expand All @@ -279,8 +279,11 @@ def build_dataset(self, train_set: pd.DataFrame, test_set: pd.DataFrame):
self.train_set = BatchDataset(train_set, batch_size=self.batch_size)
self.train_data_loader = BatchLoader(self.train_set)

self.test_set = BatchDataset(test_set, batch_size=self.batch_size)
self.test_data_loader = BatchLoader(self.test_set)
self.test_set = (
[]
if test_set is None
else BatchDataset(test_set, batch_size=self.batch_size)
)

def train(self, verbose: bool = True):
self.verbose = verbose
Expand Down Expand Up @@ -333,33 +336,25 @@ def train(self, verbose: bool = True):
def eval(self):
self.model.eval()
with torch.no_grad():
sequences, delta_ts, labels, seq_lens = (
self.train_set.x_train,
self.train_set.t_train,
self.train_set.y_train,
self.train_set.seq_len,
)
real_batch_size = seq_lens.shape[0]
outputs, _ = self.model(sequences.transpose(0, 1))
stabilities = outputs[seq_lens - 1, torch.arange(real_batch_size), 0]
retentions = power_forgetting_curve(delta_ts, stabilities)
train_loss = self.loss_fn(retentions, labels).mean()
if self.verbose:
tqdm.write(f"train loss: {train_loss:.6f}")
self.avg_train_losses.append(train_loss)

sequences, delta_ts, labels, seq_lens = (
self.test_set.x_train,
self.test_set.t_train,
self.test_set.y_train,
self.test_set.seq_len,
)
real_batch_size = seq_lens.shape[0]
outputs, _ = self.model(sequences.transpose(0, 1))
stabilities = outputs[seq_lens - 1, torch.arange(real_batch_size), 0]
retentions = power_forgetting_curve(delta_ts, stabilities)
test_loss = self.loss_fn(retentions, labels).mean()
self.avg_eval_losses.append(test_loss)
losses = []
for dataset in (self.train_set, self.test_set):
if len(dataset) == 0:
losses.append(0)
continue
sequences, delta_ts, labels, seq_lens = (
dataset.x_train,
dataset.t_train,
dataset.y_train,
dataset.seq_len,
)
real_batch_size = seq_lens.shape[0]
outputs, _ = self.model(sequences.transpose(0, 1))
stabilities = outputs[seq_lens - 1, torch.arange(real_batch_size), 0]
retentions = power_forgetting_curve(delta_ts, stabilities)
loss = self.loss_fn(retentions, labels).mean()
losses.append(loss)
self.avg_train_losses.append(losses[0])
self.avg_eval_losses.append(losses[1])

w = list(
map(
Expand All @@ -369,7 +364,7 @@ def eval(self):
)

weighted_loss = (
train_loss * len(self.train_set) + test_loss * len(self.test_set)
losses[0] * len(self.train_set) + losses[1] * len(self.test_set)
) / (len(self.train_set) + len(self.test_set))

return weighted_loss, w
Expand Down

0 comments on commit 5fb95e1

Please sign in to comment.