preventing datasets from batching between videos.

T2T Team · Copybara-Service · commit 214a3cc6a472 · 2018-06-17T20:59:00.000-07:00
PiperOrigin-RevId: 200934714
diff --git a/tensor2tensor/data_generators/video_generated.py b/tensor2tensor/data_generators/video_generated.py
@@ -22,7 +22,6 @@
 
 import numpy as np
 
-from tensor2tensor.data_generators import problem
 from tensor2tensor.data_generators import video_utils
 from tensor2tensor.utils import metrics
 from tensor2tensor.utils import registry
@@ -54,7 +53,8 @@ def frame_width(self):
 
   @property
   def total_number_of_frames(self):
-    return 10000
+    # 10k videos
+    return 10000 * self.video_length
 
   @property
   def video_length(self):
@@ -69,17 +69,6 @@ def eval_metrics(self):
                     metrics.Metrics.IMAGE_RMSE]
     return eval_metrics
 
-  @property
-  def dataset_splits(self):
-    """Splits of data to produce and number of output shards for each."""
-    return [{
-        "split": problem.DatasetSplit.TRAIN,
-        "shards": 1,
-    }, {
-        "split": problem.DatasetSplit.EVAL,
-        "shards": 1,
-    }]
-
   @property
   def extra_reading_spec(self):
     """Additional data fields to store on disk and their decoders."""
diff --git a/tensor2tensor/data_generators/video_utils.py b/tensor2tensor/data_generators/video_utils.py
@@ -100,6 +100,14 @@ def dataset_splits(self):
         "shards": 1,
     }]
 
+  @property
+  def only_keep_videos_from_0th_frame(self):
+    return True
+
+  @property
+  def use_not_breaking_batching(self):
+    return False
+
   def preprocess_example(self, example, mode, hparams):
     """Runtime preprocessing, e.g., resize example["frame"]."""
     return example
@@ -192,15 +200,64 @@ def features_from_batch(batched_prefeatures):
     # Batch and construct features.
     def _preprocess(example):
       return self.preprocess_example(example, mode, hparams)
+
+    def avoid_break_batching(dataset):
+      """Smart preprocessing to avoid break between videos!
+
+      Simple batching of images into videos may result into broken videos
+      with two parts from two different videos. This preprocessing avoids
+      this using the frame number.
+
+      Args:
+        dataset: raw not-batched dataset.
+
+      Returns:
+        batched not-broken videos.
+
+      """
+      def check_integrity_and_batch(*datasets):
+        """Checks whether a sequence of frames are from the same video.
+
+        Args:
+          *datasets: datasets each skipping 1 frame from the previous one.
+
+        Returns:
+          batched data and the integrity flag.
+        """
+        frame_numbers = [dataset["frame_number"][0] for dataset in datasets]
+
+        not_broken = tf.equal(
+            frame_numbers[-1] - frame_numbers[0], num_frames-1)
+        if self.only_keep_videos_from_0th_frame:
+          not_broken = tf.logical_and(not_broken, tf.equal(frame_numbers[0], 0))
+
+        features = {}
+        for key in datasets[0].keys():
+          values = [dataset[key] for dataset in datasets]
+          batch = tf.stack(values)
+          features[key] = batch
+        return features, not_broken
+
+      ds = [dataset.skip(i) for i in range(num_frames)]
+      dataset = tf.data.Dataset.zip(tuple(ds))
+      dataset = dataset.map(check_integrity_and_batch)
+      dataset = dataset.filter(lambda _, not_broken: not_broken)
+      dataset = dataset.map(lambda features, _: features)
+
+      return dataset
+
     preprocessed_dataset = dataset.map(_preprocess)
     num_frames = (hparams.video_num_input_frames +
                   hparams.video_num_target_frames)
     # We jump by a random position at the beginning to add variety.
     if self.random_skip:
       random_skip = tf.random_uniform([], maxval=num_frames, dtype=tf.int64)
       preprocessed_dataset = preprocessed_dataset.skip(random_skip)
-    batch_dataset = preprocessed_dataset.apply(
-        tf.contrib.data.batch_and_drop_remainder(num_frames))
+    if self.use_not_breaking_batching:
+      batch_dataset = avoid_break_batching(preprocessed_dataset)
+    else:
+      batch_dataset = preprocessed_dataset.apply(
+          tf.contrib.data.batch_and_drop_remainder(num_frames))
     dataset = batch_dataset.map(features_from_batch).shuffle(8)
     return dataset
 
diff --git a/tensor2tensor/models/research/next_frame.py b/tensor2tensor/models/research/next_frame.py
@@ -658,10 +658,13 @@ def body(self, features):
 
     all_actions = input_actions + target_actions
     all_rewards = input_rewards + target_rewards
+    all_frames = input_frames + target_frames
+
+    tf.summary.image("full_video", tf.concat(all_frames, axis=1))
 
     is_training = self.hparams.mode == tf.estimator.ModeKeys.TRAIN
     gen_images, gen_rewards, latent_mean, latent_std = self.construct_model(
-        images=input_frames + target_frames,
+        images=all_frames,
         actions=all_actions,
         rewards=all_rewards,
         k=900.0 if is_training else -1.0,
@@ -730,7 +733,7 @@ def next_frame():
 def next_frame_stochastic():
   """SV2P model."""
   hparams = next_frame()
-  hparams.video_num_input_frames = 4
+  hparams.video_num_input_frames = 2
   hparams.video_num_target_frames = 1
   hparams.batch_size = 8
   hparams.target_modality = "video:l2raw"