CompVis · Po-rygon · Feb 26, 2021 · Feb 26, 2021 · Feb 26, 2021 · Feb 26, 2021
diff --git a/README.md b/README.md
@@ -220,6 +220,10 @@ Train a VQGAN with
 python main.py --base configs/faceshq_vqgan.yaml -t True --gpus 0,
 ```
 
+There are two classes in `taming/data/faceshq.py`. One called `CelebAHQ{split}` reads `.npy` files, the other called `FFHQ{split}` read images,
+where `{split}` is one of `Train`/`Validation`.
+In the same file, for class `FacesHQTrain` and `FacesHQValidation`, change `d1` or `d2` as the way you want to use it.
+
 Then, adjust the checkpoint path of the config key
 `model.params.first_stage_config.params.ckpt_path` in
 `configs/faceshq_transformer.yaml` (or download
@@ -229,6 +233,12 @@ corresponds to the preconfigured checkpoint path), then run
 python main.py --base configs/faceshq_transformer.yaml -t True --gpus 0,
 ```
 
+Run the code above will cause error: yaml.parser.ParserError: 
+expected '\<document start\>', but found '\<block mapping start\>'
+
+Solution: Change to new `configs/faceshq_transformer.yaml`
+
+
 ### D-RIN
 
 Train a VQGAN on ImageNet with

diff --git a/configs/faceshq_transformer.yaml b/configs/faceshq_transformer.yaml
@@ -1,61 +1,61 @@
 model:
   base_learning_rate: 4.5e-06
-  target: taming.models.cond_transformer.Net2NetTransformer
-  params:
-    cond_stage_key: coord
-    transformer_config:
-      target: taming.modules.transformer.mingpt.GPT
-      params:
-        vocab_size: 1024
-        block_size: 512
-        n_layer: 24
-        n_head: 16
-        n_embd: 1024
-    first_stage_config:
-      target: taming.models.vqgan.VQModel
-      params:
-        ckpt_path: logs/2020-11-09T13-33-36_faceshq_vqgan/checkpoints/last.ckpt
-        embed_dim: 256
+  params: 
+    cond_stage_config: 
+      params: 
+        down_factor: 16
         n_embed: 1024
-        ddconfig:
+      target: taming.modules.misc.coord.CoordStage
+    cond_stage_key: coord
+    first_stage_config: 
+      params: 
+        ckpt_path: logs/2021-02-24T18-54-51_faceshq_vqgan/checkpoints/last.ckpt
+        ddconfig: 
+          attn_resolutions: 
+            - 16
+          ch: 128
+          ch_mult: 
+            - 1
+            - 1
+            - 2
+            - 2
+            - 4
           double_z: false
-          z_channels: 256
-          resolution: 256
+          dropout: 0.0
           in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 1
-          - 2
-          - 2
-          - 4
           num_res_blocks: 2
-          attn_resolutions:
-          - 16
-          dropout: 0.0
-        lossconfig:
+          out_ch: 3
+          resolution: 256
+          z_channels: 256
+        embed_dim: 256
+        lossconfig: 
           target: taming.modules.losses.DummyLoss
-    cond_stage_config:
-      target: taming.modules.misc.coord.CoordStage
-      params:
         n_embed: 1024
-        down_factor: 16
+      target: taming.models.vqgan.VQModel
+    transformer_config: 
+      params: 
+        block_size: 512
+        n_embd: 1024
+        n_head: 16
+        n_layer: 24
+        vocab_size: 1024
+      target: taming.modules.transformer.mingpt.GPT
+  target: taming.models.cond_transformer.Net2NetTransformer
 
 data:
-  target: main.DataModuleFromConfig
   params:
     batch_size: 2
     num_workers: 8
     train:
-      target: taming.data.faceshq.FacesHQTrain
       params:
-        size: 256
+        coord: true
         crop_size: 256
-        coord: True
+        size: 256
+      target: taming.data.faceshq.FacesHQTrain
     validation:
-      target: taming.data.faceshq.FacesHQValidation
       params:
-        size: 256
+        coord: true
         crop_size: 256
-        coord: True
+        size: 256
+      target: taming.data.faceshq.FacesHQValidation
+  target: main.DataModuleFromConfig