diff --git a/Dockerfile b/Dockerfile
new file mode 100755
index 0000000..ab45a55
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,11 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+FROM tensorflow/tensorflow:1.15.0-gpu-py3
+
+RUN pip install scipy==1.3.3
+RUN pip install requests==2.22.0
+RUN pip install Pillow==6.2.1
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100755
index 0000000..d7e8507
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,101 @@
+Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+
+
+Nvidia Source Code License-NC
+
+=======================================================================
+
+1. Definitions
+
+"Licensor" means any person or entity that distributes its Work.
+
+"Software" means the original work of authorship made available under
+this License.
+
+"Work" means the Software and any additions to or derivative works of
+the Software that are made available under this License.
+
+"Nvidia Processors" means any central processing unit (CPU), graphics
+processing unit (GPU), field-programmable gate array (FPGA),
+application-specific integrated circuit (ASIC) or any combination
+thereof designed, made, sold, or provided by Nvidia or its affiliates.
+
+The terms "reproduce," "reproduction," "derivative works," and
+"distribution" have the meaning as provided under U.S. copyright law;
+provided, however, that for the purposes of this License, derivative
+works shall not include works that remain separable from, or merely
+link (or bind by name) to the interfaces of, the Work.
+
+Works, including the Software, are "made available" under this License
+by including in or with the Work either (a) a copyright notice
+referencing the applicability of this License to the Work, or (b) a
+copy of this License.
+
+2. License Grants
+
+ 2.1 Copyright Grant. Subject to the terms and conditions of this
+ License, each Licensor grants to you a perpetual, worldwide,
+ non-exclusive, royalty-free, copyright license to reproduce,
+ prepare derivative works of, publicly display, publicly perform,
+ sublicense and distribute its Work and any resulting derivative
+ works in any form.
+
+3. Limitations
+
+ 3.1 Redistribution. You may reproduce or distribute the Work only
+ if (a) you do so under this License, (b) you include a complete
+ copy of this License with your distribution, and (c) you retain
+ without modification any copyright, patent, trademark, or
+ attribution notices that are present in the Work.
+
+ 3.2 Derivative Works. You may specify that additional or different
+ terms apply to the use, reproduction, and distribution of your
+ derivative works of the Work ("Your Terms") only if (a) Your Terms
+ provide that the use limitation in Section 3.3 applies to your
+ derivative works, and (b) you identify the specific derivative
+ works that are subject to Your Terms. Notwithstanding Your Terms,
+ this License (including the redistribution requirements in Section
+ 3.1) will continue to apply to the Work itself.
+
+ 3.3 Use Limitation. The Work and any derivative works thereof only
+ may be used or intended for use non-commercially. The Work or
+ derivative works thereof may be used or intended for use by Nvidia
+ or its affiliates commercially or non-commercially. As used herein,
+ "non-commercially" means for research or evaluation purposes only.
+
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim
+ against any Licensor (including any claim, cross-claim or
+ counterclaim in a lawsuit) to enforce any patents that you allege
+ are infringed by any Work, then your rights under this License from
+ such Licensor (including the grants in Sections 2.1 and 2.2) will
+ terminate immediately.
+
+ 3.5 Trademarks. This License does not grant any rights to use any
+ Licensor's or its affiliates' names, logos, or trademarks, except
+ as necessary to reproduce the notices described in this License.
+
+ 3.6 Termination. If you violate any term of this License, then your
+ rights under this License (including the grants in Sections 2.1 and
+ 2.2) will terminate immediately.
+
+4. Disclaimer of Warranty.
+
+THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
+NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
+THIS LICENSE.
+
+5. Limitation of Liability.
+
+EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
+THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
+SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
+INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
+(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
+LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
+COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGES.
+
+=======================================================================
diff --git a/README.md b/README.md
new file mode 100755
index 0000000..74114ea
--- /dev/null
+++ b/README.md
@@ -0,0 +1,221 @@
+## StyleGAN2 — Official TensorFlow Implementation
+
+
+
+**Analyzing and Improving the Image Quality of StyleGAN**
+Tero Karras, Samuli Laine, Miika Aittala, Janne Hellsten, Jaakko Lehtinen, Timo Aila
+
+Paper: http://arxiv.org/abs/1912.04958
+Video: https://youtu.be/c-NJtV9Jvp0
+
+Abstract: *The style-based GAN architecture (StyleGAN) yields state-of-the-art results in data-driven unconditional generative image modeling. We expose and analyze several of its characteristic artifacts, and propose changes in both model architecture and training methods to address them. In particular, we redesign generator normalization, revisit progressive growing, and regularize the generator to encourage good conditioning in the mapping from latent vectors to images. In addition to improving image quality, this path length regularizer yields the additional benefit that the generator becomes significantly easier to invert. This makes it possible to reliably detect if an image is generated by a particular network. We furthermore visualize how well the generator utilizes its output resolution, and identify a capacity problem, motivating us to train larger models for additional quality improvements. Overall, our improved model redefines the state of the art in unconditional image modeling, both in terms of existing distribution quality metrics as well as perceived image quality.*
+
+For business inquiries, please contact [researchinquiries@nvidia.com](mailto:researchinquiries@nvidia.com)
+For press and other inquiries, please contact Hector Marinez at [hmarinez@nvidia.com](mailto:hmarinez@nvidia.com)
+
+| Additional material |
+| :--- | :----------
+| [StyleGAN2](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7) | Main Google Drive folder
+| ├ [stylegan2-paper.pdf](https://drive.google.com/open?id=1fnF-QsiQeKaxF-HbvFiGtzHF_Bf3CzJu) | High-quality version of the paper
+| ├ [stylegan2-video.mp4](https://drive.google.com/open?id=1f_gbKW6FUUHKkUxciJ_lQx29mCq_fSBy) | High-quality version of the video
+| ├ [images](https://drive.google.com/open?id=1Sak157_DLX84ytqHHqZaH_59HoEWzfB7) | Example images produced using our method
+| │ ├ [curated-images](https://drive.google.com/open?id=1ydWb8xCHzDKMTW9kQ7sL-B1R0zATHVHp) | Hand-picked images showcasing our results
+| │ └ [100k-generated-images](https://drive.google.com/open?id=1BA2OZ1GshdfFZGYZPob5QWOGBuJCdu5q) | Random images with and without truncation
+| ├ [videos](https://drive.google.com/open?id=1yXDV96SFXoUiZKU7AyE6DyKgDpIk4wUZ) | Individual clips of the video as high-quality MP4
+| └ [networks](https://drive.google.com/open?id=1yanUI9m4b4PWzR0eurKNq6JR1Bbfbh6L) | Pre-trained networks
+| ├ [stylegan2-ffhq-config-f.pkl](https://drive.google.com/open?id=1Mgh-jglZjgksupF0XLl0KzuOqd1LXcoE) | StyleGAN2 for FFHQ dataset at 1024×1024
+| ├ [stylegan2-car-config-f.pkl](https://drive.google.com/open?id=1MutzVf8XjNo6TUg03a6CUU_2Vlc0ltbV) | StyleGAN2 for LSUN Car dataset at 512×384
+| ├ [stylegan2-cat-config-f.pkl](https://drive.google.com/open?id=1MyowTZGvMDJCWuT7Yg2e_GnTLIzcSPCy) | StyleGAN2 for LSUN Cat dataset at 256×256
+| ├ [stylegan2-church-config-f.pkl](https://drive.google.com/open?id=1N3iaujGpwa6vmKCqRSHcD6GZ2HVV8h1f) | StyleGAN2 for LSUN Church dataset at 256×256
+| ├ [stylegan2-horse-config-f.pkl](https://drive.google.com/open?id=1N55ZtBhEyEbDn6uKBjCNAew1phD5ZAh-) | StyleGAN2 for LSUN Horse dataset at 256×256
+| └ ⋯ | Other training configurations used in the paper
+
+## Requirements
+
+* Both Linux and Windows are supported. Linux is recommended for performance and compatibility reasons.
+* 64-bit Python 3.6 installation. We recommend Anaconda3 with numpy 1.14.3 or newer.
+* TensorFlow 1.15 with GPU support. The code does not support TensorFlow 2.0.
+* One or more high-end NVIDIA GPUs, NVIDIA drivers, CUDA 10.0 toolkit and cuDNN 7.5. To reproduce the results reported in the paper, you need an NVIDIA GPU with at least 16 GB of DRAM.
+* Docker users: use the [provided Dockerfile](./Dockerfile) to build an image with the required library dependencies.
+
+StyleGAN2 relies on custom TensorFlow ops that are compiled on the fly using [NVCC](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html). To test that your NVCC installation is working correctly, run:
+
+```.bash
+nvcc test_nvcc.cu -o test_nvcc -run
+| test_nvcc.cu
+| Creating library test_nvcc.lib and object test_nvcc.exp
+| CPU says hello!
+| GPU says hello!
+```
+
+On Windows, the compilation requires Microsoft Visual Studio to be in `PATH`. We recommend installing [Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/) and adding into `PATH` using `"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"`.
+
+## Preparing datasets
+
+Datasets are stored as multi-resolution TFRecords, similar to the [original StyleGAN](https://github.com/NVlabs/stylegan). Each dataset consists of multiple `*.tfrecords` files stored under a common directory, e.g., `~/datasets/ffhq/ffhq-r*.tfrecords`. In the following sections, the datasets are referenced using a combination of `--dataset` and `--data-dir` arguments, e.g., `--dataset=ffhq --data-dir=~/datasets`.
+
+**FFHQ**. To download the [Flickr-Faces-HQ](https://github.com/NVlabs/ffhq-dataset) dataset as multi-resolution TFRecords, run:
+
+```.bash
+pushd ~
+git clone https://github.com/NVlabs/ffhq-dataset.git
+cd ffhq-dataset
+python download_ffhq.py --tfrecords
+popd
+python dataset_tool.py display ~/ffhq-dataset/tfrecords/ffhq
+```
+
+**LSUN**. Download the desired LSUN categories in LMDB format from the [LSUN project page](https://www.yf.io/p/lsun). To convert the data to multi-resolution TFRecords, run:
+
+```.bash
+python dataset_tool.py create_lsun_wide ~/datasets/car ~/lsun/car_lmdb --width=512 --height=384
+python dataset_tool.py create_lsun ~/datasets/cat ~/lsun/cat_lmdb --resolution=256
+python dataset_tool.py create_lsun ~/datasets/church ~/lsun/church_outdoor_train_lmdb --resolution=256
+python dataset_tool.py create_lsun ~/datasets/horse ~/lsun/horse_lmdb --resolution=256
+```
+
+**Custom**. Create custom datasets by placing all training images under a single directory. The images must be square-shaped and they must all have the same power-of-two dimensions. To convert the images to multi-resolution TFRecords, run:
+
+```.bash
+python dataset_tool.py create_from_images ~/datasets/my-custom-dataset ~/my-custom-images
+python dataset_tool.py display ~/datasets/my-custom-dataset
+```
+
+## Using pre-trained networks
+
+Pre-trained networks are stored as `*.pkl` files on the [StyleGAN2 Google Drive folder](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7). Below, you can either reference them directly using the syntax `gdrive:networks/.pkl`, or download them manually and reference by filename.
+
+**Generating images**:
+
+```.bash
+# Generate uncurated ffhq images (matches paper Figure 12)
+python run_generator.py generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \
+ --seeds=6600-6625 --truncation-psi=0.5
+
+# Generate curated ffhq images (matches paper Figure 11)
+python run_generator.py generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \
+ --seeds=66,230,389,1518 --truncation-psi=1.0
+
+# Generate uncurated car images
+python run_generator.py generate-images --network=gdrive:networks/stylegan2-car-config-f.pkl \
+ --seeds=6000-6025 --truncation-psi=0.5
+
+# Example of style mixing (matches the corresponding video clip)
+python run_generator.py style-mixing-example --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \
+ --row-seeds=85,100,75,458,1500 --col-seeds=55,821,1789,293 --truncation-psi=1.0
+```
+
+The results are placed in `results//*.png`. You can change the location with `--result-dir`. For example, `--result-dir=~/my-stylegan2-results`.
+
+**Projecting images to latent space**:
+
+```.bash
+# Project generated images
+python run_projector.py project-generated-images --network=gdrive:networks/stylegan2-car-config-f.pkl \
+ --seeds=0,1,5
+
+# Project real images
+python run_projector.py project-real-images --network=gdrive:networks/stylegan2-car-config-f.pkl \
+ --dataset=car --data-dir=~/datasets
+```
+
+You can import the networks in your own Python code using `pickle.load()`. For this to work, you need to include the `dnnlib` source directory in `PYTHONPATH` and create a default TensorFlow session by calling `dnnlib.tflib.init_tf()`. See [run_generator.py](./run_generator.py) and [pretrained_networks.py](./pretrained_networks.py) for examples.
+
+## Training networks
+
+To reproduce the training runs for config F in Tables 1 and 3, run:
+
+```.bash
+python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \
+ --dataset=ffhq --mirror-augment=true
+python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \
+ --dataset=car --total-kimg=57000
+python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \
+ --dataset=cat --total-kimg=88000
+python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \
+ --dataset=church --total-kimg 88000 --gamma=100
+python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \
+ --dataset=horse --total-kimg 100000 --gamma=100
+```
+
+For other configurations, see `python run_training.py --help`.
+
+We have verified that the results match the paper when training with 1, 2, 4, or 8 GPUs. Note that training FFHQ at 1024×1024 resolution requires GPU(s) with at least 16 GB of memory. The following table lists typical training times using NVIDIA DGX-1 with 8 Tesla V100 GPUs:
+
+| Configuration | Resolution | Total kimg | 1 GPU | 2 GPUs | 4 GPUs | 8 GPUs | GPU mem |
+| :------------ | :-------------: | :--------: | :-----: | :-----: | :-----: | :----: | :-----: |
+| `config-f` | 1024×1024 | 25000 | 69d 23h | 36d 4h | 18d 14h | 9d 18h | 13.3 GB |
+| `config-f` | 1024×1024 | 10000 | 27d 23h | 14d 11h | 7d 10h | 3d 22h | 13.3 GB |
+| `config-e` | 1024×1024 | 25000 | 35d 11h | 18d 15h | 9d 15h | 5d 6h | 8.6 GB |
+| `config-e` | 1024×1024 | 10000 | 14d 4h | 7d 11h | 3d 20h | 2d 3h | 8.6 GB |
+| `config-f` | 256×256 | 25000 | 32d 13h | 16d 23h | 8d 21h | 4d 18h | 6.4 GB |
+| `config-f` | 256×256 | 10000 | 13d 0h | 6d 19h | 3d 13h | 1d 22h | 6.4 GB |
+
+Training curves for FFHQ config F (StyleGAN2) compared to original StyleGAN using 8 GPUs:
+
+
+
+After training, the resulting networks can be used the same way as the official pre-trained networks:
+
+```.bash
+# Generate 1000 random images without truncation
+python run_generator.py generate-images --seeds=0-999 --truncation-psi=1.0 \
+ --network=results/00006-stylegan2-ffhq-8gpu-config-f/networks-final.pkl
+```
+
+## Evaluation metrics
+
+To reproduce the numbers for config F in Tables 1 and 3, run:
+
+```.bash
+python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \
+ --metrics=fid50k,ppl_wend --dataset=ffhq --mirror-augment=true
+python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-car-config-f.pkl \
+ --metrics=fid50k,ppl2_wend --dataset=car
+python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-cat-config-f.pkl \
+ --metrics=fid50k,ppl2_wend --dataset=cat
+python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-church-config-f.pkl \
+ --metrics=fid50k,ppl2_wend --dataset=church
+python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-horse-config-f.pkl \
+ --metrics=fid50k,ppl2_wend --dataset=horse
+```
+
+For other configurations, see the [StyleGAN2 Google Drive folder](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7).
+
+Note the metrics are evaluated using a different random seed each time, so the results will vary between runs. In the paper, we reported the average result of running each metric 10 times. The following table lists the available metrics along with their expected runtimes and random variation:
+
+| Metric | FFHQ config F | 1 GPU | 2 GPUs | 4 GPUs | Description |
+| :---------- | :------------: | :----: | :-----: | :----: | :---------- |
+| `fid50k` | 2.84 ± 0.03 | 22 min | 14 min | 10 min | [Fréchet Inception Distance](https://arxiv.org/abs/1706.08500)
+| `is50k` | 5.13 ± 0.02 | 23 min | 14 min | 8 min | [Inception Score](https://arxiv.org/abs/1606.03498)
+| `ppl_zfull` | 348.0 ± 3.8 | 41 min | 22 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in Z, full paths
+| `ppl_wfull` | 126.9 ± 0.2 | 42 min | 22 min | 13 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in W, full paths
+| `ppl_zend` | 348.6 ± 3.0 | 41 min | 22 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in Z, path endpoints
+| `ppl_wend` | 129.4 ± 0.8 | 40 min | 23 min | 13 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in W, path endpoints
+| `ppl2_wend` | 145.0 ± 0.5 | 41 min | 23 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) without center crop
+| `ls` | 154.2 / 4.27 | 10 hrs | 6 hrs | 4 hrs | [Linear Separability](https://arxiv.org/abs/1812.04948)
+| `pr50k3` | 0.689 / 0.492 | 26 min | 17 min | 12 min | [Precision and Recall](https://arxiv.org/abs/1904.06991)
+
+Note that some of the metrics cache dataset-specific data on the disk, and they will take somewhat longer when run for the first time.
+
+## License
+
+Copyright © 2019, NVIDIA Corporation. All rights reserved.
+
+This work is made available under the Nvidia Source Code License-NC. To view a copy of this license, visit https://nvlabs.github.io/stylegan2/license.html
+
+## Citation
+
+```
+@article{Karras2019stylegan2,
+ title = {Analyzing and Improving the Image Quality of {StyleGAN}},
+ author = {Tero Karras and Samuli Laine and Miika Aittala and Janne Hellsten and Jaakko Lehtinen and Timo Aila},
+ journal = {CoRR},
+ volume = {abs/1912.04958},
+ year = {2019},
+}
+```
+
+## Acknowledgements
+
+We thank Ming-Yu Liu for an early review, Timo Viitanen for his help with code release, and Tero Kuosmanen for compute infrastructure.
diff --git a/dataset_tool.py b/dataset_tool.py
new file mode 100755
index 0000000..d8c4dc2
--- /dev/null
+++ b/dataset_tool.py
@@ -0,0 +1,644 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Tool for creating multi-resolution TFRecords datasets."""
+
+# pylint: disable=too-many-lines
+import os
+import sys
+import glob
+import argparse
+import threading
+import six.moves.queue as Queue # pylint: disable=import-error
+import traceback
+import numpy as np
+import tensorflow as tf
+import PIL.Image
+import dnnlib.tflib as tflib
+
+from training import dataset
+
+#----------------------------------------------------------------------------
+
+def error(msg):
+ print('Error: ' + msg)
+ exit(1)
+
+#----------------------------------------------------------------------------
+
+class TFRecordExporter:
+ def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10):
+ self.tfrecord_dir = tfrecord_dir
+ self.tfr_prefix = os.path.join(self.tfrecord_dir, os.path.basename(self.tfrecord_dir))
+ self.expected_images = expected_images
+ self.cur_images = 0
+ self.shape = None
+ self.resolution_log2 = None
+ self.tfr_writers = []
+ self.print_progress = print_progress
+ self.progress_interval = progress_interval
+
+ if self.print_progress:
+ print('Creating dataset "%s"' % tfrecord_dir)
+ if not os.path.isdir(self.tfrecord_dir):
+ os.makedirs(self.tfrecord_dir)
+ assert os.path.isdir(self.tfrecord_dir)
+
+ def close(self):
+ if self.print_progress:
+ print('%-40s\r' % 'Flushing data...', end='', flush=True)
+ for tfr_writer in self.tfr_writers:
+ tfr_writer.close()
+ self.tfr_writers = []
+ if self.print_progress:
+ print('%-40s\r' % '', end='', flush=True)
+ print('Added %d images.' % self.cur_images)
+
+ def choose_shuffled_order(self): # Note: Images and labels must be added in shuffled order.
+ order = np.arange(self.expected_images)
+ np.random.RandomState(123).shuffle(order)
+ return order
+
+ def add_image(self, img):
+ if self.print_progress and self.cur_images % self.progress_interval == 0:
+ print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True)
+ if self.shape is None:
+ self.shape = img.shape
+ self.resolution_log2 = int(np.log2(self.shape[1]))
+ assert self.shape[0] in [1, 3]
+ assert self.shape[1] == self.shape[2]
+ assert self.shape[1] == 2**self.resolution_log2
+ tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE)
+ for lod in range(self.resolution_log2 - 1):
+ tfr_file = self.tfr_prefix + '-r%02d.tfrecords' % (self.resolution_log2 - lod)
+ self.tfr_writers.append(tf.python_io.TFRecordWriter(tfr_file, tfr_opt))
+ assert img.shape == self.shape
+ for lod, tfr_writer in enumerate(self.tfr_writers):
+ if lod:
+ img = img.astype(np.float32)
+ img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25
+ quant = np.rint(img).clip(0, 255).astype(np.uint8)
+ ex = tf.train.Example(features=tf.train.Features(feature={
+ 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)),
+ 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))}))
+ tfr_writer.write(ex.SerializeToString())
+ self.cur_images += 1
+
+ def add_labels(self, labels):
+ if self.print_progress:
+ print('%-40s\r' % 'Saving labels...', end='', flush=True)
+ assert labels.shape[0] == self.cur_images
+ with open(self.tfr_prefix + '-rxx.labels', 'wb') as f:
+ np.save(f, labels.astype(np.float32))
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self.close()
+
+#----------------------------------------------------------------------------
+
+class ExceptionInfo(object):
+ def __init__(self):
+ self.value = sys.exc_info()[1]
+ self.traceback = traceback.format_exc()
+
+#----------------------------------------------------------------------------
+
+class WorkerThread(threading.Thread):
+ def __init__(self, task_queue):
+ threading.Thread.__init__(self)
+ self.task_queue = task_queue
+
+ def run(self):
+ while True:
+ func, args, result_queue = self.task_queue.get()
+ if func is None:
+ break
+ try:
+ result = func(*args)
+ except:
+ result = ExceptionInfo()
+ result_queue.put((result, args))
+
+#----------------------------------------------------------------------------
+
+class ThreadPool(object):
+ def __init__(self, num_threads):
+ assert num_threads >= 1
+ self.task_queue = Queue.Queue()
+ self.result_queues = dict()
+ self.num_threads = num_threads
+ for _idx in range(self.num_threads):
+ thread = WorkerThread(self.task_queue)
+ thread.daemon = True
+ thread.start()
+
+ def add_task(self, func, args=()):
+ assert hasattr(func, '__call__') # must be a function
+ if func not in self.result_queues:
+ self.result_queues[func] = Queue.Queue()
+ self.task_queue.put((func, args, self.result_queues[func]))
+
+ def get_result(self, func): # returns (result, args)
+ result, args = self.result_queues[func].get()
+ if isinstance(result, ExceptionInfo):
+ print('\n\nWorker thread caught an exception:\n' + result.traceback)
+ raise result.value
+ return result, args
+
+ def finish(self):
+ for _idx in range(self.num_threads):
+ self.task_queue.put((None, (), None))
+
+ def __enter__(self): # for 'with' statement
+ return self
+
+ def __exit__(self, *excinfo):
+ self.finish()
+
+ def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None):
+ if max_items_in_flight is None: max_items_in_flight = self.num_threads * 4
+ assert max_items_in_flight >= 1
+ results = []
+ retire_idx = [0]
+
+ def task_func(prepared, _idx):
+ return process_func(prepared)
+
+ def retire_result():
+ processed, (_prepared, idx) = self.get_result(task_func)
+ results[idx] = processed
+ while retire_idx[0] < len(results) and results[retire_idx[0]] is not None:
+ yield post_func(results[retire_idx[0]])
+ results[retire_idx[0]] = None
+ retire_idx[0] += 1
+
+ for idx, item in enumerate(item_iterator):
+ prepared = pre_func(item)
+ results.append(None)
+ self.add_task(func=task_func, args=(prepared, idx))
+ while retire_idx[0] < idx - max_items_in_flight + 2:
+ for res in retire_result(): yield res
+ while retire_idx[0] < len(results):
+ for res in retire_result(): yield res
+
+#----------------------------------------------------------------------------
+
+def display(tfrecord_dir):
+ print('Loading dataset "%s"' % tfrecord_dir)
+ tflib.init_tf({'gpu_options.allow_growth': True})
+ dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0)
+ tflib.init_uninitialized_vars()
+ import cv2 # pip install opencv-python
+
+ idx = 0
+ while True:
+ try:
+ images, labels = dset.get_minibatch_np(1)
+ except tf.errors.OutOfRangeError:
+ break
+ if idx == 0:
+ print('Displaying images')
+ cv2.namedWindow('dataset_tool')
+ print('Press SPACE or ENTER to advance, ESC to exit')
+ print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist()))
+ cv2.imshow('dataset_tool', images[0].transpose(1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR
+ idx += 1
+ if cv2.waitKey() == 27:
+ break
+ print('\nDisplayed %d images.' % idx)
+
+#----------------------------------------------------------------------------
+
+def extract(tfrecord_dir, output_dir):
+ print('Loading dataset "%s"' % tfrecord_dir)
+ tflib.init_tf({'gpu_options.allow_growth': True})
+ dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0)
+ tflib.init_uninitialized_vars()
+
+ print('Extracting images to "%s"' % output_dir)
+ if not os.path.isdir(output_dir):
+ os.makedirs(output_dir)
+ idx = 0
+ while True:
+ if idx % 10 == 0:
+ print('%d\r' % idx, end='', flush=True)
+ try:
+ images, _labels = dset.get_minibatch_np(1)
+ except tf.errors.OutOfRangeError:
+ break
+ if images.shape[1] == 1:
+ img = PIL.Image.fromarray(images[0][0], 'L')
+ else:
+ img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB')
+ img.save(os.path.join(output_dir, 'img%08d.png' % idx))
+ idx += 1
+ print('Extracted %d images.' % idx)
+
+#----------------------------------------------------------------------------
+
+def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels):
+ max_label_size = 0 if ignore_labels else 'full'
+ print('Loading dataset "%s"' % tfrecord_dir_a)
+ tflib.init_tf({'gpu_options.allow_growth': True})
+ dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0)
+ print('Loading dataset "%s"' % tfrecord_dir_b)
+ dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0)
+ tflib.init_uninitialized_vars()
+
+ print('Comparing datasets')
+ idx = 0
+ identical_images = 0
+ identical_labels = 0
+ while True:
+ if idx % 100 == 0:
+ print('%d\r' % idx, end='', flush=True)
+ try:
+ images_a, labels_a = dset_a.get_minibatch_np(1)
+ except tf.errors.OutOfRangeError:
+ images_a, labels_a = None, None
+ try:
+ images_b, labels_b = dset_b.get_minibatch_np(1)
+ except tf.errors.OutOfRangeError:
+ images_b, labels_b = None, None
+ if images_a is None or images_b is None:
+ if images_a is not None or images_b is not None:
+ print('Datasets contain different number of images')
+ break
+ if images_a.shape == images_b.shape and np.all(images_a == images_b):
+ identical_images += 1
+ else:
+ print('Image %d is different' % idx)
+ if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b):
+ identical_labels += 1
+ else:
+ print('Label %d is different' % idx)
+ idx += 1
+ print('Identical images: %d / %d' % (identical_images, idx))
+ if not ignore_labels:
+ print('Identical labels: %d / %d' % (identical_labels, idx))
+
+#----------------------------------------------------------------------------
+
+def create_mnist(tfrecord_dir, mnist_dir):
+ print('Loading MNIST from "%s"' % mnist_dir)
+ import gzip
+ with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
+ images = np.frombuffer(file.read(), np.uint8, offset=16)
+ with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file:
+ labels = np.frombuffer(file.read(), np.uint8, offset=8)
+ images = images.reshape(-1, 1, 28, 28)
+ images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0)
+ assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8
+ assert labels.shape == (60000,) and labels.dtype == np.uint8
+ assert np.min(images) == 0 and np.max(images) == 255
+ assert np.min(labels) == 0 and np.max(labels) == 9
+ onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
+ onehot[np.arange(labels.size), labels] = 1.0
+
+ with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
+ order = tfr.choose_shuffled_order()
+ for idx in range(order.size):
+ tfr.add_image(images[order[idx]])
+ tfr.add_labels(onehot[order])
+
+#----------------------------------------------------------------------------
+
+def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123):
+ print('Loading MNIST from "%s"' % mnist_dir)
+ import gzip
+ with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file:
+ images = np.frombuffer(file.read(), np.uint8, offset=16)
+ images = images.reshape(-1, 28, 28)
+ images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0)
+ assert images.shape == (60000, 32, 32) and images.dtype == np.uint8
+ assert np.min(images) == 0 and np.max(images) == 255
+
+ with TFRecordExporter(tfrecord_dir, num_images) as tfr:
+ rnd = np.random.RandomState(random_seed)
+ for _idx in range(num_images):
+ tfr.add_image(images[rnd.randint(images.shape[0], size=3)])
+
+#----------------------------------------------------------------------------
+
+def create_cifar10(tfrecord_dir, cifar10_dir):
+ print('Loading CIFAR-10 from "%s"' % cifar10_dir)
+ import pickle
+ images = []
+ labels = []
+ for batch in range(1, 6):
+ with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file:
+ data = pickle.load(file, encoding='latin1')
+ images.append(data['data'].reshape(-1, 3, 32, 32))
+ labels.append(data['labels'])
+ images = np.concatenate(images)
+ labels = np.concatenate(labels)
+ assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8
+ assert labels.shape == (50000,) and labels.dtype == np.int32
+ assert np.min(images) == 0 and np.max(images) == 255
+ assert np.min(labels) == 0 and np.max(labels) == 9
+ onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
+ onehot[np.arange(labels.size), labels] = 1.0
+
+ with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
+ order = tfr.choose_shuffled_order()
+ for idx in range(order.size):
+ tfr.add_image(images[order[idx]])
+ tfr.add_labels(onehot[order])
+
+#----------------------------------------------------------------------------
+
+def create_cifar100(tfrecord_dir, cifar100_dir):
+ print('Loading CIFAR-100 from "%s"' % cifar100_dir)
+ import pickle
+ with open(os.path.join(cifar100_dir, 'train'), 'rb') as file:
+ data = pickle.load(file, encoding='latin1')
+ images = data['data'].reshape(-1, 3, 32, 32)
+ labels = np.array(data['fine_labels'])
+ assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8
+ assert labels.shape == (50000,) and labels.dtype == np.int32
+ assert np.min(images) == 0 and np.max(images) == 255
+ assert np.min(labels) == 0 and np.max(labels) == 99
+ onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
+ onehot[np.arange(labels.size), labels] = 1.0
+
+ with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
+ order = tfr.choose_shuffled_order()
+ for idx in range(order.size):
+ tfr.add_image(images[order[idx]])
+ tfr.add_labels(onehot[order])
+
+#----------------------------------------------------------------------------
+
+def create_svhn(tfrecord_dir, svhn_dir):
+ print('Loading SVHN from "%s"' % svhn_dir)
+ import pickle
+ images = []
+ labels = []
+ for batch in range(1, 4):
+ with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file:
+ data = pickle.load(file, encoding='latin1')
+ images.append(data[0])
+ labels.append(data[1])
+ images = np.concatenate(images)
+ labels = np.concatenate(labels)
+ assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8
+ assert labels.shape == (73257,) and labels.dtype == np.uint8
+ assert np.min(images) == 0 and np.max(images) == 255
+ assert np.min(labels) == 0 and np.max(labels) == 9
+ onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32)
+ onehot[np.arange(labels.size), labels] = 1.0
+
+ with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr:
+ order = tfr.choose_shuffled_order()
+ for idx in range(order.size):
+ tfr.add_image(images[order[idx]])
+ tfr.add_labels(onehot[order])
+
+#----------------------------------------------------------------------------
+
+def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None):
+ print('Loading LSUN dataset from "%s"' % lmdb_dir)
+ import lmdb # pip install lmdb # pylint: disable=import-error
+ import cv2 # pip install opencv-python
+ import io
+ with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn:
+ total_images = txn.stat()['entries'] # pylint: disable=no-value-for-parameter
+ if max_images is None:
+ max_images = total_images
+ with TFRecordExporter(tfrecord_dir, max_images) as tfr:
+ for _idx, (_key, value) in enumerate(txn.cursor()):
+ try:
+ try:
+ img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1)
+ if img is None:
+ raise IOError('cv2.imdecode failed')
+ img = img[:, :, ::-1] # BGR => RGB
+ except IOError:
+ img = np.asarray(PIL.Image.open(io.BytesIO(value)))
+ crop = np.min(img.shape[:2])
+ img = img[(img.shape[0] - crop) // 2 : (img.shape[0] + crop) // 2, (img.shape[1] - crop) // 2 : (img.shape[1] + crop) // 2]
+ img = PIL.Image.fromarray(img, 'RGB')
+ img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS)
+ img = np.asarray(img)
+ img = img.transpose([2, 0, 1]) # HWC => CHW
+ tfr.add_image(img)
+ except:
+ print(sys.exc_info()[1])
+ if tfr.cur_images == max_images:
+ break
+
+#----------------------------------------------------------------------------
+
+def create_lsun_wide(tfrecord_dir, lmdb_dir, width=512, height=384, max_images=None):
+ assert width == 2 ** int(np.round(np.log2(width)))
+ assert height <= width
+ print('Loading LSUN dataset from "%s"' % lmdb_dir)
+ import lmdb # pip install lmdb # pylint: disable=import-error
+ import cv2 # pip install opencv-python
+ import io
+ with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn:
+ total_images = txn.stat()['entries'] # pylint: disable=no-value-for-parameter
+ if max_images is None:
+ max_images = total_images
+ with TFRecordExporter(tfrecord_dir, max_images, print_progress=False) as tfr:
+ for idx, (_key, value) in enumerate(txn.cursor()):
+ try:
+ try:
+ img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1)
+ if img is None:
+ raise IOError('cv2.imdecode failed')
+ img = img[:, :, ::-1] # BGR => RGB
+ except IOError:
+ img = np.asarray(PIL.Image.open(io.BytesIO(value)))
+
+ ch = int(np.round(width * img.shape[0] / img.shape[1]))
+ if img.shape[1] < width or ch < height:
+ continue
+
+ img = img[(img.shape[0] - ch) // 2 : (img.shape[0] + ch) // 2]
+ img = PIL.Image.fromarray(img, 'RGB')
+ img = img.resize((width, height), PIL.Image.ANTIALIAS)
+ img = np.asarray(img)
+ img = img.transpose([2, 0, 1]) # HWC => CHW
+
+ canvas = np.zeros([3, width, width], dtype=np.uint8)
+ canvas[:, (width - height) // 2 : (width + height) // 2] = img
+ tfr.add_image(canvas)
+ print('\r%d / %d => %d ' % (idx + 1, total_images, tfr.cur_images), end='')
+
+ except:
+ print(sys.exc_info()[1])
+ if tfr.cur_images == max_images:
+ break
+ print()
+
+#----------------------------------------------------------------------------
+
+def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121):
+ print('Loading CelebA from "%s"' % celeba_dir)
+ glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png')
+ image_filenames = sorted(glob.glob(glob_pattern))
+ expected_images = 202599
+ if len(image_filenames) != expected_images:
+ error('Expected to find %d images' % expected_images)
+
+ with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr:
+ order = tfr.choose_shuffled_order()
+ for idx in range(order.size):
+ img = np.asarray(PIL.Image.open(image_filenames[order[idx]]))
+ assert img.shape == (218, 178, 3)
+ img = img[cy - 64 : cy + 64, cx - 64 : cx + 64]
+ img = img.transpose(2, 0, 1) # HWC => CHW
+ tfr.add_image(img)
+
+#----------------------------------------------------------------------------
+
+def create_from_images(tfrecord_dir, image_dir, shuffle):
+ print('Loading images from "%s"' % image_dir)
+ image_filenames = sorted(glob.glob(os.path.join(image_dir, '*')))
+ if len(image_filenames) == 0:
+ error('No input images found')
+
+ img = np.asarray(PIL.Image.open(image_filenames[0]))
+ resolution = img.shape[0]
+ channels = img.shape[2] if img.ndim == 3 else 1
+ if img.shape[1] != resolution:
+ error('Input images must have the same width and height')
+ if resolution != 2 ** int(np.floor(np.log2(resolution))):
+ error('Input image resolution must be a power-of-two')
+ if channels not in [1, 3]:
+ error('Input images must be stored as RGB or grayscale')
+
+ with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr:
+ order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames))
+ for idx in range(order.size):
+ img = np.asarray(PIL.Image.open(image_filenames[order[idx]]))
+ if channels == 1:
+ img = img[np.newaxis, :, :] # HW => CHW
+ else:
+ img = img.transpose([2, 0, 1]) # HWC => CHW
+ tfr.add_image(img)
+
+#----------------------------------------------------------------------------
+
+def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle):
+ print('Loading HDF5 archive from "%s"' % hdf5_filename)
+ import h5py # conda install h5py
+ with h5py.File(hdf5_filename, 'r') as hdf5_file:
+ hdf5_data = max([value for key, value in hdf5_file.items() if key.startswith('data')], key=lambda lod: lod.shape[3])
+ with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr:
+ order = tfr.choose_shuffled_order() if shuffle else np.arange(hdf5_data.shape[0])
+ for idx in range(order.size):
+ tfr.add_image(hdf5_data[order[idx]])
+ npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy'
+ if os.path.isfile(npy_filename):
+ tfr.add_labels(np.load(npy_filename)[order])
+
+#----------------------------------------------------------------------------
+
+def execute_cmdline(argv):
+ prog = argv[0]
+ parser = argparse.ArgumentParser(
+ prog = prog,
+ description = 'Tool for creating multi-resolution TFRecords datasets for StyleGAN and ProGAN.',
+ epilog = 'Type "%s -h" for more information.' % prog)
+
+ subparsers = parser.add_subparsers(dest='command')
+ subparsers.required = True
+ def add_command(cmd, desc, example=None):
+ epilog = 'Example: %s %s' % (prog, example) if example is not None else None
+ return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog)
+
+ p = add_command( 'display', 'Display images in dataset.',
+ 'display datasets/mnist')
+ p.add_argument( 'tfrecord_dir', help='Directory containing dataset')
+
+ p = add_command( 'extract', 'Extract images from dataset.',
+ 'extract datasets/mnist mnist-images')
+ p.add_argument( 'tfrecord_dir', help='Directory containing dataset')
+ p.add_argument( 'output_dir', help='Directory to extract the images into')
+
+ p = add_command( 'compare', 'Compare two datasets.',
+ 'compare datasets/mydataset datasets/mnist')
+ p.add_argument( 'tfrecord_dir_a', help='Directory containing first dataset')
+ p.add_argument( 'tfrecord_dir_b', help='Directory containing second dataset')
+ p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0)
+
+ p = add_command( 'create_mnist', 'Create dataset for MNIST.',
+ 'create_mnist datasets/mnist ~/downloads/mnist')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'mnist_dir', help='Directory containing MNIST')
+
+ p = add_command( 'create_mnistrgb', 'Create dataset for MNIST-RGB.',
+ 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'mnist_dir', help='Directory containing MNIST')
+ p.add_argument( '--num_images', help='Number of composite images to create (default: 1000000)', type=int, default=1000000)
+ p.add_argument( '--random_seed', help='Random seed (default: 123)', type=int, default=123)
+
+ p = add_command( 'create_cifar10', 'Create dataset for CIFAR-10.',
+ 'create_cifar10 datasets/cifar10 ~/downloads/cifar10')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'cifar10_dir', help='Directory containing CIFAR-10')
+
+ p = add_command( 'create_cifar100', 'Create dataset for CIFAR-100.',
+ 'create_cifar100 datasets/cifar100 ~/downloads/cifar100')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'cifar100_dir', help='Directory containing CIFAR-100')
+
+ p = add_command( 'create_svhn', 'Create dataset for SVHN.',
+ 'create_svhn datasets/svhn ~/downloads/svhn')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'svhn_dir', help='Directory containing SVHN')
+
+ p = add_command( 'create_lsun', 'Create dataset for single LSUN category.',
+ 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'lmdb_dir', help='Directory containing LMDB database')
+ p.add_argument( '--resolution', help='Output resolution (default: 256)', type=int, default=256)
+ p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None)
+
+ p = add_command( 'create_lsun_wide', 'Create LSUN dataset with non-square aspect ratio.',
+ 'create_lsun_wide datasets/lsun-car-512x384 ~/downloads/lsun/car_lmdb --width 512 --height 384')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'lmdb_dir', help='Directory containing LMDB database')
+ p.add_argument( '--width', help='Output width (default: 512)', type=int, default=512)
+ p.add_argument( '--height', help='Output height (default: 384)', type=int, default=384)
+ p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None)
+
+ p = add_command( 'create_celeba', 'Create dataset for CelebA.',
+ 'create_celeba datasets/celeba ~/downloads/celeba')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'celeba_dir', help='Directory containing CelebA')
+ p.add_argument( '--cx', help='Center X coordinate (default: 89)', type=int, default=89)
+ p.add_argument( '--cy', help='Center Y coordinate (default: 121)', type=int, default=121)
+
+ p = add_command( 'create_from_images', 'Create dataset from a directory full of images.',
+ 'create_from_images datasets/mydataset myimagedir')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'image_dir', help='Directory containing the images')
+ p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1)
+
+ p = add_command( 'create_from_hdf5', 'Create dataset from legacy HDF5 archive.',
+ 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5')
+ p.add_argument( 'tfrecord_dir', help='New dataset directory to be created')
+ p.add_argument( 'hdf5_filename', help='HDF5 archive containing the images')
+ p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1)
+
+ args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h'])
+ func = globals()[args.command]
+ del args.command
+ func(**vars(args))
+
+#----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+ execute_cmdline(sys.argv)
+
+#----------------------------------------------------------------------------
diff --git a/dnnlib/__init__.py b/dnnlib/__init__.py
new file mode 100755
index 0000000..e34112b
--- /dev/null
+++ b/dnnlib/__init__.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+from . import submission
+
+from .submission.run_context import RunContext
+
+from .submission.submit import SubmitTarget
+from .submission.submit import PathType
+from .submission.submit import SubmitConfig
+from .submission.submit import submit_run
+from .submission.submit import get_path_from_template
+from .submission.submit import convert_path
+from .submission.submit import make_run_dir_path
+
+from .util import EasyDict
+
+submit_config: SubmitConfig = None # Package level variable for SubmitConfig which is only valid when inside the run function.
diff --git a/dnnlib/submission/__init__.py b/dnnlib/submission/__init__.py
new file mode 100755
index 0000000..acf2fbe
--- /dev/null
+++ b/dnnlib/submission/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+from . import run_context
+from . import submit
diff --git a/dnnlib/submission/internal/__init__.py b/dnnlib/submission/internal/__init__.py
new file mode 100755
index 0000000..0f11279
--- /dev/null
+++ b/dnnlib/submission/internal/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+from . import local
diff --git a/dnnlib/submission/internal/local.py b/dnnlib/submission/internal/local.py
new file mode 100755
index 0000000..c03c79e
--- /dev/null
+++ b/dnnlib/submission/internal/local.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+class TargetOptions():
+ def __init__(self):
+ self.do_not_copy_source_files = False
+
+class Target():
+ def __init__(self):
+ pass
+
+ def finalize_submit_config(self, submit_config, host_run_dir):
+ print ('Local submit ', end='', flush=True)
+ submit_config.run_dir = host_run_dir
+
+ def submit(self, submit_config, host_run_dir):
+ from ..submit import run_wrapper, convert_path
+ print('- run_dir: %s' % convert_path(submit_config.run_dir), flush=True)
+ return run_wrapper(submit_config)
diff --git a/dnnlib/submission/run_context.py b/dnnlib/submission/run_context.py
new file mode 100755
index 0000000..62fbb1a
--- /dev/null
+++ b/dnnlib/submission/run_context.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Helpers for managing the run/training loop."""
+
+import datetime
+import json
+import os
+import pprint
+import time
+import types
+
+from typing import Any
+
+from . import submit
+
+# Singleton RunContext
+_run_context = None
+
+class RunContext(object):
+ """Helper class for managing the run/training loop.
+
+ The context will hide the implementation details of a basic run/training loop.
+ It will set things up properly, tell if run should be stopped, and then cleans up.
+ User should call update periodically and use should_stop to determine if run should be stopped.
+
+ Args:
+ submit_config: The SubmitConfig that is used for the current run.
+ config_module: (deprecated) The whole config module that is used for the current run.
+ """
+
+ def __init__(self, submit_config: submit.SubmitConfig, config_module: types.ModuleType = None):
+ global _run_context
+ # Only a single RunContext can be alive
+ assert _run_context is None
+ _run_context = self
+ self.submit_config = submit_config
+ self.should_stop_flag = False
+ self.has_closed = False
+ self.start_time = time.time()
+ self.last_update_time = time.time()
+ self.last_update_interval = 0.0
+ self.progress_monitor_file_path = None
+
+ # vestigial config_module support just prints a warning
+ if config_module is not None:
+ print("RunContext.config_module parameter support has been removed.")
+
+ # write out details about the run to a text file
+ self.run_txt_data = {"task_name": submit_config.task_name, "host_name": submit_config.host_name, "start_time": datetime.datetime.now().isoformat(sep=" ")}
+ with open(os.path.join(submit_config.run_dir, "run.txt"), "w") as f:
+ pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False)
+
+ def __enter__(self) -> "RunContext":
+ return self
+
+ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+ self.close()
+
+ def update(self, loss: Any = 0, cur_epoch: Any = 0, max_epoch: Any = None) -> None:
+ """Do general housekeeping and keep the state of the context up-to-date.
+ Should be called often enough but not in a tight loop."""
+ assert not self.has_closed
+
+ self.last_update_interval = time.time() - self.last_update_time
+ self.last_update_time = time.time()
+
+ if os.path.exists(os.path.join(self.submit_config.run_dir, "abort.txt")):
+ self.should_stop_flag = True
+
+ def should_stop(self) -> bool:
+ """Tell whether a stopping condition has been triggered one way or another."""
+ return self.should_stop_flag
+
+ def get_time_since_start(self) -> float:
+ """How much time has passed since the creation of the context."""
+ return time.time() - self.start_time
+
+ def get_time_since_last_update(self) -> float:
+ """How much time has passed since the last call to update."""
+ return time.time() - self.last_update_time
+
+ def get_last_update_interval(self) -> float:
+ """How much time passed between the previous two calls to update."""
+ return self.last_update_interval
+
+ def close(self) -> None:
+ """Close the context and clean up.
+ Should only be called once."""
+ if not self.has_closed:
+ # update the run.txt with stopping time
+ self.run_txt_data["stop_time"] = datetime.datetime.now().isoformat(sep=" ")
+ with open(os.path.join(self.submit_config.run_dir, "run.txt"), "w") as f:
+ pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False)
+ self.has_closed = True
+
+ # detach the global singleton
+ global _run_context
+ if _run_context is self:
+ _run_context = None
+
+ @staticmethod
+ def get():
+ import dnnlib
+ if _run_context is not None:
+ return _run_context
+ return RunContext(dnnlib.submit_config)
diff --git a/dnnlib/submission/submit.py b/dnnlib/submission/submit.py
new file mode 100755
index 0000000..514647d
--- /dev/null
+++ b/dnnlib/submission/submit.py
@@ -0,0 +1,343 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Submit a function to be run either locally or in a computing cluster."""
+
+import copy
+import inspect
+import os
+import pathlib
+import pickle
+import platform
+import pprint
+import re
+import shutil
+import sys
+import time
+import traceback
+
+from enum import Enum
+
+from .. import util
+from ..util import EasyDict
+
+from . import internal
+
+class SubmitTarget(Enum):
+ """The target where the function should be run.
+
+ LOCAL: Run it locally.
+ """
+ LOCAL = 1
+
+
+class PathType(Enum):
+ """Determines in which format should a path be formatted.
+
+ WINDOWS: Format with Windows style.
+ LINUX: Format with Linux/Posix style.
+ AUTO: Use current OS type to select either WINDOWS or LINUX.
+ """
+ WINDOWS = 1
+ LINUX = 2
+ AUTO = 3
+
+
+class PlatformExtras:
+ """A mixed bag of values used by dnnlib heuristics.
+
+ Attributes:
+
+ data_reader_buffer_size: Used by DataReader to size internal shared memory buffers.
+ data_reader_process_count: Number of worker processes to spawn (zero for single thread operation)
+ """
+ def __init__(self):
+ self.data_reader_buffer_size = 1<<30 # 1 GB
+ self.data_reader_process_count = 0 # single threaded default
+
+
+_user_name_override = None
+
+class SubmitConfig(util.EasyDict):
+ """Strongly typed config dict needed to submit runs.
+
+ Attributes:
+ run_dir_root: Path to the run dir root. Can be optionally templated with tags. Needs to always be run through get_path_from_template.
+ run_desc: Description of the run. Will be used in the run dir and task name.
+ run_dir_ignore: List of file patterns used to ignore files when copying files to the run dir.
+ run_dir_extra_files: List of (abs_path, rel_path) tuples of file paths. rel_path root will be the src directory inside the run dir.
+ submit_target: Submit target enum value. Used to select where the run is actually launched.
+ num_gpus: Number of GPUs used/requested for the run.
+ print_info: Whether to print debug information when submitting.
+ local.do_not_copy_source_files: Do not copy source files from the working directory to the run dir.
+ run_id: Automatically populated value during submit.
+ run_name: Automatically populated value during submit.
+ run_dir: Automatically populated value during submit.
+ run_func_name: Automatically populated value during submit.
+ run_func_kwargs: Automatically populated value during submit.
+ user_name: Automatically populated value during submit. Can be set by the user which will then override the automatic value.
+ task_name: Automatically populated value during submit.
+ host_name: Automatically populated value during submit.
+ platform_extras: Automatically populated values during submit. Used by various dnnlib libraries such as the DataReader class.
+ """
+
+ def __init__(self):
+ super().__init__()
+
+ # run (set these)
+ self.run_dir_root = "" # should always be passed through get_path_from_template
+ self.run_desc = ""
+ self.run_dir_ignore = ["__pycache__", "*.pyproj", "*.sln", "*.suo", ".cache", ".idea", ".vs", ".vscode", "_cudacache"]
+ self.run_dir_extra_files = []
+
+ # submit (set these)
+ self.submit_target = SubmitTarget.LOCAL
+ self.num_gpus = 1
+ self.print_info = False
+ self.nvprof = False
+ self.local = internal.local.TargetOptions()
+ self.datasets = []
+
+ # (automatically populated)
+ self.run_id = None
+ self.run_name = None
+ self.run_dir = None
+ self.run_func_name = None
+ self.run_func_kwargs = None
+ self.user_name = None
+ self.task_name = None
+ self.host_name = "localhost"
+ self.platform_extras = PlatformExtras()
+
+
+def get_path_from_template(path_template: str, path_type: PathType = PathType.AUTO) -> str:
+ """Replace tags in the given path template and return either Windows or Linux formatted path."""
+ # automatically select path type depending on running OS
+ if path_type == PathType.AUTO:
+ if platform.system() == "Windows":
+ path_type = PathType.WINDOWS
+ elif platform.system() == "Linux":
+ path_type = PathType.LINUX
+ else:
+ raise RuntimeError("Unknown platform")
+
+ path_template = path_template.replace("", get_user_name())
+
+ # return correctly formatted path
+ if path_type == PathType.WINDOWS:
+ return str(pathlib.PureWindowsPath(path_template))
+ elif path_type == PathType.LINUX:
+ return str(pathlib.PurePosixPath(path_template))
+ else:
+ raise RuntimeError("Unknown platform")
+
+
+def get_template_from_path(path: str) -> str:
+ """Convert a normal path back to its template representation."""
+ path = path.replace("\\", "/")
+ return path
+
+
+def convert_path(path: str, path_type: PathType = PathType.AUTO) -> str:
+ """Convert a normal path to template and the convert it back to a normal path with given path type."""
+ path_template = get_template_from_path(path)
+ path = get_path_from_template(path_template, path_type)
+ return path
+
+
+def set_user_name_override(name: str) -> None:
+ """Set the global username override value."""
+ global _user_name_override
+ _user_name_override = name
+
+
+def get_user_name():
+ """Get the current user name."""
+ if _user_name_override is not None:
+ return _user_name_override
+ elif platform.system() == "Windows":
+ return os.getlogin()
+ elif platform.system() == "Linux":
+ try:
+ import pwd
+ return pwd.getpwuid(os.geteuid()).pw_name
+ except:
+ return "unknown"
+ else:
+ raise RuntimeError("Unknown platform")
+
+
+def make_run_dir_path(*paths):
+ """Make a path/filename that resides under the current submit run_dir.
+
+ Args:
+ *paths: Path components to be passed to os.path.join
+
+ Returns:
+ A file/dirname rooted at submit_config.run_dir. If there's no
+ submit_config or run_dir, the base directory is the current
+ working directory.
+
+ E.g., `os.path.join(dnnlib.submit_config.run_dir, "output.txt"))`
+ """
+ import dnnlib
+ if (dnnlib.submit_config is None) or (dnnlib.submit_config.run_dir is None):
+ return os.path.join(os.getcwd(), *paths)
+ return os.path.join(dnnlib.submit_config.run_dir, *paths)
+
+
+def _create_run_dir_local(submit_config: SubmitConfig) -> str:
+ """Create a new run dir with increasing ID number at the start."""
+ run_dir_root = get_path_from_template(submit_config.run_dir_root, PathType.AUTO)
+
+ if not os.path.exists(run_dir_root):
+ os.makedirs(run_dir_root)
+
+ submit_config.run_id = _get_next_run_id_local(run_dir_root)
+ submit_config.run_name = "{0:05d}-{1}".format(submit_config.run_id, submit_config.run_desc)
+ run_dir = os.path.join(run_dir_root, submit_config.run_name)
+
+ if os.path.exists(run_dir):
+ raise RuntimeError("The run dir already exists! ({0})".format(run_dir))
+
+ os.makedirs(run_dir)
+
+ return run_dir
+
+
+def _get_next_run_id_local(run_dir_root: str) -> int:
+ """Reads all directory names in a given directory (non-recursive) and returns the next (increasing) run id. Assumes IDs are numbers at the start of the directory names."""
+ dir_names = [d for d in os.listdir(run_dir_root) if os.path.isdir(os.path.join(run_dir_root, d))]
+ r = re.compile("^\\d+") # match one or more digits at the start of the string
+ run_id = 0
+
+ for dir_name in dir_names:
+ m = r.match(dir_name)
+
+ if m is not None:
+ i = int(m.group())
+ run_id = max(run_id, i + 1)
+
+ return run_id
+
+
+def _populate_run_dir(submit_config: SubmitConfig, run_dir: str) -> None:
+ """Copy all necessary files into the run dir. Assumes that the dir exists, is local, and is writable."""
+ pickle.dump(submit_config, open(os.path.join(run_dir, "submit_config.pkl"), "wb"))
+ with open(os.path.join(run_dir, "submit_config.txt"), "w") as f:
+ pprint.pprint(submit_config, stream=f, indent=4, width=200, compact=False)
+
+ if (submit_config.submit_target == SubmitTarget.LOCAL) and submit_config.local.do_not_copy_source_files:
+ return
+
+ files = []
+
+ run_func_module_dir_path = util.get_module_dir_by_obj_name(submit_config.run_func_name)
+ assert '.' in submit_config.run_func_name
+ for _idx in range(submit_config.run_func_name.count('.') - 1):
+ run_func_module_dir_path = os.path.dirname(run_func_module_dir_path)
+ files += util.list_dir_recursively_with_ignore(run_func_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=False)
+
+ dnnlib_module_dir_path = util.get_module_dir_by_obj_name("dnnlib")
+ files += util.list_dir_recursively_with_ignore(dnnlib_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=True)
+
+ files += submit_config.run_dir_extra_files
+
+ files = [(f[0], os.path.join(run_dir, "src", f[1])) for f in files]
+ files += [(os.path.join(dnnlib_module_dir_path, "submission", "internal", "run.py"), os.path.join(run_dir, "run.py"))]
+
+ util.copy_files_and_create_dirs(files)
+
+
+
+def run_wrapper(submit_config: SubmitConfig) -> None:
+ """Wrap the actual run function call for handling logging, exceptions, typing, etc."""
+ is_local = submit_config.submit_target == SubmitTarget.LOCAL
+
+ # when running locally, redirect stderr to stdout, log stdout to a file, and force flushing
+ if is_local:
+ logger = util.Logger(file_name=os.path.join(submit_config.run_dir, "log.txt"), file_mode="w", should_flush=True)
+ else: # when running in a cluster, redirect stderr to stdout, and just force flushing (log writing is handled by run.sh)
+ logger = util.Logger(file_name=None, should_flush=True)
+
+ import dnnlib
+ dnnlib.submit_config = submit_config
+
+ exit_with_errcode = False
+ try:
+ print("dnnlib: Running {0}() on {1}...".format(submit_config.run_func_name, submit_config.host_name))
+ start_time = time.time()
+
+ run_func_obj = util.get_obj_by_name(submit_config.run_func_name)
+ assert callable(run_func_obj)
+ sig = inspect.signature(run_func_obj)
+ if 'submit_config' in sig.parameters:
+ run_func_obj(submit_config=submit_config, **submit_config.run_func_kwargs)
+ else:
+ run_func_obj(**submit_config.run_func_kwargs)
+
+ print("dnnlib: Finished {0}() in {1}.".format(submit_config.run_func_name, util.format_time(time.time() - start_time)))
+ except:
+ if is_local:
+ raise
+ else:
+ traceback.print_exc()
+
+ log_src = os.path.join(submit_config.run_dir, "log.txt")
+ log_dst = os.path.join(get_path_from_template(submit_config.run_dir_root), "{0}-error.txt".format(submit_config.run_name))
+ shutil.copyfile(log_src, log_dst)
+
+ # Defer sys.exit(1) to happen after we close the logs and create a _finished.txt
+ exit_with_errcode = True
+ finally:
+ open(os.path.join(submit_config.run_dir, "_finished.txt"), "w").close()
+
+ dnnlib.RunContext.get().close()
+ dnnlib.submit_config = None
+ logger.close()
+
+ # If we hit an error, get out of the script now and signal the error
+ # to whatever process that started this script.
+ if exit_with_errcode:
+ sys.exit(1)
+
+ return submit_config
+
+
+def submit_run(submit_config: SubmitConfig, run_func_name: str, **run_func_kwargs) -> None:
+ """Create a run dir, gather files related to the run, copy files to the run dir, and launch the run in appropriate place."""
+ submit_config = copy.deepcopy(submit_config)
+
+ submit_target = submit_config.submit_target
+ farm = None
+ if submit_target == SubmitTarget.LOCAL:
+ farm = internal.local.Target()
+ assert farm is not None # unknown target
+
+ # Disallow submitting jobs with zero num_gpus.
+ if (submit_config.num_gpus is None) or (submit_config.num_gpus == 0):
+ raise RuntimeError("submit_config.num_gpus must be set to a non-zero value")
+
+ if submit_config.user_name is None:
+ submit_config.user_name = get_user_name()
+
+ submit_config.run_func_name = run_func_name
+ submit_config.run_func_kwargs = run_func_kwargs
+
+ #--------------------------------------------------------------------
+ # Prepare submission by populating the run dir
+ #--------------------------------------------------------------------
+ host_run_dir = _create_run_dir_local(submit_config)
+
+ submit_config.task_name = "{0}-{1:05d}-{2}".format(submit_config.user_name, submit_config.run_id, submit_config.run_desc)
+ docker_valid_name_regex = "^[a-zA-Z0-9][a-zA-Z0-9_.-]+$"
+ if not re.match(docker_valid_name_regex, submit_config.task_name):
+ raise RuntimeError("Invalid task name. Probable reason: unacceptable characters in your submit_config.run_desc. Task name must be accepted by the following regex: " + docker_valid_name_regex + ", got " + submit_config.task_name)
+
+ # Farm specific preparations for a submit
+ farm.finalize_submit_config(submit_config, host_run_dir)
+ _populate_run_dir(submit_config, host_run_dir)
+ return farm.submit(submit_config, host_run_dir)
diff --git a/dnnlib/tflib/__init__.py b/dnnlib/tflib/__init__.py
new file mode 100755
index 0000000..02c2517
--- /dev/null
+++ b/dnnlib/tflib/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+from . import autosummary
+from . import network
+from . import optimizer
+from . import tfutil
+from . import custom_ops
+
+from .tfutil import *
+from .network import Network
+
+from .optimizer import Optimizer
+
+from .custom_ops import get_plugin
diff --git a/dnnlib/tflib/autosummary.py b/dnnlib/tflib/autosummary.py
new file mode 100755
index 0000000..6b0d80b
--- /dev/null
+++ b/dnnlib/tflib/autosummary.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Helper for adding automatically tracked values to Tensorboard.
+
+Autosummary creates an identity op that internally keeps track of the input
+values and automatically shows up in TensorBoard. The reported value
+represents an average over input components. The average is accumulated
+constantly over time and flushed when save_summaries() is called.
+
+Notes:
+- The output tensor must be used as an input for something else in the
+ graph. Otherwise, the autosummary op will not get executed, and the average
+ value will not get accumulated.
+- It is perfectly fine to include autosummaries with the same name in
+ several places throughout the graph, even if they are executed concurrently.
+- It is ok to also pass in a python scalar or numpy array. In this case, it
+ is added to the average immediately.
+"""
+
+from collections import OrderedDict
+import numpy as np
+import tensorflow as tf
+from tensorboard import summary as summary_lib
+from tensorboard.plugins.custom_scalar import layout_pb2
+
+from . import tfutil
+from .tfutil import TfExpression
+from .tfutil import TfExpressionEx
+
+# Enable "Custom scalars" tab in TensorBoard for advanced formatting.
+# Disabled by default to reduce tfevents file size.
+enable_custom_scalars = False
+
+_dtype = tf.float64
+_vars = OrderedDict() # name => [var, ...]
+_immediate = OrderedDict() # name => update_op, update_value
+_finalized = False
+_merge_op = None
+
+
+def _create_var(name: str, value_expr: TfExpression) -> TfExpression:
+ """Internal helper for creating autosummary accumulators."""
+ assert not _finalized
+ name_id = name.replace("/", "_")
+ v = tf.cast(value_expr, _dtype)
+
+ if v.shape.is_fully_defined():
+ size = np.prod(v.shape.as_list())
+ size_expr = tf.constant(size, dtype=_dtype)
+ else:
+ size = None
+ size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype))
+
+ if size == 1:
+ if v.shape.ndims != 0:
+ v = tf.reshape(v, [])
+ v = [size_expr, v, tf.square(v)]
+ else:
+ v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))]
+ v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype))
+
+ with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None):
+ var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)]
+ update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v))
+
+ if name in _vars:
+ _vars[name].append(var)
+ else:
+ _vars[name] = [var]
+ return update_op
+
+
+def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx:
+ """Create a new autosummary.
+
+ Args:
+ name: Name to use in TensorBoard
+ value: TensorFlow expression or python value to track
+ passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node.
+
+ Example use of the passthru mechanism:
+
+ n = autosummary('l2loss', loss, passthru=n)
+
+ This is a shorthand for the following code:
+
+ with tf.control_dependencies([autosummary('l2loss', loss)]):
+ n = tf.identity(n)
+ """
+ tfutil.assert_tf_initialized()
+ name_id = name.replace("/", "_")
+
+ if tfutil.is_tf_expression(value):
+ with tf.name_scope("summary_" + name_id), tf.device(value.device):
+ condition = tf.convert_to_tensor(condition, name='condition')
+ update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op)
+ with tf.control_dependencies([update_op]):
+ return tf.identity(value if passthru is None else passthru)
+
+ else: # python scalar or numpy array
+ assert not tfutil.is_tf_expression(passthru)
+ assert not tfutil.is_tf_expression(condition)
+ if condition:
+ if name not in _immediate:
+ with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None):
+ update_value = tf.placeholder(_dtype)
+ update_op = _create_var(name, update_value)
+ _immediate[name] = update_op, update_value
+ update_op, update_value = _immediate[name]
+ tfutil.run(update_op, {update_value: value})
+ return value if passthru is None else passthru
+
+
+def finalize_autosummaries() -> None:
+ """Create the necessary ops to include autosummaries in TensorBoard report.
+ Note: This should be done only once per graph.
+ """
+ global _finalized
+ tfutil.assert_tf_initialized()
+
+ if _finalized:
+ return None
+
+ _finalized = True
+ tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list])
+
+ # Create summary ops.
+ with tf.device(None), tf.control_dependencies(None):
+ for name, vars_list in _vars.items():
+ name_id = name.replace("/", "_")
+ with tfutil.absolute_name_scope("Autosummary/" + name_id):
+ moments = tf.add_n(vars_list)
+ moments /= moments[0]
+ with tf.control_dependencies([moments]): # read before resetting
+ reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list]
+ with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting
+ mean = moments[1]
+ std = tf.sqrt(moments[2] - tf.square(moments[1]))
+ tf.summary.scalar(name, mean)
+ if enable_custom_scalars:
+ tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std)
+ tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std)
+
+ # Setup layout for custom scalars.
+ layout = None
+ if enable_custom_scalars:
+ cat_dict = OrderedDict()
+ for series_name in sorted(_vars.keys()):
+ p = series_name.split("/")
+ cat = p[0] if len(p) >= 2 else ""
+ chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1]
+ if cat not in cat_dict:
+ cat_dict[cat] = OrderedDict()
+ if chart not in cat_dict[cat]:
+ cat_dict[cat][chart] = []
+ cat_dict[cat][chart].append(series_name)
+ categories = []
+ for cat_name, chart_dict in cat_dict.items():
+ charts = []
+ for chart_name, series_names in chart_dict.items():
+ series = []
+ for series_name in series_names:
+ series.append(layout_pb2.MarginChartContent.Series(
+ value=series_name,
+ lower="xCustomScalars/" + series_name + "/margin_lo",
+ upper="xCustomScalars/" + series_name + "/margin_hi"))
+ margin = layout_pb2.MarginChartContent(series=series)
+ charts.append(layout_pb2.Chart(title=chart_name, margin=margin))
+ categories.append(layout_pb2.Category(title=cat_name, chart=charts))
+ layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories))
+ return layout
+
+def save_summaries(file_writer, global_step=None):
+ """Call FileWriter.add_summary() with all summaries in the default graph,
+ automatically finalizing and merging them on the first call.
+ """
+ global _merge_op
+ tfutil.assert_tf_initialized()
+
+ if _merge_op is None:
+ layout = finalize_autosummaries()
+ if layout is not None:
+ file_writer.add_summary(layout)
+ with tf.device(None), tf.control_dependencies(None):
+ _merge_op = tf.summary.merge_all()
+
+ file_writer.add_summary(_merge_op.eval(), global_step)
diff --git a/dnnlib/tflib/custom_ops.py b/dnnlib/tflib/custom_ops.py
new file mode 100755
index 0000000..e6c3e52
--- /dev/null
+++ b/dnnlib/tflib/custom_ops.py
@@ -0,0 +1,169 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""TensorFlow custom ops builder.
+"""
+
+import os
+import re
+import uuid
+import hashlib
+import tempfile
+import shutil
+import tensorflow as tf
+from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
+
+#----------------------------------------------------------------------------
+# Global options.
+
+cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache')
+cuda_cache_version_tag = 'v1'
+do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
+verbose = True # Print status messages to stdout.
+
+compiler_bindir_search_path = [
+ 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.14.26428/bin/Hostx64/x64',
+ 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64',
+ 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin',
+]
+
+#----------------------------------------------------------------------------
+# Internal helper funcs.
+
+def _find_compiler_bindir():
+ for compiler_path in compiler_bindir_search_path:
+ if os.path.isdir(compiler_path):
+ return compiler_path
+ return None
+
+def _get_compute_cap(device):
+ caps_str = device.physical_device_desc
+ m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
+ major = m.group(1)
+ minor = m.group(2)
+ return (major, minor)
+
+def _get_cuda_gpu_arch_string():
+ gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
+ if len(gpus) == 0:
+ raise RuntimeError('No GPU devices found')
+ (major, minor) = _get_compute_cap(gpus[0])
+ return 'sm_%s%s' % (major, minor)
+
+def _run_cmd(cmd):
+ with os.popen(cmd) as pipe:
+ output = pipe.read()
+ status = pipe.close()
+ if status is not None:
+ raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
+
+def _prepare_nvcc_cli(opts):
+ cmd = 'nvcc ' + opts.strip()
+ cmd += ' --disable-warnings'
+ cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
+ cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
+ cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
+ cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
+
+ compiler_bindir = _find_compiler_bindir()
+ if compiler_bindir is None:
+ # Require that _find_compiler_bindir succeeds on Windows. Allow
+ # nvcc to use whatever is the default on Linux.
+ if os.name == 'nt':
+ raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
+ else:
+ cmd += ' --compiler-bindir "%s"' % compiler_bindir
+ cmd += ' 2>&1'
+ return cmd
+
+#----------------------------------------------------------------------------
+# Main entry point.
+
+_plugin_cache = dict()
+
+def get_plugin(cuda_file):
+ cuda_file_base = os.path.basename(cuda_file)
+ cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
+
+ # Already in cache?
+ if cuda_file in _plugin_cache:
+ return _plugin_cache[cuda_file]
+
+ # Setup plugin.
+ if verbose:
+ print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
+ try:
+ # Hash CUDA source.
+ md5 = hashlib.md5()
+ with open(cuda_file, 'rb') as f:
+ md5.update(f.read())
+ md5.update(b'\n')
+
+ # Hash headers included by the CUDA code by running it through the preprocessor.
+ if not do_not_hash_included_headers:
+ if verbose:
+ print('Preprocessing... ', end='', flush=True)
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
+ _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
+ with open(tmp_file, 'rb') as f:
+ bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
+ good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
+ for ln in f:
+ if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
+ ln = ln.replace(bad_file_str, good_file_str)
+ md5.update(ln)
+ md5.update(b'\n')
+
+ # Select compiler options.
+ compile_opts = ''
+ if os.name == 'nt':
+ compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
+ elif os.name == 'posix':
+ compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
+ compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
+ else:
+ assert False # not Windows or Linux, w00t?
+ compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
+ compile_opts += ' --use_fast_math'
+ nvcc_cmd = _prepare_nvcc_cli(compile_opts)
+
+ # Hash build configuration.
+ md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
+ md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
+ md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
+
+ # Compile if not already compiled.
+ bin_file_ext = '.dll' if os.name == 'nt' else '.so'
+ bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
+ if not os.path.isfile(bin_file):
+ if verbose:
+ print('Compiling... ', end='', flush=True)
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
+ _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
+ os.makedirs(cuda_cache_path, exist_ok=True)
+ intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
+ shutil.copyfile(tmp_file, intermediate_file)
+ os.rename(intermediate_file, bin_file) # atomic
+
+ # Load.
+ if verbose:
+ print('Loading... ', end='', flush=True)
+ plugin = tf.load_op_library(bin_file)
+
+ # Add to cache.
+ _plugin_cache[cuda_file] = plugin
+ if verbose:
+ print('Done.', flush=True)
+ return plugin
+
+ except:
+ if verbose:
+ print('Failed!', flush=True)
+ raise
+
+#----------------------------------------------------------------------------
diff --git a/dnnlib/tflib/network.py b/dnnlib/tflib/network.py
new file mode 100755
index 0000000..409babb
--- /dev/null
+++ b/dnnlib/tflib/network.py
@@ -0,0 +1,590 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Helper for managing networks."""
+
+import types
+import inspect
+import re
+import uuid
+import sys
+import numpy as np
+import tensorflow as tf
+
+from collections import OrderedDict
+from typing import Any, List, Tuple, Union
+
+from . import tfutil
+from .. import util
+
+from .tfutil import TfExpression, TfExpressionEx
+
+_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import.
+_import_module_src = dict() # Source code for temporary modules created during pickle import.
+
+
+def import_handler(handler_func):
+ """Function decorator for declaring custom import handlers."""
+ _import_handlers.append(handler_func)
+ return handler_func
+
+
+class Network:
+ """Generic network abstraction.
+
+ Acts as a convenience wrapper for a parameterized network construction
+ function, providing several utility methods and convenient access to
+ the inputs/outputs/weights.
+
+ Network objects can be safely pickled and unpickled for long-term
+ archival purposes. The pickling works reliably as long as the underlying
+ network construction function is defined in a standalone Python module
+ that has no side effects or application-specific imports.
+
+ Args:
+ name: Network name. Used to select TensorFlow name and variable scopes.
+ func_name: Fully qualified name of the underlying network construction function, or a top-level function object.
+ static_kwargs: Keyword arguments to be passed in to the network construction function.
+
+ Attributes:
+ name: User-specified name, defaults to build func name if None.
+ scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name.
+ static_kwargs: Arguments passed to the user-supplied build func.
+ components: Container for sub-networks. Passed to the build func, and retained between calls.
+ num_inputs: Number of input tensors.
+ num_outputs: Number of output tensors.
+ input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension.
+ output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension.
+ input_shape: Short-hand for input_shapes[0].
+ output_shape: Short-hand for output_shapes[0].
+ input_templates: Input placeholders in the template graph.
+ output_templates: Output tensors in the template graph.
+ input_names: Name string for each input.
+ output_names: Name string for each output.
+ own_vars: Variables defined by this network (local_name => var), excluding sub-networks.
+ vars: All variables (local_name => var).
+ trainables: All trainable variables (local_name => var).
+ var_global_to_local: Mapping from variable global names to local names.
+ """
+
+ def __init__(self, name: str = None, func_name: Any = None, **static_kwargs):
+ tfutil.assert_tf_initialized()
+ assert isinstance(name, str) or name is None
+ assert func_name is not None
+ assert isinstance(func_name, str) or util.is_top_level_function(func_name)
+ assert util.is_pickleable(static_kwargs)
+
+ self._init_fields()
+ self.name = name
+ self.static_kwargs = util.EasyDict(static_kwargs)
+
+ # Locate the user-specified network build function.
+ if util.is_top_level_function(func_name):
+ func_name = util.get_top_level_function_name(func_name)
+ module, self._build_func_name = util.get_module_from_obj_name(func_name)
+ self._build_func = util.get_obj_from_module(module, self._build_func_name)
+ assert callable(self._build_func)
+
+ # Dig up source code for the module containing the build function.
+ self._build_module_src = _import_module_src.get(module, None)
+ if self._build_module_src is None:
+ self._build_module_src = inspect.getsource(module)
+
+ # Init TensorFlow graph.
+ self._init_graph()
+ self.reset_own_vars()
+
+ def _init_fields(self) -> None:
+ self.name = None
+ self.scope = None
+ self.static_kwargs = util.EasyDict()
+ self.components = util.EasyDict()
+ self.num_inputs = 0
+ self.num_outputs = 0
+ self.input_shapes = [[]]
+ self.output_shapes = [[]]
+ self.input_shape = []
+ self.output_shape = []
+ self.input_templates = []
+ self.output_templates = []
+ self.input_names = []
+ self.output_names = []
+ self.own_vars = OrderedDict()
+ self.vars = OrderedDict()
+ self.trainables = OrderedDict()
+ self.var_global_to_local = OrderedDict()
+
+ self._build_func = None # User-supplied build function that constructs the network.
+ self._build_func_name = None # Name of the build function.
+ self._build_module_src = None # Full source code of the module containing the build function.
+ self._run_cache = dict() # Cached graph data for Network.run().
+
+ def _init_graph(self) -> None:
+ # Collect inputs.
+ self.input_names = []
+
+ for param in inspect.signature(self._build_func).parameters.values():
+ if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty:
+ self.input_names.append(param.name)
+
+ self.num_inputs = len(self.input_names)
+ assert self.num_inputs >= 1
+
+ # Choose name and scope.
+ if self.name is None:
+ self.name = self._build_func_name
+ assert re.match("^[A-Za-z0-9_.\\-]*$", self.name)
+ with tf.name_scope(None):
+ self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True)
+
+ # Finalize build func kwargs.
+ build_kwargs = dict(self.static_kwargs)
+ build_kwargs["is_template_graph"] = True
+ build_kwargs["components"] = self.components
+
+ # Build template graph.
+ with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes
+ assert tf.get_variable_scope().name == self.scope
+ assert tf.get_default_graph().get_name_scope() == self.scope
+ with tf.control_dependencies(None): # ignore surrounding control dependencies
+ self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
+ out_expr = self._build_func(*self.input_templates, **build_kwargs)
+
+ # Collect outputs.
+ assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
+ self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
+ self.num_outputs = len(self.output_templates)
+ assert self.num_outputs >= 1
+ assert all(tfutil.is_tf_expression(t) for t in self.output_templates)
+
+ # Perform sanity checks.
+ if any(t.shape.ndims is None for t in self.input_templates):
+ raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.")
+ if any(t.shape.ndims is None for t in self.output_templates):
+ raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.")
+ if any(not isinstance(comp, Network) for comp in self.components.values()):
+ raise ValueError("Components of a Network must be Networks themselves.")
+ if len(self.components) != len(set(comp.name for comp in self.components.values())):
+ raise ValueError("Components of a Network must have unique names.")
+
+ # List inputs and outputs.
+ self.input_shapes = [t.shape.as_list() for t in self.input_templates]
+ self.output_shapes = [t.shape.as_list() for t in self.output_templates]
+ self.input_shape = self.input_shapes[0]
+ self.output_shape = self.output_shapes[0]
+ self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates]
+
+ # List variables.
+ self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/"))
+ self.vars = OrderedDict(self.own_vars)
+ self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items())
+ self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable)
+ self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items())
+
+ def reset_own_vars(self) -> None:
+ """Re-initialize all variables of this network, excluding sub-networks."""
+ tfutil.run([var.initializer for var in self.own_vars.values()])
+
+ def reset_vars(self) -> None:
+ """Re-initialize all variables of this network, including sub-networks."""
+ tfutil.run([var.initializer for var in self.vars.values()])
+
+ def reset_trainables(self) -> None:
+ """Re-initialize all trainable variables of this network, including sub-networks."""
+ tfutil.run([var.initializer for var in self.trainables.values()])
+
+ def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]:
+ """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s)."""
+ assert len(in_expr) == self.num_inputs
+ assert not all(expr is None for expr in in_expr)
+
+ # Finalize build func kwargs.
+ build_kwargs = dict(self.static_kwargs)
+ build_kwargs.update(dynamic_kwargs)
+ build_kwargs["is_template_graph"] = False
+ build_kwargs["components"] = self.components
+
+ # Build TensorFlow graph to evaluate the network.
+ with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name):
+ assert tf.get_variable_scope().name == self.scope
+ valid_inputs = [expr for expr in in_expr if expr is not None]
+ final_inputs = []
+ for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes):
+ if expr is not None:
+ expr = tf.identity(expr, name=name)
+ else:
+ expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name)
+ final_inputs.append(expr)
+ out_expr = self._build_func(*final_inputs, **build_kwargs)
+
+ # Propagate input shapes back to the user-specified expressions.
+ for expr, final in zip(in_expr, final_inputs):
+ if isinstance(expr, tf.Tensor):
+ expr.set_shape(final.shape)
+
+ # Express outputs in the desired format.
+ assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
+ if return_as_list:
+ out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
+ return out_expr
+
+ def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str:
+ """Get the local name of a given variable, without any surrounding name scopes."""
+ assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str)
+ global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name
+ return self.var_global_to_local[global_name]
+
+ def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression:
+ """Find variable by local or global name."""
+ assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str)
+ return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name
+
+ def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray:
+ """Get the value of a given variable as NumPy array.
+ Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible."""
+ return self.find_var(var_or_local_name).eval()
+
+ def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None:
+ """Set the value of a given variable based on the given NumPy array.
+ Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible."""
+ tfutil.set_vars({self.find_var(var_or_local_name): new_value})
+
+ def __getstate__(self) -> dict:
+ """Pickle export."""
+ state = dict()
+ state["version"] = 4
+ state["name"] = self.name
+ state["static_kwargs"] = dict(self.static_kwargs)
+ state["components"] = dict(self.components)
+ state["build_module_src"] = self._build_module_src
+ state["build_func_name"] = self._build_func_name
+ state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values()))))
+ return state
+
+ def __setstate__(self, state: dict) -> None:
+ """Pickle import."""
+ # pylint: disable=attribute-defined-outside-init
+ tfutil.assert_tf_initialized()
+ self._init_fields()
+
+ # Execute custom import handlers.
+ for handler in _import_handlers:
+ state = handler(state)
+
+ # Set basic fields.
+ assert state["version"] in [2, 3, 4]
+ self.name = state["name"]
+ self.static_kwargs = util.EasyDict(state["static_kwargs"])
+ self.components = util.EasyDict(state.get("components", {}))
+ self._build_module_src = state["build_module_src"]
+ self._build_func_name = state["build_func_name"]
+
+ # Create temporary module from the imported source code.
+ module_name = "_tflib_network_import_" + uuid.uuid4().hex
+ module = types.ModuleType(module_name)
+ sys.modules[module_name] = module
+ _import_module_src[module] = self._build_module_src
+ exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used
+
+ # Locate network build function in the temporary module.
+ self._build_func = util.get_obj_from_module(module, self._build_func_name)
+ assert callable(self._build_func)
+
+ # Init TensorFlow graph.
+ self._init_graph()
+ self.reset_own_vars()
+ tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]})
+
+ def clone(self, name: str = None, **new_static_kwargs) -> "Network":
+ """Create a clone of this network with its own copy of the variables."""
+ # pylint: disable=protected-access
+ net = object.__new__(Network)
+ net._init_fields()
+ net.name = name if name is not None else self.name
+ net.static_kwargs = util.EasyDict(self.static_kwargs)
+ net.static_kwargs.update(new_static_kwargs)
+ net._build_module_src = self._build_module_src
+ net._build_func_name = self._build_func_name
+ net._build_func = self._build_func
+ net._init_graph()
+ net.copy_vars_from(self)
+ return net
+
+ def copy_own_vars_from(self, src_net: "Network") -> None:
+ """Copy the values of all variables from the given network, excluding sub-networks."""
+ names = [name for name in self.own_vars.keys() if name in src_net.own_vars]
+ tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
+
+ def copy_vars_from(self, src_net: "Network") -> None:
+ """Copy the values of all variables from the given network, including sub-networks."""
+ names = [name for name in self.vars.keys() if name in src_net.vars]
+ tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
+
+ def copy_trainables_from(self, src_net: "Network") -> None:
+ """Copy the values of all trainable variables from the given network, including sub-networks."""
+ names = [name for name in self.trainables.keys() if name in src_net.trainables]
+ tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
+
+ def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network":
+ """Create new network with the given parameters, and copy all variables from this network."""
+ if new_name is None:
+ new_name = self.name
+ static_kwargs = dict(self.static_kwargs)
+ static_kwargs.update(new_static_kwargs)
+ net = Network(name=new_name, func_name=new_func_name, **static_kwargs)
+ net.copy_vars_from(self)
+ return net
+
+ def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation:
+ """Construct a TensorFlow op that updates the variables of this network
+ to be slightly closer to those of the given network."""
+ with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"):
+ ops = []
+ for name, var in self.vars.items():
+ if name in src_net.vars:
+ cur_beta = beta if name in self.trainables else beta_nontrainable
+ new_value = tfutil.lerp(src_net.vars[name], var, cur_beta)
+ ops.append(var.assign(new_value))
+ return tf.group(*ops)
+
+ def run(self,
+ *in_arrays: Tuple[Union[np.ndarray, None], ...],
+ input_transform: dict = None,
+ output_transform: dict = None,
+ return_as_list: bool = False,
+ print_progress: bool = False,
+ minibatch_size: int = None,
+ num_gpus: int = 1,
+ assume_frozen: bool = False,
+ **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]:
+ """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s).
+
+ Args:
+ input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network.
+ The dict must contain a 'func' field that points to a top-level function. The function is called with the input
+ TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
+ output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network.
+ The dict must contain a 'func' field that points to a top-level function. The function is called with the output
+ TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
+ return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
+ print_progress: Print progress to the console? Useful for very large input arrays.
+ minibatch_size: Maximum minibatch size to use, None = disable batching.
+ num_gpus: Number of GPUs to use.
+ assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls.
+ dynamic_kwargs: Additional keyword arguments to be passed into the network build function.
+ """
+ assert len(in_arrays) == self.num_inputs
+ assert not all(arr is None for arr in in_arrays)
+ assert input_transform is None or util.is_top_level_function(input_transform["func"])
+ assert output_transform is None or util.is_top_level_function(output_transform["func"])
+ output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs)
+ num_items = in_arrays[0].shape[0]
+ if minibatch_size is None:
+ minibatch_size = num_items
+
+ # Construct unique hash key from all arguments that affect the TensorFlow graph.
+ key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs)
+ def unwind_key(obj):
+ if isinstance(obj, dict):
+ return [(key, unwind_key(value)) for key, value in sorted(obj.items())]
+ if callable(obj):
+ return util.get_top_level_function_name(obj)
+ return obj
+ key = repr(unwind_key(key))
+
+ # Build graph.
+ if key not in self._run_cache:
+ with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None):
+ with tf.device("/cpu:0"):
+ in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
+ in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr]))
+
+ out_split = []
+ for gpu in range(num_gpus):
+ with tf.device("/gpu:%d" % gpu):
+ net_gpu = self.clone() if assume_frozen else self
+ in_gpu = in_split[gpu]
+
+ if input_transform is not None:
+ in_kwargs = dict(input_transform)
+ in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs)
+ in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu)
+
+ assert len(in_gpu) == self.num_inputs
+ out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs)
+
+ if output_transform is not None:
+ out_kwargs = dict(output_transform)
+ out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs)
+ out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu)
+
+ assert len(out_gpu) == self.num_outputs
+ out_split.append(out_gpu)
+
+ with tf.device("/cpu:0"):
+ out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)]
+ self._run_cache[key] = in_expr, out_expr
+
+ # Run minibatches.
+ in_expr, out_expr = self._run_cache[key]
+ out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr]
+
+ for mb_begin in range(0, num_items, minibatch_size):
+ if print_progress:
+ print("\r%d / %d" % (mb_begin, num_items), end="")
+
+ mb_end = min(mb_begin + minibatch_size, num_items)
+ mb_num = mb_end - mb_begin
+ mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)]
+ mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in)))
+
+ for dst, src in zip(out_arrays, mb_out):
+ dst[mb_begin: mb_end] = src
+
+ # Done.
+ if print_progress:
+ print("\r%d / %d" % (num_items, num_items))
+
+ if not return_as_list:
+ out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays)
+ return out_arrays
+
+ def list_ops(self) -> List[TfExpression]:
+ include_prefix = self.scope + "/"
+ exclude_prefix = include_prefix + "_"
+ ops = tf.get_default_graph().get_operations()
+ ops = [op for op in ops if op.name.startswith(include_prefix)]
+ ops = [op for op in ops if not op.name.startswith(exclude_prefix)]
+ return ops
+
+ def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]:
+ """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to
+ individual layers of the network. Mainly intended to be used for reporting."""
+ layers = []
+
+ def recurse(scope, parent_ops, parent_vars, level):
+ # Ignore specific patterns.
+ if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]):
+ return
+
+ # Filter ops and vars by scope.
+ global_prefix = scope + "/"
+ local_prefix = global_prefix[len(self.scope) + 1:]
+ cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]]
+ cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]]
+ if not cur_ops and not cur_vars:
+ return
+
+ # Filter out all ops related to variables.
+ for var in [op for op in cur_ops if op.type.startswith("Variable")]:
+ var_prefix = var.name + "/"
+ cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)]
+
+ # Scope does not contain ops as immediate children => recurse deeper.
+ contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops)
+ if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1:
+ visited = set()
+ for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]:
+ token = rel_name.split("/")[0]
+ if token not in visited:
+ recurse(global_prefix + token, cur_ops, cur_vars, level + 1)
+ visited.add(token)
+ return
+
+ # Report layer.
+ layer_name = scope[len(self.scope) + 1:]
+ layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1]
+ layer_trainables = [var for _name, var in cur_vars if var.trainable]
+ layers.append((layer_name, layer_output, layer_trainables))
+
+ recurse(self.scope, self.list_ops(), list(self.vars.items()), 0)
+ return layers
+
+ def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None:
+ """Print a summary table of the network structure."""
+ rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]]
+ rows += [["---"] * 4]
+ total_params = 0
+
+ for layer_name, layer_output, layer_trainables in self.list_layers():
+ num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables)
+ weights = [var for var in layer_trainables if var.name.endswith("/weight:0")]
+ weights.sort(key=lambda x: len(x.name))
+ if len(weights) == 0 and len(layer_trainables) == 1:
+ weights = layer_trainables
+ total_params += num_params
+
+ if not hide_layers_with_no_params or num_params != 0:
+ num_params_str = str(num_params) if num_params > 0 else "-"
+ output_shape_str = str(layer_output.shape)
+ weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-"
+ rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]]
+
+ rows += [["---"] * 4]
+ rows += [["Total", str(total_params), "", ""]]
+
+ widths = [max(len(cell) for cell in column) for column in zip(*rows)]
+ print()
+ for row in rows:
+ print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths)))
+ print()
+
+ def setup_weight_histograms(self, title: str = None) -> None:
+ """Construct summary ops to include histograms of all trainable parameters in TensorBoard."""
+ if title is None:
+ title = self.name
+
+ with tf.name_scope(None), tf.device(None), tf.control_dependencies(None):
+ for local_name, var in self.trainables.items():
+ if "/" in local_name:
+ p = local_name.split("/")
+ name = title + "_" + p[-1] + "/" + "_".join(p[:-1])
+ else:
+ name = title + "_toplevel/" + local_name
+
+ tf.summary.histogram(name, var)
+
+#----------------------------------------------------------------------------
+# Backwards-compatible emulation of legacy output transformation in Network.run().
+
+_print_legacy_warning = True
+
+def _handle_legacy_output_transforms(output_transform, dynamic_kwargs):
+ global _print_legacy_warning
+ legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"]
+ if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs):
+ return output_transform, dynamic_kwargs
+
+ if _print_legacy_warning:
+ _print_legacy_warning = False
+ print()
+ print("WARNING: Old-style output transformations in Network.run() are deprecated.")
+ print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'")
+ print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.")
+ print()
+ assert output_transform is None
+
+ new_kwargs = dict(dynamic_kwargs)
+ new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs}
+ new_transform["func"] = _legacy_output_transform_func
+ return new_transform, new_kwargs
+
+def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None):
+ if out_mul != 1.0:
+ expr = [x * out_mul for x in expr]
+
+ if out_add != 0.0:
+ expr = [x + out_add for x in expr]
+
+ if out_shrink > 1:
+ ksize = [1, 1, out_shrink, out_shrink]
+ expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr]
+
+ if out_dtype is not None:
+ if tf.as_dtype(out_dtype).is_integer:
+ expr = [tf.round(x) for x in expr]
+ expr = [tf.saturate_cast(x, out_dtype) for x in expr]
+ return expr
diff --git a/dnnlib/tflib/ops/__init__.py b/dnnlib/tflib/ops/__init__.py
new file mode 100755
index 0000000..9ab9908
--- /dev/null
+++ b/dnnlib/tflib/ops/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+# empty
diff --git a/dnnlib/tflib/ops/fused_bias_act.cu b/dnnlib/tflib/ops/fused_bias_act.cu
new file mode 100755
index 0000000..1102f62
--- /dev/null
+++ b/dnnlib/tflib/ops/fused_bias_act.cu
@@ -0,0 +1,188 @@
+// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+//
+// This work is made available under the Nvidia Source Code License-NC.
+// To view a copy of this license, visit
+// https://nvlabs.github.io/stylegan2/license.html
+
+#define EIGEN_USE_GPU
+#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include
+
+using namespace tensorflow;
+using namespace tensorflow::shape_inference;
+
+#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
+
+//------------------------------------------------------------------------
+// CUDA kernel.
+
+template
+struct FusedBiasActKernelParams
+{
+ const T* x; // [sizeX]
+ const T* b; // [sizeB] or NULL
+ const T* ref; // [sizeX] or NULL
+ T* y; // [sizeX]
+
+ int grad;
+ int axis;
+ int act;
+ float alpha;
+ float gain;
+
+ int sizeX;
+ int sizeB;
+ int stepB;
+ int loopX;
+};
+
+template
+static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams p)
+{
+ const float expRange = 80.0f;
+ const float halfExpRange = 40.0f;
+ const float seluScale = 1.0507009873554804934193349852946f;
+ const float seluAlpha = 1.6732632423543772848170429916717f;
+
+ // Loop over elements.
+ int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x;
+ for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x)
+ {
+ // Load and apply bias.
+ float x = (float)p.x[xi];
+ if (p.b)
+ x += (float)p.b[(xi / p.stepB) % p.sizeB];
+ float ref = (p.ref) ? (float)p.ref[xi] : 0.0f;
+ if (p.gain != 0.0f & p.act != 9)
+ ref /= p.gain;
+
+ // Evaluate activation func.
+ float y;
+ switch (p.act * 10 + p.grad)
+ {
+ // linear
+ default:
+ case 10: y = x; break;
+ case 11: y = x; break;
+ case 12: y = 0.0f; break;
+
+ // relu
+ case 20: y = (x > 0.0f) ? x : 0.0f; break;
+ case 21: y = (ref > 0.0f) ? x : 0.0f; break;
+ case 22: y = 0.0f; break;
+
+ // lrelu
+ case 30: y = (x > 0.0f) ? x : x * p.alpha; break;
+ case 31: y = (ref > 0.0f) ? x : x * p.alpha; break;
+ case 32: y = 0.0f; break;
+
+ // tanh
+ case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break;
+ case 41: y = x * (1.0f - ref * ref); break;
+ case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break;
+
+ // sigmoid
+ case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break;
+ case 51: y = x * ref * (1.0f - ref); break;
+ case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break;
+
+ // elu
+ case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break;
+ case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break;
+ case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break;
+
+ // selu
+ case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break;
+ case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break;
+ case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break;
+
+ // softplus
+ case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break;
+ case 81: y = x * (1.0f - expf(-ref)); break;
+ case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break;
+
+ // swish
+ case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break;
+ case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break;
+ case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break;
+ }
+
+ // Apply gain and store.
+ p.y[xi] = (T)(y * p.gain);
+ }
+}
+
+//------------------------------------------------------------------------
+// TensorFlow op.
+
+template
+struct FusedBiasActOp : public OpKernel
+{
+ FusedBiasActKernelParams m_attribs;
+
+ FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx)
+ {
+ memset(&m_attribs, 0, sizeof(m_attribs));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain));
+ OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative"));
+ OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative"));
+ OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative"));
+ }
+
+ void Compute(OpKernelContext* ctx)
+ {
+ FusedBiasActKernelParams p = m_attribs;
+ cudaStream_t stream = ctx->eigen_device().stream();
+
+ const Tensor& x = ctx->input(0); // [...]
+ const Tensor& b = ctx->input(1); // [sizeB] or [0]
+ const Tensor& ref = ctx->input(2); // x.shape or [0]
+ p.x = x.flat().data();
+ p.b = (b.NumElements()) ? b.flat().data() : NULL;
+ p.ref = (ref.NumElements()) ? ref.flat().data() : NULL;
+ OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds"));
+ OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1"));
+ OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements"));
+ OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements"));
+ OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large"));
+
+ p.sizeX = (int)x.NumElements();
+ p.sizeB = (int)b.NumElements();
+ p.stepB = 1;
+ for (int i = m_attribs.axis + 1; i < x.dims(); i++)
+ p.stepB *= (int)x.dim_size(i);
+
+ Tensor* y = NULL; // x.shape
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y));
+ p.y = y->flat().data();
+
+ p.loopX = 4;
+ int blockSize = 4 * 32;
+ int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1;
+ void* args[] = {&p};
+ OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel, gridSize, blockSize, args, 0, stream));
+ }
+};
+
+REGISTER_OP("FusedBiasAct")
+ .Input ("x: T")
+ .Input ("b: T")
+ .Input ("ref: T")
+ .Output ("y: T")
+ .Attr ("T: {float, half}")
+ .Attr ("grad: int = 0")
+ .Attr ("axis: int = 1")
+ .Attr ("act: int = 0")
+ .Attr ("alpha: float = 0.0")
+ .Attr ("gain: float = 1.0");
+REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp);
+REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp);
+
+//------------------------------------------------------------------------
diff --git a/dnnlib/tflib/ops/fused_bias_act.py b/dnnlib/tflib/ops/fused_bias_act.py
new file mode 100755
index 0000000..52f6bfd
--- /dev/null
+++ b/dnnlib/tflib/ops/fused_bias_act.py
@@ -0,0 +1,196 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Custom TensorFlow ops for efficient bias and activation."""
+
+import os
+import numpy as np
+import tensorflow as tf
+from .. import custom_ops
+from ...util import EasyDict
+
+def _get_plugin():
+ return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
+
+#----------------------------------------------------------------------------
+
+activation_funcs = {
+ 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True),
+ 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True),
+ 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True),
+ 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False),
+ 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False),
+ 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False),
+ 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False),
+ 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False),
+ 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False),
+}
+
+#----------------------------------------------------------------------------
+
+def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'):
+ r"""Fused bias and activation function.
+
+ Adds bias `b` to activation tensor `x`, evaluates activation function `act`,
+ and scales the result by `gain`. Each of the steps is optional. In most cases,
+ the fused op is considerably more efficient than performing the same calculation
+ using standard TensorFlow ops. It supports first and second order gradients,
+ but not third order gradients.
+
+ Args:
+ x: Input activation tensor. Can have any shape, but if `b` is defined, the
+ dimension corresponding to `axis`, as well as the rank, must be known.
+ b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type
+ as `x`. The shape must be known, and it must match the dimension of `x`
+ corresponding to `axis`.
+ axis: The dimension in `x` corresponding to the elements of `b`.
+ The value of `axis` is ignored if `b` is not specified.
+ act: Name of the activation function to evaluate, or `"linear"` to disable.
+ Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc.
+ See `activation_funcs` for a full list. `None` is not allowed.
+ alpha: Shape parameter for the activation function, or `None` to use the default.
+ gain: Scaling factor for the output tensor, or `None` to use default.
+ See `activation_funcs` for the default scaling of each activation function.
+ If unsure, consider specifying `1.0`.
+ impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
+
+ Returns:
+ Tensor of the same shape and datatype as `x`.
+ """
+
+ impl_dict = {
+ 'ref': _fused_bias_act_ref,
+ 'cuda': _fused_bias_act_cuda,
+ }
+ return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
+
+#----------------------------------------------------------------------------
+
+def _fused_bias_act_ref(x, b, axis, act, alpha, gain):
+ """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops."""
+
+ # Validate arguments.
+ x = tf.convert_to_tensor(x)
+ b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype)
+ act_spec = activation_funcs[act]
+ assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
+ assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
+ if alpha is None:
+ alpha = act_spec.def_alpha
+ if gain is None:
+ gain = act_spec.def_gain
+
+ # Add bias.
+ if b.shape[0] != 0:
+ x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)])
+
+ # Evaluate activation function.
+ x = act_spec.func(x, alpha=alpha)
+
+ # Scale by gain.
+ if gain != 1:
+ x *= gain
+ return x
+
+#----------------------------------------------------------------------------
+
+def _fused_bias_act_cuda(x, b, axis, act, alpha, gain):
+ """Fast CUDA implementation of `fused_bias_act()` using custom ops."""
+
+ # Validate arguments.
+ x = tf.convert_to_tensor(x)
+ empty_tensor = tf.constant([], dtype=x.dtype)
+ b = tf.convert_to_tensor(b) if b is not None else empty_tensor
+ act_spec = activation_funcs[act]
+ assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
+ assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
+ if alpha is None:
+ alpha = act_spec.def_alpha
+ if gain is None:
+ gain = act_spec.def_gain
+
+ # Special cases.
+ if act == 'linear' and b is None and gain == 1.0:
+ return x
+ if act_spec.cuda_idx is None:
+ return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
+
+ # CUDA kernel.
+ cuda_kernel = _get_plugin().fused_bias_act
+ cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain)
+
+ # Forward pass: y = func(x, b).
+ def func_y(x, b):
+ y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs)
+ y.set_shape(x.shape)
+ return y
+
+ # Backward pass: dx, db = grad(dy, x, y)
+ def grad_dx(dy, x, y):
+ ref = {'x': x, 'y': y}[act_spec.ref]
+ dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs)
+ dx.set_shape(x.shape)
+ return dx
+ def grad_db(dx):
+ if b.shape[0] == 0:
+ return empty_tensor
+ db = dx
+ if axis < x.shape.rank - 1:
+ db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank)))
+ if axis > 0:
+ db = tf.reduce_sum(db, list(range(axis)))
+ db.set_shape(b.shape)
+ return db
+
+ # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y)
+ def grad2_d_dy(d_dx, d_db, x, y):
+ ref = {'x': x, 'y': y}[act_spec.ref]
+ d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs)
+ d_dy.set_shape(x.shape)
+ return d_dy
+ def grad2_d_x(d_dx, d_db, x, y):
+ ref = {'x': x, 'y': y}[act_spec.ref]
+ d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs)
+ d_x.set_shape(x.shape)
+ return d_x
+
+ # Fast version for piecewise-linear activation funcs.
+ @tf.custom_gradient
+ def func_zero_2nd_grad(x, b):
+ y = func_y(x, b)
+ @tf.custom_gradient
+ def grad(dy):
+ dx = grad_dx(dy, x, y)
+ db = grad_db(dx)
+ def grad2(d_dx, d_db):
+ d_dy = grad2_d_dy(d_dx, d_db, x, y)
+ return d_dy
+ return (dx, db), grad2
+ return y, grad
+
+ # Slow version for general activation funcs.
+ @tf.custom_gradient
+ def func_nonzero_2nd_grad(x, b):
+ y = func_y(x, b)
+ def grad_wrap(dy):
+ @tf.custom_gradient
+ def grad_impl(dy, x):
+ dx = grad_dx(dy, x, y)
+ db = grad_db(dx)
+ def grad2(d_dx, d_db):
+ d_dy = grad2_d_dy(d_dx, d_db, x, y)
+ d_x = grad2_d_x(d_dx, d_db, x, y)
+ return d_dy, d_x
+ return (dx, db), grad2
+ return grad_impl(dy, x)
+ return y, grad_wrap
+
+ # Which version to use?
+ if act_spec.zero_2nd_grad:
+ return func_zero_2nd_grad(x, b)
+ return func_nonzero_2nd_grad(x, b)
+
+#----------------------------------------------------------------------------
diff --git a/dnnlib/tflib/ops/upfirdn_2d.cu b/dnnlib/tflib/ops/upfirdn_2d.cu
new file mode 100755
index 0000000..b97ef36
--- /dev/null
+++ b/dnnlib/tflib/ops/upfirdn_2d.cu
@@ -0,0 +1,326 @@
+// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+//
+// This work is made available under the Nvidia Source Code License-NC.
+// To view a copy of this license, visit
+// https://nvlabs.github.io/stylegan2/license.html
+
+#define EIGEN_USE_GPU
+#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include
+
+using namespace tensorflow;
+using namespace tensorflow::shape_inference;
+
+//------------------------------------------------------------------------
+// Helpers.
+
+#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
+
+static __host__ __device__ __forceinline__ int floorDiv(int a, int b)
+{
+ int c = a / b;
+ if (c * b > a)
+ c--;
+ return c;
+}
+
+//------------------------------------------------------------------------
+// CUDA kernel params.
+
+template
+struct UpFirDn2DKernelParams
+{
+ const T* x; // [majorDim, inH, inW, minorDim]
+ const T* k; // [kernelH, kernelW]
+ T* y; // [majorDim, outH, outW, minorDim]
+
+ int upx;
+ int upy;
+ int downx;
+ int downy;
+ int padx0;
+ int padx1;
+ int pady0;
+ int pady1;
+
+ int majorDim;
+ int inH;
+ int inW;
+ int minorDim;
+ int kernelH;
+ int kernelW;
+ int outH;
+ int outW;
+ int loopMajor;
+ int loopX;
+};
+
+//------------------------------------------------------------------------
+// General CUDA implementation for large filter kernels.
+
+template
+static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams p)
+{
+ // Calculate thread index.
+ int minorIdx = blockIdx.x * blockDim.x + threadIdx.x;
+ int outY = minorIdx / p.minorDim;
+ minorIdx -= outY * p.minorDim;
+ int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y;
+ int majorIdxBase = blockIdx.z * p.loopMajor;
+ if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim)
+ return;
+
+ // Setup Y receptive field.
+ int midY = outY * p.downy + p.upy - 1 - p.pady0;
+ int inY = min(max(floorDiv(midY, p.upy), 0), p.inH);
+ int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY;
+ int kernelY = midY + p.kernelH - (inY + 1) * p.upy;
+
+ // Loop over majorDim and outX.
+ for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++)
+ for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y)
+ {
+ // Setup X receptive field.
+ int midX = outX * p.downx + p.upx - 1 - p.padx0;
+ int inX = min(max(floorDiv(midX, p.upx), 0), p.inW);
+ int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX;
+ int kernelX = midX + p.kernelW - (inX + 1) * p.upx;
+
+ // Initialize pointers.
+ const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
+ const T* kp = &p.k[kernelY * p.kernelW + kernelX];
+ int xpx = p.minorDim;
+ int kpx = -p.upx;
+ int xpy = p.inW * p.minorDim;
+ int kpy = -p.upy * p.kernelW;
+
+ // Inner loop.
+ float v = 0.0f;
+ for (int y = 0; y < h; y++)
+ {
+ for (int x = 0; x < w; x++)
+ {
+ v += (float)(*xp) * (float)(*kp);
+ xp += xpx;
+ kp += kpx;
+ }
+ xp += xpy - w * xpx;
+ kp += kpy - w * kpx;
+ }
+
+ // Store result.
+ p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
+ }
+}
+
+//------------------------------------------------------------------------
+// Specialized CUDA implementation for small filter kernels.
+
+template
+static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams p)
+{
+ //assert(kernelW % upx == 0);
+ //assert(kernelH % upy == 0);
+ const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1;
+ const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1;
+ __shared__ volatile float sk[kernelH][kernelW];
+ __shared__ volatile float sx[tileInH][tileInW];
+
+ // Calculate tile index.
+ int minorIdx = blockIdx.x;
+ int tileOutY = minorIdx / p.minorDim;
+ minorIdx -= tileOutY * p.minorDim;
+ tileOutY *= tileOutH;
+ int tileOutXBase = blockIdx.y * p.loopX * tileOutW;
+ int majorIdxBase = blockIdx.z * p.loopMajor;
+ if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim)
+ return;
+
+ // Load filter kernel (flipped).
+ for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x)
+ {
+ int ky = tapIdx / kernelW;
+ int kx = tapIdx - ky * kernelW;
+ float v = 0.0f;
+ if (kx < p.kernelW & ky < p.kernelH)
+ v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)];
+ sk[ky][kx] = v;
+ }
+
+ // Loop over majorDim and outX.
+ for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++)
+ for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW)
+ {
+ // Load input pixels.
+ int tileMidX = tileOutX * downx + upx - 1 - p.padx0;
+ int tileMidY = tileOutY * downy + upy - 1 - p.pady0;
+ int tileInX = floorDiv(tileMidX, upx);
+ int tileInY = floorDiv(tileMidY, upy);
+ __syncthreads();
+ for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x)
+ {
+ int relInY = inIdx / tileInW;
+ int relInX = inIdx - relInY * tileInW;
+ int inX = relInX + tileInX;
+ int inY = relInY + tileInY;
+ float v = 0.0f;
+ if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH)
+ v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
+ sx[relInY][relInX] = v;
+ }
+
+ // Loop over output pixels.
+ __syncthreads();
+ for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x)
+ {
+ int relOutY = outIdx / tileOutW;
+ int relOutX = outIdx - relOutY * tileOutW;
+ int outX = relOutX + tileOutX;
+ int outY = relOutY + tileOutY;
+
+ // Setup receptive field.
+ int midX = tileMidX + relOutX * downx;
+ int midY = tileMidY + relOutY * downy;
+ int inX = floorDiv(midX, upx);
+ int inY = floorDiv(midY, upy);
+ int relInX = inX - tileInX;
+ int relInY = inY - tileInY;
+ int kernelX = (inX + 1) * upx - midX - 1; // flipped
+ int kernelY = (inY + 1) * upy - midY - 1; // flipped
+
+ // Inner loop.
+ float v = 0.0f;
+ #pragma unroll
+ for (int y = 0; y < kernelH / upy; y++)
+ #pragma unroll
+ for (int x = 0; x < kernelW / upx; x++)
+ v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx];
+
+ // Store result.
+ if (outX < p.outW & outY < p.outH)
+ p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// TensorFlow op.
+
+template
+struct UpFirDn2DOp : public OpKernel
+{
+ UpFirDn2DKernelParams m_attribs;
+
+ UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx)
+ {
+ memset(&m_attribs, 0, sizeof(m_attribs));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0));
+ OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1));
+ OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1"));
+ OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1"));
+ }
+
+ void Compute(OpKernelContext* ctx)
+ {
+ UpFirDn2DKernelParams p = m_attribs;
+ cudaStream_t stream = ctx->eigen_device().stream();
+
+ const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim]
+ const Tensor& k = ctx->input(1); // [kernelH, kernelW]
+ p.x = x.flat().data();
+ p.k = k.flat().data();
+ OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4"));
+ OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2"));
+ OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large"));
+ OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large"));
+
+ p.majorDim = (int)x.dim_size(0);
+ p.inH = (int)x.dim_size(1);
+ p.inW = (int)x.dim_size(2);
+ p.minorDim = (int)x.dim_size(3);
+ p.kernelH = (int)k.dim_size(0);
+ p.kernelW = (int)k.dim_size(1);
+ OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1"));
+
+ p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx;
+ p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy;
+ OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1"));
+
+ Tensor* y = NULL; // [majorDim, outH, outW, minorDim]
+ TensorShape ys;
+ ys.AddDim(p.majorDim);
+ ys.AddDim(p.outH);
+ ys.AddDim(p.outW);
+ ys.AddDim(p.minorDim);
+ OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y));
+ p.y = y->flat().data();
+ OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large"));
+
+ // Choose CUDA kernel to use.
+ void* cudaKernel = (void*)UpFirDn2DKernel_large;
+ int tileOutW = -1;
+ int tileOutH = -1;
+ if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; }
+ if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; }
+ if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; }
+ if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; }
+ if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; }
+
+ // Choose launch params.
+ dim3 blockSize;
+ dim3 gridSize;
+ if (tileOutW > 0 && tileOutH > 0) // small
+ {
+ p.loopMajor = (p.majorDim - 1) / 16384 + 1;
+ p.loopX = 1;
+ blockSize = dim3(32 * 8, 1, 1);
+ gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1);
+ }
+ else // large
+ {
+ p.loopMajor = (p.majorDim - 1) / 16384 + 1;
+ p.loopX = 4;
+ blockSize = dim3(4, 32, 1);
+ gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1);
+ }
+
+ // Launch CUDA kernel.
+ void* args[] = {&p};
+ OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream));
+ }
+};
+
+REGISTER_OP("UpFirDn2D")
+ .Input ("x: T")
+ .Input ("k: T")
+ .Output ("y: T")
+ .Attr ("T: {float, half}")
+ .Attr ("upx: int = 1")
+ .Attr ("upy: int = 1")
+ .Attr ("downx: int = 1")
+ .Attr ("downy: int = 1")
+ .Attr ("padx0: int = 0")
+ .Attr ("padx1: int = 0")
+ .Attr ("pady0: int = 0")
+ .Attr ("pady1: int = 0");
+REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp);
+REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp);
+
+//------------------------------------------------------------------------
diff --git a/dnnlib/tflib/ops/upfirdn_2d.py b/dnnlib/tflib/ops/upfirdn_2d.py
new file mode 100755
index 0000000..fd23777
--- /dev/null
+++ b/dnnlib/tflib/ops/upfirdn_2d.py
@@ -0,0 +1,364 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Custom TensorFlow ops for efficient resampling of 2D images."""
+
+import os
+import numpy as np
+import tensorflow as tf
+from .. import custom_ops
+
+def _get_plugin():
+ return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
+
+#----------------------------------------------------------------------------
+
+def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'):
+ r"""Pad, upsample, FIR filter, and downsample a batch of 2D images.
+
+ Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]`
+ and performs the following operations for each image, batched across
+ `majorDim` and `minorDim`:
+
+ 1. Pad the image with zeros by the specified number of pixels on each side
+ (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value
+ corresponds to cropping the image.
+
+ 2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`).
+
+ 3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the
+ image so that the footprint of all output pixels lies within the input image.
+
+ 4. Downsample the image by throwing away pixels (`downx`, `downy`).
+
+ This sequence of operations bears close resemblance to scipy.signal.upfirdn().
+ The fused op is considerably more efficient than performing the same calculation
+ using standard TensorFlow ops. It supports gradients of arbitrary order.
+
+ Args:
+ x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`.
+ k: 2D FIR filter of the shape `[firH, firW]`.
+ upx: Integer upsampling factor along the X-axis (default: 1).
+ upy: Integer upsampling factor along the Y-axis (default: 1).
+ downx: Integer downsampling factor along the X-axis (default: 1).
+ downy: Integer downsampling factor along the Y-axis (default: 1).
+ padx0: Number of pixels to pad on the left side (default: 0).
+ padx1: Number of pixels to pad on the right side (default: 0).
+ pady0: Number of pixels to pad on the top side (default: 0).
+ pady1: Number of pixels to pad on the bottom side (default: 0).
+ impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
+
+ Returns:
+ Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`.
+ """
+
+ impl_dict = {
+ 'ref': _upfirdn_2d_ref,
+ 'cuda': _upfirdn_2d_cuda,
+ }
+ return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)
+
+#----------------------------------------------------------------------------
+
+def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
+ """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops."""
+
+ x = tf.convert_to_tensor(x)
+ k = np.asarray(k, dtype=np.float32)
+ assert x.shape.rank == 4
+ inH = x.shape[1].value
+ inW = x.shape[2].value
+ minorDim = _shape(x, 3)
+ kernelH, kernelW = k.shape
+ assert inW >= 1 and inH >= 1
+ assert kernelW >= 1 and kernelH >= 1
+ assert isinstance(upx, int) and isinstance(upy, int)
+ assert isinstance(downx, int) and isinstance(downy, int)
+ assert isinstance(padx0, int) and isinstance(padx1, int)
+ assert isinstance(pady0, int) and isinstance(pady1, int)
+
+ # Upsample (insert zeros).
+ x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim])
+ x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]])
+ x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim])
+
+ # Pad (crop if negative).
+ x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]])
+ x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :]
+
+ # Convolve with filter.
+ x = tf.transpose(x, [0, 3, 1, 2])
+ x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1])
+ w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype)
+ x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW')
+ x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1])
+ x = tf.transpose(x, [0, 2, 3, 1])
+
+ # Downsample (throw away pixels).
+ return x[:, ::downy, ::downx, :]
+
+#----------------------------------------------------------------------------
+
+def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
+ """Fast CUDA implementation of `upfirdn_2d()` using custom ops."""
+
+ x = tf.convert_to_tensor(x)
+ k = np.asarray(k, dtype=np.float32)
+ majorDim, inH, inW, minorDim = x.shape.as_list()
+ kernelH, kernelW = k.shape
+ assert inW >= 1 and inH >= 1
+ assert kernelW >= 1 and kernelH >= 1
+ assert isinstance(upx, int) and isinstance(upy, int)
+ assert isinstance(downx, int) and isinstance(downy, int)
+ assert isinstance(padx0, int) and isinstance(padx1, int)
+ assert isinstance(pady0, int) and isinstance(pady1, int)
+
+ outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1
+ outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1
+ assert outW >= 1 and outH >= 1
+
+ kc = tf.constant(k, dtype=x.dtype)
+ gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype)
+ gpadx0 = kernelW - padx0 - 1
+ gpady0 = kernelH - pady0 - 1
+ gpadx1 = inW * upx - outW * downx + padx0 - upx + 1
+ gpady1 = inH * upy - outH * downy + pady0 - upy + 1
+
+ @tf.custom_gradient
+ def func(x):
+ y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)
+ y.set_shape([majorDim, outH, outW, minorDim])
+ @tf.custom_gradient
+ def grad(dy):
+ dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1)
+ dx.set_shape([majorDim, inH, inW, minorDim])
+ return dx, func
+ return y, grad
+ return func(x)
+
+#----------------------------------------------------------------------------
+
+def filter_2d(x, k, gain=1, data_format='NCHW', impl='cuda'):
+ r"""Filter a batch of 2D images with the given FIR filter.
+
+ Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
+ and filters each image with the given filter. The filter is normalized so that
+ if the input pixels are constant, they will be scaled by the specified `gain`.
+ Pixels outside the image are assumed to be zero.
+
+ Args:
+ x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
+ k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
+ gain: Scaling factor for signal magnitude (default: 1.0).
+ data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
+ impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
+
+ Returns:
+ Tensor of the same shape and datatype as `x`.
+ """
+
+ k = _setup_kernel(k) * gain
+ p = k.shape[0] - 1
+ return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
+
+#----------------------------------------------------------------------------
+
+def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
+ r"""Upsample a batch of 2D images with the given filter.
+
+ Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
+ and upsamples each image with the given filter. The filter is normalized so that
+ if the input pixels are constant, they will be scaled by the specified `gain`.
+ Pixels outside the image are assumed to be zero, and the filter is padded with
+ zeros so that its shape is a multiple of the upsampling factor.
+
+ Args:
+ x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
+ k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
+ The default is `[1] * factor`, which corresponds to nearest-neighbor
+ upsampling.
+ factor: Integer upsampling factor (default: 2).
+ gain: Scaling factor for signal magnitude (default: 1.0).
+ data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
+ impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
+
+ Returns:
+ Tensor of the shape `[N, C, H * factor, W * factor]` or
+ `[N, H * factor, W * factor, C]`, and same datatype as `x`.
+ """
+
+ assert isinstance(factor, int) and factor >= 1
+ if k is None:
+ k = [1] * factor
+ k = _setup_kernel(k) * (gain * (factor ** 2))
+ p = k.shape[0] - factor
+ return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl)
+
+#----------------------------------------------------------------------------
+
+def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
+ r"""Downsample a batch of 2D images with the given filter.
+
+ Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
+ and downsamples each image with the given filter. The filter is normalized so that
+ if the input pixels are constant, they will be scaled by the specified `gain`.
+ Pixels outside the image are assumed to be zero, and the filter is padded with
+ zeros so that its shape is a multiple of the downsampling factor.
+
+ Args:
+ x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
+ k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
+ The default is `[1] * factor`, which corresponds to average pooling.
+ factor: Integer downsampling factor (default: 2).
+ gain: Scaling factor for signal magnitude (default: 1.0).
+ data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
+ impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
+
+ Returns:
+ Tensor of the shape `[N, C, H // factor, W // factor]` or
+ `[N, H // factor, W // factor, C]`, and same datatype as `x`.
+ """
+
+ assert isinstance(factor, int) and factor >= 1
+ if k is None:
+ k = [1] * factor
+ k = _setup_kernel(k) * gain
+ p = k.shape[0] - factor
+ return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
+
+#----------------------------------------------------------------------------
+
+def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
+ r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`.
+
+ Padding is performed only once at the beginning, not between the operations.
+ The fused op is considerably more efficient than performing the same calculation
+ using standard TensorFlow ops. It supports gradients of arbitrary order.
+
+ Args:
+ x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
+ w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
+ Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
+ k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
+ The default is `[1] * factor`, which corresponds to nearest-neighbor
+ upsampling.
+ factor: Integer upsampling factor (default: 2).
+ gain: Scaling factor for signal magnitude (default: 1.0).
+ data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
+ impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
+
+ Returns:
+ Tensor of the shape `[N, C, H * factor, W * factor]` or
+ `[N, H * factor, W * factor, C]`, and same datatype as `x`.
+ """
+
+ assert isinstance(factor, int) and factor >= 1
+
+ # Check weight shape.
+ w = tf.convert_to_tensor(w)
+ assert w.shape.rank == 4
+ convH = w.shape[0].value
+ convW = w.shape[1].value
+ inC = _shape(w, 2)
+ outC = _shape(w, 3)
+ assert convW == convH
+
+ # Setup filter kernel.
+ if k is None:
+ k = [1] * factor
+ k = _setup_kernel(k) * (gain * (factor ** 2))
+ p = (k.shape[0] - factor) - (convW - 1)
+
+ # Determine data dimensions.
+ if data_format == 'NCHW':
+ stride = [1, 1, factor, factor]
+ output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW]
+ num_groups = _shape(x, 1) // inC
+ else:
+ stride = [1, factor, factor, 1]
+ output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC]
+ num_groups = _shape(x, 3) // inC
+
+ # Transpose weights.
+ w = tf.reshape(w, [convH, convW, inC, num_groups, -1])
+ w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2])
+ w = tf.reshape(w, [convH, convW, -1, num_groups * inC])
+
+ # Execute.
+ x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format)
+ return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl)
+
+#----------------------------------------------------------------------------
+
+def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
+ r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`.
+
+ Padding is performed only once at the beginning, not between the operations.
+ The fused op is considerably more efficient than performing the same calculation
+ using standard TensorFlow ops. It supports gradients of arbitrary order.
+
+ Args:
+ x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
+ w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
+ Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
+ k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
+ The default is `[1] * factor`, which corresponds to average pooling.
+ factor: Integer downsampling factor (default: 2).
+ gain: Scaling factor for signal magnitude (default: 1.0).
+ data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
+ impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
+
+ Returns:
+ Tensor of the shape `[N, C, H // factor, W // factor]` or
+ `[N, H // factor, W // factor, C]`, and same datatype as `x`.
+ """
+
+ assert isinstance(factor, int) and factor >= 1
+ w = tf.convert_to_tensor(w)
+ convH, convW, _inC, _outC = w.shape.as_list()
+ assert convW == convH
+ if k is None:
+ k = [1] * factor
+ k = _setup_kernel(k) * gain
+ p = (k.shape[0] - factor) + (convW - 1)
+ if data_format == 'NCHW':
+ s = [1, 1, factor, factor]
+ else:
+ s = [1, factor, factor, 1]
+ x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
+ return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format)
+
+#----------------------------------------------------------------------------
+# Internal helper funcs.
+
+def _shape(tf_expr, dim_idx):
+ if tf_expr.shape.rank is not None:
+ dim = tf_expr.shape[dim_idx].value
+ if dim is not None:
+ return dim
+ return tf.shape(tf_expr)[dim_idx]
+
+def _setup_kernel(k):
+ k = np.asarray(k, dtype=np.float32)
+ if k.ndim == 1:
+ k = np.outer(k, k)
+ k /= np.sum(k)
+ assert k.ndim == 2
+ assert k.shape[0] == k.shape[1]
+ return k
+
+def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'):
+ assert data_format in ['NCHW', 'NHWC']
+ assert x.shape.rank == 4
+ y = x
+ if data_format == 'NCHW':
+ y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1])
+ y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl)
+ if data_format == 'NCHW':
+ y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)])
+ return y
+
+#----------------------------------------------------------------------------
diff --git a/dnnlib/tflib/optimizer.py b/dnnlib/tflib/optimizer.py
new file mode 100755
index 0000000..9a1b1b8
--- /dev/null
+++ b/dnnlib/tflib/optimizer.py
@@ -0,0 +1,336 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Helper wrapper for a Tensorflow optimizer."""
+
+import numpy as np
+import tensorflow as tf
+
+from collections import OrderedDict
+from typing import List, Union
+
+from . import autosummary
+from . import tfutil
+from .. import util
+
+from .tfutil import TfExpression, TfExpressionEx
+
+try:
+ # TensorFlow 1.13
+ from tensorflow.python.ops import nccl_ops
+except:
+ # Older TensorFlow versions
+ import tensorflow.contrib.nccl as nccl_ops
+
+class Optimizer:
+ """A Wrapper for tf.train.Optimizer.
+
+ Automatically takes care of:
+ - Gradient averaging for multi-GPU training.
+ - Gradient accumulation for arbitrarily large minibatches.
+ - Dynamic loss scaling and typecasts for FP16 training.
+ - Ignoring corrupted gradients that contain NaNs/Infs.
+ - Reporting statistics.
+ - Well-chosen default settings.
+ """
+
+ def __init__(self,
+ name: str = "Train", # Name string that will appear in TensorFlow graph.
+ tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class.
+ learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time.
+ minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients.
+ share: "Optimizer" = None, # Share internal state with a previously created optimizer?
+ use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training?
+ loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor.
+ loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow.
+ loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow.
+ report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard?
+ **kwargs):
+
+ # Public fields.
+ self.name = name
+ self.learning_rate = learning_rate
+ self.minibatch_multiplier = minibatch_multiplier
+ self.id = self.name.replace("/", ".")
+ self.scope = tf.get_default_graph().unique_name(self.id)
+ self.optimizer_class = util.get_obj_by_name(tf_optimizer)
+ self.optimizer_kwargs = dict(kwargs)
+ self.use_loss_scaling = use_loss_scaling
+ self.loss_scaling_init = loss_scaling_init
+ self.loss_scaling_inc = loss_scaling_inc
+ self.loss_scaling_dec = loss_scaling_dec
+
+ # Private fields.
+ self._updates_applied = False
+ self._devices = OrderedDict() # device_name => EasyDict()
+ self._shared_optimizers = OrderedDict() # device_name => optimizer_class
+ self._gradient_shapes = None # [shape, ...]
+ self._report_mem_usage = report_mem_usage
+
+ # Validate arguments.
+ assert callable(self.optimizer_class)
+
+ # Share internal state if requested.
+ if share is not None:
+ assert isinstance(share, Optimizer)
+ assert self.optimizer_class is share.optimizer_class
+ assert self.learning_rate is share.learning_rate
+ assert self.optimizer_kwargs == share.optimizer_kwargs
+ self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access
+
+ def _get_device(self, device_name: str):
+ """Get internal state for the given TensorFlow device."""
+ tfutil.assert_tf_initialized()
+ if device_name in self._devices:
+ return self._devices[device_name]
+
+ # Initialize fields.
+ device = util.EasyDict()
+ device.name = device_name
+ device.optimizer = None # Underlying optimizer: optimizer_class
+ device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable
+ device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...]
+ device.grad_clean = OrderedDict() # Clean gradients: var => grad
+ device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable
+ device.grad_acc_count = None # Accumulation counter: tf.Variable
+ device.grad_acc = OrderedDict() # Accumulated gradients: var => grad
+
+ # Setup TensorFlow objects.
+ with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None):
+ if device_name not in self._shared_optimizers:
+ optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers)
+ self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs)
+ device.optimizer = self._shared_optimizers[device_name]
+ if self.use_loss_scaling:
+ device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var")
+
+ # Register device.
+ self._devices[device_name] = device
+ return device
+
+ def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None:
+ """Register the gradients of the given loss function with respect to the given variables.
+ Intended to be called once per GPU."""
+ tfutil.assert_tf_initialized()
+ assert not self._updates_applied
+ device = self._get_device(loss.device)
+
+ # Validate trainables.
+ if isinstance(trainable_vars, dict):
+ trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars
+ assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1
+ assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss])
+ assert all(var.device == device.name for var in trainable_vars)
+
+ # Validate shapes.
+ if self._gradient_shapes is None:
+ self._gradient_shapes = [var.shape.as_list() for var in trainable_vars]
+ assert len(trainable_vars) == len(self._gradient_shapes)
+ assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes))
+
+ # Report memory usage if requested.
+ deps = []
+ if self._report_mem_usage:
+ self._report_mem_usage = False
+ try:
+ with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]):
+ deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30))
+ except tf.errors.NotFoundError:
+ pass
+
+ # Compute gradients.
+ with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps):
+ loss = self.apply_loss_scaling(tf.cast(loss, tf.float32))
+ gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage
+ grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate)
+
+ # Register gradients.
+ for grad, var in grad_list:
+ if var not in device.grad_raw:
+ device.grad_raw[var] = []
+ device.grad_raw[var].append(grad)
+
+ def apply_updates(self, allow_no_op: bool = False) -> tf.Operation:
+ """Construct training op to update the registered variables based on their gradients."""
+ tfutil.assert_tf_initialized()
+ assert not self._updates_applied
+ self._updates_applied = True
+ all_ops = []
+
+ # Check for no-op.
+ if allow_no_op and len(self._devices) == 0:
+ with tfutil.absolute_name_scope(self.scope):
+ return tf.no_op(name='TrainingOp')
+
+ # Clean up gradients.
+ for device_idx, device in enumerate(self._devices.values()):
+ with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name):
+ for var, grad in device.grad_raw.items():
+
+ # Filter out disconnected gradients and convert to float32.
+ grad = [g for g in grad if g is not None]
+ grad = [tf.cast(g, tf.float32) for g in grad]
+
+ # Sum within the device.
+ if len(grad) == 0:
+ grad = tf.zeros(var.shape) # No gradients => zero.
+ elif len(grad) == 1:
+ grad = grad[0] # Single gradient => use as is.
+ else:
+ grad = tf.add_n(grad) # Multiple gradients => sum.
+
+ # Scale as needed.
+ scale = 1.0 / len(device.grad_raw[var]) / len(self._devices)
+ scale = tf.constant(scale, dtype=tf.float32, name="scale")
+ if self.minibatch_multiplier is not None:
+ scale /= tf.cast(self.minibatch_multiplier, tf.float32)
+ scale = self.undo_loss_scaling(scale)
+ device.grad_clean[var] = grad * scale
+
+ # Sum gradients across devices.
+ if len(self._devices) > 1:
+ with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None):
+ for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]):
+ if len(all_vars) > 0 and all(dim > 0 for dim in all_vars[0].shape.as_list()): # NCCL does not support zero-sized tensors.
+ all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)]
+ all_grads = nccl_ops.all_sum(all_grads)
+ for device, var, grad in zip(self._devices.values(), all_vars, all_grads):
+ device.grad_clean[var] = grad
+
+ # Apply updates separately on each device.
+ for device_idx, device in enumerate(self._devices.values()):
+ with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name):
+ # pylint: disable=cell-var-from-loop
+
+ # Accumulate gradients over time.
+ if self.minibatch_multiplier is None:
+ acc_ok = tf.constant(True, name='acc_ok')
+ device.grad_acc = OrderedDict(device.grad_clean)
+ else:
+ # Create variables.
+ with tf.control_dependencies(None):
+ for var in device.grad_clean.keys():
+ device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var")
+ device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count")
+
+ # Track counter.
+ count_cur = device.grad_acc_count + 1.0
+ count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur)
+ count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([]))
+ acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32))
+ all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op))
+
+ # Track gradients.
+ for var, grad in device.grad_clean.items():
+ acc_var = device.grad_acc_vars[var]
+ acc_cur = acc_var + grad
+ device.grad_acc[var] = acc_cur
+ with tf.control_dependencies([acc_cur]):
+ acc_inc_op = lambda: tf.assign(acc_var, acc_cur)
+ acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape))
+ all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op))
+
+ # No overflow => apply gradients.
+ all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()]))
+ apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()])
+ all_ops.append(tf.cond(all_ok, apply_op, tf.no_op))
+
+ # Adjust loss scaling.
+ if self.use_loss_scaling:
+ ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc)
+ ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec)
+ ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op))
+ all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op))
+
+ # Last device => report statistics.
+ if device_idx == len(self._devices) - 1:
+ all_ops.append(autosummary.autosummary(self.id + "/learning_rate", self.learning_rate))
+ all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok))
+ if self.use_loss_scaling:
+ all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var))
+
+ # Initialize variables.
+ self.reset_optimizer_state()
+ if self.use_loss_scaling:
+ tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()])
+ if self.minibatch_multiplier is not None:
+ tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]])
+
+ # Group everything into a single op.
+ with tfutil.absolute_name_scope(self.scope):
+ return tf.group(*all_ops, name="TrainingOp")
+
+ def reset_optimizer_state(self) -> None:
+ """Reset internal state of the underlying optimizer."""
+ tfutil.assert_tf_initialized()
+ tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()])
+
+ def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]:
+ """Get or create variable representing log2 of the current dynamic loss scaling factor."""
+ return self._get_device(device).loss_scaling_var
+
+ def apply_loss_scaling(self, value: TfExpression) -> TfExpression:
+ """Apply dynamic loss scaling for the given expression."""
+ assert tfutil.is_tf_expression(value)
+ if not self.use_loss_scaling:
+ return value
+ return value * tfutil.exp2(self.get_loss_scaling_var(value.device))
+
+ def undo_loss_scaling(self, value: TfExpression) -> TfExpression:
+ """Undo the effect of dynamic loss scaling for the given expression."""
+ assert tfutil.is_tf_expression(value)
+ if not self.use_loss_scaling:
+ return value
+ return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type
+
+
+class SimpleAdam:
+ """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer."""
+
+ def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
+ self.name = name
+ self.learning_rate = learning_rate
+ self.beta1 = beta1
+ self.beta2 = beta2
+ self.epsilon = epsilon
+ self.all_state_vars = []
+
+ def variables(self):
+ return self.all_state_vars
+
+ def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE):
+ assert gate_gradients == tf.train.Optimizer.GATE_NONE
+ return list(zip(tf.gradients(loss, var_list), var_list))
+
+ def apply_gradients(self, grads_and_vars):
+ with tf.name_scope(self.name):
+ state_vars = []
+ update_ops = []
+
+ # Adjust learning rate to deal with startup bias.
+ with tf.control_dependencies(None):
+ b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False)
+ b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False)
+ state_vars += [b1pow_var, b2pow_var]
+ b1pow_new = b1pow_var * self.beta1
+ b2pow_new = b2pow_var * self.beta2
+ update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)]
+ lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new)
+
+ # Construct ops to update each variable.
+ for grad, var in grads_and_vars:
+ with tf.control_dependencies(None):
+ m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False)
+ v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False)
+ state_vars += [m_var, v_var]
+ m_new = self.beta1 * m_var + (1 - self.beta1) * grad
+ v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad)
+ var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon)
+ update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)]
+
+ # Group everything together.
+ self.all_state_vars += state_vars
+ return tf.group(*update_ops)
diff --git a/dnnlib/tflib/tfutil.py b/dnnlib/tflib/tfutil.py
new file mode 100755
index 0000000..1127c7b
--- /dev/null
+++ b/dnnlib/tflib/tfutil.py
@@ -0,0 +1,252 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Miscellaneous helper utils for Tensorflow."""
+
+import os
+import numpy as np
+import tensorflow as tf
+
+# Silence deprecation warnings from TensorFlow 1.13 onwards
+import logging
+logging.getLogger('tensorflow').setLevel(logging.ERROR)
+import tensorflow.contrib # requires TensorFlow 1.x!
+tf.contrib = tensorflow.contrib
+
+from typing import Any, Iterable, List, Union
+
+TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation]
+"""A type that represents a valid Tensorflow expression."""
+
+TfExpressionEx = Union[TfExpression, int, float, np.ndarray]
+"""A type that can be converted to a valid Tensorflow expression."""
+
+
+def run(*args, **kwargs) -> Any:
+ """Run the specified ops in the default session."""
+ assert_tf_initialized()
+ return tf.get_default_session().run(*args, **kwargs)
+
+
+def is_tf_expression(x: Any) -> bool:
+ """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation."""
+ return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation))
+
+
+def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]:
+ """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code."""
+ return [dim.value for dim in shape]
+
+
+def flatten(x: TfExpressionEx) -> TfExpression:
+ """Shortcut function for flattening a tensor."""
+ with tf.name_scope("Flatten"):
+ return tf.reshape(x, [-1])
+
+
+def log2(x: TfExpressionEx) -> TfExpression:
+ """Logarithm in base 2."""
+ with tf.name_scope("Log2"):
+ return tf.log(x) * np.float32(1.0 / np.log(2.0))
+
+
+def exp2(x: TfExpressionEx) -> TfExpression:
+ """Exponent in base 2."""
+ with tf.name_scope("Exp2"):
+ return tf.exp(x * np.float32(np.log(2.0)))
+
+
+def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx:
+ """Linear interpolation."""
+ with tf.name_scope("Lerp"):
+ return a + (b - a) * t
+
+
+def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression:
+ """Linear interpolation with clip."""
+ with tf.name_scope("LerpClip"):
+ return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0)
+
+
+def absolute_name_scope(scope: str) -> tf.name_scope:
+ """Forcefully enter the specified name scope, ignoring any surrounding scopes."""
+ return tf.name_scope(scope + "/")
+
+
+def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope:
+ """Forcefully enter the specified variable scope, ignoring any surrounding scopes."""
+ return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False)
+
+
+def _sanitize_tf_config(config_dict: dict = None) -> dict:
+ # Defaults.
+ cfg = dict()
+ cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is.
+ cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is.
+ cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info.
+ cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used.
+ cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed.
+
+ # Remove defaults for environment variables that are already set.
+ for key in list(cfg):
+ fields = key.split(".")
+ if fields[0] == "env":
+ assert len(fields) == 2
+ if fields[1] in os.environ:
+ del cfg[key]
+
+ # User overrides.
+ if config_dict is not None:
+ cfg.update(config_dict)
+ return cfg
+
+
+def init_tf(config_dict: dict = None) -> None:
+ """Initialize TensorFlow session using good default settings."""
+ # Skip if already initialized.
+ if tf.get_default_session() is not None:
+ return
+
+ # Setup config dict and random seeds.
+ cfg = _sanitize_tf_config(config_dict)
+ np_random_seed = cfg["rnd.np_random_seed"]
+ if np_random_seed is not None:
+ np.random.seed(np_random_seed)
+ tf_random_seed = cfg["rnd.tf_random_seed"]
+ if tf_random_seed == "auto":
+ tf_random_seed = np.random.randint(1 << 31)
+ if tf_random_seed is not None:
+ tf.set_random_seed(tf_random_seed)
+
+ # Setup environment variables.
+ for key, value in cfg.items():
+ fields = key.split(".")
+ if fields[0] == "env":
+ assert len(fields) == 2
+ os.environ[fields[1]] = str(value)
+
+ # Create default TensorFlow session.
+ create_session(cfg, force_as_default=True)
+
+
+def assert_tf_initialized():
+ """Check that TensorFlow session has been initialized."""
+ if tf.get_default_session() is None:
+ raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().")
+
+
+def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session:
+ """Create tf.Session based on config dict."""
+ # Setup TensorFlow config proto.
+ cfg = _sanitize_tf_config(config_dict)
+ config_proto = tf.ConfigProto()
+ for key, value in cfg.items():
+ fields = key.split(".")
+ if fields[0] not in ["rnd", "env"]:
+ obj = config_proto
+ for field in fields[:-1]:
+ obj = getattr(obj, field)
+ setattr(obj, fields[-1], value)
+
+ # Create session.
+ session = tf.Session(config=config_proto)
+ if force_as_default:
+ # pylint: disable=protected-access
+ session._default_session = session.as_default()
+ session._default_session.enforce_nesting = False
+ session._default_session.__enter__()
+ return session
+
+
+def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None:
+ """Initialize all tf.Variables that have not already been initialized.
+
+ Equivalent to the following, but more efficient and does not bloat the tf graph:
+ tf.variables_initializer(tf.report_uninitialized_variables()).run()
+ """
+ assert_tf_initialized()
+ if target_vars is None:
+ target_vars = tf.global_variables()
+
+ test_vars = []
+ test_ops = []
+
+ with tf.control_dependencies(None): # ignore surrounding control_dependencies
+ for var in target_vars:
+ assert is_tf_expression(var)
+
+ try:
+ tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0"))
+ except KeyError:
+ # Op does not exist => variable may be uninitialized.
+ test_vars.append(var)
+
+ with absolute_name_scope(var.name.split(":")[0]):
+ test_ops.append(tf.is_variable_initialized(var))
+
+ init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited]
+ run([var.initializer for var in init_vars])
+
+
+def set_vars(var_to_value_dict: dict) -> None:
+ """Set the values of given tf.Variables.
+
+ Equivalent to the following, but more efficient and does not bloat the tf graph:
+ tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()]
+ """
+ assert_tf_initialized()
+ ops = []
+ feed_dict = {}
+
+ for var, value in var_to_value_dict.items():
+ assert is_tf_expression(var)
+
+ try:
+ setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op
+ except KeyError:
+ with absolute_name_scope(var.name.split(":")[0]):
+ with tf.control_dependencies(None): # ignore surrounding control_dependencies
+ setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter
+
+ ops.append(setter)
+ feed_dict[setter.op.inputs[1]] = value
+
+ run(ops, feed_dict)
+
+
+def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs):
+ """Create tf.Variable with large initial value without bloating the tf graph."""
+ assert_tf_initialized()
+ assert isinstance(initial_value, np.ndarray)
+ zeros = tf.zeros(initial_value.shape, initial_value.dtype)
+ var = tf.Variable(zeros, *args, **kwargs)
+ set_vars({var: initial_value})
+ return var
+
+
+def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False):
+ """Convert a minibatch of images from uint8 to float32 with configurable dynamic range.
+ Can be used as an input transformation for Network.run().
+ """
+ images = tf.cast(images, tf.float32)
+ if nhwc_to_nchw:
+ images = tf.transpose(images, [0, 3, 1, 2])
+ return images * ((drange[1] - drange[0]) / 255) + drange[0]
+
+
+def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1):
+ """Convert a minibatch of images from float32 to uint8 with configurable dynamic range.
+ Can be used as an output transformation for Network.run().
+ """
+ images = tf.cast(images, tf.float32)
+ if shrink > 1:
+ ksize = [1, 1, shrink, shrink]
+ images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW")
+ if nchw_to_nhwc:
+ images = tf.transpose(images, [0, 2, 3, 1])
+ scale = 255 / (drange[1] - drange[0])
+ images = images * scale + (0.5 - drange[0] * scale)
+ return tf.saturate_cast(images, tf.uint8)
diff --git a/dnnlib/util.py b/dnnlib/util.py
new file mode 100755
index 0000000..73c98d7
--- /dev/null
+++ b/dnnlib/util.py
@@ -0,0 +1,410 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Miscellaneous utility classes and functions."""
+
+import ctypes
+import fnmatch
+import importlib
+import inspect
+import numpy as np
+import os
+import shutil
+import sys
+import types
+import io
+import pickle
+import re
+import requests
+import html
+import hashlib
+import glob
+import uuid
+
+from distutils.util import strtobool
+from typing import Any, List, Tuple, Union
+
+
+# Util classes
+# ------------------------------------------------------------------------------------------
+
+
+class EasyDict(dict):
+ """Convenience class that behaves like a dict but allows access with the attribute syntax."""
+
+ def __getattr__(self, name: str) -> Any:
+ try:
+ return self[name]
+ except KeyError:
+ raise AttributeError(name)
+
+ def __setattr__(self, name: str, value: Any) -> None:
+ self[name] = value
+
+ def __delattr__(self, name: str) -> None:
+ del self[name]
+
+
+class Logger(object):
+ """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file."""
+
+ def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True):
+ self.file = None
+
+ if file_name is not None:
+ self.file = open(file_name, file_mode)
+
+ self.should_flush = should_flush
+ self.stdout = sys.stdout
+ self.stderr = sys.stderr
+
+ sys.stdout = self
+ sys.stderr = self
+
+ def __enter__(self) -> "Logger":
+ return self
+
+ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+ self.close()
+
+ def write(self, text: str) -> None:
+ """Write text to stdout (and a file) and optionally flush."""
+ if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash
+ return
+
+ if self.file is not None:
+ self.file.write(text)
+
+ self.stdout.write(text)
+
+ if self.should_flush:
+ self.flush()
+
+ def flush(self) -> None:
+ """Flush written text to both stdout and a file, if open."""
+ if self.file is not None:
+ self.file.flush()
+
+ self.stdout.flush()
+
+ def close(self) -> None:
+ """Flush, close possible files, and remove stdout/stderr mirroring."""
+ self.flush()
+
+ # if using multiple loggers, prevent closing in wrong order
+ if sys.stdout is self:
+ sys.stdout = self.stdout
+ if sys.stderr is self:
+ sys.stderr = self.stderr
+
+ if self.file is not None:
+ self.file.close()
+
+
+# Small util functions
+# ------------------------------------------------------------------------------------------
+
+
+def format_time(seconds: Union[int, float]) -> str:
+ """Convert the seconds to human readable string with days, hours, minutes and seconds."""
+ s = int(np.rint(seconds))
+
+ if s < 60:
+ return "{0}s".format(s)
+ elif s < 60 * 60:
+ return "{0}m {1:02}s".format(s // 60, s % 60)
+ elif s < 24 * 60 * 60:
+ return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60)
+ else:
+ return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60)
+
+
+def ask_yes_no(question: str) -> bool:
+ """Ask the user the question until the user inputs a valid answer."""
+ while True:
+ try:
+ print("{0} [y/n]".format(question))
+ return strtobool(input().lower())
+ except ValueError:
+ pass
+
+
+def tuple_product(t: Tuple) -> Any:
+ """Calculate the product of the tuple elements."""
+ result = 1
+
+ for v in t:
+ result *= v
+
+ return result
+
+
+_str_to_ctype = {
+ "uint8": ctypes.c_ubyte,
+ "uint16": ctypes.c_uint16,
+ "uint32": ctypes.c_uint32,
+ "uint64": ctypes.c_uint64,
+ "int8": ctypes.c_byte,
+ "int16": ctypes.c_int16,
+ "int32": ctypes.c_int32,
+ "int64": ctypes.c_int64,
+ "float32": ctypes.c_float,
+ "float64": ctypes.c_double
+}
+
+
+def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]:
+ """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes."""
+ type_str = None
+
+ if isinstance(type_obj, str):
+ type_str = type_obj
+ elif hasattr(type_obj, "__name__"):
+ type_str = type_obj.__name__
+ elif hasattr(type_obj, "name"):
+ type_str = type_obj.name
+ else:
+ raise RuntimeError("Cannot infer type name from input")
+
+ assert type_str in _str_to_ctype.keys()
+
+ my_dtype = np.dtype(type_str)
+ my_ctype = _str_to_ctype[type_str]
+
+ assert my_dtype.itemsize == ctypes.sizeof(my_ctype)
+
+ return my_dtype, my_ctype
+
+
+def is_pickleable(obj: Any) -> bool:
+ try:
+ with io.BytesIO() as stream:
+ pickle.dump(obj, stream)
+ return True
+ except:
+ return False
+
+
+# Functionality to import modules/objects by name, and call functions by name
+# ------------------------------------------------------------------------------------------
+
+def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]:
+ """Searches for the underlying module behind the name to some python object.
+ Returns the module and the object name (original name with module part removed)."""
+
+ # allow convenience shorthands, substitute them by full names
+ obj_name = re.sub("^np.", "numpy.", obj_name)
+ obj_name = re.sub("^tf.", "tensorflow.", obj_name)
+
+ # list alternatives for (module_name, local_obj_name)
+ parts = obj_name.split(".")
+ name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)]
+
+ # try each alternative in turn
+ for module_name, local_obj_name in name_pairs:
+ try:
+ module = importlib.import_module(module_name) # may raise ImportError
+ get_obj_from_module(module, local_obj_name) # may raise AttributeError
+ return module, local_obj_name
+ except:
+ pass
+
+ # maybe some of the modules themselves contain errors?
+ for module_name, _local_obj_name in name_pairs:
+ try:
+ importlib.import_module(module_name) # may raise ImportError
+ except ImportError:
+ if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"):
+ raise
+
+ # maybe the requested attribute is missing?
+ for module_name, local_obj_name in name_pairs:
+ try:
+ module = importlib.import_module(module_name) # may raise ImportError
+ get_obj_from_module(module, local_obj_name) # may raise AttributeError
+ except ImportError:
+ pass
+
+ # we are out of luck, but we have no idea why
+ raise ImportError(obj_name)
+
+
+def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any:
+ """Traverses the object name and returns the last (rightmost) python object."""
+ if obj_name == '':
+ return module
+ obj = module
+ for part in obj_name.split("."):
+ obj = getattr(obj, part)
+ return obj
+
+
+def get_obj_by_name(name: str) -> Any:
+ """Finds the python object with the given name."""
+ module, obj_name = get_module_from_obj_name(name)
+ return get_obj_from_module(module, obj_name)
+
+
+def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any:
+ """Finds the python object with the given name and calls it as a function."""
+ assert func_name is not None
+ func_obj = get_obj_by_name(func_name)
+ assert callable(func_obj)
+ return func_obj(*args, **kwargs)
+
+
+def get_module_dir_by_obj_name(obj_name: str) -> str:
+ """Get the directory path of the module containing the given object name."""
+ module, _ = get_module_from_obj_name(obj_name)
+ return os.path.dirname(inspect.getfile(module))
+
+
+def is_top_level_function(obj: Any) -> bool:
+ """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'."""
+ return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__
+
+
+def get_top_level_function_name(obj: Any) -> str:
+ """Return the fully-qualified name of a top-level function."""
+ assert is_top_level_function(obj)
+ return obj.__module__ + "." + obj.__name__
+
+
+# File system helpers
+# ------------------------------------------------------------------------------------------
+
+def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]:
+ """List all files recursively in a given directory while ignoring given file and directory names.
+ Returns list of tuples containing both absolute and relative paths."""
+ assert os.path.isdir(dir_path)
+ base_name = os.path.basename(os.path.normpath(dir_path))
+
+ if ignores is None:
+ ignores = []
+
+ result = []
+
+ for root, dirs, files in os.walk(dir_path, topdown=True):
+ for ignore_ in ignores:
+ dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)]
+
+ # dirs need to be edited in-place
+ for d in dirs_to_remove:
+ dirs.remove(d)
+
+ files = [f for f in files if not fnmatch.fnmatch(f, ignore_)]
+
+ absolute_paths = [os.path.join(root, f) for f in files]
+ relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths]
+
+ if add_base_to_relative:
+ relative_paths = [os.path.join(base_name, p) for p in relative_paths]
+
+ assert len(absolute_paths) == len(relative_paths)
+ result += zip(absolute_paths, relative_paths)
+
+ return result
+
+
+def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None:
+ """Takes in a list of tuples of (src, dst) paths and copies files.
+ Will create all necessary directories."""
+ for file in files:
+ target_dir_name = os.path.dirname(file[1])
+
+ # will create all intermediate-level directories
+ if not os.path.exists(target_dir_name):
+ os.makedirs(target_dir_name)
+
+ shutil.copyfile(file[0], file[1])
+
+
+# URL helpers
+# ------------------------------------------------------------------------------------------
+
+def is_url(obj: Any, allow_file_urls: bool = False) -> bool:
+ """Determine whether the given object is a valid URL string."""
+ if not isinstance(obj, str) or not "://" in obj:
+ return False
+ if allow_file_urls and obj.startswith('file:///'):
+ return True
+ try:
+ res = requests.compat.urlparse(obj)
+ if not res.scheme or not res.netloc or not "." in res.netloc:
+ return False
+ res = requests.compat.urlparse(requests.compat.urljoin(obj, "/"))
+ if not res.scheme or not res.netloc or not "." in res.netloc:
+ return False
+ except:
+ return False
+ return True
+
+
+def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True) -> Any:
+ """Download the given URL and return a binary-mode file object to access the data."""
+ assert is_url(url, allow_file_urls=True)
+ assert num_attempts >= 1
+
+ # Handle file URLs.
+ if url.startswith('file:///'):
+ return open(url[len('file:///'):], "rb")
+
+ # Lookup from cache.
+ url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest()
+ if cache_dir is not None:
+ cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*"))
+ if len(cache_files) == 1:
+ return open(cache_files[0], "rb")
+
+ # Download.
+ url_name = None
+ url_data = None
+ with requests.Session() as session:
+ if verbose:
+ print("Downloading %s ..." % url, end="", flush=True)
+ for attempts_left in reversed(range(num_attempts)):
+ try:
+ with session.get(url) as res:
+ res.raise_for_status()
+ if len(res.content) == 0:
+ raise IOError("No data received")
+
+ if len(res.content) < 8192:
+ content_str = res.content.decode("utf-8")
+ if "download_warning" in res.headers.get("Set-Cookie", ""):
+ links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link]
+ if len(links) == 1:
+ url = requests.compat.urljoin(url, links[0])
+ raise IOError("Google Drive virus checker nag")
+ if "Google Drive - Quota exceeded" in content_str:
+ raise IOError("Google Drive download quota exceeded -- please try again later")
+
+ match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", ""))
+ url_name = match[1] if match else url
+ url_data = res.content
+ if verbose:
+ print(" done")
+ break
+ except:
+ if not attempts_left:
+ if verbose:
+ print(" failed")
+ raise
+ if verbose:
+ print(".", end="", flush=True)
+
+ # Save to cache.
+ if cache_dir is not None:
+ safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name)
+ cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name)
+ temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name)
+ os.makedirs(cache_dir, exist_ok=True)
+ with open(temp_file, "wb") as f:
+ f.write(url_data)
+ os.replace(temp_file, cache_file) # atomic
+
+ # Return data as file object.
+ return io.BytesIO(url_data)
diff --git a/docs/stylegan2-teaser-1024x256.png b/docs/stylegan2-teaser-1024x256.png
new file mode 100755
index 0000000..bb16c5f
Binary files /dev/null and b/docs/stylegan2-teaser-1024x256.png differ
diff --git a/docs/stylegan2-training-curves.png b/docs/stylegan2-training-curves.png
new file mode 100755
index 0000000..04fdf4d
Binary files /dev/null and b/docs/stylegan2-training-curves.png differ
diff --git a/docs/versions.html b/docs/versions.html
new file mode 100755
index 0000000..da107ff
--- /dev/null
+++ b/docs/versions.html
@@ -0,0 +1,64 @@
+
+
+
+
+
+ StyleGAN versions
+
+
+
+
+
+Original StyleGAN
+
+
+StyleGAN2
+
+
+
+
diff --git a/metrics/__init__.py b/metrics/__init__.py
new file mode 100755
index 0000000..9ab9908
--- /dev/null
+++ b/metrics/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+# empty
diff --git a/metrics/frechet_inception_distance.py b/metrics/frechet_inception_distance.py
new file mode 100755
index 0000000..ace0d6e
--- /dev/null
+++ b/metrics/frechet_inception_distance.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Frechet Inception Distance (FID)."""
+
+import os
+import numpy as np
+import scipy
+import tensorflow as tf
+import dnnlib.tflib as tflib
+
+from metrics import metric_base
+from training import misc
+
+#----------------------------------------------------------------------------
+
+class FID(metric_base.MetricBase):
+ def __init__(self, num_images, minibatch_per_gpu, **kwargs):
+ super().__init__(**kwargs)
+ self.num_images = num_images
+ self.minibatch_per_gpu = minibatch_per_gpu
+
+ def _evaluate(self, Gs, Gs_kwargs, num_gpus):
+ minibatch_size = num_gpus * self.minibatch_per_gpu
+ inception = misc.load_pkl('https://drive.google.com/uc?id=1MzTY44rLToO5APn8TZmfR7_ENSe5aZUn') # inception_v3_features.pkl
+ activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32)
+
+ # Calculate statistics for reals.
+ cache_file = self._get_cache_file_for_reals(num_images=self.num_images)
+ os.makedirs(os.path.dirname(cache_file), exist_ok=True)
+ if os.path.isfile(cache_file):
+ mu_real, sigma_real = misc.load_pkl(cache_file)
+ else:
+ for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)):
+ begin = idx * minibatch_size
+ end = min(begin + minibatch_size, self.num_images)
+ activations[begin:end] = inception.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True)
+ if end == self.num_images:
+ break
+ mu_real = np.mean(activations, axis=0)
+ sigma_real = np.cov(activations, rowvar=False)
+ misc.save_pkl((mu_real, sigma_real), cache_file)
+
+ # Construct TensorFlow graph.
+ result_expr = []
+ for gpu_idx in range(num_gpus):
+ with tf.device('/gpu:%d' % gpu_idx):
+ Gs_clone = Gs.clone()
+ inception_clone = inception.clone()
+ latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
+ labels = self._get_random_labels_tf(self.minibatch_per_gpu)
+ images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs)
+ images = tflib.convert_images_to_uint8(images)
+ result_expr.append(inception_clone.get_output_for(images))
+
+ # Calculate statistics for fakes.
+ for begin in range(0, self.num_images, minibatch_size):
+ self._report_progress(begin, self.num_images)
+ end = min(begin + minibatch_size, self.num_images)
+ activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin]
+ mu_fake = np.mean(activations, axis=0)
+ sigma_fake = np.cov(activations, rowvar=False)
+
+ # Calculate FID.
+ m = np.square(mu_fake - mu_real).sum()
+ s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member
+ dist = m + np.trace(sigma_fake + sigma_real - 2*s)
+ self._report_result(np.real(dist))
+
+#----------------------------------------------------------------------------
diff --git a/metrics/inception_score.py b/metrics/inception_score.py
new file mode 100755
index 0000000..ff0543d
--- /dev/null
+++ b/metrics/inception_score.py
@@ -0,0 +1,58 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Inception Score (IS)."""
+
+import numpy as np
+import tensorflow as tf
+import dnnlib.tflib as tflib
+
+from metrics import metric_base
+from training import misc
+
+#----------------------------------------------------------------------------
+
+class IS(metric_base.MetricBase):
+ def __init__(self, num_images, num_splits, minibatch_per_gpu, **kwargs):
+ super().__init__(**kwargs)
+ self.num_images = num_images
+ self.num_splits = num_splits
+ self.minibatch_per_gpu = minibatch_per_gpu
+
+ def _evaluate(self, Gs, Gs_kwargs, num_gpus):
+ minibatch_size = num_gpus * self.minibatch_per_gpu
+ inception = misc.load_pkl('https://drive.google.com/uc?id=1Mz9zQnIrusm3duZB91ng_aUIePFNI6Jx') # inception_v3_softmax.pkl
+ activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32)
+
+ # Construct TensorFlow graph.
+ result_expr = []
+ for gpu_idx in range(num_gpus):
+ with tf.device('/gpu:%d' % gpu_idx):
+ Gs_clone = Gs.clone()
+ inception_clone = inception.clone()
+ latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
+ labels = self._get_random_labels_tf(self.minibatch_per_gpu)
+ images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs)
+ images = tflib.convert_images_to_uint8(images)
+ result_expr.append(inception_clone.get_output_for(images))
+
+ # Calculate activations for fakes.
+ for begin in range(0, self.num_images, minibatch_size):
+ self._report_progress(begin, self.num_images)
+ end = min(begin + minibatch_size, self.num_images)
+ activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin]
+
+ # Calculate IS.
+ scores = []
+ for i in range(self.num_splits):
+ part = activations[i * self.num_images // self.num_splits : (i + 1) * self.num_images // self.num_splits]
+ kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
+ kl = np.mean(np.sum(kl, 1))
+ scores.append(np.exp(kl))
+ self._report_result(np.mean(scores), suffix='_mean')
+ self._report_result(np.std(scores), suffix='_std')
+
+#----------------------------------------------------------------------------
diff --git a/metrics/linear_separability.py b/metrics/linear_separability.py
new file mode 100755
index 0000000..14bfb99
--- /dev/null
+++ b/metrics/linear_separability.py
@@ -0,0 +1,178 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Linear Separability (LS)."""
+
+from collections import defaultdict
+import numpy as np
+import sklearn.svm
+import tensorflow as tf
+import dnnlib.tflib as tflib
+
+from metrics import metric_base
+from training import misc
+
+#----------------------------------------------------------------------------
+
+classifier_urls = [
+ 'https://drive.google.com/uc?id=1Q5-AI6TwWhCVM7Muu4tBM7rp5nG_gmCX', # celebahq-classifier-00-male.pkl
+ 'https://drive.google.com/uc?id=1Q5c6HE__ReW2W8qYAXpao68V1ryuisGo', # celebahq-classifier-01-smiling.pkl
+ 'https://drive.google.com/uc?id=1Q7738mgWTljPOJQrZtSMLxzShEhrvVsU', # celebahq-classifier-02-attractive.pkl
+ 'https://drive.google.com/uc?id=1QBv2Mxe7ZLvOv1YBTLq-T4DS3HjmXV0o', # celebahq-classifier-03-wavy-hair.pkl
+ 'https://drive.google.com/uc?id=1QIvKTrkYpUrdA45nf7pspwAqXDwWOLhV', # celebahq-classifier-04-young.pkl
+ 'https://drive.google.com/uc?id=1QJPH5rW7MbIjFUdZT7vRYfyUjNYDl4_L', # celebahq-classifier-05-5-o-clock-shadow.pkl
+ 'https://drive.google.com/uc?id=1QPZXSYf6cptQnApWS_T83sqFMun3rULY', # celebahq-classifier-06-arched-eyebrows.pkl
+ 'https://drive.google.com/uc?id=1QPgoAZRqINXk_PFoQ6NwMmiJfxc5d2Pg', # celebahq-classifier-07-bags-under-eyes.pkl
+ 'https://drive.google.com/uc?id=1QQPQgxgI6wrMWNyxFyTLSgMVZmRr1oO7', # celebahq-classifier-08-bald.pkl
+ 'https://drive.google.com/uc?id=1QcSphAmV62UrCIqhMGgcIlZfoe8hfWaF', # celebahq-classifier-09-bangs.pkl
+ 'https://drive.google.com/uc?id=1QdWTVwljClTFrrrcZnPuPOR4mEuz7jGh', # celebahq-classifier-10-big-lips.pkl
+ 'https://drive.google.com/uc?id=1QgvEWEtr2mS4yj1b_Y3WKe6cLWL3LYmK', # celebahq-classifier-11-big-nose.pkl
+ 'https://drive.google.com/uc?id=1QidfMk9FOKgmUUIziTCeo8t-kTGwcT18', # celebahq-classifier-12-black-hair.pkl
+ 'https://drive.google.com/uc?id=1QthrJt-wY31GPtV8SbnZQZ0_UEdhasHO', # celebahq-classifier-13-blond-hair.pkl
+ 'https://drive.google.com/uc?id=1QvCAkXxdYT4sIwCzYDnCL9Nb5TDYUxGW', # celebahq-classifier-14-blurry.pkl
+ 'https://drive.google.com/uc?id=1QvLWuwSuWI9Ln8cpxSGHIciUsnmaw8L0', # celebahq-classifier-15-brown-hair.pkl
+ 'https://drive.google.com/uc?id=1QxW6THPI2fqDoiFEMaV6pWWHhKI_OoA7', # celebahq-classifier-16-bushy-eyebrows.pkl
+ 'https://drive.google.com/uc?id=1R71xKw8oTW2IHyqmRDChhTBkW9wq4N9v', # celebahq-classifier-17-chubby.pkl
+ 'https://drive.google.com/uc?id=1RDn_fiLfEGbTc7JjazRXuAxJpr-4Pl67', # celebahq-classifier-18-double-chin.pkl
+ 'https://drive.google.com/uc?id=1RGBuwXbaz5052bM4VFvaSJaqNvVM4_cI', # celebahq-classifier-19-eyeglasses.pkl
+ 'https://drive.google.com/uc?id=1RIxOiWxDpUwhB-9HzDkbkLegkd7euRU9', # celebahq-classifier-20-goatee.pkl
+ 'https://drive.google.com/uc?id=1RPaNiEnJODdr-fwXhUFdoSQLFFZC7rC-', # celebahq-classifier-21-gray-hair.pkl
+ 'https://drive.google.com/uc?id=1RQH8lPSwOI2K_9XQCZ2Ktz7xm46o80ep', # celebahq-classifier-22-heavy-makeup.pkl
+ 'https://drive.google.com/uc?id=1RXZM61xCzlwUZKq-X7QhxOg0D2telPow', # celebahq-classifier-23-high-cheekbones.pkl
+ 'https://drive.google.com/uc?id=1RgASVHW8EWMyOCiRb5fsUijFu-HfxONM', # celebahq-classifier-24-mouth-slightly-open.pkl
+ 'https://drive.google.com/uc?id=1RkC8JLqLosWMaRne3DARRgolhbtg_wnr', # celebahq-classifier-25-mustache.pkl
+ 'https://drive.google.com/uc?id=1RqtbtFT2EuwpGTqsTYJDyXdnDsFCPtLO', # celebahq-classifier-26-narrow-eyes.pkl
+ 'https://drive.google.com/uc?id=1Rs7hU-re8bBMeRHR-fKgMbjPh-RIbrsh', # celebahq-classifier-27-no-beard.pkl
+ 'https://drive.google.com/uc?id=1RynDJQWdGOAGffmkPVCrLJqy_fciPF9E', # celebahq-classifier-28-oval-face.pkl
+ 'https://drive.google.com/uc?id=1S0TZ_Hdv5cb06NDaCD8NqVfKy7MuXZsN', # celebahq-classifier-29-pale-skin.pkl
+ 'https://drive.google.com/uc?id=1S3JPhZH2B4gVZZYCWkxoRP11q09PjCkA', # celebahq-classifier-30-pointy-nose.pkl
+ 'https://drive.google.com/uc?id=1S3pQuUz-Jiywq_euhsfezWfGkfzLZ87W', # celebahq-classifier-31-receding-hairline.pkl
+ 'https://drive.google.com/uc?id=1S6nyIl_SEI3M4l748xEdTV2vymB_-lrY', # celebahq-classifier-32-rosy-cheeks.pkl
+ 'https://drive.google.com/uc?id=1S9P5WCi3GYIBPVYiPTWygrYIUSIKGxbU', # celebahq-classifier-33-sideburns.pkl
+ 'https://drive.google.com/uc?id=1SANviG-pp08n7AFpE9wrARzozPIlbfCH', # celebahq-classifier-34-straight-hair.pkl
+ 'https://drive.google.com/uc?id=1SArgyMl6_z7P7coAuArqUC2zbmckecEY', # celebahq-classifier-35-wearing-earrings.pkl
+ 'https://drive.google.com/uc?id=1SC5JjS5J-J4zXFO9Vk2ZU2DT82TZUza_', # celebahq-classifier-36-wearing-hat.pkl
+ 'https://drive.google.com/uc?id=1SDAQWz03HGiu0MSOKyn7gvrp3wdIGoj-', # celebahq-classifier-37-wearing-lipstick.pkl
+ 'https://drive.google.com/uc?id=1SEtrVK-TQUC0XeGkBE9y7L8VXfbchyKX', # celebahq-classifier-38-wearing-necklace.pkl
+ 'https://drive.google.com/uc?id=1SF_mJIdyGINXoV-I6IAxHB_k5dxiF6M-', # celebahq-classifier-39-wearing-necktie.pkl
+]
+
+#----------------------------------------------------------------------------
+
+def prob_normalize(p):
+ p = np.asarray(p).astype(np.float32)
+ assert len(p.shape) == 2
+ return p / np.sum(p)
+
+def mutual_information(p):
+ p = prob_normalize(p)
+ px = np.sum(p, axis=1)
+ py = np.sum(p, axis=0)
+ result = 0.0
+ for x in range(p.shape[0]):
+ p_x = px[x]
+ for y in range(p.shape[1]):
+ p_xy = p[x][y]
+ p_y = py[y]
+ if p_xy > 0.0:
+ result += p_xy * np.log2(p_xy / (p_x * p_y)) # get bits as output
+ return result
+
+def entropy(p):
+ p = prob_normalize(p)
+ result = 0.0
+ for x in range(p.shape[0]):
+ for y in range(p.shape[1]):
+ p_xy = p[x][y]
+ if p_xy > 0.0:
+ result -= p_xy * np.log2(p_xy)
+ return result
+
+def conditional_entropy(p):
+ # H(Y|X) where X corresponds to axis 0, Y to axis 1
+ # i.e., How many bits of additional information are needed to where we are on axis 1 if we know where we are on axis 0?
+ p = prob_normalize(p)
+ y = np.sum(p, axis=0, keepdims=True) # marginalize to calculate H(Y)
+ return max(0.0, entropy(y) - mutual_information(p)) # can slip just below 0 due to FP inaccuracies, clean those up.
+
+#----------------------------------------------------------------------------
+
+class LS(metric_base.MetricBase):
+ def __init__(self, num_samples, num_keep, attrib_indices, minibatch_per_gpu, **kwargs):
+ assert num_keep <= num_samples
+ super().__init__(**kwargs)
+ self.num_samples = num_samples
+ self.num_keep = num_keep
+ self.attrib_indices = attrib_indices
+ self.minibatch_per_gpu = minibatch_per_gpu
+
+ def _evaluate(self, Gs, Gs_kwargs, num_gpus):
+ minibatch_size = num_gpus * self.minibatch_per_gpu
+
+ # Construct TensorFlow graph for each GPU.
+ result_expr = []
+ for gpu_idx in range(num_gpus):
+ with tf.device('/gpu:%d' % gpu_idx):
+ Gs_clone = Gs.clone()
+
+ # Generate images.
+ latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
+ labels = self._get_random_labels_tf(self.minibatch_per_gpu)
+ dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **Gs_kwargs)
+ images = Gs_clone.get_output_for(latents, None, **Gs_kwargs)
+
+ # Downsample to 256x256. The attribute classifiers were built for 256x256.
+ if images.shape[2] > 256:
+ factor = images.shape[2] // 256
+ images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor])
+ images = tf.reduce_mean(images, axis=[3, 5])
+
+ # Run classifier for each attribute.
+ result_dict = dict(latents=latents, dlatents=dlatents[:,-1])
+ for attrib_idx in self.attrib_indices:
+ classifier = misc.load_pkl(classifier_urls[attrib_idx])
+ logits = classifier.get_output_for(images, None)
+ predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1))
+ result_dict[attrib_idx] = predictions
+ result_expr.append(result_dict)
+
+ # Sampling loop.
+ results = []
+ for begin in range(0, self.num_samples, minibatch_size):
+ self._report_progress(begin, self.num_samples)
+ results += tflib.run(result_expr)
+ results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()}
+
+ # Calculate conditional entropy for each attribute.
+ conditional_entropies = defaultdict(list)
+ for attrib_idx in self.attrib_indices:
+ # Prune the least confident samples.
+ pruned_indices = list(range(self.num_samples))
+ pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i]))
+ pruned_indices = pruned_indices[:self.num_keep]
+
+ # Fit SVM to the remaining samples.
+ svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1)
+ for space in ['latents', 'dlatents']:
+ svm_inputs = results[space][pruned_indices]
+ try:
+ svm = sklearn.svm.LinearSVC()
+ svm.fit(svm_inputs, svm_targets)
+ svm.score(svm_inputs, svm_targets)
+ svm_outputs = svm.predict(svm_inputs)
+ except:
+ svm_outputs = svm_targets # assume perfect prediction
+
+ # Calculate conditional entropy.
+ p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)]
+ conditional_entropies[space].append(conditional_entropy(p))
+
+ # Calculate separability scores.
+ scores = {key: 2**np.sum(values) for key, values in conditional_entropies.items()}
+ self._report_result(scores['latents'], suffix='_z')
+ self._report_result(scores['dlatents'], suffix='_w')
+
+#----------------------------------------------------------------------------
diff --git a/metrics/metric_base.py b/metrics/metric_base.py
new file mode 100755
index 0000000..cbd0276
--- /dev/null
+++ b/metrics/metric_base.py
@@ -0,0 +1,168 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Common definitions for GAN metrics."""
+
+import os
+import time
+import hashlib
+import numpy as np
+import tensorflow as tf
+import dnnlib
+import dnnlib.tflib as tflib
+
+from training import misc
+from training import dataset
+
+#----------------------------------------------------------------------------
+# Base class for metrics.
+
+class MetricBase:
+ def __init__(self, name):
+ self.name = name
+ self._dataset_obj = None
+ self._progress_lo = None
+ self._progress_hi = None
+ self._progress_max = None
+ self._progress_sec = None
+ self._progress_time = None
+ self._reset()
+
+ def close(self):
+ self._reset()
+
+ def _reset(self, network_pkl=None, run_dir=None, data_dir=None, dataset_args=None, mirror_augment=None):
+ if self._dataset_obj is not None:
+ self._dataset_obj.close()
+
+ self._network_pkl = network_pkl
+ self._data_dir = data_dir
+ self._dataset_args = dataset_args
+ self._dataset_obj = None
+ self._mirror_augment = mirror_augment
+ self._eval_time = 0
+ self._results = []
+
+ if (dataset_args is None or mirror_augment is None) and run_dir is not None:
+ run_config = misc.parse_config_for_previous_run(run_dir)
+ self._dataset_args = dict(run_config['dataset'])
+ self._dataset_args['shuffle_mb'] = 0
+ self._mirror_augment = run_config['train'].get('mirror_augment', False)
+
+ def configure_progress_reports(self, plo, phi, pmax, psec=15):
+ self._progress_lo = plo
+ self._progress_hi = phi
+ self._progress_max = pmax
+ self._progress_sec = psec
+
+ def run(self, network_pkl, run_dir=None, data_dir=None, dataset_args=None, mirror_augment=None, num_gpus=1, tf_config=None, log_results=True, Gs_kwargs=dict(is_validation=True)):
+ self._reset(network_pkl=network_pkl, run_dir=run_dir, data_dir=data_dir, dataset_args=dataset_args, mirror_augment=mirror_augment)
+ time_begin = time.time()
+ with tf.Graph().as_default(), tflib.create_session(tf_config).as_default(): # pylint: disable=not-context-manager
+ self._report_progress(0, 1)
+ _G, _D, Gs = misc.load_pkl(self._network_pkl)
+ self._evaluate(Gs, Gs_kwargs=Gs_kwargs, num_gpus=num_gpus)
+ self._report_progress(1, 1)
+ self._eval_time = time.time() - time_begin # pylint: disable=attribute-defined-outside-init
+
+ if log_results:
+ if run_dir is not None:
+ log_file = os.path.join(run_dir, 'metric-%s.txt' % self.name)
+ with dnnlib.util.Logger(log_file, 'a'):
+ print(self.get_result_str().strip())
+ else:
+ print(self.get_result_str().strip())
+
+ def get_result_str(self):
+ network_name = os.path.splitext(os.path.basename(self._network_pkl))[0]
+ if len(network_name) > 29:
+ network_name = '...' + network_name[-26:]
+ result_str = '%-30s' % network_name
+ result_str += ' time %-12s' % dnnlib.util.format_time(self._eval_time)
+ for res in self._results:
+ result_str += ' ' + self.name + res.suffix + ' '
+ result_str += res.fmt % res.value
+ return result_str
+
+ def update_autosummaries(self):
+ for res in self._results:
+ tflib.autosummary.autosummary('Metrics/' + self.name + res.suffix, res.value)
+
+ def _evaluate(self, Gs, Gs_kwargs, num_gpus):
+ raise NotImplementedError # to be overridden by subclasses
+
+ def _report_result(self, value, suffix='', fmt='%-10.4f'):
+ self._results += [dnnlib.EasyDict(value=value, suffix=suffix, fmt=fmt)]
+
+ def _report_progress(self, pcur, pmax, status_str=''):
+ if self._progress_lo is None or self._progress_hi is None or self._progress_max is None:
+ return
+ t = time.time()
+ if self._progress_sec is not None and self._progress_time is not None and t < self._progress_time + self._progress_sec:
+ return
+ self._progress_time = t
+ val = self._progress_lo + (pcur / pmax) * (self._progress_hi - self._progress_lo)
+ dnnlib.RunContext.get().update(status_str, int(val), self._progress_max)
+
+ def _get_cache_file_for_reals(self, extension='pkl', **kwargs):
+ all_args = dnnlib.EasyDict(metric_name=self.name, mirror_augment=self._mirror_augment)
+ all_args.update(self._dataset_args)
+ all_args.update(kwargs)
+ md5 = hashlib.md5(repr(sorted(all_args.items())).encode('utf-8'))
+ dataset_name = self._dataset_args.get('tfrecord_dir', None) or self._dataset_args.get('h5_file', None)
+ dataset_name = os.path.splitext(os.path.basename(dataset_name))[0]
+ return os.path.join('.stylegan2-cache', '%s-%s-%s.%s' % (md5.hexdigest(), self.name, dataset_name, extension))
+
+ def _get_dataset_obj(self):
+ if self._dataset_obj is None:
+ self._dataset_obj = dataset.load_dataset(data_dir=self._data_dir, **self._dataset_args)
+ return self._dataset_obj
+
+ def _iterate_reals(self, minibatch_size):
+ dataset_obj = self._get_dataset_obj()
+ while True:
+ images, _labels = dataset_obj.get_minibatch_np(minibatch_size)
+ if self._mirror_augment:
+ images = misc.apply_mirror_augment(images)
+ yield images
+
+ def _iterate_fakes(self, Gs, minibatch_size, num_gpus):
+ while True:
+ latents = np.random.randn(minibatch_size, *Gs.input_shape[1:])
+ fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
+ images = Gs.run(latents, None, output_transform=fmt, is_validation=True, num_gpus=num_gpus, assume_frozen=True)
+ yield images
+
+ def _get_random_labels_tf(self, minibatch_size):
+ return self._get_dataset_obj().get_random_labels_tf(minibatch_size)
+
+#----------------------------------------------------------------------------
+# Group of multiple metrics.
+
+class MetricGroup:
+ def __init__(self, metric_kwarg_list):
+ self.metrics = [dnnlib.util.call_func_by_name(**kwargs) for kwargs in metric_kwarg_list]
+
+ def run(self, *args, **kwargs):
+ for metric in self.metrics:
+ metric.run(*args, **kwargs)
+
+ def get_result_str(self):
+ return ' '.join(metric.get_result_str() for metric in self.metrics)
+
+ def update_autosummaries(self):
+ for metric in self.metrics:
+ metric.update_autosummaries()
+
+#----------------------------------------------------------------------------
+# Dummy metric for debugging purposes.
+
+class DummyMetric(MetricBase):
+ def _evaluate(self, Gs, Gs_kwargs, num_gpus):
+ _ = Gs, Gs_kwargs, num_gpus
+ self._report_result(0.0)
+
+#----------------------------------------------------------------------------
diff --git a/metrics/metric_defaults.py b/metrics/metric_defaults.py
new file mode 100755
index 0000000..4371db8
--- /dev/null
+++ b/metrics/metric_defaults.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Default metric definitions."""
+
+from dnnlib import EasyDict
+
+#----------------------------------------------------------------------------
+
+metric_defaults = EasyDict([(args.name, args) for args in [
+ EasyDict(name='fid50k', func_name='metrics.frechet_inception_distance.FID', num_images=50000, minibatch_per_gpu=8),
+ EasyDict(name='is50k', func_name='metrics.inception_score.IS', num_images=50000, num_splits=10, minibatch_per_gpu=8),
+ EasyDict(name='ppl_zfull', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
+ EasyDict(name='ppl_wfull', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
+ EasyDict(name='ppl_zend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
+ EasyDict(name='ppl_wend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
+ EasyDict(name='ppl2_wend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
+ EasyDict(name='ls', func_name='metrics.linear_separability.LS', num_samples=200000, num_keep=100000, attrib_indices=range(40), minibatch_per_gpu=4),
+ EasyDict(name='pr50k3', func_name='metrics.precision_recall.PR', num_images=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000),
+]])
+
+#----------------------------------------------------------------------------
diff --git a/metrics/perceptual_path_length.py b/metrics/perceptual_path_length.py
new file mode 100755
index 0000000..2e5e4d9
--- /dev/null
+++ b/metrics/perceptual_path_length.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Perceptual Path Length (PPL)."""
+
+import numpy as np
+import tensorflow as tf
+import dnnlib.tflib as tflib
+
+from metrics import metric_base
+from training import misc
+
+#----------------------------------------------------------------------------
+
+# Normalize batch of vectors.
+def normalize(v):
+ return v / tf.sqrt(tf.reduce_sum(tf.square(v), axis=-1, keepdims=True))
+
+# Spherical interpolation of a batch of vectors.
+def slerp(a, b, t):
+ a = normalize(a)
+ b = normalize(b)
+ d = tf.reduce_sum(a * b, axis=-1, keepdims=True)
+ p = t * tf.math.acos(d)
+ c = normalize(b - d * a)
+ d = a * tf.math.cos(p) + c * tf.math.sin(p)
+ return normalize(d)
+
+#----------------------------------------------------------------------------
+
+class PPL(metric_base.MetricBase):
+ def __init__(self, num_samples, epsilon, space, sampling, crop, minibatch_per_gpu, Gs_overrides, **kwargs):
+ assert space in ['z', 'w']
+ assert sampling in ['full', 'end']
+ super().__init__(**kwargs)
+ self.num_samples = num_samples
+ self.epsilon = epsilon
+ self.space = space
+ self.sampling = sampling
+ self.crop = crop
+ self.minibatch_per_gpu = minibatch_per_gpu
+ self.Gs_overrides = Gs_overrides
+
+ def _evaluate(self, Gs, Gs_kwargs, num_gpus):
+ Gs_kwargs = dict(Gs_kwargs)
+ Gs_kwargs.update(self.Gs_overrides)
+ minibatch_size = num_gpus * self.minibatch_per_gpu
+
+ # Construct TensorFlow graph.
+ distance_expr = []
+ for gpu_idx in range(num_gpus):
+ with tf.device('/gpu:%d' % gpu_idx):
+ Gs_clone = Gs.clone()
+ noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')]
+
+ # Generate random latents and interpolation t-values.
+ lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:])
+ lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0)
+ labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1])
+
+ # Interpolate in W or Z.
+ if self.space == 'w':
+ dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, labels, **Gs_kwargs)
+ dlat_t01 = tf.cast(dlat_t01, tf.float32)
+ dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2]
+ dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis])
+ dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)
+ dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape)
+ else: # space == 'z'
+ lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2]
+ lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis])
+ lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon)
+ lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape)
+ dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, labels, **Gs_kwargs)
+
+ # Synthesize images.
+ with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch
+ images = Gs_clone.components.synthesis.get_output_for(dlat_e01, randomize_noise=False, **Gs_kwargs)
+ images = tf.cast(images, tf.float32)
+
+ # Crop only the face region.
+ if self.crop:
+ c = int(images.shape[2] // 8)
+ images = images[:, :, c*3 : c*7, c*2 : c*6]
+
+ # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
+ factor = images.shape[2] // 256
+ if factor > 1:
+ images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor])
+ images = tf.reduce_mean(images, axis=[3,5])
+
+ # Scale dynamic range from [-1,1] to [0,255] for VGG.
+ images = (images + 1) * (255 / 2)
+
+ # Evaluate perceptual distance.
+ img_e0, img_e1 = images[0::2], images[1::2]
+ distance_measure = misc.load_pkl('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2') # vgg16_zhang_perceptual.pkl
+ distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2))
+
+ # Sampling loop.
+ all_distances = []
+ for begin in range(0, self.num_samples, minibatch_size):
+ self._report_progress(begin, self.num_samples)
+ all_distances += tflib.run(distance_expr)
+ all_distances = np.concatenate(all_distances, axis=0)
+
+ # Reject outliers.
+ lo = np.percentile(all_distances, 1, interpolation='lower')
+ hi = np.percentile(all_distances, 99, interpolation='higher')
+ filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances)
+ self._report_result(np.mean(filtered_distances))
+
+#----------------------------------------------------------------------------
diff --git a/metrics/precision_recall.py b/metrics/precision_recall.py
new file mode 100755
index 0000000..addf9bd
--- /dev/null
+++ b/metrics/precision_recall.py
@@ -0,0 +1,224 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Precision/Recall (PR)."""
+
+import os
+import numpy as np
+import tensorflow as tf
+import dnnlib
+import dnnlib.tflib as tflib
+
+from metrics import metric_base
+from training import misc
+
+#----------------------------------------------------------------------------
+
+def batch_pairwise_distances(U, V):
+ """ Compute pairwise distances between two batches of feature vectors."""
+ with tf.variable_scope('pairwise_dist_block'):
+ # Squared norms of each row in U and V.
+ norm_u = tf.reduce_sum(tf.square(U), 1)
+ norm_v = tf.reduce_sum(tf.square(V), 1)
+
+ # norm_u as a row and norm_v as a column vectors.
+ norm_u = tf.reshape(norm_u, [-1, 1])
+ norm_v = tf.reshape(norm_v, [1, -1])
+
+ # Pairwise squared Euclidean distances.
+ D = tf.maximum(norm_u - 2*tf.matmul(U, V, False, True) + norm_v, 0.0)
+
+ return D
+
+#----------------------------------------------------------------------------
+
+class DistanceBlock():
+ """Distance block."""
+ def __init__(self, num_features, num_gpus):
+ self.num_features = num_features
+ self.num_gpus = num_gpus
+
+ # Initialize TF graph to calculate pairwise distances.
+ with tf.device('/cpu:0'):
+ self._features_batch1 = tf.placeholder(tf.float16, shape=[None, self.num_features])
+ self._features_batch2 = tf.placeholder(tf.float16, shape=[None, self.num_features])
+ features_split2 = tf.split(self._features_batch2, self.num_gpus, axis=0)
+ distances_split = []
+ for gpu_idx in range(self.num_gpus):
+ with tf.device('/gpu:%d' % gpu_idx):
+ distances_split.append(batch_pairwise_distances(self._features_batch1, features_split2[gpu_idx]))
+ self._distance_block = tf.concat(distances_split, axis=1)
+
+ def pairwise_distances(self, U, V):
+ """Evaluate pairwise distances between two batches of feature vectors."""
+ return self._distance_block.eval(feed_dict={self._features_batch1: U, self._features_batch2: V})
+
+#----------------------------------------------------------------------------
+
+class ManifoldEstimator():
+ """Finds an estimate for the manifold of given feature vectors."""
+ def __init__(self, distance_block, features, row_batch_size, col_batch_size, nhood_sizes, clamp_to_percentile=None):
+ """Find an estimate of the manifold of given feature vectors."""
+ num_images = features.shape[0]
+ self.nhood_sizes = nhood_sizes
+ self.num_nhoods = len(nhood_sizes)
+ self.row_batch_size = row_batch_size
+ self.col_batch_size = col_batch_size
+ self._ref_features = features
+ self._distance_block = distance_block
+
+ # Estimate manifold of features by calculating distances to kth nearest neighbor of each sample.
+ self.D = np.zeros([num_images, self.num_nhoods], dtype=np.float16)
+ distance_batch = np.zeros([row_batch_size, num_images], dtype=np.float16)
+ seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32)
+
+ for begin1 in range(0, num_images, row_batch_size):
+ end1 = min(begin1 + row_batch_size, num_images)
+ row_batch = features[begin1:end1]
+
+ for begin2 in range(0, num_images, col_batch_size):
+ end2 = min(begin2 + col_batch_size, num_images)
+ col_batch = features[begin2:end2]
+
+ # Compute distances between batches.
+ distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(row_batch, col_batch)
+
+ # Find the kth nearest neighbor from the current batch.
+ self.D[begin1:end1, :] = np.partition(distance_batch[0:end1-begin1, :], seq, axis=1)[:, self.nhood_sizes]
+
+ if clamp_to_percentile is not None:
+ max_distances = np.percentile(self.D, clamp_to_percentile, axis=0)
+ self.D[self.D > max_distances] = 0 #max_distances # 0
+
+ def evaluate(self, eval_features, return_realism=False, return_neighbors=False):
+ """Evaluate if new feature vectors are in the estimated manifold."""
+ num_eval_images = eval_features.shape[0]
+ num_ref_images = self.D.shape[0]
+ distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float16)
+ batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32)
+ #max_realism_score = np.zeros([num_eval_images,], dtype=np.float32)
+ realism_score = np.zeros([num_eval_images,], dtype=np.float32)
+ nearest_indices = np.zeros([num_eval_images,], dtype=np.int32)
+
+ for begin1 in range(0, num_eval_images, self.row_batch_size):
+ end1 = min(begin1 + self.row_batch_size, num_eval_images)
+ feature_batch = eval_features[begin1:end1]
+
+ for begin2 in range(0, num_ref_images, self.col_batch_size):
+ end2 = min(begin2 + self.col_batch_size, num_ref_images)
+ ref_batch = self._ref_features[begin2:end2]
+
+ distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(feature_batch, ref_batch)
+
+ # From the minibatch of new feature vectors, determine if they are in the estimated manifold.
+ # If a feature vector is inside a hypersphere of some reference sample, then the new sample lies on the estimated manifold.
+ # The radii of the hyperspheres are determined from distances of neighborhood size k.
+ samples_in_manifold = distance_batch[0:end1-begin1, :, None] <= self.D
+ batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32)
+
+ #max_realism_score[begin1:end1] = np.max(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1)
+ #nearest_indices[begin1:end1] = np.argmax(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1)
+ nearest_indices[begin1:end1] = np.argmin(distance_batch[0:end1-begin1, :], axis=1)
+ realism_score[begin1:end1] = self.D[nearest_indices[begin1:end1], 0] / np.min(distance_batch[0:end1-begin1, :], axis=1)
+
+ if return_realism and return_neighbors:
+ return batch_predictions, realism_score, nearest_indices
+ elif return_realism:
+ return batch_predictions, realism_score
+ elif return_neighbors:
+ return batch_predictions, nearest_indices
+
+ return batch_predictions
+
+#----------------------------------------------------------------------------
+
+def knn_precision_recall_features(ref_features, eval_features, feature_net, nhood_sizes,
+ row_batch_size, col_batch_size, num_gpus):
+ """Calculates k-NN precision and recall for two sets of feature vectors."""
+ state = dnnlib.EasyDict()
+ #num_images = ref_features.shape[0]
+ num_features = feature_net.output_shape[1]
+ state.ref_features = ref_features
+ state.eval_features = eval_features
+
+ # Initialize DistanceBlock and ManifoldEstimators.
+ distance_block = DistanceBlock(num_features, num_gpus)
+ state.ref_manifold = ManifoldEstimator(distance_block, state.ref_features, row_batch_size, col_batch_size, nhood_sizes)
+ state.eval_manifold = ManifoldEstimator(distance_block, state.eval_features, row_batch_size, col_batch_size, nhood_sizes)
+
+ # Evaluate precision and recall using k-nearest neighbors.
+ #print('Evaluating k-NN precision and recall with %i samples...' % num_images)
+ #start = time.time()
+
+ # Precision: How many points from eval_features are in ref_features manifold.
+ state.precision, state.realism_scores, state.nearest_neighbors = state.ref_manifold.evaluate(state.eval_features, return_realism=True, return_neighbors=True)
+ state.knn_precision = state.precision.mean(axis=0)
+
+ # Recall: How many points from ref_features are in eval_features manifold.
+ state.recall = state.eval_manifold.evaluate(state.ref_features)
+ state.knn_recall = state.recall.mean(axis=0)
+
+ #elapsed_time = time.time() - start
+ #print('Done evaluation in: %gs' % elapsed_time)
+
+ return state
+
+#----------------------------------------------------------------------------
+
+class PR(metric_base.MetricBase):
+ def __init__(self, num_images, nhood_size, minibatch_per_gpu, row_batch_size, col_batch_size, **kwargs):
+ super().__init__(**kwargs)
+ self.num_images = num_images
+ self.nhood_size = nhood_size
+ self.minibatch_per_gpu = minibatch_per_gpu
+ self.row_batch_size = row_batch_size
+ self.col_batch_size = col_batch_size
+
+ def _evaluate(self, Gs, Gs_kwargs, num_gpus):
+ minibatch_size = num_gpus * self.minibatch_per_gpu
+ feature_net = misc.load_pkl('https://drive.google.com/uc?id=1MzY4MFpZzE-mNS26pzhYlWN-4vMm2ytu') # vgg16.pkl
+
+ # Calculate features for reals.
+ cache_file = self._get_cache_file_for_reals(num_images=self.num_images)
+ os.makedirs(os.path.dirname(cache_file), exist_ok=True)
+ if os.path.isfile(cache_file):
+ ref_features = misc.load_pkl(cache_file)
+ else:
+ ref_features = np.empty([self.num_images, feature_net.output_shape[1]], dtype=np.float32)
+ for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)):
+ begin = idx * minibatch_size
+ end = min(begin + minibatch_size, self.num_images)
+ ref_features[begin:end] = feature_net.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True)
+ if end == self.num_images:
+ break
+ misc.save_pkl(ref_features, cache_file)
+
+ # Construct TensorFlow graph.
+ result_expr = []
+ for gpu_idx in range(num_gpus):
+ with tf.device('/gpu:%d' % gpu_idx):
+ Gs_clone = Gs.clone()
+ feature_net_clone = feature_net.clone()
+ latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
+ labels = self._get_random_labels_tf(self.minibatch_per_gpu)
+ images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs)
+ images = tflib.convert_images_to_uint8(images)
+ result_expr.append(feature_net_clone.get_output_for(images))
+
+ # Calculate features for fakes.
+ eval_features = np.empty([self.num_images, feature_net.output_shape[1]], dtype=np.float32)
+ for begin in range(0, self.num_images, minibatch_size):
+ self._report_progress(begin, self.num_images)
+ end = min(begin + minibatch_size, self.num_images)
+ eval_features[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin]
+
+ # Calculate precision and recall.
+ state = knn_precision_recall_features(ref_features=ref_features, eval_features=eval_features, feature_net=feature_net,
+ nhood_sizes=[self.nhood_size], row_batch_size=self.row_batch_size, col_batch_size=self.row_batch_size, num_gpus=num_gpus)
+ self._report_result(state.knn_precision[0], suffix='_precision')
+ self._report_result(state.knn_recall[0], suffix='_recall')
+
+#----------------------------------------------------------------------------
diff --git a/pretrained_networks.py b/pretrained_networks.py
new file mode 100755
index 0000000..fec8d58
--- /dev/null
+++ b/pretrained_networks.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""List of pre-trained StyleGAN2 networks located on Google Drive."""
+
+import pickle
+import dnnlib
+import dnnlib.tflib as tflib
+
+#----------------------------------------------------------------------------
+# StyleGAN2 Google Drive root: https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7
+
+gdrive_urls = {
+ 'gdrive:networks/stylegan2-car-config-a.pkl': 'https://drive.google.com/uc?id=1MhZpQAqgxKTz22u_urk0HSXA-BOLMCLV',
+ 'gdrive:networks/stylegan2-car-config-b.pkl': 'https://drive.google.com/uc?id=1MirO1UBmfF4c-aZDDrfyknOj8iO8Qvb2',
+ 'gdrive:networks/stylegan2-car-config-c.pkl': 'https://drive.google.com/uc?id=1MlFg5VVajuPyPkFt3f1HGiJ6OBWAPdaJ',
+ 'gdrive:networks/stylegan2-car-config-d.pkl': 'https://drive.google.com/uc?id=1MpM83SpDgitOab_icAWU12D5P2ZpCHFl',
+ 'gdrive:networks/stylegan2-car-config-e.pkl': 'https://drive.google.com/uc?id=1MpsFaO0BFo3qhor0MN0rnPFQCr_JpqLm',
+ 'gdrive:networks/stylegan2-car-config-f.pkl': 'https://drive.google.com/uc?id=1MutzVf8XjNo6TUg03a6CUU_2Vlc0ltbV',
+ 'gdrive:networks/stylegan2-cat-config-a.pkl': 'https://drive.google.com/uc?id=1MvGHMNicQjhOdGs94Zs7fw6D9F7ikJeO',
+ 'gdrive:networks/stylegan2-cat-config-f.pkl': 'https://drive.google.com/uc?id=1MyowTZGvMDJCWuT7Yg2e_GnTLIzcSPCy',
+ 'gdrive:networks/stylegan2-church-config-a.pkl': 'https://drive.google.com/uc?id=1N2g_buEUxCkbb7Bfpjbj0TDeKf1Vrzdx',
+ 'gdrive:networks/stylegan2-church-config-f.pkl': 'https://drive.google.com/uc?id=1N3iaujGpwa6vmKCqRSHcD6GZ2HVV8h1f',
+ 'gdrive:networks/stylegan2-ffhq-config-a.pkl': 'https://drive.google.com/uc?id=1MR3Ogs9XQlupSF_al-nGIAh797Cp5nKA',
+ 'gdrive:networks/stylegan2-ffhq-config-b.pkl': 'https://drive.google.com/uc?id=1MW5O1rxT8CsPfJ9i7HF6Xr0qD8EKw5Op',
+ 'gdrive:networks/stylegan2-ffhq-config-c.pkl': 'https://drive.google.com/uc?id=1MWfZdKNqWHv8h2K708im70lx0MDcP6ow',
+ 'gdrive:networks/stylegan2-ffhq-config-d.pkl': 'https://drive.google.com/uc?id=1MbdyjloQxe4pdAUnad-M08EZBxeYAIOr',
+ 'gdrive:networks/stylegan2-ffhq-config-e.pkl': 'https://drive.google.com/uc?id=1Md448HIgwM5eCdz39vk-m5pRbJ3YqQow',
+ 'gdrive:networks/stylegan2-ffhq-config-f.pkl': 'https://drive.google.com/uc?id=1Mgh-jglZjgksupF0XLl0KzuOqd1LXcoE',
+ 'gdrive:networks/stylegan2-horse-config-a.pkl': 'https://drive.google.com/uc?id=1N4lnXL3ezv1aeQVoGY6KBen185MTvWOu',
+ 'gdrive:networks/stylegan2-horse-config-f.pkl': 'https://drive.google.com/uc?id=1N55ZtBhEyEbDn6uKBjCNAew1phD5ZAh-',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dorig.pkl': 'https://drive.google.com/uc?id=1NuS7MSsVcP17dgPX_pLMPtIf5ElcE3jJ',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dresnet.pkl': 'https://drive.google.com/uc?id=1O7BD5yqSk87cjVQcOlLEGUeztOaC-Cyw',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dskip.pkl': 'https://drive.google.com/uc?id=1O2NjtullNlymC3ZOUpULCeMtvkCottnn',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dorig.pkl': 'https://drive.google.com/uc?id=1OMe7OaicfJn8KUT2ZjwKNxioJJZz5QrI',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dresnet.pkl': 'https://drive.google.com/uc?id=1OpogMnDdehK5b2pqBbvypYvm3arrhCtv',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dskip.pkl': 'https://drive.google.com/uc?id=1OZjZD4-6B7W-WUlsLqXUHoM0XnPPtYQb',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dorig.pkl': 'https://drive.google.com/uc?id=1O7CVde1j-zh7lMX-gXGusRRSpY-0NY8L',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dresnet.pkl': 'https://drive.google.com/uc?id=1OCJ-OZZ_N-_Qay6ZKopQFe4M_dAy54eS',
+ 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dskip.pkl': 'https://drive.google.com/uc?id=1OAPFAJYcJTjYHLP5Z29KlkWIOqB8goOk',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dorig.pkl': 'https://drive.google.com/uc?id=1N8wMCQ5j8iQKwLFrQl4T4gJtY_9wzigu',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dresnet.pkl': 'https://drive.google.com/uc?id=1NRhA2W87lx4DQg3KpBT8QuH5a3RzqSXd',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dskip.pkl': 'https://drive.google.com/uc?id=1NBvTUYqzx6NZfXgmdOSyg-2PdrksEj8U',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dorig.pkl': 'https://drive.google.com/uc?id=1NhyfG5h9mbA400nUqejpOVyEouxbKeMx',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dresnet.pkl': 'https://drive.google.com/uc?id=1Ntq-RrbSjZ-gxbRL46BoNrEygbsDkNrB',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dskip.pkl': 'https://drive.google.com/uc?id=1NkJi8o9pDRNCOlv-nYmlM4rvhB27UVc5',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dorig.pkl': 'https://drive.google.com/uc?id=1NdlwIO2nvQCfwyY-a-111B3aZQlZGrk8',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dresnet.pkl': 'https://drive.google.com/uc?id=1Nheaxsq08HsTn2gTDlBydv90M818NeJk',
+ 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dskip.pkl': 'https://drive.google.com/uc?id=1Nfe0O5M-4654w0_5xvnSf-ng07vXIFBR',
+}
+
+#----------------------------------------------------------------------------
+
+def get_path_or_url(path_or_gdrive_path):
+ return gdrive_urls.get(path_or_gdrive_path, path_or_gdrive_path)
+
+#----------------------------------------------------------------------------
+
+_cached_networks = dict()
+
+def load_networks(path_or_gdrive_path):
+ path_or_url = get_path_or_url(path_or_gdrive_path)
+ if path_or_url in _cached_networks:
+ return _cached_networks[path_or_url]
+
+ if dnnlib.util.is_url(path_or_url):
+ stream = dnnlib.util.open_url(path_or_url, cache_dir='.stylegan2-cache')
+ else:
+ stream = open(path_or_url, 'rb')
+
+ tflib.init_tf()
+ with stream:
+ G, D, Gs = pickle.load(stream, encoding='latin1')
+ _cached_networks[path_or_url] = G, D, Gs
+ return G, D, Gs
+
+#----------------------------------------------------------------------------
diff --git a/projector.py b/projector.py
new file mode 100755
index 0000000..7a2f989
--- /dev/null
+++ b/projector.py
@@ -0,0 +1,206 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+import numpy as np
+import tensorflow as tf
+import dnnlib
+import dnnlib.tflib as tflib
+
+from training import misc
+
+#----------------------------------------------------------------------------
+
+class Projector:
+ def __init__(self):
+ self.num_steps = 1000
+ self.dlatent_avg_samples = 10000
+ self.initial_learning_rate = 0.1
+ self.initial_noise_factor = 0.05
+ self.lr_rampdown_length = 0.25
+ self.lr_rampup_length = 0.05
+ self.noise_ramp_length = 0.75
+ self.regularize_noise_weight = 1e5
+ self.verbose = False
+ self.clone_net = True
+
+ self._Gs = None
+ self._minibatch_size = None
+ self._dlatent_avg = None
+ self._dlatent_std = None
+ self._noise_vars = None
+ self._noise_init_op = None
+ self._noise_normalize_op = None
+ self._dlatents_var = None
+ self._noise_in = None
+ self._dlatents_expr = None
+ self._images_expr = None
+ self._target_images_var = None
+ self._lpips = None
+ self._dist = None
+ self._loss = None
+ self._reg_sizes = None
+ self._lrate_in = None
+ self._opt = None
+ self._opt_step = None
+ self._cur_step = None
+
+ def _info(self, *args):
+ if self.verbose:
+ print('Projector:', *args)
+
+ def set_network(self, Gs, minibatch_size=1):
+ assert minibatch_size == 1
+ self._Gs = Gs
+ self._minibatch_size = minibatch_size
+ if self._Gs is None:
+ return
+ if self.clone_net:
+ self._Gs = self._Gs.clone()
+
+ # Find dlatent stats.
+ self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples)
+ latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:])
+ dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512]
+ self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, 512]
+ self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5
+ self._info('std = %g' % self._dlatent_std)
+
+ # Find noise inputs.
+ self._info('Setting up noise inputs...')
+ self._noise_vars = []
+ noise_init_ops = []
+ noise_normalize_ops = []
+ while True:
+ n = 'G_synthesis/noise%d' % len(self._noise_vars)
+ if not n in self._Gs.vars:
+ break
+ v = self._Gs.vars[n]
+ self._noise_vars.append(v)
+ noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32)))
+ noise_mean = tf.reduce_mean(v)
+ noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5
+ noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std))
+ self._info(n, v)
+ self._noise_init_op = tf.group(*noise_init_ops)
+ self._noise_normalize_op = tf.group(*noise_normalize_ops)
+
+ # Image output graph.
+ self._info('Building image output graph...')
+ self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var')
+ self._noise_in = tf.placeholder(tf.float32, [], name='noise_in')
+ dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in
+ self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1])
+ self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False)
+
+ # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
+ proc_images_expr = (self._images_expr + 1) * (255 / 2)
+ sh = proc_images_expr.shape.as_list()
+ if sh[2] > 256:
+ factor = sh[2] // 256
+ proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5])
+
+ # Loss graph.
+ self._info('Building loss graph...')
+ self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var')
+ if self._lpips is None:
+ self._lpips = misc.load_pkl('https://drive.google.com/uc?id=1N2-m9qszOeVC9Tq77WxsLnuWwOedQiD2') # vgg16_zhang_perceptual.pkl
+ self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var)
+ self._loss = tf.reduce_sum(self._dist)
+
+ # Noise regularization graph.
+ self._info('Building noise regularization graph...')
+ reg_loss = 0.0
+ for v in self._noise_vars:
+ sz = v.shape[2]
+ while True:
+ reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2
+ if sz <= 8:
+ break # Small enough already
+ v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale
+ v = tf.reduce_mean(v, axis=[3, 5])
+ sz = sz // 2
+ self._loss += reg_loss * self.regularize_noise_weight
+
+ # Optimizer.
+ self._info('Setting up optimizer...')
+ self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in')
+ self._opt = dnnlib.tflib.Optimizer(learning_rate=self._lrate_in)
+ self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars)
+ self._opt_step = self._opt.apply_updates()
+
+ def run(self, target_images):
+ # Run to completion.
+ self.start(target_images)
+ while self._cur_step < self.num_steps:
+ self.step()
+
+ # Collect results.
+ pres = dnnlib.EasyDict()
+ pres.dlatents = self.get_dlatents()
+ pres.noises = self.get_noises()
+ pres.images = self.get_images()
+ return pres
+
+ def start(self, target_images):
+ assert self._Gs is not None
+
+ # Prepare target images.
+ self._info('Preparing target images...')
+ target_images = np.asarray(target_images, dtype='float32')
+ target_images = (target_images + 1) * (255 / 2)
+ sh = target_images.shape
+ assert sh[0] == self._minibatch_size
+ if sh[2] > self._target_images_var.shape[2]:
+ factor = sh[2] // self._target_images_var.shape[2]
+ target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5))
+
+ # Initialize optimization state.
+ self._info('Initializing optimization state...')
+ tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1])})
+ tflib.run(self._noise_init_op)
+ self._opt.reset_optimizer_state()
+ self._cur_step = 0
+
+ def step(self):
+ assert self._cur_step is not None
+ if self._cur_step >= self.num_steps:
+ return
+ if self._cur_step == 0:
+ self._info('Running...')
+
+ # Hyperparameters.
+ t = self._cur_step / self.num_steps
+ noise_strength = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2
+ lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length)
+ lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi)
+ lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length)
+ learning_rate = self.initial_learning_rate * lr_ramp
+
+ # Train.
+ feed_dict = {self._noise_in: noise_strength, self._lrate_in: learning_rate}
+ _, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict)
+ tflib.run(self._noise_normalize_op)
+
+ # Print status.
+ self._cur_step += 1
+ if self._cur_step == self.num_steps or self._cur_step % 10 == 0:
+ self._info('%-8d%-12g%-12g' % (self._cur_step, dist_value, loss_value))
+ if self._cur_step == self.num_steps:
+ self._info('Done.')
+
+ def get_cur_step(self):
+ return self._cur_step
+
+ def get_dlatents(self):
+ return tflib.run(self._dlatents_expr, {self._noise_in: 0})
+
+ def get_noises(self):
+ return tflib.run(self._noise_vars)
+
+ def get_images(self):
+ return tflib.run(self._images_expr, {self._noise_in: 0})
+
+#----------------------------------------------------------------------------
diff --git a/run_generator.py b/run_generator.py
new file mode 100755
index 0000000..2f15414
--- /dev/null
+++ b/run_generator.py
@@ -0,0 +1,170 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+import argparse
+import numpy as np
+import PIL.Image
+import dnnlib
+import dnnlib.tflib as tflib
+import re
+import sys
+
+import pretrained_networks
+
+#----------------------------------------------------------------------------
+
+def generate_images(network_pkl, seeds, truncation_psi):
+ print('Loading networks from "%s"...' % network_pkl)
+ _G, _D, Gs = pretrained_networks.load_networks(network_pkl)
+ noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')]
+
+ Gs_kwargs = dnnlib.EasyDict()
+ Gs_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
+ Gs_kwargs.randomize_noise = False
+ if truncation_psi is not None:
+ Gs_kwargs.truncation_psi = truncation_psi
+
+ for seed_idx, seed in enumerate(seeds):
+ print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))
+ rnd = np.random.RandomState(seed)
+ z = rnd.randn(1, *Gs.input_shape[1:]) # [minibatch, component]
+ tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width]
+ images = Gs.run(z, None, **Gs_kwargs) # [minibatch, height, width, channel]
+ PIL.Image.fromarray(images[0], 'RGB').save(dnnlib.make_run_dir_path('seed%04d.png' % seed))
+
+#----------------------------------------------------------------------------
+
+def style_mixing_example(network_pkl, row_seeds, col_seeds, truncation_psi, col_styles, minibatch_size=4):
+ print('Loading networks from "%s"...' % network_pkl)
+ _G, _D, Gs = pretrained_networks.load_networks(network_pkl)
+ w_avg = Gs.get_var('dlatent_avg') # [component]
+
+ Gs_syn_kwargs = dnnlib.EasyDict()
+ Gs_syn_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
+ Gs_syn_kwargs.randomize_noise = False
+ Gs_syn_kwargs.minibatch_size = minibatch_size
+
+ print('Generating W vectors...')
+ all_seeds = list(set(row_seeds + col_seeds))
+ all_z = np.stack([np.random.RandomState(seed).randn(*Gs.input_shape[1:]) for seed in all_seeds]) # [minibatch, component]
+ all_w = Gs.components.mapping.run(all_z, None) # [minibatch, layer, component]
+ all_w = w_avg + (all_w - w_avg) * truncation_psi # [minibatch, layer, component]
+ w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} # [layer, component]
+
+ print('Generating images...')
+ all_images = Gs.components.synthesis.run(all_w, **Gs_syn_kwargs) # [minibatch, height, width, channel]
+ image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))}
+
+ print('Generating style-mixed images...')
+ for row_seed in row_seeds:
+ for col_seed in col_seeds:
+ w = w_dict[row_seed].copy()
+ w[col_styles] = w_dict[col_seed][col_styles]
+ image = Gs.components.synthesis.run(w[np.newaxis], **Gs_syn_kwargs)[0]
+ image_dict[(row_seed, col_seed)] = image
+
+ print('Saving images...')
+ for (row_seed, col_seed), image in image_dict.items():
+ PIL.Image.fromarray(image, 'RGB').save(dnnlib.make_run_dir_path('%d-%d.png' % (row_seed, col_seed)))
+
+ print('Saving image grid...')
+ _N, _C, H, W = Gs.output_shape
+ canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black')
+ for row_idx, row_seed in enumerate([None] + row_seeds):
+ for col_idx, col_seed in enumerate([None] + col_seeds):
+ if row_seed is None and col_seed is None:
+ continue
+ key = (row_seed, col_seed)
+ if row_seed is None:
+ key = (col_seed, col_seed)
+ if col_seed is None:
+ key = (row_seed, row_seed)
+ canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx))
+ canvas.save(dnnlib.make_run_dir_path('grid.png'))
+
+#----------------------------------------------------------------------------
+
+def _parse_num_range(s):
+ '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.'''
+
+ range_re = re.compile(r'^(\d+)-(\d+)$')
+ m = range_re.match(s)
+ if m:
+ return range(int(m.group(1)), int(m.group(2))+1)
+ vals = s.split(',')
+ return [int(x) for x in vals]
+
+#----------------------------------------------------------------------------
+
+_examples = '''examples:
+
+ # Generate ffhq uncurated images (matches paper Figure 12)
+ python %(prog)s generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --seeds=6600-6625 --truncation-psi=0.5
+
+ # Generate ffhq curated images (matches paper Figure 11)
+ python %(prog)s generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --seeds=66,230,389,1518 --truncation-psi=1.0
+
+ # Generate uncurated car images (matches paper Figure 12)
+ python %(prog)s generate-images --network=gdrive:networks/stylegan2-car-config-f.pkl --seeds=6000-6025 --truncation-psi=0.5
+
+ # Generate style mixing example (matches style mixing video clip)
+ python %(prog)s style-mixing-example --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --row-seeds=85,100,75,458,1500 --col-seeds=55,821,1789,293 --truncation-psi=1.0
+'''
+
+#----------------------------------------------------------------------------
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='''StyleGAN2 generator.
+
+Run 'python %(prog)s --help' for subcommand help.''',
+ epilog=_examples,
+ formatter_class=argparse.RawDescriptionHelpFormatter
+ )
+
+ subparsers = parser.add_subparsers(help='Sub-commands', dest='command')
+
+ parser_generate_images = subparsers.add_parser('generate-images', help='Generate images')
+ parser_generate_images.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
+ parser_generate_images.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', required=True)
+ parser_generate_images.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5)
+ parser_generate_images.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
+
+ parser_style_mixing_example = subparsers.add_parser('style-mixing-example', help='Generate style mixing video')
+ parser_style_mixing_example.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
+ parser_style_mixing_example.add_argument('--row-seeds', type=_parse_num_range, help='Random seeds to use for image rows', required=True)
+ parser_style_mixing_example.add_argument('--col-seeds', type=_parse_num_range, help='Random seeds to use for image columns', required=True)
+ parser_style_mixing_example.add_argument('--col-styles', type=_parse_num_range, help='Style layer range (default: %(default)s)', default='0-6')
+ parser_style_mixing_example.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5)
+ parser_style_mixing_example.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
+
+ args = parser.parse_args()
+ kwargs = vars(args)
+ subcmd = kwargs.pop('command')
+
+ if subcmd is None:
+ print ('Error: missing subcommand. Re-run with --help for usage.')
+ sys.exit(1)
+
+ sc = dnnlib.SubmitConfig()
+ sc.num_gpus = 1
+ sc.submit_target = dnnlib.SubmitTarget.LOCAL
+ sc.local.do_not_copy_source_files = True
+ sc.run_dir_root = kwargs.pop('result_dir')
+ sc.run_desc = subcmd
+
+ func_name_map = {
+ 'generate-images': 'run_generator.generate_images',
+ 'style-mixing-example': 'run_generator.style_mixing_example'
+ }
+ dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs)
+
+#----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+ main()
+
+#----------------------------------------------------------------------------
diff --git a/run_metrics.py b/run_metrics.py
new file mode 100755
index 0000000..5043b10
--- /dev/null
+++ b/run_metrics.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+import argparse
+import os
+import sys
+
+import dnnlib
+import dnnlib.tflib as tflib
+
+import pretrained_networks
+from metrics import metric_base
+from metrics.metric_defaults import metric_defaults
+
+#----------------------------------------------------------------------------
+
+def run(network_pkl, metrics, dataset, data_dir, mirror_augment):
+ print('Evaluating metrics "%s" for "%s"...' % (','.join(metrics), network_pkl))
+ tflib.init_tf()
+ network_pkl = pretrained_networks.get_path_or_url(network_pkl)
+ dataset_args = dnnlib.EasyDict(tfrecord_dir=dataset, shuffle_mb=0)
+ num_gpus = dnnlib.submit_config.num_gpus
+ metric_group = metric_base.MetricGroup([metric_defaults[metric] for metric in metrics])
+ metric_group.run(network_pkl, data_dir=data_dir, dataset_args=dataset_args, mirror_augment=mirror_augment, num_gpus=num_gpus)
+
+#----------------------------------------------------------------------------
+
+def _str_to_bool(v):
+ if isinstance(v, bool):
+ return v
+ if v.lower() in ('yes', 'true', 't', 'y', '1'):
+ return True
+ elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+ return False
+ else:
+ raise argparse.ArgumentTypeError('Boolean value expected.')
+
+#----------------------------------------------------------------------------
+
+_examples = '''examples:
+
+ python %(prog)s --data-dir=~/datasets --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --metrics=fid50k,ppl_wend --dataset=ffhq --mirror-augment=true
+
+valid metrics:
+
+ ''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + '''
+'''
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Run StyleGAN2 metrics.',
+ epilog=_examples,
+ formatter_class=argparse.RawDescriptionHelpFormatter
+ )
+ parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
+ parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
+ parser.add_argument('--metrics', help='Metrics to compute (default: %(default)s)', default='fid50k', type=lambda x: x.split(','))
+ parser.add_argument('--dataset', help='Training dataset', required=True)
+ parser.add_argument('--data-dir', help='Dataset root directory', required=True)
+ parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, type=_str_to_bool, metavar='BOOL')
+ parser.add_argument('--num-gpus', help='Number of GPUs to use', type=int, default=1, metavar='N')
+
+ args = parser.parse_args()
+
+ if not os.path.exists(args.data_dir):
+ print ('Error: dataset root directory does not exist.')
+ sys.exit(1)
+
+ kwargs = vars(args)
+ sc = dnnlib.SubmitConfig()
+ sc.num_gpus = kwargs.pop('num_gpus')
+ sc.submit_target = dnnlib.SubmitTarget.LOCAL
+ sc.local.do_not_copy_source_files = True
+ sc.run_dir_root = kwargs.pop('result_dir')
+ sc.run_desc = 'run-metrics'
+ dnnlib.submit_run(sc, 'run_metrics.run', **kwargs)
+
+#----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+ main()
+
+#----------------------------------------------------------------------------
diff --git a/run_projector.py b/run_projector.py
new file mode 100755
index 0000000..bf18bd7
--- /dev/null
+++ b/run_projector.py
@@ -0,0 +1,148 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+import argparse
+import numpy as np
+import dnnlib
+import dnnlib.tflib as tflib
+import re
+import sys
+
+import projector
+import pretrained_networks
+from training import dataset
+from training import misc
+
+#----------------------------------------------------------------------------
+
+def project_image(proj, targets, png_prefix, num_snapshots):
+ snapshot_steps = set(proj.num_steps - np.linspace(0, proj.num_steps, num_snapshots, endpoint=False, dtype=int))
+ misc.save_image_grid(targets, png_prefix + 'target.png', drange=[-1,1])
+ proj.start(targets)
+ while proj.get_cur_step() < proj.num_steps:
+ print('\r%d / %d ... ' % (proj.get_cur_step(), proj.num_steps), end='', flush=True)
+ proj.step()
+ if proj.get_cur_step() in snapshot_steps:
+ misc.save_image_grid(proj.get_images(), png_prefix + 'step%04d.png' % proj.get_cur_step(), drange=[-1,1])
+ print('\r%-30s\r' % '', end='', flush=True)
+
+#----------------------------------------------------------------------------
+
+def project_generated_images(network_pkl, seeds, num_snapshots, truncation_psi):
+ print('Loading networks from "%s"...' % network_pkl)
+ _G, _D, Gs = pretrained_networks.load_networks(network_pkl)
+ proj = projector.Projector()
+ proj.set_network(Gs)
+ noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')]
+
+ Gs_kwargs = dnnlib.EasyDict()
+ Gs_kwargs.randomize_noise = False
+ Gs_kwargs.truncation_psi = truncation_psi
+
+ for seed_idx, seed in enumerate(seeds):
+ print('Projecting seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))
+ rnd = np.random.RandomState(seed)
+ z = rnd.randn(1, *Gs.input_shape[1:])
+ tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars})
+ images = Gs.run(z, None, **Gs_kwargs)
+ project_image(proj, targets=images, png_prefix=dnnlib.make_run_dir_path('seed%04d-' % seed), num_snapshots=num_snapshots)
+
+#----------------------------------------------------------------------------
+
+def project_real_images(network_pkl, dataset_name, data_dir, num_images, num_snapshots):
+ print('Loading networks from "%s"...' % network_pkl)
+ _G, _D, Gs = pretrained_networks.load_networks(network_pkl)
+ proj = projector.Projector()
+ proj.set_network(Gs)
+
+ print('Loading images from "%s"...' % dataset_name)
+ dataset_obj = dataset.load_dataset(data_dir=data_dir, tfrecord_dir=dataset_name, max_label_size=0, repeat=False, shuffle_mb=0)
+ assert dataset_obj.shape == Gs.output_shape[1:]
+
+ for image_idx in range(num_images):
+ print('Projecting image %d/%d ...' % (image_idx, num_images))
+ images, _labels = dataset_obj.get_minibatch_np(1)
+ images = misc.adjust_dynamic_range(images, [0, 255], [-1, 1])
+ project_image(proj, targets=images, png_prefix=dnnlib.make_run_dir_path('image%04d-' % image_idx), num_snapshots=num_snapshots)
+
+#----------------------------------------------------------------------------
+
+def _parse_num_range(s):
+ '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.'''
+
+ range_re = re.compile(r'^(\d+)-(\d+)$')
+ m = range_re.match(s)
+ if m:
+ return range(int(m.group(1)), int(m.group(2))+1)
+ vals = s.split(',')
+ return [int(x) for x in vals]
+
+#----------------------------------------------------------------------------
+
+_examples = '''examples:
+
+ # Project generated images
+ python %(prog)s project-generated-images --network=gdrive:networks/stylegan2-car-config-f.pkl --seeds=0,1,5
+
+ # Project real images
+ python %(prog)s project-real-images --network=gdrive:networks/stylegan2-car-config-f.pkl --dataset=car --data-dir=~/datasets
+
+'''
+
+#----------------------------------------------------------------------------
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='''StyleGAN2 projector.
+
+Run 'python %(prog)s --help' for subcommand help.''',
+ epilog=_examples,
+ formatter_class=argparse.RawDescriptionHelpFormatter
+ )
+
+ subparsers = parser.add_subparsers(help='Sub-commands', dest='command')
+
+ project_generated_images_parser = subparsers.add_parser('project-generated-images', help='Project generated images')
+ project_generated_images_parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
+ project_generated_images_parser.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', default=range(3))
+ project_generated_images_parser.add_argument('--num-snapshots', type=int, help='Number of snapshots (default: %(default)s)', default=5)
+ project_generated_images_parser.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=1.0)
+ project_generated_images_parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
+
+ project_real_images_parser = subparsers.add_parser('project-real-images', help='Project real images')
+ project_real_images_parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
+ project_real_images_parser.add_argument('--data-dir', help='Dataset root directory', required=True)
+ project_real_images_parser.add_argument('--dataset', help='Training dataset', dest='dataset_name', required=True)
+ project_real_images_parser.add_argument('--num-snapshots', type=int, help='Number of snapshots (default: %(default)s)', default=5)
+ project_real_images_parser.add_argument('--num-images', type=int, help='Number of images to project (default: %(default)s)', default=3)
+ project_real_images_parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
+
+ args = parser.parse_args()
+ subcmd = args.command
+ if subcmd is None:
+ print ('Error: missing subcommand. Re-run with --help for usage.')
+ sys.exit(1)
+
+ kwargs = vars(args)
+ sc = dnnlib.SubmitConfig()
+ sc.num_gpus = 1
+ sc.submit_target = dnnlib.SubmitTarget.LOCAL
+ sc.local.do_not_copy_source_files = True
+ sc.run_dir_root = kwargs.pop('result_dir')
+ sc.run_desc = kwargs.pop('command')
+
+ func_name_map = {
+ 'project-generated-images': 'run_projector.project_generated_images',
+ 'project-real-images': 'run_projector.project_real_images'
+ }
+ dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs)
+
+#----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+ main()
+
+#----------------------------------------------------------------------------
diff --git a/run_training.py b/run_training.py
new file mode 100755
index 0000000..bc4c0a2
--- /dev/null
+++ b/run_training.py
@@ -0,0 +1,195 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+import argparse
+import copy
+import os
+import sys
+
+import dnnlib
+from dnnlib import EasyDict
+
+from metrics.metric_defaults import metric_defaults
+
+#----------------------------------------------------------------------------
+
+_valid_configs = [
+ # Table 1
+ 'config-a', # Baseline StyleGAN
+ 'config-b', # + Weight demodulation
+ 'config-c', # + Lazy regularization
+ 'config-d', # + Path length regularization
+ 'config-e', # + No growing, new G & D arch.
+ 'config-f', # + Large networks (default)
+
+ # Table 2
+ 'config-e-Gorig-Dorig', 'config-e-Gorig-Dresnet', 'config-e-Gorig-Dskip',
+ 'config-e-Gresnet-Dorig', 'config-e-Gresnet-Dresnet', 'config-e-Gresnet-Dskip',
+ 'config-e-Gskip-Dorig', 'config-e-Gskip-Dresnet', 'config-e-Gskip-Dskip',
+]
+
+#----------------------------------------------------------------------------
+
+def run(dataset, data_dir, result_dir, config_id, num_gpus, total_kimg, gamma, mirror_augment, metrics):
+ train = EasyDict(run_func_name='training.training_loop.training_loop') # Options for training loop.
+ G = EasyDict(func_name='training.networks_stylegan2.G_main') # Options for generator network.
+ D = EasyDict(func_name='training.networks_stylegan2.D_stylegan2') # Options for discriminator network.
+ G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer.
+ D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer.
+ G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg') # Options for generator loss.
+ D_loss = EasyDict(func_name='training.loss.D_logistic_r1') # Options for discriminator loss.
+ sched = EasyDict() # Options for TrainingSchedule.
+ grid = EasyDict(size='8k', layout='random') # Options for setup_snapshot_image_grid().
+ sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run().
+ tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf().
+
+ train.data_dir = data_dir
+ train.total_kimg = total_kimg
+ train.mirror_augment = mirror_augment
+ train.image_snapshot_ticks = train.network_snapshot_ticks = 10
+ sched.G_lrate_base = sched.D_lrate_base = 0.002
+ sched.minibatch_size_base = 32
+ sched.minibatch_gpu_base = 4
+ D_loss.gamma = 10
+ metrics = [metric_defaults[x] for x in metrics]
+ desc = 'stylegan2'
+
+ desc += '-' + dataset
+ dataset_args = EasyDict(tfrecord_dir=dataset)
+
+ assert num_gpus in [1, 2, 4, 8]
+ sc.num_gpus = num_gpus
+ desc += '-%dgpu' % num_gpus
+
+ assert config_id in _valid_configs
+ desc += '-' + config_id
+
+ # Configs A-E: Shrink networks to match original StyleGAN.
+ if config_id != 'config-f':
+ G.fmap_base = D.fmap_base = 8 << 10
+
+ # Config E: Set gamma to 100 and override G & D architecture.
+ if config_id.startswith('config-e'):
+ D_loss.gamma = 100
+ if 'Gorig' in config_id: G.architecture = 'orig'
+ if 'Gskip' in config_id: G.architecture = 'skip' # (default)
+ if 'Gresnet' in config_id: G.architecture = 'resnet'
+ if 'Dorig' in config_id: D.architecture = 'orig'
+ if 'Dskip' in config_id: D.architecture = 'skip'
+ if 'Dresnet' in config_id: D.architecture = 'resnet' # (default)
+
+ # Configs A-D: Enable progressive growing and switch to networks that support it.
+ if config_id in ['config-a', 'config-b', 'config-c', 'config-d']:
+ sched.lod_initial_resolution = 8
+ sched.G_lrate_base = sched.D_lrate_base = 0.001
+ sched.G_lrate_dict = sched.D_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003}
+ sched.minibatch_size_base = 32 # (default)
+ sched.minibatch_size_dict = {8: 256, 16: 128, 32: 64, 64: 32}
+ sched.minibatch_gpu_base = 4 # (default)
+ sched.minibatch_gpu_dict = {8: 32, 16: 16, 32: 8, 64: 4}
+ G.synthesis_func = 'G_synthesis_stylegan_revised'
+ D.func_name = 'training.networks_stylegan2.D_stylegan'
+
+ # Configs A-C: Disable path length regularization.
+ if config_id in ['config-a', 'config-b', 'config-c']:
+ G_loss = EasyDict(func_name='training.loss.G_logistic_ns')
+
+ # Configs A-B: Disable lazy regularization.
+ if config_id in ['config-a', 'config-b']:
+ train.lazy_regularization = False
+
+ # Config A: Switch to original StyleGAN networks.
+ if config_id == 'config-a':
+ G = EasyDict(func_name='training.networks_stylegan.G_style')
+ D = EasyDict(func_name='training.networks_stylegan.D_basic')
+
+ if gamma is not None:
+ D_loss.gamma = gamma
+
+ sc.submit_target = dnnlib.SubmitTarget.LOCAL
+ sc.local.do_not_copy_source_files = True
+ kwargs = EasyDict(train)
+ kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt, G_loss_args=G_loss, D_loss_args=D_loss)
+ kwargs.update(dataset_args=dataset_args, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config)
+ kwargs.submit_config = copy.deepcopy(sc)
+ kwargs.submit_config.run_dir_root = result_dir
+ kwargs.submit_config.run_desc = desc
+ dnnlib.submit_run(**kwargs)
+
+#----------------------------------------------------------------------------
+
+def _str_to_bool(v):
+ if isinstance(v, bool):
+ return v
+ if v.lower() in ('yes', 'true', 't', 'y', '1'):
+ return True
+ elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+ return False
+ else:
+ raise argparse.ArgumentTypeError('Boolean value expected.')
+
+def _parse_comma_sep(s):
+ if s is None or s.lower() == 'none' or s == '':
+ return []
+ return s.split(',')
+
+#----------------------------------------------------------------------------
+
+_examples = '''examples:
+
+ # Train StyleGAN2 using the FFHQ dataset
+ python %(prog)s --num-gpus=8 --data-dir=~/datasets --config=config-f --dataset=ffhq --mirror-augment=true
+
+valid configs:
+
+ ''' + ', '.join(_valid_configs) + '''
+
+valid metrics:
+
+ ''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + '''
+
+'''
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Train StyleGAN2.',
+ epilog=_examples,
+ formatter_class=argparse.RawDescriptionHelpFormatter
+ )
+ parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
+ parser.add_argument('--data-dir', help='Dataset root directory', required=True)
+ parser.add_argument('--dataset', help='Training dataset', required=True)
+ parser.add_argument('--config', help='Training config (default: %(default)s)', default='config-f', required=True, dest='config_id', metavar='CONFIG')
+ parser.add_argument('--num-gpus', help='Number of GPUs (default: %(default)s)', default=1, type=int, metavar='N')
+ parser.add_argument('--total-kimg', help='Training length in thousands of images (default: %(default)s)', metavar='KIMG', default=25000, type=int)
+ parser.add_argument('--gamma', help='R1 regularization weight (default is config dependent)', default=None, type=float)
+ parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, metavar='BOOL', type=_str_to_bool)
+ parser.add_argument('--metrics', help='Comma-separated list of metrics or "none" (default: %(default)s)', default='fid50k', type=_parse_comma_sep)
+
+ args = parser.parse_args()
+
+ if not os.path.exists(args.data_dir):
+ print ('Error: dataset root directory does not exist.')
+ sys.exit(1)
+
+ if args.config_id not in _valid_configs:
+ print ('Error: --config value must be one of: ', ', '.join(_valid_configs))
+ sys.exit(1)
+
+ for metric in args.metrics:
+ if metric not in metric_defaults:
+ print ('Error: unknown metric \'%s\'' % metric)
+ sys.exit(1)
+
+ run(**vars(args))
+
+#----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+ main()
+
+#----------------------------------------------------------------------------
+
diff --git a/test_nvcc.cu b/test_nvcc.cu
new file mode 100755
index 0000000..9d3ce2d
--- /dev/null
+++ b/test_nvcc.cu
@@ -0,0 +1,24 @@
+// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+//
+// This work is made available under the Nvidia Source Code License-NC.
+// To view a copy of this license, visit
+// https://nvlabs.github.io/stylegan2/license.html
+
+#include
+
+__global__ void cudaKernel(void)
+{
+ printf("GPU says hello!\n");
+}
+
+int main(void)
+{
+ printf("CPU says hello!\n");
+ cudaError_t err = cudaLaunchKernel(cudaKernel, 1, 1, NULL, 0, NULL);
+ if (err != cudaSuccess)
+ {
+ printf("%s: %s\n", cudaGetErrorName(err), cudaGetErrorString(err));
+ return 1;
+ }
+ return 0;
+}
diff --git a/training/__init__.py b/training/__init__.py
new file mode 100755
index 0000000..9ab9908
--- /dev/null
+++ b/training/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+# empty
diff --git a/training/dataset.py b/training/dataset.py
new file mode 100755
index 0000000..2d10598
--- /dev/null
+++ b/training/dataset.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Multi-resolution input data pipeline."""
+
+import os
+import glob
+import numpy as np
+import tensorflow as tf
+import dnnlib
+import dnnlib.tflib as tflib
+
+#----------------------------------------------------------------------------
+# Dataset class that loads data from tfrecords files.
+
+class TFRecordDataset:
+ def __init__(self,
+ tfrecord_dir, # Directory containing a collection of tfrecords files.
+ resolution = None, # Dataset resolution, None = autodetect.
+ label_file = None, # Relative path of the labels file, None = autodetect.
+ max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components.
+ max_images = None, # Maximum number of images to use, None = use all images.
+ repeat = True, # Repeat dataset indefinitely?
+ shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling.
+ prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching.
+ buffer_mb = 256, # Read buffer size (megabytes).
+ num_threads = 2): # Number of concurrent threads.
+
+ self.tfrecord_dir = tfrecord_dir
+ self.resolution = None
+ self.resolution_log2 = None
+ self.shape = [] # [channels, height, width]
+ self.dtype = 'uint8'
+ self.dynamic_range = [0, 255]
+ self.label_file = label_file
+ self.label_size = None # components
+ self.label_dtype = None
+ self._np_labels = None
+ self._tf_minibatch_in = None
+ self._tf_labels_var = None
+ self._tf_labels_dataset = None
+ self._tf_datasets = dict()
+ self._tf_iterator = None
+ self._tf_init_ops = dict()
+ self._tf_minibatch_np = None
+ self._cur_minibatch = -1
+ self._cur_lod = -1
+
+ # List tfrecords files and inspect their shapes.
+ assert os.path.isdir(self.tfrecord_dir)
+ tfr_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.tfrecords')))
+ assert len(tfr_files) >= 1
+ tfr_shapes = []
+ for tfr_file in tfr_files:
+ tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE)
+ for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt):
+ tfr_shapes.append(self.parse_tfrecord_np(record).shape)
+ break
+
+ # Autodetect label filename.
+ if self.label_file is None:
+ guess = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.labels')))
+ if len(guess):
+ self.label_file = guess[0]
+ elif not os.path.isfile(self.label_file):
+ guess = os.path.join(self.tfrecord_dir, self.label_file)
+ if os.path.isfile(guess):
+ self.label_file = guess
+
+ # Determine shape and resolution.
+ max_shape = max(tfr_shapes, key=np.prod)
+ self.resolution = resolution if resolution is not None else max_shape[1]
+ self.resolution_log2 = int(np.log2(self.resolution))
+ self.shape = [max_shape[0], self.resolution, self.resolution]
+ tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes]
+ assert all(shape[0] == max_shape[0] for shape in tfr_shapes)
+ assert all(shape[1] == shape[2] for shape in tfr_shapes)
+ assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods))
+ assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1))
+
+ # Load labels.
+ assert max_label_size == 'full' or max_label_size >= 0
+ self._np_labels = np.zeros([1<<30, 0], dtype=np.float32)
+ if self.label_file is not None and max_label_size != 0:
+ self._np_labels = np.load(self.label_file)
+ assert self._np_labels.ndim == 2
+ if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size:
+ self._np_labels = self._np_labels[:, :max_label_size]
+ if max_images is not None and self._np_labels.shape[0] > max_images:
+ self._np_labels = self._np_labels[:max_images]
+ self.label_size = self._np_labels.shape[1]
+ self.label_dtype = self._np_labels.dtype.name
+
+ # Build TF expressions.
+ with tf.name_scope('Dataset'), tf.device('/cpu:0'):
+ self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[])
+ self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var')
+ self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var)
+ for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods):
+ if tfr_lod < 0:
+ continue
+ dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20)
+ if max_images is not None:
+ dset = dset.take(max_images)
+ dset = dset.map(self.parse_tfrecord_tf, num_parallel_calls=num_threads)
+ dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset))
+ bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize
+ if shuffle_mb > 0:
+ dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1)
+ if repeat:
+ dset = dset.repeat()
+ if prefetch_mb > 0:
+ dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1)
+ dset = dset.batch(self._tf_minibatch_in)
+ self._tf_datasets[tfr_lod] = dset
+ self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes)
+ self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()}
+
+ def close(self):
+ pass
+
+ # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf().
+ def configure(self, minibatch_size, lod=0):
+ lod = int(np.floor(lod))
+ assert minibatch_size >= 1 and lod in self._tf_datasets
+ if self._cur_minibatch != minibatch_size or self._cur_lod != lod:
+ self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size})
+ self._cur_minibatch = minibatch_size
+ self._cur_lod = lod
+
+ # Get next minibatch as TensorFlow expressions.
+ def get_minibatch_tf(self): # => images, labels
+ return self._tf_iterator.get_next()
+
+ # Get next minibatch as NumPy arrays.
+ def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels
+ self.configure(minibatch_size, lod)
+ with tf.name_scope('Dataset'):
+ if self._tf_minibatch_np is None:
+ self._tf_minibatch_np = self.get_minibatch_tf()
+ return tflib.run(self._tf_minibatch_np)
+
+ # Get random labels as TensorFlow expression.
+ def get_random_labels_tf(self, minibatch_size): # => labels
+ with tf.name_scope('Dataset'):
+ if self.label_size > 0:
+ with tf.device('/cpu:0'):
+ return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32))
+ return tf.zeros([minibatch_size, 0], self.label_dtype)
+
+ # Get random labels as NumPy array.
+ def get_random_labels_np(self, minibatch_size): # => labels
+ if self.label_size > 0:
+ return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])]
+ return np.zeros([minibatch_size, 0], self.label_dtype)
+
+ # Parse individual image from a tfrecords file into TensorFlow expression.
+ @staticmethod
+ def parse_tfrecord_tf(record):
+ features = tf.parse_single_example(record, features={
+ 'shape': tf.FixedLenFeature([3], tf.int64),
+ 'data': tf.FixedLenFeature([], tf.string)})
+ data = tf.decode_raw(features['data'], tf.uint8)
+ return tf.reshape(data, features['shape'])
+
+ # Parse individual image from a tfrecords file into NumPy array.
+ @staticmethod
+ def parse_tfrecord_np(record):
+ ex = tf.train.Example()
+ ex.ParseFromString(record)
+ shape = ex.features.feature['shape'].int64_list.value # pylint: disable=no-member
+ data = ex.features.feature['data'].bytes_list.value[0] # pylint: disable=no-member
+ return np.fromstring(data, np.uint8).reshape(shape)
+
+#----------------------------------------------------------------------------
+# Helper func for constructing a dataset object using the given options.
+
+def load_dataset(class_name=None, data_dir=None, verbose=False, **kwargs):
+ kwargs = dict(kwargs)
+ if 'tfrecord_dir' in kwargs:
+ if class_name is None:
+ class_name = __name__ + '.TFRecordDataset'
+ if data_dir is not None:
+ kwargs['tfrecord_dir'] = os.path.join(data_dir, kwargs['tfrecord_dir'])
+
+ assert class_name is not None
+ if verbose:
+ print('Streaming data using %s...' % class_name)
+ dataset = dnnlib.util.get_obj_by_name(class_name)(**kwargs)
+ if verbose:
+ print('Dataset shape =', np.int32(dataset.shape).tolist())
+ print('Dynamic range =', dataset.dynamic_range)
+ print('Label size =', dataset.label_size)
+ return dataset
+
+#----------------------------------------------------------------------------
diff --git a/training/loss.py b/training/loss.py
new file mode 100755
index 0000000..7ad2fe1
--- /dev/null
+++ b/training/loss.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Loss functions."""
+
+import numpy as np
+import tensorflow as tf
+import dnnlib.tflib as tflib
+from dnnlib.tflib.autosummary import autosummary
+
+#----------------------------------------------------------------------------
+# Logistic loss from the paper
+# "Generative Adversarial Nets", Goodfellow et al. 2014
+
+def G_logistic(G, D, opt, training_set, minibatch_size):
+ _ = opt
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ labels = training_set.get_random_labels_tf(minibatch_size)
+ fake_images_out = G.get_output_for(latents, labels, is_training=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ loss = -tf.nn.softplus(fake_scores_out) # log(1-sigmoid(fake_scores_out)) # pylint: disable=invalid-unary-operand-type
+ return loss, None
+
+def G_logistic_ns(G, D, opt, training_set, minibatch_size):
+ _ = opt
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ labels = training_set.get_random_labels_tf(minibatch_size)
+ fake_images_out = G.get_output_for(latents, labels, is_training=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out))
+ return loss, None
+
+def D_logistic(G, D, opt, training_set, minibatch_size, reals, labels):
+ _ = opt, training_set
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ fake_images_out = G.get_output_for(latents, labels, is_training=True)
+ real_scores_out = D.get_output_for(reals, labels, is_training=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ real_scores_out = autosummary('Loss/scores/real', real_scores_out)
+ fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
+ loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out))
+ loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type
+ return loss, None
+
+#----------------------------------------------------------------------------
+# R1 and R2 regularizers from the paper
+# "Which Training Methods for GANs do actually Converge?", Mescheder et al. 2018
+
+def D_logistic_r1(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0):
+ _ = opt, training_set
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ fake_images_out = G.get_output_for(latents, labels, is_training=True)
+ real_scores_out = D.get_output_for(reals, labels, is_training=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ real_scores_out = autosummary('Loss/scores/real', real_scores_out)
+ fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
+ loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out))
+ loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type
+
+ with tf.name_scope('GradientPenalty'):
+ real_grads = tf.gradients(tf.reduce_sum(real_scores_out), [reals])[0]
+ gradient_penalty = tf.reduce_sum(tf.square(real_grads), axis=[1,2,3])
+ gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty)
+ reg = gradient_penalty * (gamma * 0.5)
+ return loss, reg
+
+def D_logistic_r2(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0):
+ _ = opt, training_set
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ fake_images_out = G.get_output_for(latents, labels, is_training=True)
+ real_scores_out = D.get_output_for(reals, labels, is_training=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ real_scores_out = autosummary('Loss/scores/real', real_scores_out)
+ fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
+ loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out))
+ loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type
+
+ with tf.name_scope('GradientPenalty'):
+ fake_grads = tf.gradients(tf.reduce_sum(fake_scores_out), [fake_images_out])[0]
+ gradient_penalty = tf.reduce_sum(tf.square(fake_grads), axis=[1,2,3])
+ gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty)
+ reg = gradient_penalty * (gamma * 0.5)
+ return loss, reg
+
+#----------------------------------------------------------------------------
+# WGAN loss from the paper
+# "Wasserstein Generative Adversarial Networks", Arjovsky et al. 2017
+
+def G_wgan(G, D, opt, training_set, minibatch_size):
+ _ = opt
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ labels = training_set.get_random_labels_tf(minibatch_size)
+ fake_images_out = G.get_output_for(latents, labels, is_training=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ loss = -fake_scores_out
+ return loss, None
+
+def D_wgan(G, D, opt, training_set, minibatch_size, reals, labels, wgan_epsilon=0.001):
+ _ = opt, training_set
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ fake_images_out = G.get_output_for(latents, labels, is_training=True)
+ real_scores_out = D.get_output_for(reals, labels, is_training=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ real_scores_out = autosummary('Loss/scores/real', real_scores_out)
+ fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
+ loss = fake_scores_out - real_scores_out
+ with tf.name_scope('EpsilonPenalty'):
+ epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out))
+ loss += epsilon_penalty * wgan_epsilon
+ return loss, None
+
+#----------------------------------------------------------------------------
+# WGAN-GP loss from the paper
+# "Improved Training of Wasserstein GANs", Gulrajani et al. 2017
+
+def D_wgan_gp(G, D, opt, training_set, minibatch_size, reals, labels, wgan_lambda=10.0, wgan_epsilon=0.001, wgan_target=1.0):
+ _ = opt, training_set
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ fake_images_out = G.get_output_for(latents, labels, is_training=True)
+ real_scores_out = D.get_output_for(reals, labels, is_training=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ real_scores_out = autosummary('Loss/scores/real', real_scores_out)
+ fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
+ loss = fake_scores_out - real_scores_out
+ with tf.name_scope('EpsilonPenalty'):
+ epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out))
+ loss += epsilon_penalty * wgan_epsilon
+
+ with tf.name_scope('GradientPenalty'):
+ mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype)
+ mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors)
+ mixed_scores_out = D.get_output_for(mixed_images_out, labels, is_training=True)
+ mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out)
+ mixed_grads = tf.gradients(tf.reduce_sum(mixed_scores_out), [mixed_images_out])[0]
+ mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3]))
+ mixed_norms = autosummary('Loss/mixed_norms', mixed_norms)
+ gradient_penalty = tf.square(mixed_norms - wgan_target)
+ reg = gradient_penalty * (wgan_lambda / (wgan_target**2))
+ return loss, reg
+
+#----------------------------------------------------------------------------
+# Non-saturating logistic loss with path length regularizer from the paper
+# "Analyzing and Improving the Image Quality of StyleGAN", Karras et al. 2019
+
+def G_logistic_ns_pathreg(G, D, opt, training_set, minibatch_size, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2.0):
+ _ = opt
+ latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
+ labels = training_set.get_random_labels_tf(minibatch_size)
+ fake_images_out, fake_dlatents_out = G.get_output_for(latents, labels, is_training=True, return_dlatents=True)
+ fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
+ loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out))
+
+ # Path length regularization.
+ with tf.name_scope('PathReg'):
+
+ # Evaluate the regularization term using a smaller minibatch to conserve memory.
+ if pl_minibatch_shrink > 1:
+ pl_minibatch = minibatch_size // pl_minibatch_shrink
+ pl_latents = tf.random_normal([pl_minibatch] + G.input_shapes[0][1:])
+ pl_labels = training_set.get_random_labels_tf(pl_minibatch)
+ fake_images_out, fake_dlatents_out = G.get_output_for(pl_latents, pl_labels, is_training=True, return_dlatents=True)
+
+ # Compute |J*y|.
+ pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt(np.prod(G.output_shape[2:]))
+ pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise), [fake_dlatents_out])[0]
+ pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1))
+ pl_lengths = autosummary('Loss/pl_lengths', pl_lengths)
+
+ # Track exponential moving average of |J*y|.
+ with tf.control_dependencies(None):
+ pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32)
+ pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var)
+ pl_update = tf.assign(pl_mean_var, pl_mean)
+
+ # Calculate (|J*y|-a)^2.
+ with tf.control_dependencies([pl_update]):
+ pl_penalty = tf.square(pl_lengths - pl_mean)
+ pl_penalty = autosummary('Loss/pl_penalty', pl_penalty)
+
+ # Apply weight.
+ #
+ # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean
+ # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes:
+ #
+ # gamma_pl = pl_weight / num_pixels / num_affine_layers
+ # = 2 / (r^2) / (log2(r) * 2 - 2)
+ # = 1 / (r^2 * (log2(r) - 1))
+ # = ln(2) / (r^2 * (ln(r) - ln(2))
+ #
+ reg = pl_penalty * pl_weight
+
+ return loss, reg
+
+#----------------------------------------------------------------------------
diff --git a/training/misc.py b/training/misc.py
new file mode 100755
index 0000000..9b3444e
--- /dev/null
+++ b/training/misc.py
@@ -0,0 +1,145 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Miscellaneous utility functions."""
+
+import os
+import pickle
+import numpy as np
+import PIL.Image
+import PIL.ImageFont
+import dnnlib
+
+#----------------------------------------------------------------------------
+# Convenience wrappers for pickle that are able to load data produced by
+# older versions of the code, and from external URLs.
+
+def open_file_or_url(file_or_url):
+ if dnnlib.util.is_url(file_or_url):
+ return dnnlib.util.open_url(file_or_url, cache_dir='.stylegan2-cache')
+ return open(file_or_url, 'rb')
+
+def load_pkl(file_or_url):
+ with open_file_or_url(file_or_url) as file:
+ return pickle.load(file, encoding='latin1')
+
+def save_pkl(obj, filename):
+ with open(filename, 'wb') as file:
+ pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL)
+
+#----------------------------------------------------------------------------
+# Image utils.
+
+def adjust_dynamic_range(data, drange_in, drange_out):
+ if drange_in != drange_out:
+ scale = (np.float32(drange_out[1]) - np.float32(drange_out[0])) / (np.float32(drange_in[1]) - np.float32(drange_in[0]))
+ bias = (np.float32(drange_out[0]) - np.float32(drange_in[0]) * scale)
+ data = data * scale + bias
+ return data
+
+def create_image_grid(images, grid_size=None):
+ assert images.ndim == 3 or images.ndim == 4
+ num, img_w, img_h = images.shape[0], images.shape[-1], images.shape[-2]
+
+ if grid_size is not None:
+ grid_w, grid_h = tuple(grid_size)
+ else:
+ grid_w = max(int(np.ceil(np.sqrt(num))), 1)
+ grid_h = max((num - 1) // grid_w + 1, 1)
+
+ grid = np.zeros(list(images.shape[1:-2]) + [grid_h * img_h, grid_w * img_w], dtype=images.dtype)
+ for idx in range(num):
+ x = (idx % grid_w) * img_w
+ y = (idx // grid_w) * img_h
+ grid[..., y : y + img_h, x : x + img_w] = images[idx]
+ return grid
+
+def convert_to_pil_image(image, drange=[0,1]):
+ assert image.ndim == 2 or image.ndim == 3
+ if image.ndim == 3:
+ if image.shape[0] == 1:
+ image = image[0] # grayscale CHW => HW
+ else:
+ image = image.transpose(1, 2, 0) # CHW -> HWC
+
+ image = adjust_dynamic_range(image, drange, [0,255])
+ image = np.rint(image).clip(0, 255).astype(np.uint8)
+ fmt = 'RGB' if image.ndim == 3 else 'L'
+ return PIL.Image.fromarray(image, fmt)
+
+def save_image_grid(images, filename, drange=[0,1], grid_size=None):
+ convert_to_pil_image(create_image_grid(images, grid_size), drange).save(filename)
+
+def apply_mirror_augment(minibatch):
+ mask = np.random.rand(minibatch.shape[0]) < 0.5
+ minibatch = np.array(minibatch)
+ minibatch[mask] = minibatch[mask, :, :, ::-1]
+ return minibatch
+
+#----------------------------------------------------------------------------
+# Loading data from previous training runs.
+
+def parse_config_for_previous_run(run_dir):
+ with open(os.path.join(run_dir, 'submit_config.pkl'), 'rb') as f:
+ data = pickle.load(f)
+ data = data.get('run_func_kwargs', {})
+ return dict(train=data, dataset=data.get('dataset_args', {}))
+
+#----------------------------------------------------------------------------
+# Size and contents of the image snapshot grids that are exported
+# periodically during training.
+
+def setup_snapshot_image_grid(training_set,
+ size = '1080p', # '1080p' = to be viewed on 1080p display, '4k' = to be viewed on 4k display.
+ layout = 'random'): # 'random' = grid contents are selected randomly, 'row_per_class' = each row corresponds to one class label.
+
+ # Select size.
+ gw = 1; gh = 1
+ if size == '1080p':
+ gw = np.clip(1920 // training_set.shape[2], 3, 32)
+ gh = np.clip(1080 // training_set.shape[1], 2, 32)
+ if size == '4k':
+ gw = np.clip(3840 // training_set.shape[2], 7, 32)
+ gh = np.clip(2160 // training_set.shape[1], 4, 32)
+ if size == '8k':
+ gw = np.clip(7680 // training_set.shape[2], 7, 32)
+ gh = np.clip(4320 // training_set.shape[1], 4, 32)
+
+ # Initialize data arrays.
+ reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype)
+ labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype)
+
+ # Random layout.
+ if layout == 'random':
+ reals[:], labels[:] = training_set.get_minibatch_np(gw * gh)
+
+ # Class-conditional layouts.
+ class_layouts = dict(row_per_class=[gw,1], col_per_class=[1,gh], class4x4=[4,4])
+ if layout in class_layouts:
+ bw, bh = class_layouts[layout]
+ nw = (gw - 1) // bw + 1
+ nh = (gh - 1) // bh + 1
+ blocks = [[] for _i in range(nw * nh)]
+ for _iter in range(1000000):
+ real, label = training_set.get_minibatch_np(1)
+ idx = np.argmax(label[0])
+ while idx < len(blocks) and len(blocks[idx]) >= bw * bh:
+ idx += training_set.label_size
+ if idx < len(blocks):
+ blocks[idx].append((real, label))
+ if all(len(block) >= bw * bh for block in blocks):
+ break
+ for i, block in enumerate(blocks):
+ for j, (real, label) in enumerate(block):
+ x = (i % nw) * bw + j % bw
+ y = (i // nw) * bh + j // bw
+ if x < gw and y < gh:
+ reals[x + y * gw] = real[0]
+ labels[x + y * gw] = label[0]
+
+ return (gw, gh), reals, labels
+
+#----------------------------------------------------------------------------
diff --git a/training/networks_stylegan.py b/training/networks_stylegan.py
new file mode 100755
index 0000000..76ce31c
--- /dev/null
+++ b/training/networks_stylegan.py
@@ -0,0 +1,660 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Network architectures used in the StyleGAN paper."""
+
+import numpy as np
+import tensorflow as tf
+import dnnlib
+import dnnlib.tflib as tflib
+
+# NOTE: Do not import any application-specific modules here!
+# Specify all network parameters as kwargs.
+
+#----------------------------------------------------------------------------
+# Primitive ops for manipulating 4D activation tensors.
+# The gradients of these are not necessary efficient or even meaningful.
+
+def _blur2d(x, f=[1,2,1], normalize=True, flip=False, stride=1):
+ assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:])
+ assert isinstance(stride, int) and stride >= 1
+
+ # Finalize filter kernel.
+ f = np.array(f, dtype=np.float32)
+ if f.ndim == 1:
+ f = f[:, np.newaxis] * f[np.newaxis, :]
+ assert f.ndim == 2
+ if normalize:
+ f /= np.sum(f)
+ if flip:
+ f = f[::-1, ::-1]
+ f = f[:, :, np.newaxis, np.newaxis]
+ f = np.tile(f, [1, 1, int(x.shape[1]), 1])
+
+ # No-op => early exit.
+ if f.shape == (1, 1) and f[0,0] == 1:
+ return x
+
+ # Convolve using depthwise_conv2d.
+ orig_dtype = x.dtype
+ x = tf.cast(x, tf.float32) # tf.nn.depthwise_conv2d() doesn't support fp16
+ f = tf.constant(f, dtype=x.dtype, name='filter')
+ strides = [1, 1, stride, stride]
+ x = tf.nn.depthwise_conv2d(x, f, strides=strides, padding='SAME', data_format='NCHW')
+ x = tf.cast(x, orig_dtype)
+ return x
+
+def _upscale2d(x, factor=2, gain=1):
+ assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:])
+ assert isinstance(factor, int) and factor >= 1
+
+ # Apply gain.
+ if gain != 1:
+ x *= gain
+
+ # No-op => early exit.
+ if factor == 1:
+ return x
+
+ # Upscale using tf.tile().
+ s = x.shape
+ x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
+ x = tf.tile(x, [1, 1, 1, factor, 1, factor])
+ x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
+ return x
+
+def _downscale2d(x, factor=2, gain=1):
+ assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:])
+ assert isinstance(factor, int) and factor >= 1
+
+ # 2x2, float32 => downscale using _blur2d().
+ if factor == 2 and x.dtype == tf.float32:
+ f = [np.sqrt(gain) / factor] * factor
+ return _blur2d(x, f=f, normalize=False, stride=factor)
+
+ # Apply gain.
+ if gain != 1:
+ x *= gain
+
+ # No-op => early exit.
+ if factor == 1:
+ return x
+
+ # Large factor => downscale using tf.nn.avg_pool().
+ # NOTE: Requires tf_config['graph_options.place_pruned_graph']=True to work.
+ ksize = [1, 1, factor, factor]
+ return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW')
+
+#----------------------------------------------------------------------------
+# High-level ops for manipulating 4D activation tensors.
+# The gradients of these are meant to be as efficient as possible.
+
+def blur2d(x, f=[1,2,1], normalize=True):
+ with tf.variable_scope('Blur2D'):
+ @tf.custom_gradient
+ def func(x):
+ y = _blur2d(x, f, normalize)
+ @tf.custom_gradient
+ def grad(dy):
+ dx = _blur2d(dy, f, normalize, flip=True)
+ return dx, lambda ddx: _blur2d(ddx, f, normalize)
+ return y, grad
+ return func(x)
+
+def upscale2d(x, factor=2):
+ with tf.variable_scope('Upscale2D'):
+ @tf.custom_gradient
+ def func(x):
+ y = _upscale2d(x, factor)
+ @tf.custom_gradient
+ def grad(dy):
+ dx = _downscale2d(dy, factor, gain=factor**2)
+ return dx, lambda ddx: _upscale2d(ddx, factor)
+ return y, grad
+ return func(x)
+
+def downscale2d(x, factor=2):
+ with tf.variable_scope('Downscale2D'):
+ @tf.custom_gradient
+ def func(x):
+ y = _downscale2d(x, factor)
+ @tf.custom_gradient
+ def grad(dy):
+ dx = _upscale2d(dy, factor, gain=1/factor**2)
+ return dx, lambda ddx: _downscale2d(ddx, factor)
+ return y, grad
+ return func(x)
+
+#----------------------------------------------------------------------------
+# Get/create weight tensor for a convolutional or fully-connected layer.
+
+def get_weight(shape, gain=np.sqrt(2), use_wscale=False, lrmul=1):
+ fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out]
+ he_std = gain / np.sqrt(fan_in) # He init
+
+ # Equalized learning rate and custom learning rate multiplier.
+ if use_wscale:
+ init_std = 1.0 / lrmul
+ runtime_coef = he_std * lrmul
+ else:
+ init_std = he_std / lrmul
+ runtime_coef = lrmul
+
+ # Create variable.
+ init = tf.initializers.random_normal(0, init_std)
+ return tf.get_variable('weight', shape=shape, initializer=init) * runtime_coef
+
+#----------------------------------------------------------------------------
+# Fully-connected layer.
+
+def dense(x, fmaps, **kwargs):
+ if len(x.shape) > 2:
+ x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])])
+ w = get_weight([x.shape[1].value, fmaps], **kwargs)
+ w = tf.cast(w, x.dtype)
+ return tf.matmul(x, w)
+
+#----------------------------------------------------------------------------
+# Convolutional layer.
+
+def conv2d(x, fmaps, kernel, **kwargs):
+ assert kernel >= 1 and kernel % 2 == 1
+ w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs)
+ w = tf.cast(w, x.dtype)
+ return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME', data_format='NCHW')
+
+#----------------------------------------------------------------------------
+# Fused convolution + scaling.
+# Faster and uses less memory than performing the operations separately.
+
+def upscale2d_conv2d(x, fmaps, kernel, fused_scale='auto', **kwargs):
+ assert kernel >= 1 and kernel % 2 == 1
+ assert fused_scale in [True, False, 'auto']
+ if fused_scale == 'auto':
+ fused_scale = min(x.shape[2:]) * 2 >= 128
+
+ # Not fused => call the individual ops directly.
+ if not fused_scale:
+ return conv2d(upscale2d(x), fmaps, kernel, **kwargs)
+
+ # Fused => perform both ops simultaneously using tf.nn.conv2d_transpose().
+ w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs)
+ w = tf.transpose(w, [0, 1, 3, 2]) # [kernel, kernel, fmaps_out, fmaps_in]
+ w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT')
+ w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]])
+ w = tf.cast(w, x.dtype)
+ os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2]
+ return tf.nn.conv2d_transpose(x, w, os, strides=[1,1,2,2], padding='SAME', data_format='NCHW')
+
+def conv2d_downscale2d(x, fmaps, kernel, fused_scale='auto', **kwargs):
+ assert kernel >= 1 and kernel % 2 == 1
+ assert fused_scale in [True, False, 'auto']
+ if fused_scale == 'auto':
+ fused_scale = min(x.shape[2:]) >= 128
+
+ # Not fused => call the individual ops directly.
+ if not fused_scale:
+ return downscale2d(conv2d(x, fmaps, kernel, **kwargs))
+
+ # Fused => perform both ops simultaneously using tf.nn.conv2d().
+ w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs)
+ w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT')
+ w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25
+ w = tf.cast(w, x.dtype)
+ return tf.nn.conv2d(x, w, strides=[1,1,2,2], padding='SAME', data_format='NCHW')
+
+#----------------------------------------------------------------------------
+# Apply bias to the given activation tensor.
+
+def apply_bias(x, lrmul=1):
+ b = tf.get_variable('bias', shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul
+ b = tf.cast(b, x.dtype)
+ if len(x.shape) == 2:
+ return x + b
+ return x + tf.reshape(b, [1, -1, 1, 1])
+
+#----------------------------------------------------------------------------
+# Leaky ReLU activation. More efficient than tf.nn.leaky_relu() and supports FP16.
+
+def leaky_relu(x, alpha=0.2):
+ with tf.variable_scope('LeakyReLU'):
+ alpha = tf.constant(alpha, dtype=x.dtype, name='alpha')
+ @tf.custom_gradient
+ def func(x):
+ y = tf.maximum(x, x * alpha)
+ @tf.custom_gradient
+ def grad(dy):
+ dx = tf.where(y >= 0, dy, dy * alpha)
+ return dx, lambda ddx: tf.where(y >= 0, ddx, ddx * alpha)
+ return y, grad
+ return func(x)
+
+#----------------------------------------------------------------------------
+# Pixelwise feature vector normalization.
+
+def pixel_norm(x, epsilon=1e-8):
+ with tf.variable_scope('PixelNorm'):
+ epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon')
+ return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon)
+
+#----------------------------------------------------------------------------
+# Instance normalization.
+
+def instance_norm(x, epsilon=1e-8):
+ assert len(x.shape) == 4 # NCHW
+ with tf.variable_scope('InstanceNorm'):
+ orig_dtype = x.dtype
+ x = tf.cast(x, tf.float32)
+ x -= tf.reduce_mean(x, axis=[2,3], keepdims=True)
+ epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon')
+ x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=[2,3], keepdims=True) + epsilon)
+ x = tf.cast(x, orig_dtype)
+ return x
+
+#----------------------------------------------------------------------------
+# Style modulation.
+
+def style_mod(x, dlatent, **kwargs):
+ with tf.variable_scope('StyleMod'):
+ style = apply_bias(dense(dlatent, fmaps=x.shape[1]*2, gain=1, **kwargs))
+ style = tf.reshape(style, [-1, 2, x.shape[1]] + [1] * (len(x.shape) - 2))
+ return x * (style[:,0] + 1) + style[:,1]
+
+#----------------------------------------------------------------------------
+# Noise input.
+
+def apply_noise(x, noise_var=None, randomize_noise=True):
+ assert len(x.shape) == 4 # NCHW
+ with tf.variable_scope('Noise'):
+ if noise_var is None or randomize_noise:
+ noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype)
+ else:
+ noise = tf.cast(noise_var, x.dtype)
+ weight = tf.get_variable('weight', shape=[x.shape[1].value], initializer=tf.initializers.zeros())
+ return x + noise * tf.reshape(tf.cast(weight, x.dtype), [1, -1, 1, 1])
+
+#----------------------------------------------------------------------------
+# Minibatch standard deviation.
+
+def minibatch_stddev_layer(x, group_size=4, num_new_features=1):
+ with tf.variable_scope('MinibatchStddev'):
+ group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size.
+ s = x.shape # [NCHW] Input shape.
+ y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c.
+ y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32.
+ y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group.
+ y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group.
+ y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group.
+ y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels.
+ y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups
+ y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type.
+ y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels.
+ return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap.
+
+#----------------------------------------------------------------------------
+# Style-based generator used in the StyleGAN paper.
+# Composed of two sub-networks (G_mapping and G_synthesis) that are defined below.
+
+def G_style(
+ latents_in, # First input: Latent vectors (Z) [minibatch, latent_size].
+ labels_in, # Second input: Conditioning labels [minibatch, label_size].
+ truncation_psi = 0.7, # Style strength multiplier for the truncation trick. None = disable.
+ truncation_cutoff = 8, # Number of layers for which to apply the truncation trick. None = disable.
+ truncation_psi_val = None, # Value for truncation_psi to use during validation.
+ truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation.
+ dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable.
+ style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable.
+ is_training = False, # Network is under training? Enables and disables specific features.
+ is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi.
+ is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
+ components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls.
+ **kwargs): # Arguments for sub-networks (G_mapping and G_synthesis).
+
+ # Validate arguments.
+ assert not is_training or not is_validation
+ assert isinstance(components, dnnlib.EasyDict)
+ if is_validation:
+ truncation_psi = truncation_psi_val
+ truncation_cutoff = truncation_cutoff_val
+ if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1):
+ truncation_psi = None
+ if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0):
+ truncation_cutoff = None
+ if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1):
+ dlatent_avg_beta = None
+ if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0):
+ style_mixing_prob = None
+
+ # Setup components.
+ if 'synthesis' not in components:
+ components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs)
+ num_layers = components.synthesis.input_shape[1]
+ dlatent_size = components.synthesis.input_shape[2]
+ if 'mapping' not in components:
+ components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs)
+
+ # Setup variables.
+ lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False)
+ dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False)
+
+ # Evaluate mapping network.
+ dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs)
+
+ # Update moving average of W.
+ if dlatent_avg_beta is not None:
+ with tf.variable_scope('DlatentAvg'):
+ batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)
+ update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
+ with tf.control_dependencies([update_op]):
+ dlatents = tf.identity(dlatents)
+
+ # Perform style mixing regularization.
+ if style_mixing_prob is not None:
+ with tf.name_scope('StyleMix'):
+ latents2 = tf.random_normal(tf.shape(latents_in))
+ dlatents2 = components.mapping.get_output_for(latents2, labels_in, **kwargs)
+ layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
+ cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2
+ mixing_cutoff = tf.cond(
+ tf.random_uniform([], 0.0, 1.0) < style_mixing_prob,
+ lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32),
+ lambda: cur_layers)
+ dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2)
+
+ # Apply truncation trick.
+ if truncation_psi is not None and truncation_cutoff is not None:
+ with tf.variable_scope('Truncation'):
+ layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
+ ones = np.ones(layer_idx.shape, dtype=np.float32)
+ coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones)
+ dlatents = tflib.lerp(dlatent_avg, dlatents, coefs)
+
+ # Evaluate synthesis network.
+ with tf.control_dependencies([tf.assign(components.synthesis.find_var('lod'), lod_in)]):
+ images_out = components.synthesis.get_output_for(dlatents, force_clean_graph=is_template_graph, **kwargs)
+ return tf.identity(images_out, name='images_out')
+
+#----------------------------------------------------------------------------
+# Mapping network used in the StyleGAN paper.
+
+def G_mapping(
+ latents_in, # First input: Latent vectors (Z) [minibatch, latent_size].
+ labels_in, # Second input: Conditioning labels [minibatch, label_size].
+ latent_size = 512, # Latent vector (Z) dimensionality.
+ label_size = 0, # Label dimensionality, 0 if no labels.
+ dlatent_size = 512, # Disentangled latent (W) dimensionality.
+ dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size].
+ mapping_layers = 8, # Number of mapping layers.
+ mapping_fmaps = 512, # Number of activations in the mapping layers.
+ mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers.
+ mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu'.
+ use_wscale = True, # Enable equalized learning rate?
+ normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers?
+ dtype = 'float32', # Data type to use for activations and outputs.
+ **_kwargs): # Ignore unrecognized keyword args.
+
+ act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[mapping_nonlinearity]
+
+ # Inputs.
+ latents_in.set_shape([None, latent_size])
+ labels_in.set_shape([None, label_size])
+ latents_in = tf.cast(latents_in, dtype)
+ labels_in = tf.cast(labels_in, dtype)
+ x = latents_in
+
+ # Embed labels and concatenate them with latents.
+ if label_size:
+ with tf.variable_scope('LabelConcat'):
+ w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal())
+ y = tf.matmul(labels_in, tf.cast(w, dtype))
+ x = tf.concat([x, y], axis=1)
+
+ # Normalize latents.
+ if normalize_latents:
+ x = pixel_norm(x)
+
+ # Mapping layers.
+ for layer_idx in range(mapping_layers):
+ with tf.variable_scope('Dense%d' % layer_idx):
+ fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps
+ x = dense(x, fmaps=fmaps, gain=gain, use_wscale=use_wscale, lrmul=mapping_lrmul)
+ x = apply_bias(x, lrmul=mapping_lrmul)
+ x = act(x)
+
+ # Broadcast.
+ if dlatent_broadcast is not None:
+ with tf.variable_scope('Broadcast'):
+ x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1])
+
+ # Output.
+ assert x.dtype == tf.as_dtype(dtype)
+ return tf.identity(x, name='dlatents_out')
+
+#----------------------------------------------------------------------------
+# Synthesis network used in the StyleGAN paper.
+
+def G_synthesis(
+ dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
+ dlatent_size = 512, # Disentangled latent (W) dimensionality.
+ num_channels = 3, # Number of output color channels.
+ resolution = 1024, # Output resolution.
+ fmap_base = 8192, # Overall multiplier for the number of feature maps.
+ fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
+ fmap_max = 512, # Maximum number of feature maps in any layer.
+ use_styles = True, # Enable style inputs?
+ const_input_layer = True, # First layer is a learned constant?
+ use_noise = True, # Enable noise inputs?
+ randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
+ nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu'
+ use_wscale = True, # Enable equalized learning rate?
+ use_pixel_norm = False, # Enable pixelwise feature vector normalization?
+ use_instance_norm = True, # Enable instance normalization?
+ dtype = 'float32', # Data type to use for activations and outputs.
+ fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically.
+ blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering.
+ structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
+ is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
+ force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior.
+ **_kwargs): # Ignore unrecognized keyword args.
+
+ resolution_log2 = int(np.log2(resolution))
+ assert resolution == 2**resolution_log2 and resolution >= 4
+ def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)
+ def blur(x): return blur2d(x, blur_filter) if blur_filter else x
+ if is_template_graph: force_clean_graph = True
+ if force_clean_graph: randomize_noise = False
+ if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive'
+ act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity]
+ num_layers = resolution_log2 * 2 - 2
+ num_styles = num_layers if use_styles else 1
+ images_out = None
+
+ # Primary inputs.
+ dlatents_in.set_shape([None, num_styles, dlatent_size])
+ dlatents_in = tf.cast(dlatents_in, dtype)
+ lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype)
+
+ # Noise inputs.
+ noise_inputs = []
+ if use_noise:
+ for layer_idx in range(num_layers):
+ res = layer_idx // 2 + 2
+ shape = [1, use_noise, 2**res, 2**res]
+ noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False))
+
+ # Things to do at the end of each layer.
+ def layer_epilogue(x, layer_idx):
+ if use_noise:
+ x = apply_noise(x, noise_inputs[layer_idx], randomize_noise=randomize_noise)
+ x = apply_bias(x)
+ x = act(x)
+ if use_pixel_norm:
+ x = pixel_norm(x)
+ if use_instance_norm:
+ x = instance_norm(x)
+ if use_styles:
+ x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale)
+ return x
+
+ # Early layers.
+ with tf.variable_scope('4x4'):
+ if const_input_layer:
+ with tf.variable_scope('Const'):
+ x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.ones())
+ x = layer_epilogue(tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]), 0)
+ else:
+ with tf.variable_scope('Dense'):
+ x = dense(dlatents_in[:, 0], fmaps=nf(1)*16, gain=gain/4, use_wscale=use_wscale) # tweak gain to match the official implementation of Progressing GAN
+ x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0)
+ with tf.variable_scope('Conv'):
+ x = layer_epilogue(conv2d(x, fmaps=nf(1), kernel=3, gain=gain, use_wscale=use_wscale), 1)
+
+ # Building blocks for remaining layers.
+ def block(res, x): # res = 3..resolution_log2
+ with tf.variable_scope('%dx%d' % (2**res, 2**res)):
+ with tf.variable_scope('Conv0_up'):
+ x = layer_epilogue(blur(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)), res*2-4)
+ with tf.variable_scope('Conv1'):
+ x = layer_epilogue(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale), res*2-3)
+ return x
+ def torgb(res, x): # res = 2..resolution_log2
+ lod = resolution_log2 - res
+ with tf.variable_scope('ToRGB_lod%d' % lod):
+ return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale))
+
+ # Fixed structure: simple and efficient, but does not support progressive growing.
+ if structure == 'fixed':
+ for res in range(3, resolution_log2 + 1):
+ x = block(res, x)
+ images_out = torgb(resolution_log2, x)
+
+ # Linear structure: simple but inefficient.
+ if structure == 'linear':
+ images_out = torgb(2, x)
+ for res in range(3, resolution_log2 + 1):
+ lod = resolution_log2 - res
+ x = block(res, x)
+ img = torgb(res, x)
+ images_out = upscale2d(images_out)
+ with tf.variable_scope('Grow_lod%d' % lod):
+ images_out = tflib.lerp_clip(img, images_out, lod_in - lod)
+
+ # Recursive structure: complex but efficient.
+ if structure == 'recursive':
+ def cset(cur_lambda, new_cond, new_lambda):
+ return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
+ def grow(x, res, lod):
+ y = block(res, x)
+ img = lambda: upscale2d(torgb(res, y), 2**lod)
+ img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod))
+ if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1))
+ return img()
+ images_out = grow(x, 3, resolution_log2 - 3)
+
+ assert images_out.dtype == tf.as_dtype(dtype)
+ return tf.identity(images_out, name='images_out')
+
+#----------------------------------------------------------------------------
+# Discriminator used in the StyleGAN paper.
+
+def D_basic(
+ images_in, # First input: Images [minibatch, channel, height, width].
+ labels_in, # Second input: Labels [minibatch, label_size].
+ num_channels = 1, # Number of input color channels. Overridden based on dataset.
+ resolution = 32, # Input resolution. Overridden based on dataset.
+ label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
+ fmap_base = 8192, # Overall multiplier for the number of feature maps.
+ fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
+ fmap_max = 512, # Maximum number of feature maps in any layer.
+ nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu',
+ use_wscale = True, # Enable equalized learning rate?
+ mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable.
+ mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer.
+ dtype = 'float32', # Data type to use for activations and outputs.
+ fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically.
+ blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering.
+ structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
+ is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
+ **_kwargs): # Ignore unrecognized keyword args.
+
+ resolution_log2 = int(np.log2(resolution))
+ assert resolution == 2**resolution_log2 and resolution >= 4
+ def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)
+ def blur(x): return blur2d(x, blur_filter) if blur_filter else x
+ if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive'
+ act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity]
+
+ images_in.set_shape([None, num_channels, resolution, resolution])
+ labels_in.set_shape([None, label_size])
+ images_in = tf.cast(images_in, dtype)
+ labels_in = tf.cast(labels_in, dtype)
+ lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype)
+ scores_out = None
+
+ # Building blocks.
+ def fromrgb(x, res): # res = 2..resolution_log2
+ with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)):
+ return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, gain=gain, use_wscale=use_wscale)))
+ def block(x, res): # res = 2..resolution_log2
+ with tf.variable_scope('%dx%d' % (2**res, 2**res)):
+ if res >= 3: # 8x8 and up
+ with tf.variable_scope('Conv0'):
+ x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale)))
+ with tf.variable_scope('Conv1_down'):
+ x = act(apply_bias(conv2d_downscale2d(blur(x), fmaps=nf(res-2), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)))
+ else: # 4x4
+ if mbstd_group_size > 1:
+ x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features)
+ with tf.variable_scope('Conv'):
+ x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale)))
+ with tf.variable_scope('Dense0'):
+ x = act(apply_bias(dense(x, fmaps=nf(res-2), gain=gain, use_wscale=use_wscale)))
+ with tf.variable_scope('Dense1'):
+ x = apply_bias(dense(x, fmaps=max(label_size, 1), gain=1, use_wscale=use_wscale))
+ return x
+
+ # Fixed structure: simple and efficient, but does not support progressive growing.
+ if structure == 'fixed':
+ x = fromrgb(images_in, resolution_log2)
+ for res in range(resolution_log2, 2, -1):
+ x = block(x, res)
+ scores_out = block(x, 2)
+
+ # Linear structure: simple but inefficient.
+ if structure == 'linear':
+ img = images_in
+ x = fromrgb(img, resolution_log2)
+ for res in range(resolution_log2, 2, -1):
+ lod = resolution_log2 - res
+ x = block(x, res)
+ img = downscale2d(img)
+ y = fromrgb(img, res - 1)
+ with tf.variable_scope('Grow_lod%d' % lod):
+ x = tflib.lerp_clip(x, y, lod_in - lod)
+ scores_out = block(x, 2)
+
+ # Recursive structure: complex but efficient.
+ if structure == 'recursive':
+ def cset(cur_lambda, new_cond, new_lambda):
+ return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
+ def grow(res, lod):
+ x = lambda: fromrgb(downscale2d(images_in, 2**lod), res)
+ if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
+ x = block(x(), res); y = lambda: x
+ if res > 2: y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod))
+ return y()
+ scores_out = grow(2, resolution_log2 - 2)
+
+ # Label conditioning from "Which Training Methods for GANs do actually Converge?"
+ if label_size:
+ with tf.variable_scope('LabelSwitch'):
+ scores_out = tf.reduce_sum(scores_out * labels_in, axis=1, keepdims=True)
+
+ assert scores_out.dtype == tf.as_dtype(dtype)
+ scores_out = tf.identity(scores_out, name='scores_out')
+ return scores_out
+
+#----------------------------------------------------------------------------
diff --git a/training/networks_stylegan2.py b/training/networks_stylegan2.py
new file mode 100755
index 0000000..6c96fc1
--- /dev/null
+++ b/training/networks_stylegan2.py
@@ -0,0 +1,697 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Network architectures used in the StyleGAN2 paper."""
+
+import numpy as np
+import tensorflow as tf
+import dnnlib
+import dnnlib.tflib as tflib
+from dnnlib.tflib.ops.upfirdn_2d import upsample_2d, downsample_2d, upsample_conv_2d, conv_downsample_2d
+from dnnlib.tflib.ops.fused_bias_act import fused_bias_act
+
+# NOTE: Do not import any application-specific modules here!
+# Specify all network parameters as kwargs.
+
+#----------------------------------------------------------------------------
+# Get/create weight tensor for a convolution or fully-connected layer.
+
+def get_weight(shape, gain=1, use_wscale=True, lrmul=1, weight_var='weight'):
+ fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out]
+ he_std = gain / np.sqrt(fan_in) # He init
+
+ # Equalized learning rate and custom learning rate multiplier.
+ if use_wscale:
+ init_std = 1.0 / lrmul
+ runtime_coef = he_std * lrmul
+ else:
+ init_std = he_std / lrmul
+ runtime_coef = lrmul
+
+ # Create variable.
+ init = tf.initializers.random_normal(0, init_std)
+ return tf.get_variable(weight_var, shape=shape, initializer=init) * runtime_coef
+
+#----------------------------------------------------------------------------
+# Fully-connected layer.
+
+def dense_layer(x, fmaps, gain=1, use_wscale=True, lrmul=1, weight_var='weight'):
+ if len(x.shape) > 2:
+ x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])])
+ w = get_weight([x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var)
+ w = tf.cast(w, x.dtype)
+ return tf.matmul(x, w)
+
+#----------------------------------------------------------------------------
+# Convolution layer with optional upsampling or downsampling.
+
+def conv2d_layer(x, fmaps, kernel, up=False, down=False, resample_kernel=None, gain=1, use_wscale=True, lrmul=1, weight_var='weight'):
+ assert not (up and down)
+ assert kernel >= 1 and kernel % 2 == 1
+ w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var)
+ if up:
+ x = upsample_conv_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel)
+ elif down:
+ x = conv_downsample_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel)
+ else:
+ x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1,1,1,1], padding='SAME')
+ return x
+
+#----------------------------------------------------------------------------
+# Apply bias and activation func.
+
+def apply_bias_act(x, act='linear', alpha=None, gain=None, lrmul=1, bias_var='bias'):
+ b = tf.get_variable(bias_var, shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul
+ return fused_bias_act(x, b=tf.cast(b, x.dtype), act=act, alpha=alpha, gain=gain)
+
+#----------------------------------------------------------------------------
+# Naive upsampling (nearest neighbor) and downsampling (average pooling).
+
+def naive_upsample_2d(x, factor=2):
+ with tf.variable_scope('NaiveUpsample'):
+ _N, C, H, W = x.shape.as_list()
+ x = tf.reshape(x, [-1, C, H, 1, W, 1])
+ x = tf.tile(x, [1, 1, 1, factor, 1, factor])
+ return tf.reshape(x, [-1, C, H * factor, W * factor])
+
+def naive_downsample_2d(x, factor=2):
+ with tf.variable_scope('NaiveDownsample'):
+ _N, C, H, W = x.shape.as_list()
+ x = tf.reshape(x, [-1, C, H // factor, factor, W // factor, factor])
+ return tf.reduce_mean(x, axis=[3,5])
+
+#----------------------------------------------------------------------------
+# Modulated convolution layer.
+
+def modulated_conv2d_layer(x, y, fmaps, kernel, up=False, down=False, demodulate=True, resample_kernel=None, gain=1, use_wscale=True, lrmul=1, fused_modconv=True, weight_var='weight', mod_weight_var='mod_weight', mod_bias_var='mod_bias'):
+ assert not (up and down)
+ assert kernel >= 1 and kernel % 2 == 1
+
+ # Get weight.
+ w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var)
+ ww = w[np.newaxis] # [BkkIO] Introduce minibatch dimension.
+
+ # Modulate.
+ s = dense_layer(y, fmaps=x.shape[1].value, weight_var=mod_weight_var) # [BI] Transform incoming W to style.
+ s = apply_bias_act(s, bias_var=mod_bias_var) + 1 # [BI] Add bias (initially 1).
+ ww *= tf.cast(s[:, np.newaxis, np.newaxis, :, np.newaxis], w.dtype) # [BkkIO] Scale input feature maps.
+
+ # Demodulate.
+ if demodulate:
+ d = tf.rsqrt(tf.reduce_sum(tf.square(ww), axis=[1,2,3]) + 1e-8) # [BO] Scaling factor.
+ ww *= d[:, np.newaxis, np.newaxis, np.newaxis, :] # [BkkIO] Scale output feature maps.
+
+ # Reshape/scale input.
+ if fused_modconv:
+ x = tf.reshape(x, [1, -1, x.shape[2], x.shape[3]]) # Fused => reshape minibatch to convolution groups.
+ w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]), [ww.shape[1], ww.shape[2], ww.shape[3], -1])
+ else:
+ x *= tf.cast(s[:, :, np.newaxis, np.newaxis], x.dtype) # [BIhw] Not fused => scale input activations.
+
+ # Convolution with optional up/downsampling.
+ if up:
+ x = upsample_conv_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel)
+ elif down:
+ x = conv_downsample_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel)
+ else:
+ x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1,1,1,1], padding='SAME')
+
+ # Reshape/scale output.
+ if fused_modconv:
+ x = tf.reshape(x, [-1, fmaps, x.shape[2], x.shape[3]]) # Fused => reshape convolution groups back to minibatch.
+ elif demodulate:
+ x *= tf.cast(d[:, :, np.newaxis, np.newaxis], x.dtype) # [BOhw] Not fused => scale output activations.
+ return x
+
+#----------------------------------------------------------------------------
+# Minibatch standard deviation layer.
+
+def minibatch_stddev_layer(x, group_size=4, num_new_features=1):
+ group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size.
+ s = x.shape # [NCHW] Input shape.
+ y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c.
+ y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32.
+ y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group.
+ y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group.
+ y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group.
+ y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels.
+ y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups
+ y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type.
+ y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels.
+ return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap.
+
+#----------------------------------------------------------------------------
+# Main generator network.
+# Composed of two sub-networks (mapping and synthesis) that are defined below.
+# Used in configs B-F (Table 1).
+
+def G_main(
+ latents_in, # First input: Latent vectors (Z) [minibatch, latent_size].
+ labels_in, # Second input: Conditioning labels [minibatch, label_size].
+ truncation_psi = 0.5, # Style strength multiplier for the truncation trick. None = disable.
+ truncation_cutoff = None, # Number of layers for which to apply the truncation trick. None = disable.
+ truncation_psi_val = None, # Value for truncation_psi to use during validation.
+ truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation.
+ dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable.
+ style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable.
+ is_training = False, # Network is under training? Enables and disables specific features.
+ is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi.
+ return_dlatents = False, # Return dlatents in addition to the images?
+ is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
+ components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls.
+ mapping_func = 'G_mapping', # Build func name for the mapping network.
+ synthesis_func = 'G_synthesis_stylegan2', # Build func name for the synthesis network.
+ **kwargs): # Arguments for sub-networks (mapping and synthesis).
+
+ # Validate arguments.
+ assert not is_training or not is_validation
+ assert isinstance(components, dnnlib.EasyDict)
+ if is_validation:
+ truncation_psi = truncation_psi_val
+ truncation_cutoff = truncation_cutoff_val
+ if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1):
+ truncation_psi = None
+ if is_training:
+ truncation_cutoff = None
+ if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1):
+ dlatent_avg_beta = None
+ if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0):
+ style_mixing_prob = None
+
+ # Setup components.
+ if 'synthesis' not in components:
+ components.synthesis = tflib.Network('G_synthesis', func_name=globals()[synthesis_func], **kwargs)
+ num_layers = components.synthesis.input_shape[1]
+ dlatent_size = components.synthesis.input_shape[2]
+ if 'mapping' not in components:
+ components.mapping = tflib.Network('G_mapping', func_name=globals()[mapping_func], dlatent_broadcast=num_layers, **kwargs)
+
+ # Setup variables.
+ lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False)
+ dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False)
+
+ # Evaluate mapping network.
+ dlatents = components.mapping.get_output_for(latents_in, labels_in, is_training=is_training, **kwargs)
+ dlatents = tf.cast(dlatents, tf.float32)
+
+ # Update moving average of W.
+ if dlatent_avg_beta is not None:
+ with tf.variable_scope('DlatentAvg'):
+ batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)
+ update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
+ with tf.control_dependencies([update_op]):
+ dlatents = tf.identity(dlatents)
+
+ # Perform style mixing regularization.
+ if style_mixing_prob is not None:
+ with tf.variable_scope('StyleMix'):
+ latents2 = tf.random_normal(tf.shape(latents_in))
+ dlatents2 = components.mapping.get_output_for(latents2, labels_in, is_training=is_training, **kwargs)
+ dlatents2 = tf.cast(dlatents2, tf.float32)
+ layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
+ cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2
+ mixing_cutoff = tf.cond(
+ tf.random_uniform([], 0.0, 1.0) < style_mixing_prob,
+ lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32),
+ lambda: cur_layers)
+ dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2)
+
+ # Apply truncation trick.
+ if truncation_psi is not None:
+ with tf.variable_scope('Truncation'):
+ layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
+ layer_psi = np.ones(layer_idx.shape, dtype=np.float32)
+ if truncation_cutoff is None:
+ layer_psi *= truncation_psi
+ else:
+ layer_psi = tf.where(layer_idx < truncation_cutoff, layer_psi * truncation_psi, layer_psi)
+ dlatents = tflib.lerp(dlatent_avg, dlatents, layer_psi)
+
+ # Evaluate synthesis network.
+ deps = []
+ if 'lod' in components.synthesis.vars:
+ deps.append(tf.assign(components.synthesis.vars['lod'], lod_in))
+ with tf.control_dependencies(deps):
+ images_out = components.synthesis.get_output_for(dlatents, is_training=is_training, force_clean_graph=is_template_graph, **kwargs)
+
+ # Return requested outputs.
+ images_out = tf.identity(images_out, name='images_out')
+ if return_dlatents:
+ return images_out, dlatents
+ return images_out
+
+#----------------------------------------------------------------------------
+# Mapping network.
+# Transforms the input latent code (z) to the disentangled latent code (w).
+# Used in configs B-F (Table 1).
+
+def G_mapping(
+ latents_in, # First input: Latent vectors (Z) [minibatch, latent_size].
+ labels_in, # Second input: Conditioning labels [minibatch, label_size].
+ latent_size = 512, # Latent vector (Z) dimensionality.
+ label_size = 0, # Label dimensionality, 0 if no labels.
+ dlatent_size = 512, # Disentangled latent (W) dimensionality.
+ dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size].
+ mapping_layers = 8, # Number of mapping layers.
+ mapping_fmaps = 512, # Number of activations in the mapping layers.
+ mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers.
+ mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
+ normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers?
+ dtype = 'float32', # Data type to use for activations and outputs.
+ **_kwargs): # Ignore unrecognized keyword args.
+
+ act = mapping_nonlinearity
+
+ # Inputs.
+ latents_in.set_shape([None, latent_size])
+ labels_in.set_shape([None, label_size])
+ latents_in = tf.cast(latents_in, dtype)
+ labels_in = tf.cast(labels_in, dtype)
+ x = latents_in
+
+ # Embed labels and concatenate them with latents.
+ if label_size:
+ with tf.variable_scope('LabelConcat'):
+ w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal())
+ y = tf.matmul(labels_in, tf.cast(w, dtype))
+ x = tf.concat([x, y], axis=1)
+
+ # Normalize latents.
+ if normalize_latents:
+ with tf.variable_scope('Normalize'):
+ x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + 1e-8)
+
+ # Mapping layers.
+ for layer_idx in range(mapping_layers):
+ with tf.variable_scope('Dense%d' % layer_idx):
+ fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps
+ x = apply_bias_act(dense_layer(x, fmaps=fmaps, lrmul=mapping_lrmul), act=act, lrmul=mapping_lrmul)
+
+ # Broadcast.
+ if dlatent_broadcast is not None:
+ with tf.variable_scope('Broadcast'):
+ x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1])
+
+ # Output.
+ assert x.dtype == tf.as_dtype(dtype)
+ return tf.identity(x, name='dlatents_out')
+
+#----------------------------------------------------------------------------
+# StyleGAN synthesis network with revised architecture (Figure 2d).
+# Implements progressive growing, but no skip connections or residual nets (Figure 7).
+# Used in configs B-D (Table 1).
+
+def G_synthesis_stylegan_revised(
+ dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
+ dlatent_size = 512, # Disentangled latent (W) dimensionality.
+ num_channels = 3, # Number of output color channels.
+ resolution = 1024, # Output resolution.
+ fmap_base = 16 << 10, # Overall multiplier for the number of feature maps.
+ fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
+ fmap_min = 1, # Minimum number of feature maps in any layer.
+ fmap_max = 512, # Maximum number of feature maps in any layer.
+ randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
+ nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
+ dtype = 'float32', # Data type to use for activations and outputs.
+ resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering.
+ fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op?
+ structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
+ is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
+ force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior.
+ **_kwargs): # Ignore unrecognized keyword args.
+
+ resolution_log2 = int(np.log2(resolution))
+ assert resolution == 2**resolution_log2 and resolution >= 4
+ def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
+ if is_template_graph: force_clean_graph = True
+ if force_clean_graph: randomize_noise = False
+ if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive'
+ act = nonlinearity
+ num_layers = resolution_log2 * 2 - 2
+ images_out = None
+
+ # Primary inputs.
+ dlatents_in.set_shape([None, num_layers, dlatent_size])
+ dlatents_in = tf.cast(dlatents_in, dtype)
+ lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype)
+
+ # Noise inputs.
+ noise_inputs = []
+ for layer_idx in range(num_layers - 1):
+ res = (layer_idx + 5) // 2
+ shape = [1, 1, 2**res, 2**res]
+ noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False))
+
+ # Single convolution layer with all the bells and whistles.
+ def layer(x, layer_idx, fmaps, kernel, up=False):
+ x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv)
+ if randomize_noise:
+ noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype)
+ else:
+ noise = tf.cast(noise_inputs[layer_idx], x.dtype)
+ noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros())
+ x += noise * tf.cast(noise_strength, x.dtype)
+ return apply_bias_act(x, act=act)
+
+ # Early layers.
+ with tf.variable_scope('4x4'):
+ with tf.variable_scope('Const'):
+ x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal())
+ x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1])
+ with tf.variable_scope('Conv'):
+ x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3)
+
+ # Building blocks for remaining layers.
+ def block(res, x): # res = 3..resolution_log2
+ with tf.variable_scope('%dx%d' % (2**res, 2**res)):
+ with tf.variable_scope('Conv0_up'):
+ x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True)
+ with tf.variable_scope('Conv1'):
+ x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3)
+ return x
+ def torgb(res, x): # res = 2..resolution_log2
+ with tf.variable_scope('ToRGB_lod%d' % (resolution_log2 - res)):
+ return apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv))
+
+ # Fixed structure: simple and efficient, but does not support progressive growing.
+ if structure == 'fixed':
+ for res in range(3, resolution_log2 + 1):
+ x = block(res, x)
+ images_out = torgb(resolution_log2, x)
+
+ # Linear structure: simple but inefficient.
+ if structure == 'linear':
+ images_out = torgb(2, x)
+ for res in range(3, resolution_log2 + 1):
+ lod = resolution_log2 - res
+ x = block(res, x)
+ img = torgb(res, x)
+ with tf.variable_scope('Upsample_lod%d' % lod):
+ images_out = upsample_2d(images_out)
+ with tf.variable_scope('Grow_lod%d' % lod):
+ images_out = tflib.lerp_clip(img, images_out, lod_in - lod)
+
+ # Recursive structure: complex but efficient.
+ if structure == 'recursive':
+ def cset(cur_lambda, new_cond, new_lambda):
+ return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
+ def grow(x, res, lod):
+ y = block(res, x)
+ img = lambda: naive_upsample_2d(torgb(res, y), factor=2**lod)
+ img = cset(img, (lod_in > lod), lambda: naive_upsample_2d(tflib.lerp(torgb(res, y), upsample_2d(torgb(res - 1, x)), lod_in - lod), factor=2**lod))
+ if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1))
+ return img()
+ images_out = grow(x, 3, resolution_log2 - 3)
+
+ assert images_out.dtype == tf.as_dtype(dtype)
+ return tf.identity(images_out, name='images_out')
+
+#----------------------------------------------------------------------------
+# StyleGAN2 synthesis network (Figure 7).
+# Implements skip connections and residual nets (Figure 7), but no progressive growing.
+# Used in configs E-F (Table 1).
+
+def G_synthesis_stylegan2(
+ dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
+ dlatent_size = 512, # Disentangled latent (W) dimensionality.
+ num_channels = 3, # Number of output color channels.
+ resolution = 1024, # Output resolution.
+ fmap_base = 16 << 10, # Overall multiplier for the number of feature maps.
+ fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
+ fmap_min = 1, # Minimum number of feature maps in any layer.
+ fmap_max = 512, # Maximum number of feature maps in any layer.
+ randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
+ architecture = 'skip', # Architecture: 'orig', 'skip', 'resnet'.
+ nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
+ dtype = 'float32', # Data type to use for activations and outputs.
+ resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering.
+ fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op?
+ **_kwargs): # Ignore unrecognized keyword args.
+
+ resolution_log2 = int(np.log2(resolution))
+ assert resolution == 2**resolution_log2 and resolution >= 4
+ def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
+ assert architecture in ['orig', 'skip', 'resnet']
+ act = nonlinearity
+ num_layers = resolution_log2 * 2 - 2
+ images_out = None
+
+ # Primary inputs.
+ dlatents_in.set_shape([None, num_layers, dlatent_size])
+ dlatents_in = tf.cast(dlatents_in, dtype)
+
+ # Noise inputs.
+ noise_inputs = []
+ for layer_idx in range(num_layers - 1):
+ res = (layer_idx + 5) // 2
+ shape = [1, 1, 2**res, 2**res]
+ noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False))
+
+ # Single convolution layer with all the bells and whistles.
+ def layer(x, layer_idx, fmaps, kernel, up=False):
+ x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv)
+ if randomize_noise:
+ noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype)
+ else:
+ noise = tf.cast(noise_inputs[layer_idx], x.dtype)
+ noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros())
+ x += noise * tf.cast(noise_strength, x.dtype)
+ return apply_bias_act(x, act=act)
+
+ # Building blocks for main layers.
+ def block(x, res): # res = 3..resolution_log2
+ t = x
+ with tf.variable_scope('Conv0_up'):
+ x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True)
+ with tf.variable_scope('Conv1'):
+ x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3)
+ if architecture == 'resnet':
+ with tf.variable_scope('Skip'):
+ t = conv2d_layer(t, fmaps=nf(res-1), kernel=1, up=True, resample_kernel=resample_kernel)
+ x = (x + t) * (1 / np.sqrt(2))
+ return x
+ def upsample(y):
+ with tf.variable_scope('Upsample'):
+ return upsample_2d(y, k=resample_kernel)
+ def torgb(x, y, res): # res = 2..resolution_log2
+ with tf.variable_scope('ToRGB'):
+ t = apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv))
+ return t if y is None else y + t
+
+ # Early layers.
+ y = None
+ with tf.variable_scope('4x4'):
+ with tf.variable_scope('Const'):
+ x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal())
+ x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1])
+ with tf.variable_scope('Conv'):
+ x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3)
+ if architecture == 'skip':
+ y = torgb(x, y, 2)
+
+ # Main layers.
+ for res in range(3, resolution_log2 + 1):
+ with tf.variable_scope('%dx%d' % (2**res, 2**res)):
+ x = block(x, res)
+ if architecture == 'skip':
+ y = upsample(y)
+ if architecture == 'skip' or res == resolution_log2:
+ y = torgb(x, y, res)
+ images_out = y
+
+ assert images_out.dtype == tf.as_dtype(dtype)
+ return tf.identity(images_out, name='images_out')
+
+#----------------------------------------------------------------------------
+# Original StyleGAN discriminator.
+# Used in configs B-D (Table 1).
+
+def D_stylegan(
+ images_in, # First input: Images [minibatch, channel, height, width].
+ labels_in, # Second input: Labels [minibatch, label_size].
+ num_channels = 3, # Number of input color channels. Overridden based on dataset.
+ resolution = 1024, # Input resolution. Overridden based on dataset.
+ label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
+ fmap_base = 16 << 10, # Overall multiplier for the number of feature maps.
+ fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
+ fmap_min = 1, # Minimum number of feature maps in any layer.
+ fmap_max = 512, # Maximum number of feature maps in any layer.
+ nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
+ mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable.
+ mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer.
+ dtype = 'float32', # Data type to use for activations and outputs.
+ resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering.
+ structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
+ is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
+ **_kwargs): # Ignore unrecognized keyword args.
+
+ resolution_log2 = int(np.log2(resolution))
+ assert resolution == 2**resolution_log2 and resolution >= 4
+ def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
+ if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive'
+ act = nonlinearity
+
+ images_in.set_shape([None, num_channels, resolution, resolution])
+ labels_in.set_shape([None, label_size])
+ images_in = tf.cast(images_in, dtype)
+ labels_in = tf.cast(labels_in, dtype)
+ lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype)
+
+ # Building blocks for spatial layers.
+ def fromrgb(x, res): # res = 2..resolution_log2
+ with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)):
+ return apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=1), act=act)
+ def block(x, res): # res = 2..resolution_log2
+ with tf.variable_scope('%dx%d' % (2**res, 2**res)):
+ with tf.variable_scope('Conv0'):
+ x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act)
+ with tf.variable_scope('Conv1_down'):
+ x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act)
+ return x
+
+ # Fixed structure: simple and efficient, but does not support progressive growing.
+ if structure == 'fixed':
+ x = fromrgb(images_in, resolution_log2)
+ for res in range(resolution_log2, 2, -1):
+ x = block(x, res)
+
+ # Linear structure: simple but inefficient.
+ if structure == 'linear':
+ img = images_in
+ x = fromrgb(img, resolution_log2)
+ for res in range(resolution_log2, 2, -1):
+ lod = resolution_log2 - res
+ x = block(x, res)
+ with tf.variable_scope('Downsample_lod%d' % lod):
+ img = downsample_2d(img)
+ y = fromrgb(img, res - 1)
+ with tf.variable_scope('Grow_lod%d' % lod):
+ x = tflib.lerp_clip(x, y, lod_in - lod)
+
+ # Recursive structure: complex but efficient.
+ if structure == 'recursive':
+ def cset(cur_lambda, new_cond, new_lambda):
+ return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
+ def grow(res, lod):
+ x = lambda: fromrgb(naive_downsample_2d(images_in, factor=2**lod), res)
+ if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
+ x = block(x(), res); y = lambda: x
+ y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(naive_downsample_2d(images_in, factor=2**(lod+1)), res - 1), lod_in - lod))
+ return y()
+ x = grow(3, resolution_log2 - 3)
+
+ # Final layers at 4x4 resolution.
+ with tf.variable_scope('4x4'):
+ if mbstd_group_size > 1:
+ with tf.variable_scope('MinibatchStddev'):
+ x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features)
+ with tf.variable_scope('Conv'):
+ x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act)
+ with tf.variable_scope('Dense0'):
+ x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act)
+
+ # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?"
+ with tf.variable_scope('Output'):
+ x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1)))
+ if labels_in.shape[1] > 0:
+ x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True)
+ scores_out = x
+
+ # Output.
+ assert scores_out.dtype == tf.as_dtype(dtype)
+ scores_out = tf.identity(scores_out, name='scores_out')
+ return scores_out
+
+#----------------------------------------------------------------------------
+# StyleGAN2 discriminator (Figure 7).
+# Implements skip connections and residual nets (Figure 7), but no progressive growing.
+# Used in configs E-F (Table 1).
+
+def D_stylegan2(
+ images_in, # First input: Images [minibatch, channel, height, width].
+ labels_in, # Second input: Labels [minibatch, label_size].
+ num_channels = 3, # Number of input color channels. Overridden based on dataset.
+ resolution = 1024, # Input resolution. Overridden based on dataset.
+ label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
+ fmap_base = 16 << 10, # Overall multiplier for the number of feature maps.
+ fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
+ fmap_min = 1, # Minimum number of feature maps in any layer.
+ fmap_max = 512, # Maximum number of feature maps in any layer.
+ architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
+ nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
+ mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable.
+ mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer.
+ dtype = 'float32', # Data type to use for activations and outputs.
+ resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering.
+ **_kwargs): # Ignore unrecognized keyword args.
+
+ resolution_log2 = int(np.log2(resolution))
+ assert resolution == 2**resolution_log2 and resolution >= 4
+ def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
+ assert architecture in ['orig', 'skip', 'resnet']
+ act = nonlinearity
+
+ images_in.set_shape([None, num_channels, resolution, resolution])
+ labels_in.set_shape([None, label_size])
+ images_in = tf.cast(images_in, dtype)
+ labels_in = tf.cast(labels_in, dtype)
+
+ # Building blocks for main layers.
+ def fromrgb(x, y, res): # res = 2..resolution_log2
+ with tf.variable_scope('FromRGB'):
+ t = apply_bias_act(conv2d_layer(y, fmaps=nf(res-1), kernel=1), act=act)
+ return t if x is None else x + t
+ def block(x, res): # res = 2..resolution_log2
+ t = x
+ with tf.variable_scope('Conv0'):
+ x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act)
+ with tf.variable_scope('Conv1_down'):
+ x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act)
+ if architecture == 'resnet':
+ with tf.variable_scope('Skip'):
+ t = conv2d_layer(t, fmaps=nf(res-2), kernel=1, down=True, resample_kernel=resample_kernel)
+ x = (x + t) * (1 / np.sqrt(2))
+ return x
+ def downsample(y):
+ with tf.variable_scope('Downsample'):
+ return downsample_2d(y, k=resample_kernel)
+
+ # Main layers.
+ x = None
+ y = images_in
+ for res in range(resolution_log2, 2, -1):
+ with tf.variable_scope('%dx%d' % (2**res, 2**res)):
+ if architecture == 'skip' or res == resolution_log2:
+ x = fromrgb(x, y, res)
+ x = block(x, res)
+ if architecture == 'skip':
+ y = downsample(y)
+
+ # Final layers.
+ with tf.variable_scope('4x4'):
+ if architecture == 'skip':
+ x = fromrgb(x, y, 2)
+ if mbstd_group_size > 1:
+ with tf.variable_scope('MinibatchStddev'):
+ x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features)
+ with tf.variable_scope('Conv'):
+ x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act)
+ with tf.variable_scope('Dense0'):
+ x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act)
+
+ # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?"
+ with tf.variable_scope('Output'):
+ x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1)))
+ if labels_in.shape[1] > 0:
+ x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True)
+ scores_out = x
+
+ # Output.
+ assert scores_out.dtype == tf.as_dtype(dtype)
+ scores_out = tf.identity(scores_out, name='scores_out')
+ return scores_out
+
+#----------------------------------------------------------------------------
diff --git a/training/training_loop.py b/training/training_loop.py
new file mode 100755
index 0000000..c2d88cf
--- /dev/null
+++ b/training/training_loop.py
@@ -0,0 +1,356 @@
+# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+#
+# This work is made available under the Nvidia Source Code License-NC.
+# To view a copy of this license, visit
+# https://nvlabs.github.io/stylegan2/license.html
+
+"""Main training script."""
+
+import numpy as np
+import tensorflow as tf
+import dnnlib
+import dnnlib.tflib as tflib
+from dnnlib.tflib.autosummary import autosummary
+
+from training import dataset
+from training import misc
+from metrics import metric_base
+
+#----------------------------------------------------------------------------
+# Just-in-time processing of training images before feeding them to the networks.
+
+def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net):
+ with tf.name_scope('DynamicRange'):
+ x = tf.cast(x, tf.float32)
+ x = misc.adjust_dynamic_range(x, drange_data, drange_net)
+ if mirror_augment:
+ with tf.name_scope('MirrorAugment'):
+ x = tf.where(tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [3]))
+ with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail.
+ s = tf.shape(x)
+ y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2])
+ y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)
+ y = tf.tile(y, [1, 1, 1, 2, 1, 2])
+ y = tf.reshape(y, [-1, s[1], s[2], s[3]])
+ x = tflib.lerp(x, y, lod - tf.floor(lod))
+ with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks.
+ s = tf.shape(x)
+ factor = tf.cast(2 ** tf.floor(lod), tf.int32)
+ x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
+ x = tf.tile(x, [1, 1, 1, factor, 1, factor])
+ x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
+ return x, labels
+
+#----------------------------------------------------------------------------
+# Evaluate time-varying training parameters.
+
+def training_schedule(
+ cur_nimg,
+ training_set,
+ lod_initial_resolution = None, # Image resolution used at the beginning.
+ lod_training_kimg = 600, # Thousands of real images to show before doubling the resolution.
+ lod_transition_kimg = 600, # Thousands of real images to show when fading in new layers.
+ minibatch_size_base = 32, # Global minibatch size.
+ minibatch_size_dict = {}, # Resolution-specific overrides.
+ minibatch_gpu_base = 4, # Number of samples processed at a time by one GPU.
+ minibatch_gpu_dict = {}, # Resolution-specific overrides.
+ G_lrate_base = 0.002, # Learning rate for the generator.
+ G_lrate_dict = {}, # Resolution-specific overrides.
+ D_lrate_base = 0.002, # Learning rate for the discriminator.
+ D_lrate_dict = {}, # Resolution-specific overrides.
+ lrate_rampup_kimg = 0, # Duration of learning rate ramp-up.
+ tick_kimg_base = 4, # Default interval of progress snapshots.
+ tick_kimg_dict = {8:28, 16:24, 32:20, 64:16, 128:12, 256:8, 512:6, 1024:4}): # Resolution-specific overrides.
+
+ # Initialize result dict.
+ s = dnnlib.EasyDict()
+ s.kimg = cur_nimg / 1000.0
+
+ # Training phase.
+ phase_dur = lod_training_kimg + lod_transition_kimg
+ phase_idx = int(np.floor(s.kimg / phase_dur)) if phase_dur > 0 else 0
+ phase_kimg = s.kimg - phase_idx * phase_dur
+
+ # Level-of-detail and resolution.
+ if lod_initial_resolution is None:
+ s.lod = 0.0
+ else:
+ s.lod = training_set.resolution_log2
+ s.lod -= np.floor(np.log2(lod_initial_resolution))
+ s.lod -= phase_idx
+ if lod_transition_kimg > 0:
+ s.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg
+ s.lod = max(s.lod, 0.0)
+ s.resolution = 2 ** (training_set.resolution_log2 - int(np.floor(s.lod)))
+
+ # Minibatch size.
+ s.minibatch_size = minibatch_size_dict.get(s.resolution, minibatch_size_base)
+ s.minibatch_gpu = minibatch_gpu_dict.get(s.resolution, minibatch_gpu_base)
+
+ # Learning rate.
+ s.G_lrate = G_lrate_dict.get(s.resolution, G_lrate_base)
+ s.D_lrate = D_lrate_dict.get(s.resolution, D_lrate_base)
+ if lrate_rampup_kimg > 0:
+ rampup = min(s.kimg / lrate_rampup_kimg, 1.0)
+ s.G_lrate *= rampup
+ s.D_lrate *= rampup
+
+ # Other parameters.
+ s.tick_kimg = tick_kimg_dict.get(s.resolution, tick_kimg_base)
+ return s
+
+#----------------------------------------------------------------------------
+# Main training script.
+
+def training_loop(
+ G_args = {}, # Options for generator network.
+ D_args = {}, # Options for discriminator network.
+ G_opt_args = {}, # Options for generator optimizer.
+ D_opt_args = {}, # Options for discriminator optimizer.
+ G_loss_args = {}, # Options for generator loss.
+ D_loss_args = {}, # Options for discriminator loss.
+ dataset_args = {}, # Options for dataset.load_dataset().
+ sched_args = {}, # Options for train.TrainingSchedule.
+ grid_args = {}, # Options for train.setup_snapshot_image_grid().
+ metric_arg_list = [], # Options for MetricGroup.
+ tf_config = {}, # Options for tflib.init_tf().
+ data_dir = None, # Directory to load datasets from.
+ G_smoothing_kimg = 10.0, # Half-life of the running average of generator weights.
+ minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters.
+ lazy_regularization = True, # Perform regularization as a separate training step?
+ G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False.
+ D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False.
+ reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
+ total_kimg = 25000, # Total length of the training, measured in thousands of real images.
+ mirror_augment = False, # Enable mirror augment?
+ drange_net = [-1,1], # Dynamic range used when feeding image data to the networks.
+ image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
+ network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'.
+ save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file?
+ save_weight_histograms = False, # Include weight histograms in the tfevents file?
+ resume_pkl = None, # Network pickle to resume training from, None = train from scratch.
+ resume_kimg = 0.0, # Assumed training progress at the beginning. Affects reporting and training schedule.
+ resume_time = 0.0, # Assumed wallclock time at the beginning. Affects reporting.
+ resume_with_new_nets = False): # Construct new networks according to G_args and D_args before resuming training?
+
+ # Initialize dnnlib and TensorFlow.
+ tflib.init_tf(tf_config)
+ num_gpus = dnnlib.submit_config.num_gpus
+
+ # Load training set.
+ training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir), verbose=True, **dataset_args)
+ grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args)
+ misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('reals.png'), drange=training_set.dynamic_range, grid_size=grid_size)
+
+ # Construct or load networks.
+ with tf.device('/gpu:0'):
+ if resume_pkl is None or resume_with_new_nets:
+ print('Constructing networks...')
+ G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args)
+ D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args)
+ Gs = G.clone('Gs')
+ if resume_pkl is not None:
+ print('Loading networks from "%s"...' % resume_pkl)
+ rG, rD, rGs = misc.load_pkl(resume_pkl)
+ if resume_with_new_nets: G.copy_vars_from(rG); D.copy_vars_from(rD); Gs.copy_vars_from(rGs)
+ else: G = rG; D = rD; Gs = rGs
+
+ # Print layers and generate initial image snapshot.
+ G.print_layers(); D.print_layers()
+ sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, **sched_args)
+ grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
+ grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu)
+ misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.png'), drange=drange_net, grid_size=grid_size)
+
+ # Setup training inputs.
+ print('Building TensorFlow graph...')
+ with tf.name_scope('Inputs'), tf.device('/cpu:0'):
+ lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[])
+ lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
+ minibatch_size_in = tf.placeholder(tf.int32, name='minibatch_size_in', shape=[])
+ minibatch_gpu_in = tf.placeholder(tf.int32, name='minibatch_gpu_in', shape=[])
+ minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in * num_gpus)
+ Gs_beta = 0.5 ** tf.div(tf.cast(minibatch_size_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0
+
+ # Setup optimizers.
+ G_opt_args = dict(G_opt_args)
+ D_opt_args = dict(D_opt_args)
+ for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]:
+ args['minibatch_multiplier'] = minibatch_multiplier
+ args['learning_rate'] = lrate_in
+ if lazy_regularization:
+ mb_ratio = reg_interval / (reg_interval + 1)
+ args['learning_rate'] *= mb_ratio
+ if 'beta1' in args: args['beta1'] **= mb_ratio
+ if 'beta2' in args: args['beta2'] **= mb_ratio
+ G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
+ D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)
+ G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)
+ D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args)
+
+ # Build training graph for each GPU.
+ data_fetch_ops = []
+ for gpu in range(num_gpus):
+ with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):
+
+ # Create GPU-specific shadow copies of G and D.
+ G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
+ D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')
+
+ # Fetch training data via temporary variables.
+ with tf.name_scope('DataFetch'):
+ sched = training_schedule(cur_nimg=int(resume_kimg*1000), training_set=training_set, **sched_args)
+ reals_var = tf.Variable(name='reals', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu] + training_set.shape))
+ labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu, training_set.label_size]))
+ reals_write, labels_write = training_set.get_minibatch_tf()
+ reals_write, labels_write = process_reals(reals_write, labels_write, lod_in, mirror_augment, training_set.dynamic_range, drange_net)
+ reals_write = tf.concat([reals_write, reals_var[minibatch_gpu_in:]], axis=0)
+ labels_write = tf.concat([labels_write, labels_var[minibatch_gpu_in:]], axis=0)
+ data_fetch_ops += [tf.assign(reals_var, reals_write)]
+ data_fetch_ops += [tf.assign(labels_var, labels_write)]
+ reals_read = reals_var[:minibatch_gpu_in]
+ labels_read = labels_var[:minibatch_gpu_in]
+
+ # Evaluate loss functions.
+ lod_assign_ops = []
+ if 'lod' in G_gpu.vars: lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)]
+ if 'lod' in D_gpu.vars: lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)]
+ with tf.control_dependencies(lod_assign_ops):
+ with tf.name_scope('G_loss'):
+ G_loss, G_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, **G_loss_args)
+ with tf.name_scope('D_loss'):
+ D_loss, D_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, reals=reals_read, labels=labels_read, **D_loss_args)
+
+ # Register gradients.
+ if not lazy_regularization:
+ if G_reg is not None: G_loss += G_reg
+ if D_reg is not None: D_loss += D_reg
+ else:
+ if G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(G_reg * G_reg_interval), G_gpu.trainables)
+ if D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(D_reg * D_reg_interval), D_gpu.trainables)
+ G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables)
+ D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables)
+
+ # Setup training ops.
+ data_fetch_op = tf.group(*data_fetch_ops)
+ G_train_op = G_opt.apply_updates()
+ D_train_op = D_opt.apply_updates()
+ G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)
+ D_reg_op = D_reg_opt.apply_updates(allow_no_op=True)
+ Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta)
+
+ # Finalize graph.
+ with tf.device('/gpu:0'):
+ try:
+ peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
+ except tf.errors.NotFoundError:
+ peak_gpu_mem_op = tf.constant(0)
+ tflib.init_uninitialized_vars()
+
+ print('Initializing logs...')
+ summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
+ if save_tf_graph:
+ summary_log.add_graph(tf.get_default_graph())
+ if save_weight_histograms:
+ G.setup_weight_histograms(); D.setup_weight_histograms()
+ metrics = metric_base.MetricGroup(metric_arg_list)
+
+ print('Training for %d kimg...\n' % total_kimg)
+ dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg)
+ maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
+ cur_nimg = int(resume_kimg * 1000)
+ cur_tick = -1
+ tick_start_nimg = cur_nimg
+ prev_lod = -1.0
+ running_mb_counter = 0
+ while cur_nimg < total_kimg * 1000:
+ if dnnlib.RunContext.get().should_stop(): break
+
+ # Choose training parameters and configure training ops.
+ sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, **sched_args)
+ assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
+ training_set.configure(sched.minibatch_gpu, sched.lod)
+ if reset_opt_for_new_lod:
+ if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod):
+ G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state()
+ prev_lod = sched.lod
+
+ # Run training ops.
+ feed_dict = {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_size_in: sched.minibatch_size, minibatch_gpu_in: sched.minibatch_gpu}
+ for _repeat in range(minibatch_repeats):
+ rounds = range(0, sched.minibatch_size, sched.minibatch_gpu * num_gpus)
+ run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0)
+ run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0)
+ cur_nimg += sched.minibatch_size
+ running_mb_counter += 1
+
+ # Fast path without gradient accumulation.
+ if len(rounds) == 1:
+ tflib.run([G_train_op, data_fetch_op], feed_dict)
+ if run_G_reg:
+ tflib.run(G_reg_op, feed_dict)
+ tflib.run([D_train_op, Gs_update_op], feed_dict)
+ if run_D_reg:
+ tflib.run(D_reg_op, feed_dict)
+
+ # Slow path with gradient accumulation.
+ else:
+ for _round in rounds:
+ tflib.run(G_train_op, feed_dict)
+ if run_G_reg:
+ for _round in rounds:
+ tflib.run(G_reg_op, feed_dict)
+ tflib.run(Gs_update_op, feed_dict)
+ for _round in rounds:
+ tflib.run(data_fetch_op, feed_dict)
+ tflib.run(D_train_op, feed_dict)
+ if run_D_reg:
+ for _round in rounds:
+ tflib.run(D_reg_op, feed_dict)
+
+ # Perform maintenance tasks once per tick.
+ done = (cur_nimg >= total_kimg * 1000)
+ if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
+ cur_tick += 1
+ tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
+ tick_start_nimg = cur_nimg
+ tick_time = dnnlib.RunContext.get().get_time_since_last_update()
+ total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time
+
+ # Report progress.
+ print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f' % (
+ autosummary('Progress/tick', cur_tick),
+ autosummary('Progress/kimg', cur_nimg / 1000.0),
+ autosummary('Progress/lod', sched.lod),
+ autosummary('Progress/minibatch', sched.minibatch_size),
+ dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)),
+ autosummary('Timing/sec_per_tick', tick_time),
+ autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
+ autosummary('Timing/maintenance_sec', maintenance_time),
+ autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30)))
+ autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
+ autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))
+
+ # Save snapshots.
+ if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done):
+ grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu)
+ misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size)
+ if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done):
+ pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' % (cur_nimg // 1000))
+ misc.save_pkl((G, D, Gs), pkl)
+ metrics.run(pkl, run_dir=dnnlib.make_run_dir_path(), data_dir=dnnlib.convert_path(data_dir), num_gpus=num_gpus, tf_config=tf_config)
+
+ # Update summaries and RunContext.
+ metrics.update_autosummaries()
+ tflib.autosummary.save_summaries(summary_log, cur_nimg)
+ dnnlib.RunContext.get().update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg)
+ maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time
+
+ # Save final snapshot.
+ misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl'))
+
+ # All done.
+ summary_log.close()
+ training_set.close()
+
+#----------------------------------------------------------------------------