From 4070fe708b67e3ab7305fd1b3b12204720f8f3e0 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Thu, 2 Jul 2020 02:01:09 +0900 Subject: [PATCH 01/36] Updates pretrained_models file --- pfrl/utils/pretrained_models.py | 167 +++++++++++++++++++++++++++++++- 1 file changed, 166 insertions(+), 1 deletion(-) diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index ee470f70a..c29cd643c 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -1,3 +1,160 @@ +"""This file is a fork from ChainerCV, an MIT-licensed project, +https://github.com/chainer/chainercv/blob/master/chainercv/utils/download.py +""" + +import filelock +import hashlib +import os +import shutil +import tempfile +import time +import sys +import zipfile +from six.moves.urllib import request + + +_models_root = os.environ.get('PFRL_MODELS_ROOT', + os.path.join(os.path.expanduser('~'), + '.pfrl', 'models')) + + +MODELS = { + "DQN": ["best", "final"], + "IQN": ["best", "final"], + "Rainbow": ["best", "final"], + "A3C": ["final", "best"], + "DDPG": ["best", "final"], + "TRPO": ["best", "final"], + "PPO": ["final"], + "TD3": ["best", "final"], + "SAC": ["best", "final"] +} + +download_url = "https://chainer-assets.preferred.jp/pfrl/" + +''' +This function forked from Chainer, an MIT-licensed project, +https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L70 +''' +def _get_model_directory(model_name, create_directory=True): + """Gets the path to the directory of given model. + + The generated path is just a concatenation of the global root directory + and the model name. + Args: + model_name (str): Name of the model. + create_directory (bool): If True (default), this function also creates + the directory at the first time. If the directory already exists, + then this option is ignored. + Returns: + str: Path to the dataset directory. + """ + path = os.path.join(_models_root, model_name) + if create_directory: + try: + os.makedirs(path) + except OSError: + if not os.path.isdir(path): + raise + return path + + +def _reporthook(count, block_size, total_size): + global start_time + if count == 0: + start_time = time.time() + print(' % Total Recv Speed Time left') + return + duration = time.time() - start_time + progress_size = count * block_size + try: + speed = progress_size / duration + except ZeroDivisionError: + speed = float('inf') + percent = progress_size / total_size * 100 + eta = int((total_size - progress_size) / speed) + sys.stdout.write( + '\r{:3.0f} {:4.0f}MiB {:4.0f}MiB {:6.0f}KiB/s {:4d}:{:02d}:{:02d}' + .format( + percent, total_size / (1 << 20), progress_size / (1 << 20), + speed / (1 << 10), eta // 60 // 60, (eta // 60) % 60, eta % 60)) + sys.stdout.flush() + + +def cached_download(url): + """Downloads a file and caches it. + + It downloads a file from the URL if there is no corresponding cache. + If there is already a cache for the given URL, it just returns the + path to the cache without downloading the same file. + Args: + url (string): URL to download from. + Returns: + string: Path to the downloaded file. + """ + cache_root = os.path.join(_models_root, '_dl_cache') + # cache_root = os.path.join(_models_root, '_dl_cache') + try: + os.makedirs(cache_root) + except OSError: + if not os.path.exists(cache_root): + raise + lock_path = os.path.join(cache_root, '_dl_lock') + urlhash = hashlib.md5(url.encode('utf-8')).hexdigest() + cache_path = os.path.join(cache_root, urlhash) + + with filelock.FileLock(lock_path): + if os.path.exists(cache_path): + return cache_path + temp_root = tempfile.mkdtemp(dir=cache_root) + try: + temp_path = os.path.join(temp_root, 'dl') + print('Downloading ...') + print('From: {:s}'.format(url)) + print('To: {:s}'.format(cache_path)) + request.urlretrieve(url, temp_path, _reporthook) + with filelock.FileLock(lock_path): + shutil.move(temp_path, cache_path) + finally: + shutil.rmtree(temp_root) + + return cache_path + + +def download_and_store_model(alg, url, env, model_type): + """Downloads a model file and puts it under model directory. + + It downloads a file from the URL and puts it under model directory. + If there is already a file at the destination path, + it just returns the path without downloading the same file. + Args: + alg (string): String representation of algorithm used in MODELS dict. + url (string): URL to download from. + env (string): Environment in which pretrained model was trained. + model_type (string): Either `best` or `final`. + Returns: + string: Path to the downloaded file. + bool: whether the model was already cached. + """ + lock = os.path.join( + _get_model_directory(os.path.join('.lock')), + 'models.lock') + with filelock.FileLock(lock): + root = _get_model_directory(os.path.join('models', + alg, env)) + url_basepath = os.path.join(url, alg, env) + file = model_type + ".zip" + path = os.path.join(root, file) + is_cached = os.path.exists(path) + if not is_cached: + cache_path = cached_download(os.path.join(url_basepath, + file)) + os.rename(cache_path, path) + with zipfile.ZipFile(path, 'r') as zip_ref: + zip_ref.extractall(root) + return os.path.join(root, model_type), is_cached + + def download_model(alg, env, model_type="best"): """Downloads and returns pretrained model. @@ -9,4 +166,12 @@ def download_model(alg, env, model_type="best"): str: Path to the downloaded file. bool: whether the model was already cached. """ - raise NotImplementedError() + assert alg in MODELS, \ + "No pretrained models for " + alg + "." + assert model_type in MODELS[alg], \ + "Model type \"" + model_type + "\" is not supported." + env = env.replace("NoFrameskip-v4", "") + model_path, is_cached = download_and_store_model(alg, + download_url, + env, model_type) + return model_path, is_cached From 24510e187dfa23936f2a458c8ece13800a0ab080 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Thu, 9 Jul 2020 13:58:06 +0900 Subject: [PATCH 02/36] Adds env variable --- .pfnci/config.pbtxt | 7 +++++++ pfrl/utils/pretrained_models.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.pfnci/config.pbtxt b/.pfnci/config.pbtxt index 9a6fa5842..ed7e4c891 100644 --- a/.pfnci/config.pbtxt +++ b/.pfnci/config.pbtxt @@ -10,6 +10,7 @@ configs { seconds: 1200 } environment_variables { key: "GPU" value: "1" } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh gpu" } } @@ -27,6 +28,7 @@ configs { } environment_variables { key: "GPU" value: "1" } environment_variables { key: "SLOW" value: "1" } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh gpu" } } @@ -44,6 +46,7 @@ configs { } environment_variables { key: "GPU" value: "1" } environment_variables { key: "TEST_EXAMPLES" value: "1" } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh gpu" } } @@ -59,6 +62,7 @@ configs { seconds: 2400 } environment_variables { key: "SLOW" value: "1" } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh cpu" } } @@ -74,6 +78,7 @@ configs { time_limit { seconds: 1200 } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/script.sh cpu" } } @@ -88,6 +93,7 @@ configs { time_limit { seconds: 1200 } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } environment_variables { key: "TEST_EXAMPLES" value: "1" } command: "bash .pfnci/script.sh cpu" } @@ -104,6 +110,7 @@ configs { time_limit { seconds: 1200 } + environment_variables { key: "PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED" value: "1" } command: "bash .pfnci/lint.sh" } } diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index c29cd643c..da047c649 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -22,7 +22,7 @@ "DQN": ["best", "final"], "IQN": ["best", "final"], "Rainbow": ["best", "final"], - "A3C": ["final", "best"], + "A3C": ["best", "final"], "DDPG": ["best", "final"], "TRPO": ["best", "final"], "PPO": ["final"], From 61b279e10891ea4d752991eaa13825b8474c7935 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Thu, 9 Jul 2020 14:06:59 +0900 Subject: [PATCH 03/36] Removes the 'Currently unsupported' caveat --- examples/atari/reproduction/a3c/README.md | 2 +- examples/atari/reproduction/dqn/README.md | 2 +- examples/atari/reproduction/iqn/README.md | 2 +- examples/atari/reproduction/rainbow/README.md | 2 +- examples/mujoco/reproduction/ddpg/README.md | 2 +- examples/mujoco/reproduction/ppo/README.md | 2 +- examples/mujoco/reproduction/soft_actor_critic/README.md | 2 +- examples/mujoco/reproduction/td3/README.md | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/atari/reproduction/a3c/README.md b/examples/atari/reproduction/a3c/README.md index 064a3a328..90e4ab878 100644 --- a/examples/atari/reproduction/a3c/README.md +++ b/examples/atari/reproduction/a3c/README.md @@ -19,7 +19,7 @@ python train_a3c.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/atari/reproduction/dqn/README.md b/examples/atari/reproduction/dqn/README.md index fcd8e65b2..072e36100 100644 --- a/examples/atari/reproduction/dqn/README.md +++ b/examples/atari/reproduction/dqn/README.md @@ -20,7 +20,7 @@ python train_dqn.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/atari/reproduction/iqn/README.md b/examples/atari/reproduction/iqn/README.md index 128289a1b..5e058b8df 100644 --- a/examples/atari/reproduction/iqn/README.md +++ b/examples/atari/reproduction/iqn/README.md @@ -20,7 +20,7 @@ python train_iqn.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/atari/reproduction/rainbow/README.md b/examples/atari/reproduction/rainbow/README.md index 8939771ce..28e553610 100644 --- a/examples/atari/reproduction/rainbow/README.md +++ b/examples/atari/reproduction/rainbow/README.md @@ -20,7 +20,7 @@ python train_rainbow.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/mujoco/reproduction/ddpg/README.md b/examples/mujoco/reproduction/ddpg/README.md index 4386dbf0a..c6fb8b297 100644 --- a/examples/mujoco/reproduction/ddpg/README.md +++ b/examples/mujoco/reproduction/ddpg/README.md @@ -23,7 +23,7 @@ python train_ddpg.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. - `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). diff --git a/examples/mujoco/reproduction/ppo/README.md b/examples/mujoco/reproduction/ppo/README.md index 89b59b0f1..4b49deef0 100644 --- a/examples/mujoco/reproduction/ppo/README.md +++ b/examples/mujoco/reproduction/ppo/README.md @@ -24,7 +24,7 @@ python train_ppo.py [options] - `--seed`. This option specifies the random seed used. - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. -- (Currently unsupported) `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/mujoco/reproduction/soft_actor_critic/README.md b/examples/mujoco/reproduction/soft_actor_critic/README.md index 02659d528..9ee6cb012 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/README.md +++ b/examples/mujoco/reproduction/soft_actor_critic/README.md @@ -24,7 +24,7 @@ python train_soft_actor_critic.py [options] - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. -- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). +- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. diff --git a/examples/mujoco/reproduction/td3/README.md b/examples/mujoco/reproduction/td3/README.md index cc37ebc26..dd66af940 100644 --- a/examples/mujoco/reproduction/td3/README.md +++ b/examples/mujoco/reproduction/td3/README.md @@ -23,7 +23,7 @@ python train_td3.py [options] - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. -- (Currently unsupported) `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). +- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. From 0d347e34347e3e154efc71ac9062f469ab0373b0 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Thu, 9 Jul 2020 14:21:06 +0900 Subject: [PATCH 04/36] Updates readmes with --load-pretrained example command --- examples/atari/reproduction/a3c/README.md | 5 +++++ examples/atari/reproduction/dqn/README.md | 5 +++++ examples/atari/reproduction/iqn/README.md | 6 ++++++ examples/atari/reproduction/rainbow/README.md | 6 ++++++ examples/mujoco/reproduction/ddpg/README.md | 5 +++++ examples/mujoco/reproduction/ppo/README.md | 6 ++++++ examples/mujoco/reproduction/soft_actor_critic/README.md | 6 ++++++ examples/mujoco/reproduction/td3/README.md | 7 +++++++ examples/mujoco/reproduction/trpo/README.md | 6 ++++++ 9 files changed, 52 insertions(+) diff --git a/examples/atari/reproduction/a3c/README.md b/examples/atari/reproduction/a3c/README.md index 90e4ab878..37cf51a1d 100644 --- a/examples/atari/reproduction/a3c/README.md +++ b/examples/atari/reproduction/a3c/README.md @@ -13,6 +13,11 @@ To run the training example: python train_a3c.py [options] ``` +We have already trained models from this script for all the domains listed in the [results](#Results). To load a pretrained model: +``` +python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 +``` + ### Useful Options - `--env`. Specifies the environment. - `--render`. Add this option to render the states in a GUI window. diff --git a/examples/atari/reproduction/dqn/README.md b/examples/atari/reproduction/dqn/README.md index 072e36100..5f96e7fdf 100644 --- a/examples/atari/reproduction/dqn/README.md +++ b/examples/atari/reproduction/dqn/README.md @@ -12,6 +12,11 @@ To run the training example: ``` python train_dqn.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model: + +``` +python train_dqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1 +``` ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`. diff --git a/examples/atari/reproduction/iqn/README.md b/examples/atari/reproduction/iqn/README.md index 5e058b8df..a8ae62fae 100644 --- a/examples/atari/reproduction/iqn/README.md +++ b/examples/atari/reproduction/iqn/README.md @@ -13,6 +13,12 @@ To run the training example: python train_iqn.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results). Note that while we may have run multiple seeds, our pretrained model represents a single run from this script, and may not be achieve the performance of the [results](#Results). To load a pretrained model: + +``` +python train_iqn.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_dqn.py --gpu -1`. - `--env`. Specifies the environment. diff --git a/examples/atari/reproduction/rainbow/README.md b/examples/atari/reproduction/rainbow/README.md index 28e553610..d23d04022 100644 --- a/examples/atari/reproduction/rainbow/README.md +++ b/examples/atari/reproduction/rainbow/README.md @@ -13,6 +13,12 @@ To run the training example: python train_rainbow.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_rainbow.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_rainbow.py --gpu -1`. - `--env`. Specifies the environment. diff --git a/examples/mujoco/reproduction/ddpg/README.md b/examples/mujoco/reproduction/ddpg/README.md index c6fb8b297..bdc824806 100644 --- a/examples/mujoco/reproduction/ddpg/README.md +++ b/examples/mujoco/reproduction/ddpg/README.md @@ -14,6 +14,11 @@ To run the training example: ``` python train_ddpg.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_ddpg.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1 +``` ### Useful Options diff --git a/examples/mujoco/reproduction/ppo/README.md b/examples/mujoco/reproduction/ppo/README.md index 4b49deef0..7170455c4 100644 --- a/examples/mujoco/reproduction/ppo/README.md +++ b/examples/mujoco/reproduction/ppo/README.md @@ -16,6 +16,12 @@ To run the training example: python train_ppo.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_ppo.py --demo --load-pretrained --env HalfCheetah-v2 --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_ppo.py --gpu -1`. diff --git a/examples/mujoco/reproduction/soft_actor_critic/README.md b/examples/mujoco/reproduction/soft_actor_critic/README.md index 9ee6cb012..319fdd0c0 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/README.md +++ b/examples/mujoco/reproduction/soft_actor_critic/README.md @@ -15,6 +15,12 @@ To run the training example: python train_soft_actor_critic.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_soft_actor_critic.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_soft_actor_critic.py --gpu -1`. diff --git a/examples/mujoco/reproduction/td3/README.md b/examples/mujoco/reproduction/td3/README.md index dd66af940..a9503b03c 100644 --- a/examples/mujoco/reproduction/td3/README.md +++ b/examples/mujoco/reproduction/td3/README.md @@ -14,6 +14,13 @@ To run the training example: python train_td3.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_td3.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1 +``` + + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_td3.py --gpu -1`. diff --git a/examples/mujoco/reproduction/trpo/README.md b/examples/mujoco/reproduction/trpo/README.md index 6ecde7dff..1841ee7e4 100644 --- a/examples/mujoco/reproduction/trpo/README.md +++ b/examples/mujoco/reproduction/trpo/README.md @@ -16,6 +16,12 @@ To run the training example: python train_trpo.py [options] ``` +We have already pretrained models from this script for all the domains listed in the [results](#Results) section. To load a pretrained model: + +``` +python train_trpo.py --demo --load-pretrained --env HalfCheetah-v2 --pretrained-type best --gpu -1 +``` + ### Useful Options - `--gpu`. Specifies the GPU. If you do not have a GPU on your machine, run the example with the option `--gpu -1`. E.g. `python train_trpo.py --gpu -1`. From cc2ee3aa0e2c9c054e973198c9b0bcd9d236c802 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Thu, 9 Jul 2020 15:16:25 +0900 Subject: [PATCH 05/36] Adds pretrained-type option for a3c --- examples/atari/reproduction/a3c/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/atari/reproduction/a3c/README.md b/examples/atari/reproduction/a3c/README.md index 37cf51a1d..cbd330492 100644 --- a/examples/atari/reproduction/a3c/README.md +++ b/examples/atari/reproduction/a3c/README.md @@ -25,6 +25,7 @@ python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 - `--outdir` This option specifies the output directory to which the results are written. - `--demo`. Runs an evaluation, instead of training the agent. - `--load-pretrained` Loads the pretrained model. Both `--load` and `--load-pretrained` cannot be used together. +- `--pretrained-type`. Either `best` (the best intermediate network during training) or `final` (the final network after training). To view the full list of options, either view the code or run the example with the `--help` option. From 3fa1013e375add08300ae45c908baa8f0d3f1d27 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Thu, 9 Jul 2020 18:20:53 +0900 Subject: [PATCH 06/36] Removes exception throw --- examples/atari/reproduction/a3c/train_a3c.py | 19 ++++++++++++++----- examples/atari/reproduction/dqn/train_dqn.py | 2 -- examples/atari/reproduction/iqn/train_iqn.py | 2 -- .../reproduction/rainbow/train_rainbow.py | 2 -- .../mujoco/reproduction/ddpg/train_ddpg.py | 2 -- examples/mujoco/reproduction/ppo/train_ppo.py | 5 ++--- .../train_soft_actor_critic.py | 2 -- examples/mujoco/reproduction/td3/train_td3.py | 8 ++------ .../mujoco/reproduction/trpo/train_trpo.py | 2 -- 9 files changed, 18 insertions(+), 26 deletions(-) diff --git a/examples/atari/reproduction/a3c/train_a3c.py b/examples/atari/reproduction/a3c/train_a3c.py index 3bfb8622a..a1eb3d58c 100644 --- a/examples/atari/reproduction/a3c/train_a3c.py +++ b/examples/atari/reproduction/a3c/train_a3c.py @@ -47,6 +47,9 @@ def main(): parser.add_argument("--eval-n-steps", type=int, default=125000) parser.add_argument("--demo", action="store_true", default=False) parser.add_argument("--load-pretrained", action="store_true", default=False) + parser.add_argument( + "--pretrained-type", type=str, default="best", choices=["best", "final"] + ) parser.add_argument("--load", type=str, default="") parser.add_argument( "--log-level", @@ -146,11 +149,17 @@ def phi(x): max_grad_norm=40.0, ) - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") - - if args.load: - agent.load(args.load) + if args.load or args.load_pretrained: + # either load or load_pretrained must be false + assert not args.load or not args.load_pretrained + if args.load: + agent.load(args.load) + else: + agent.load( + utils.download_model("A3C", args.env, model_type=args.pretrained_type)[ + 0 + ] + ) if args.demo: env = make_env(0, True) diff --git a/examples/atari/reproduction/dqn/train_dqn.py b/examples/atari/reproduction/dqn/train_dqn.py index 89572f09b..70c034090 100644 --- a/examples/atari/reproduction/dqn/train_dqn.py +++ b/examples/atari/reproduction/dqn/train_dqn.py @@ -167,8 +167,6 @@ def phi(x): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: diff --git a/examples/atari/reproduction/iqn/train_iqn.py b/examples/atari/reproduction/iqn/train_iqn.py index 205f4e362..bdffcbae4 100644 --- a/examples/atari/reproduction/iqn/train_iqn.py +++ b/examples/atari/reproduction/iqn/train_iqn.py @@ -165,8 +165,6 @@ def phi(x): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: diff --git a/examples/atari/reproduction/rainbow/train_rainbow.py b/examples/atari/reproduction/rainbow/train_rainbow.py index e46469570..3f077bb94 100644 --- a/examples/atari/reproduction/rainbow/train_rainbow.py +++ b/examples/atari/reproduction/rainbow/train_rainbow.py @@ -157,8 +157,6 @@ def phi(x): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load_ or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py index 8fe180128..7484efa5a 100644 --- a/examples/mujoco/reproduction/ddpg/train_ddpg.py +++ b/examples/mujoco/reproduction/ddpg/train_ddpg.py @@ -179,8 +179,6 @@ def burnin_action_func(): ) if len(args.load) > 0 or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not len(args.load) > 0 or not args.load_pretrained if len(args.load) > 0: diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py index b2802f88e..9d8c64a19 100644 --- a/examples/mujoco/reproduction/ppo/train_ppo.py +++ b/examples/mujoco/reproduction/ppo/train_ppo.py @@ -207,14 +207,13 @@ def ortho_init(layer, gain): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: agent.load(args.load) else: - agent.load(utils.download_model("PPO", args.env, model_type="final")[0]) + agent.load(utils.download_model("PPO", args.env, + model_type="final")[0]) if args.demo: env = make_batch_env(True) diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py index 158de19f9..03e11478f 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py +++ b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py @@ -229,8 +229,6 @@ def burnin_action_func(): ) if len(args.load) > 0 or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not len(args.load) > 0 or not args.load_pretrained if len(args.load) > 0: diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py index e0ffb6077..7b119e29d 100644 --- a/examples/mujoco/reproduction/td3/train_td3.py +++ b/examples/mujoco/reproduction/td3/train_td3.py @@ -178,18 +178,14 @@ def burnin_action_func(): ) if len(args.load) > 0 or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not len(args.load) > 0 or not args.load_pretrained if len(args.load) > 0: agent.load(args.load) else: agent.load( - utils.download_model("TD3", args.env, model_type=args.pretrained_type)[ - 0 - ] - ) + utils.download_model("TD3", args.env, + model_type=args.pretrained_type)[0]) eval_env = make_env(test=True) if args.demo: diff --git a/examples/mujoco/reproduction/trpo/train_trpo.py b/examples/mujoco/reproduction/trpo/train_trpo.py index 53e95c616..812e2e2c0 100644 --- a/examples/mujoco/reproduction/trpo/train_trpo.py +++ b/examples/mujoco/reproduction/trpo/train_trpo.py @@ -181,8 +181,6 @@ def ortho_init(layer, gain): ) if args.load or args.load_pretrained: - if args.load_pretrained: - raise Exception("Pretrained models are currently unsupported.") # either load or load_pretrained must be false assert not args.load or not args.load_pretrained if args.load: From 00650c1ff0295b51020eb51a3a7832c7b220361e Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 10 Jul 2020 22:20:59 +0900 Subject: [PATCH 07/36] modifies path --- pfrl/utils/pretrained_models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index da047c649..1fbfe4945 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -140,8 +140,7 @@ def download_and_store_model(alg, url, env, model_type): _get_model_directory(os.path.join('.lock')), 'models.lock') with filelock.FileLock(lock): - root = _get_model_directory(os.path.join('models', - alg, env)) + root = _get_model_directory(os.path.join(alg, env)) url_basepath = os.path.join(url, alg, env) file = model_type + ".zip" path = os.path.join(root, file) From c496c0724e912af576624711bbaefaa7c444a648 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 00:19:47 +0900 Subject: [PATCH 08/36] Adds Gcloud for model downloads --- .pfnci/script.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.pfnci/script.sh b/.pfnci/script.sh index f845e754b..e74f7f9ec 100644 --- a/.pfnci/script.sh +++ b/.pfnci/script.sh @@ -34,7 +34,7 @@ main() { wait # Prepare docker args. - docker_args=(docker run --rm --volume="$(pwd):/src:ro") + docker_args=(docker run --rm --volume="$(pwd):/src:ro" --volume="/root/.pfrl:/root/.pfrl/") if [ "${GPU:-0}" != '0' ]; then docker_args+=(--ipc=host --privileged --env="GPU=${GPU}" --runtime=nvidia) fi @@ -50,6 +50,14 @@ main() { docker_image=pytorch/pytorch:1.5.1-cuda10.1-cudnn7-runtime docker_args+=(--env="SLOW=${SLOW:-0}") + for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip + do + gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} . + mkdir -p ~/.pfrl/models/ + unzip ${ZIP} -d ~/.pfrl/models/ + rm ${ZIP} + done + run "${docker_args[@]}" "${docker_image}" bash /src/.pfnci/run.sh "${TARGET}" } From 27c884004a5fb0713e9084b138d7f9568aa5e241 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 01:05:41 +0900 Subject: [PATCH 09/36] Removes marker --- .pfnci/run.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/.pfnci/run.sh b/.pfnci/run.sh index 164d88736..544abcc6a 100644 --- a/.pfnci/run.sh +++ b/.pfnci/run.sh @@ -52,7 +52,6 @@ main() { marker+=' and gpu' bucket="${GPU}" fi - marker+=' and not download_model' UBUNTU_VERSION_ID=$(grep DISTRIB_RELEASE /etc/lsb-release | cut -d "=" -f2) From bbab734e447d1131f84898200c5da788678fc91e Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 18:43:46 +0900 Subject: [PATCH 10/36] Adds A3C tests --- tests/utils_tests/test_pretrained_models.py | 52 +++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 tests/utils_tests/test_pretrained_models.py diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py new file mode 100644 index 000000000..614644533 --- /dev/null +++ b/tests/utils_tests/test_pretrained_models.py @@ -0,0 +1,52 @@ +import functools +import os + +import numpy as np +import pytest + +import pfrl +from pfrl import agents +from pfrl.utils import download_model + +pytestmark = pytest.mark.skip() + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadA3C: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_a3c(self, gpu): + a3c_model = nn.Sequential( + nn.Conv2d(obs_size, 16, 8, stride=4), + nn.ReLU(), + nn.Conv2d(16, 32, 4, stride=2), + nn.ReLU(), + nn.Flatten(), + nn.Linear(2592, 256), + nn.ReLU(), + pfrl.nn.Branched( + nn.Sequential(nn.Linear(256, n_actions), SoftmaxCategoricalHead(),), + nn.Linear(256, 1), + ), + ) + from pfrl.optimizers import SharedRMSpropEpsInsideSqrt + opt = SharedRMSpropEpsInsideSqrt(model.parameters(), lr=7e-4, eps=1e-1, alpha=0.99) + agent = agents.A3C(a3c_model, opt, t_max=5, gamma=0.99, beta=1e-2, phi=lambda x: x) + downloaded_model, exists = download_model( + "A3C", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_a3c(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_a3c(gpu=0) + + + From 504e194f41145aa8fc6bf11b1cee12fad0e4cc5a Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 18:48:20 +0900 Subject: [PATCH 11/36] Applies black --- examples/mujoco/reproduction/ppo/train_ppo.py | 3 +- examples/mujoco/reproduction/td3/train_td3.py | 6 +- pfrl/utils/pretrained_models.py | 67 ++++++++++--------- tests/utils_tests/test_pretrained_models.py | 12 ++-- 4 files changed, 47 insertions(+), 41 deletions(-) diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py index 9d8c64a19..d9ebfbe3d 100644 --- a/examples/mujoco/reproduction/ppo/train_ppo.py +++ b/examples/mujoco/reproduction/ppo/train_ppo.py @@ -212,8 +212,7 @@ def ortho_init(layer, gain): if args.load: agent.load(args.load) else: - agent.load(utils.download_model("PPO", args.env, - model_type="final")[0]) + agent.load(utils.download_model("PPO", args.env, model_type="final")[0]) if args.demo: env = make_batch_env(True) diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py index 7b119e29d..7df9f5fa8 100644 --- a/examples/mujoco/reproduction/td3/train_td3.py +++ b/examples/mujoco/reproduction/td3/train_td3.py @@ -184,8 +184,10 @@ def burnin_action_func(): agent.load(args.load) else: agent.load( - utils.download_model("TD3", args.env, - model_type=args.pretrained_type)[0]) + utils.download_model("TD3", args.env, model_type=args.pretrained_type)[ + 0 + ] + ) eval_env = make_env(test=True) if args.demo: diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index 1fbfe4945..e6e750b0e 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -13,9 +13,9 @@ from six.moves.urllib import request -_models_root = os.environ.get('PFRL_MODELS_ROOT', - os.path.join(os.path.expanduser('~'), - '.pfrl', 'models')) +_models_root = os.environ.get( + "PFRL_MODELS_ROOT", os.path.join(os.path.expanduser("~"), ".pfrl", "models") +) MODELS = { @@ -27,15 +27,17 @@ "TRPO": ["best", "final"], "PPO": ["final"], "TD3": ["best", "final"], - "SAC": ["best", "final"] + "SAC": ["best", "final"], } download_url = "https://chainer-assets.preferred.jp/pfrl/" -''' +""" This function forked from Chainer, an MIT-licensed project, https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L70 -''' +""" + + def _get_model_directory(model_name, create_directory=True): """Gets the path to the directory of given model. @@ -63,21 +65,27 @@ def _reporthook(count, block_size, total_size): global start_time if count == 0: start_time = time.time() - print(' % Total Recv Speed Time left') + print(" % Total Recv Speed Time left") return duration = time.time() - start_time progress_size = count * block_size try: speed = progress_size / duration except ZeroDivisionError: - speed = float('inf') + speed = float("inf") percent = progress_size / total_size * 100 eta = int((total_size - progress_size) / speed) sys.stdout.write( - '\r{:3.0f} {:4.0f}MiB {:4.0f}MiB {:6.0f}KiB/s {:4d}:{:02d}:{:02d}' - .format( - percent, total_size / (1 << 20), progress_size / (1 << 20), - speed / (1 << 10), eta // 60 // 60, (eta // 60) % 60, eta % 60)) + "\r{:3.0f} {:4.0f}MiB {:4.0f}MiB {:6.0f}KiB/s {:4d}:{:02d}:{:02d}".format( + percent, + total_size / (1 << 20), + progress_size / (1 << 20), + speed / (1 << 10), + eta // 60 // 60, + (eta // 60) % 60, + eta % 60, + ) + ) sys.stdout.flush() @@ -92,15 +100,15 @@ def cached_download(url): Returns: string: Path to the downloaded file. """ - cache_root = os.path.join(_models_root, '_dl_cache') + cache_root = os.path.join(_models_root, "_dl_cache") # cache_root = os.path.join(_models_root, '_dl_cache') try: os.makedirs(cache_root) except OSError: if not os.path.exists(cache_root): raise - lock_path = os.path.join(cache_root, '_dl_lock') - urlhash = hashlib.md5(url.encode('utf-8')).hexdigest() + lock_path = os.path.join(cache_root, "_dl_lock") + urlhash = hashlib.md5(url.encode("utf-8")).hexdigest() cache_path = os.path.join(cache_root, urlhash) with filelock.FileLock(lock_path): @@ -108,10 +116,10 @@ def cached_download(url): return cache_path temp_root = tempfile.mkdtemp(dir=cache_root) try: - temp_path = os.path.join(temp_root, 'dl') - print('Downloading ...') - print('From: {:s}'.format(url)) - print('To: {:s}'.format(cache_path)) + temp_path = os.path.join(temp_root, "dl") + print("Downloading ...") + print("From: {:s}".format(url)) + print("To: {:s}".format(cache_path)) request.urlretrieve(url, temp_path, _reporthook) with filelock.FileLock(lock_path): shutil.move(temp_path, cache_path) @@ -136,9 +144,7 @@ def download_and_store_model(alg, url, env, model_type): string: Path to the downloaded file. bool: whether the model was already cached. """ - lock = os.path.join( - _get_model_directory(os.path.join('.lock')), - 'models.lock') + lock = os.path.join(_get_model_directory(os.path.join(".lock")), "models.lock") with filelock.FileLock(lock): root = _get_model_directory(os.path.join(alg, env)) url_basepath = os.path.join(url, alg, env) @@ -146,10 +152,9 @@ def download_and_store_model(alg, url, env, model_type): path = os.path.join(root, file) is_cached = os.path.exists(path) if not is_cached: - cache_path = cached_download(os.path.join(url_basepath, - file)) + cache_path = cached_download(os.path.join(url_basepath, file)) os.rename(cache_path, path) - with zipfile.ZipFile(path, 'r') as zip_ref: + with zipfile.ZipFile(path, "r") as zip_ref: zip_ref.extractall(root) return os.path.join(root, model_type), is_cached @@ -165,12 +170,10 @@ def download_model(alg, env, model_type="best"): str: Path to the downloaded file. bool: whether the model was already cached. """ - assert alg in MODELS, \ - "No pretrained models for " + alg + "." - assert model_type in MODELS[alg], \ - "Model type \"" + model_type + "\" is not supported." + assert alg in MODELS, "No pretrained models for " + alg + "." + assert model_type in MODELS[alg], ( + 'Model type "' + model_type + '" is not supported.' + ) env = env.replace("NoFrameskip-v4", "") - model_path, is_cached = download_and_store_model(alg, - download_url, - env, model_type) + model_path, is_cached = download_and_store_model(alg, download_url, env, model_type) return model_path, is_cached diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 614644533..f3d15cc9e 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -32,8 +32,13 @@ def _test_load_a3c(self, gpu): ), ) from pfrl.optimizers import SharedRMSpropEpsInsideSqrt - opt = SharedRMSpropEpsInsideSqrt(model.parameters(), lr=7e-4, eps=1e-1, alpha=0.99) - agent = agents.A3C(a3c_model, opt, t_max=5, gamma=0.99, beta=1e-2, phi=lambda x: x) + + opt = SharedRMSpropEpsInsideSqrt( + model.parameters(), lr=7e-4, eps=1e-1, alpha=0.99 + ) + agent = agents.A3C( + a3c_model, opt, t_max=5, gamma=0.99, beta=1e-2, phi=lambda x: x + ) downloaded_model, exists = download_model( "A3C", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type ) @@ -47,6 +52,3 @@ def test_cpu(self): @pytest.mark.gpu def test_gpu(self): self._test_load_a3c(gpu=0) - - - From 7b4ef54a3a3501c82d74be708d5bc856f7f87ec3 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 18:50:24 +0900 Subject: [PATCH 12/36] Removes some of the zips from the tests --- .pfnci/script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pfnci/script.sh b/.pfnci/script.sh index e74f7f9ec..e18486c75 100644 --- a/.pfnci/script.sh +++ b/.pfnci/script.sh @@ -50,7 +50,7 @@ main() { docker_image=pytorch/pytorch:1.5.1-cuda10.1-cudnn7-runtime docker_args+=(--env="SLOW=${SLOW:-0}") - for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip + for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip do gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} . mkdir -p ~/.pfrl/models/ From 1eeb028b34ab2a0b9468e230d712520674f66b9c Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 19:09:47 +0900 Subject: [PATCH 13/36] Adds obs and action size --- tests/utils_tests/test_pretrained_models.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index f3d15cc9e..8d747afc0 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -1,10 +1,9 @@ -import functools import os -import numpy as np import pytest import pfrl +from pfrl import nn from pfrl import agents from pfrl.utils import download_model @@ -18,6 +17,9 @@ def setup(self, pretrained_type): self.pretrained_type = pretrained_type def _test_load_a3c(self, gpu): + from pfrl.policies import SoftmaxCategoricalHead + obs_size = 4 + n_actions = 4 a3c_model = nn.Sequential( nn.Conv2d(obs_size, 16, 8, stride=4), nn.ReLU(), @@ -34,7 +36,7 @@ def _test_load_a3c(self, gpu): from pfrl.optimizers import SharedRMSpropEpsInsideSqrt opt = SharedRMSpropEpsInsideSqrt( - model.parameters(), lr=7e-4, eps=1e-1, alpha=0.99 + a3c_model.parameters(), lr=7e-4, eps=1e-1, alpha=0.99 ) agent = agents.A3C( a3c_model, opt, t_max=5, gamma=0.99, beta=1e-2, phi=lambda x: x From ae7fe6b89dea31f333d69c12f29cfcaebf301537 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 19:15:40 +0900 Subject: [PATCH 14/36] Applies black --- tests/utils_tests/test_pretrained_models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 8d747afc0..3eef485fb 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -18,8 +18,9 @@ def setup(self, pretrained_type): def _test_load_a3c(self, gpu): from pfrl.policies import SoftmaxCategoricalHead + obs_size = 4 - n_actions = 4 + n_actions = 4 a3c_model = nn.Sequential( nn.Conv2d(obs_size, 16, 8, stride=4), nn.ReLU(), From 5441f9829be9d6ee4ef7e811479cd3e49e7b48fb Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 20:07:26 +0900 Subject: [PATCH 15/36] Adds DQN and IQN tests --- tests/utils_tests/test_pretrained_models.py | 150 ++++++++++++++++++-- 1 file changed, 140 insertions(+), 10 deletions(-) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 3eef485fb..648f568ad 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -1,13 +1,150 @@ import os import pytest +import torch +from torch import nn import pfrl -from pfrl import nn +import pfrl.nn as pnn from pfrl import agents +from pfrl import explorers +from pfrl import replay_buffers from pfrl.utils import download_model +from pfrl.initializers import init_chainer_default -pytestmark = pytest.mark.skip() + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadDQN: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_dqn(self, gpu): + from pfrl.q_functions import DiscreteActionValueHead + + n_actions = 4 + q_func = nn.Sequential( + pnn.LargeAtariCNN(), + init_chainer_default(nn.Linear(512, n_actions)), + DiscreteActionValueHead(), + ) + + # Use the same hyperparameters as the Nature paper + + opt = pfrl.optimizers.RMSpropEpsInsideSqrt( + q_func.parameters(), + lr=2.5e-4, + alpha=0.95, + momentum=0.0, + eps=1e-2, + centered=True, + ) + + rbuf = replay_buffers.ReplayBuffer(100) + + explorer = explorers.LinearDecayEpsilonGreedy( + start_epsilon=1.0, + end_epsilon=0.1, + decay_steps=10 ** 6, + random_action_func=lambda: np.random.randint(4), + ) + + agent = agents.DQN( + q_func, + opt, + rbuf, + gpu=gpu, + gamma=0.99, + explorer=explorer, + replay_start_size=50, + target_update_interval=10 ** 4, + clip_delta=True, + update_interval=4, + batch_accumulator="sum", + phi=lambda x: x, + ) + + downloaded_model, exists = download_model( + "DQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_dqn(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_dqn(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadIQN: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_iqn(self, gpu): + n_actions = 4 + q_func = pfrl.agents.iqn.ImplicitQuantileQFunction( + psi=nn.Sequential( + nn.Conv2d(4, 32, 8, stride=4), + nn.ReLU(), + nn.Conv2d(32, 64, 4, stride=2), + nn.ReLU(), + nn.Conv2d(64, 64, 3, stride=1), + nn.ReLU(), + nn.Flatten(), + ), + phi=nn.Sequential(pfrl.agents.iqn.CosineBasisLinear(64, 3136), nn.ReLU(),), + f=nn.Sequential( + nn.Linear(3136, 512), nn.ReLU(), nn.Linear(512, n_actions), + ), + ) + + # Use the same hyper parameters as https://arxiv.org/abs/1710.10044 + opt = torch.optim.Adam(q_func.parameters(), lr=5e-5, eps=1e-2 / 32) + + rbuf = replay_buffers.ReplayBuffer(100) + + explorer = explorers.LinearDecayEpsilonGreedy( + start_epsilon=1.0, + end_epsilon=0.1, + decay_steps=10 ** 6, + random_action_func=lambda: np.random.randint(4), + ) + + agent = agents.IQN( + q_func, + opt, + rbuf, + gpu=gpu, + gamma=0.99, + explorer=explorer, + replay_start_size=50, + target_update_interval=10 ** 4, + update_interval=4, + batch_accumulator="mean", + phi=lambda x: x, + quantile_thresholds_N=64, + quantile_thresholds_N_prime=64, + quantile_thresholds_K=32, + ) + + downloaded_model, exists = download_model( + "IQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_iqn(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_iqn(gpu=0) @pytest.mark.parametrize("pretrained_type", ["final", "best"]) @@ -16,7 +153,7 @@ class TestLoadA3C: def setup(self, pretrained_type): self.pretrained_type = pretrained_type - def _test_load_a3c(self, gpu): + def test_load_a3c(self): from pfrl.policies import SoftmaxCategoricalHead obs_size = 4 @@ -48,10 +185,3 @@ def _test_load_a3c(self, gpu): agent.load(downloaded_model) if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): assert exists - - def test_cpu(self): - self._test_load_a3c(gpu=None) - - @pytest.mark.gpu - def test_gpu(self): - self._test_load_a3c(gpu=0) From 7588799b28d51f83d517b49b54489554aa2a2e95 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 15 Jul 2020 20:22:41 +0900 Subject: [PATCH 16/36] Adds numpy import --- tests/utils_tests/test_pretrained_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 648f568ad..a6c635978 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -1,5 +1,6 @@ import os +import numpy as np import pytest import torch from torch import nn From 28e5defaaeb8de0fd5d1195459a69a1eede07f0d Mon Sep 17 00:00:00 2001 From: Prabhat Date: Thu, 16 Jul 2020 16:49:32 +0900 Subject: [PATCH 17/36] Adds Rainbow test --- tests/utils_tests/test_pretrained_models.py | 41 ++++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index a6c635978..54015dea4 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -69,7 +69,7 @@ def _test_load_dqn(self, gpu): "DQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type ) agent.load(downloaded_model) - if os.environ.get("CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): assert exists def test_cpu(self): @@ -137,7 +137,7 @@ def _test_load_iqn(self, gpu): "IQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type ) agent.load(downloaded_model) - if os.environ.get("CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): assert exists def test_cpu(self): @@ -148,6 +148,43 @@ def test_gpu(self): self._test_load_iqn(gpu=0) +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadRainbow: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_rainbow(self, gpu): + from pfrl.q_functions import DistributionalDuelingDQN + q_func = DistributionalDuelingDQN(4, 51, -10, 10) + pnn.to_factorized_noisy(q_func, sigma_scale=0.5) + explorer = explorers.Greedy() + opt = torch.optim.Adam(q_func.parameters(), 6.25e-5, eps=1.5 * 10 ** -4) + rbuf = replay_buffers.ReplayBuffer(100) + agent = agents.CategoricalDoubleDQN( + q_func, opt, rbuf, gpu=gpu, gamma=0.99, + explorer=explorer, minibatch_size=32, + replay_start_size=50, + target_update_interval=32000, + update_interval=4, + batch_accumulator='mean', + phi=lambda x: x, + ) + + downloaded_model, exists = download_model("Rainbow", "BreakoutNoFrameskip-v4", + model_type=self.pretrained_type) + agent.load(downloaded_model) + if os.environ.get('PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'): + assert exists + + def test_cpu(self): + self._test_load_rainbow(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_rainbow(gpu=0) + + @pytest.mark.parametrize("pretrained_type", ["final", "best"]) class TestLoadA3C: @pytest.fixture(autouse=True) From 24aa5689934e0827f33cd6b9321301a03cba11d7 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 17 Jul 2020 12:12:31 +0900 Subject: [PATCH 18/36] Adds DDPG tests and applies black --- tests/utils_tests/test_pretrained_models.py | 95 +++++++++++++++++++-- 1 file changed, 89 insertions(+), 6 deletions(-) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 54015dea4..26ea54a92 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -156,25 +156,32 @@ def setup(self, pretrained_type): def _test_load_rainbow(self, gpu): from pfrl.q_functions import DistributionalDuelingDQN + q_func = DistributionalDuelingDQN(4, 51, -10, 10) pnn.to_factorized_noisy(q_func, sigma_scale=0.5) explorer = explorers.Greedy() opt = torch.optim.Adam(q_func.parameters(), 6.25e-5, eps=1.5 * 10 ** -4) rbuf = replay_buffers.ReplayBuffer(100) agent = agents.CategoricalDoubleDQN( - q_func, opt, rbuf, gpu=gpu, gamma=0.99, - explorer=explorer, minibatch_size=32, + q_func, + opt, + rbuf, + gpu=gpu, + gamma=0.99, + explorer=explorer, + minibatch_size=32, replay_start_size=50, target_update_interval=32000, update_interval=4, - batch_accumulator='mean', + batch_accumulator="mean", phi=lambda x: x, ) - downloaded_model, exists = download_model("Rainbow", "BreakoutNoFrameskip-v4", - model_type=self.pretrained_type) + downloaded_model, exists = download_model( + "Rainbow", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type + ) agent.load(downloaded_model) - if os.environ.get('PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'): + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): assert exists def test_cpu(self): @@ -223,3 +230,79 @@ def test_load_a3c(self): agent.load(downloaded_model) if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): assert exists + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadDDPG: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_ddpg(self, gpu): + def concat_obs_and_action(obs, action): + return F.concat((obs, action), axis=-1) + + obs_size = 11 + action_size = 3 + from pfrl.nn import ConcatObsAndAction + + q_func = nn.Sequential( + ConcatObsAndAction(), + nn.Linear(obs_size + action_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, 1), + ) + from pfrl.nn import BoundByTanh + from pfrl.policies import DeterministicHead + + policy = nn.Sequential( + nn.Linear(obs_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, action_size), + BoundByTanh(low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0]), + DeterministicHead(), + ) + + opt_a = torch.optim.Adam(policy.parameters()) + opt_c = torch.optim.Adam(q_func.parameters()) + + explorer = explorers.AdditiveGaussian( + scale=0.1, low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0] + ) + + agent = agents.DDPG( + policy, + q_func, + opt_a, + opt_c, + replay_buffers.ReplayBuffer(100), + gamma=0.99, + explorer=explorer, + replay_start_size=1000, + target_update_method="soft", + target_update_interval=1, + update_interval=1, + soft_update_tau=5e-3, + n_times_update=1, + gpu=gpu, + minibatch_size=100, + burnin_action_func=None, + ) + + downloaded_model, exists = download_model( + "DDPG", "Hopper-v2", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_ddpg(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_ddpg(gpu=0) From 10192cde691ea28570b0e815655d0fe4ac850e64 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 17 Jul 2020 12:44:02 +0900 Subject: [PATCH 19/36] Adds other models to download script, adds TRPO test --- .pfnci/script.sh | 2 +- tests/utils_tests/test_pretrained_models.py | 63 +++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/.pfnci/script.sh b/.pfnci/script.sh index e18486c75..e74f7f9ec 100644 --- a/.pfnci/script.sh +++ b/.pfnci/script.sh @@ -50,7 +50,7 @@ main() { docker_image=pytorch/pytorch:1.5.1-cuda10.1-cudnn7-runtime docker_args+=(--env="SLOW=${SLOW:-0}") - for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip + for ZIP in a3c_results.zip dqn_results.zip iqn_results.zip rainbow_results.zip ddpg_results.zip trpo_results.zip ppo_results.zip td3_results.zip sac_results.zip do gsutil cp gs://chainerrl-asia-pfn-public-ci/${ZIP} . mkdir -p ~/.pfrl/models/ diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 26ea54a92..ac61f40f6 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -306,3 +306,66 @@ def test_cpu(self): @pytest.mark.gpu def test_gpu(self): self._test_load_ddpg(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadTRPO: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_trpo(self, gpu): + obs_size = 11 + action_size = 3 + + policy = torch.nn.Sequential( + nn.Linear(obs_size, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, action_size), + pfrl.policies.GaussianHeadWithStateIndependentCovariance( + action_size=action_size, + var_type="diagonal", + var_func=lambda x: torch.exp(2 * x), # Parameterize log std + var_param_init=0, # log std = 0 => std = 1 + ), + ) + + vf = torch.nn.Sequential( + nn.Linear(obs_size, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, 1), + ) + vf_opt = torch.optim.Adam(vf.parameters()) + + agent = agents.TRPO( + policy=policy, + vf=vf, + vf_optimizer=vf_opt, + gpu=gpu, + update_interval=5000, + max_kl=0.01, + conjugate_gradient_max_iter=20, + conjugate_gradient_damping=1e-1, + gamma=0.995, + lambd=0.97, + vf_epochs=5, + entropy_coef=0, + ) + + downloaded_model, exists = download_model( + "TRPO", "Hopper-v2", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_trpo(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_trpo(gpu=0) From b188403bf935d9c96ea8c963271d7dc4f15c155a Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 17 Jul 2020 13:15:44 +0900 Subject: [PATCH 20/36] Adds PPO pretrained test --- tests/utils_tests/test_pretrained_models.py | 61 +++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index ac61f40f6..7bcdf2a9f 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -369,3 +369,64 @@ def test_cpu(self): @pytest.mark.gpu def test_gpu(self): self._test_load_trpo(gpu=0) + + +class TestLoadPPO: + def _test_load_ppo(self, gpu): + obs_size = 11 + action_size = 3 + from pfrl.policies import GaussianHeadWithStateIndependentCovariance + + policy = torch.nn.Sequential( + nn.Linear(obs_size, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, action_size), + GaussianHeadWithStateIndependentCovariance( + action_size=action_size, + var_type="diagonal", + var_func=lambda x: torch.exp(2 * x), # Parameterize log std + var_param_init=0, # log std = 0 => std = 1 + ), + ) + + vf = torch.nn.Sequential( + nn.Linear(obs_size, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, 1), + ) + + model = pnn.Branched(policy, vf) + opt = torch.optim.Adam(model.parameters(), lr=3e-4, eps=1e-5) + + agent = agents.PPO( + model, + opt, + obs_normalizer=None, + gpu=gpu, + update_interval=2048, + minibatch_size=64, + epochs=10, + clip_eps_vf=None, + entropy_coef=0, + standardize_advantages=True, + gamma=0.995, + lambd=0.97, + ) + + downloaded_model, exists = download_model( + "PPO", "Hopper-v2", model_type="final" + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_ppo(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_ppo(gpu=0) From 3d815c9f3577888c252708f2dbc0c0ffd8b145f8 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 17 Jul 2020 13:35:43 +0900 Subject: [PATCH 21/36] Adds TD3 tests --- tests/utils_tests/test_pretrained_models.py | 74 +++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 7bcdf2a9f..738f6c5ed 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -430,3 +430,77 @@ def test_cpu(self): @pytest.mark.gpu def test_gpu(self): self._test_load_ppo(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadTD3: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_td3(self, gpu): + + obs_size = 11 + action_size = 3 + + def make_q_func_with_optimizer(): + q_func = nn.Sequential( + pnn.ConcatObsAndAction(), + nn.Linear(obs_size + action_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, 1), + ) + q_func_optimizer = torch.optim.Adam(q_func.parameters()) + return q_func, q_func_optimizer + + q_func1, q_func1_optimizer = make_q_func_with_optimizer() + q_func2, q_func2_optimizer = make_q_func_with_optimizer() + + policy = nn.Sequential( + nn.Linear(obs_size, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, action_size), + nn.Tanh(), + pfrl.policies.DeterministicHead(), + ) + policy_optimizer = torch.optim.Adam(policy.parameters()) + + rbuf = replay_buffers.ReplayBuffer(100) + explorer = explorers.AdditiveGaussian( + scale=0.1, low=[-1.0, -1.0, -1.0], high=[1.0, 1.0, 1.0] + ) + + agent = agents.TD3( + policy, + q_func1, + q_func2, + policy_optimizer, + q_func1_optimizer, + q_func2_optimizer, + rbuf, + gamma=0.99, + soft_update_tau=5e-3, + explorer=explorer, + replay_start_size=1000, + gpu=gpu, + minibatch_size=100, + burnin_action_func=None, + ) + + downloaded_model, exists = download_model( + "TD3", "Hopper-v2", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_td3(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_td3(gpu=0) From b9b2fe34109344c069f683b30e942604175781bf Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 17 Jul 2020 13:47:58 +0900 Subject: [PATCH 22/36] Adds SAC tests --- tests/utils_tests/test_pretrained_models.py | 85 +++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 738f6c5ed..40f86ffb9 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -504,3 +504,88 @@ def test_cpu(self): @pytest.mark.gpu def test_gpu(self): self._test_load_td3(gpu=0) + + +@pytest.mark.parametrize("pretrained_type", ["final", "best"]) +class TestLoadSAC: + @pytest.fixture(autouse=True) + def setup(self, pretrained_type): + self.pretrained_type = pretrained_type + + def _test_load_sac(self, gpu): + obs_size = 11 + action_size = 3 + + def squashed_diagonal_gaussian_head(x): + assert x.shape[-1] == action_size * 2 + mean, log_scale = torch.chunk(x, 2, dim=1) + log_scale = torch.clamp(log_scale, -20.0, 2.0) + var = torch.exp(log_scale * 2) + base_distribution = distributions.Independent( + distributions.Normal(loc=mean, scale=torch.sqrt(var)), 1 + ) + # cache_size=1 is required for numerical stability + return distributions.transformed_distribution.TransformedDistribution( + base_distribution, + [distributions.transforms.TanhTransform(cache_size=1)], + ) + + from pfrl.nn.lmbda import Lambda + policy = nn.Sequential( + nn.Linear(obs_size, 256), + nn.ReLU(), + nn.Linear(256, 256), + nn.ReLU(), + nn.Linear(256, action_size * 2), + Lambda(squashed_diagonal_gaussian_head), + ) + policy_optimizer = torch.optim.Adam(policy.parameters(), lr=3e-4) + + def make_q_func_with_optimizer(): + q_func = nn.Sequential( + pfrl.nn.ConcatObsAndAction(), + nn.Linear(obs_size + action_size, 256), + nn.ReLU(), + nn.Linear(256, 256), + nn.ReLU(), + nn.Linear(256, 1), + ) + torch.nn.init.xavier_uniform_(q_func[1].weight) + torch.nn.init.xavier_uniform_(q_func[3].weight) + torch.nn.init.xavier_uniform_(q_func[5].weight) + q_func_optimizer = torch.optim.Adam(q_func.parameters(), lr=3e-4) + return q_func, q_func_optimizer + + q_func1, q_func1_optimizer = make_q_func_with_optimizer() + q_func2, q_func2_optimizer = make_q_func_with_optimizer() + + agent = agents.SoftActorCritic( + policy, + q_func1, + q_func2, + policy_optimizer, + q_func1_optimizer, + q_func2_optimizer, + replay_buffers.ReplayBuffer(100), + gamma=0.99, + replay_start_size=1000, + gpu=gpu, + minibatch_size=256, + burnin_action_func=None, + entropy_target=-3, + temperature_optimizer_lr=3e-4, + ) + + downloaded_model, exists = download_model( + "SAC", "Hopper-v2", model_type=self.pretrained_type + ) + agent.load(downloaded_model) + if os.environ.get("PFRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED"): + assert exists + + def test_cpu(self): + self._test_load_sac(gpu=None) + + @pytest.mark.gpu + def test_gpu(self): + self._test_load_sac(gpu=0) From 3ae2f88e4153fedfc53416cc64fd5cef5d40d894 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 17 Jul 2020 14:52:11 +0900 Subject: [PATCH 23/36] Uses posixpath --- pfrl/utils/pretrained_models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index e6e750b0e..4dd4a1188 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -5,6 +5,7 @@ import filelock import hashlib import os +import posixpath import shutil import tempfile import time @@ -147,12 +148,12 @@ def download_and_store_model(alg, url, env, model_type): lock = os.path.join(_get_model_directory(os.path.join(".lock")), "models.lock") with filelock.FileLock(lock): root = _get_model_directory(os.path.join(alg, env)) - url_basepath = os.path.join(url, alg, env) + url_basepath = posixpath.join(url, alg, env) file = model_type + ".zip" path = os.path.join(root, file) is_cached = os.path.exists(path) if not is_cached: - cache_path = cached_download(os.path.join(url_basepath, file)) + cache_path = cached_download(posixpath.join(url_basepath, file)) os.rename(cache_path, path) with zipfile.ZipFile(path, "r") as zip_ref: zip_ref.extractall(root) From c3e6aa79caf4bbc0fa8fd684961863f154a7d341 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 17 Jul 2020 17:52:40 +0900 Subject: [PATCH 24/36] Applies black --- tests/utils_tests/test_pretrained_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 40f86ffb9..892bd3306 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -531,6 +531,7 @@ def squashed_diagonal_gaussian_head(x): ) from pfrl.nn.lmbda import Lambda + policy = nn.Sequential( nn.Linear(obs_size, 256), nn.ReLU(), From 2e6427acb32d682fe48829ec5747faca3567cbde Mon Sep 17 00:00:00 2001 From: Prabhat Date: Sat, 18 Jul 2020 01:32:57 +0900 Subject: [PATCH 25/36] Addresses flakes --- tests/utils_tests/test_pretrained_models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 892bd3306..922ec20d6 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -239,8 +239,6 @@ def setup(self, pretrained_type): self.pretrained_type = pretrained_type def _test_load_ddpg(self, gpu): - def concat_obs_and_action(obs, action): - return F.concat((obs, action), axis=-1) obs_size = 11 action_size = 3 @@ -521,6 +519,7 @@ def squashed_diagonal_gaussian_head(x): mean, log_scale = torch.chunk(x, 2, dim=1) log_scale = torch.clamp(log_scale, -20.0, 2.0) var = torch.exp(log_scale * 2) + from torch import distributions base_distribution = distributions.Independent( distributions.Normal(loc=mean, scale=torch.sqrt(var)), 1 ) From d396f65d93181e5be286d31332fcc236dc7e4851 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Sun, 19 Jul 2020 20:32:24 +0900 Subject: [PATCH 26/36] Applies black --- tests/utils_tests/test_pretrained_models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 922ec20d6..467b9a478 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -520,6 +520,7 @@ def squashed_diagonal_gaussian_head(x): log_scale = torch.clamp(log_scale, -20.0, 2.0) var = torch.exp(log_scale * 2) from torch import distributions + base_distribution = distributions.Independent( distributions.Normal(loc=mean, scale=torch.sqrt(var)), 1 ) From 7703a7f5bc9e9adc96396f3dabcec1bab4344c62 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 4 Sep 2020 14:10:24 +0900 Subject: [PATCH 27/36] Adds outputting of demo scores to a file --- examples/mujoco/reproduction/ddpg/train_ddpg.py | 4 ++++ examples/mujoco/reproduction/ppo/train_ppo.py | 4 ++++ .../reproduction/soft_actor_critic/train_soft_actor_critic.py | 4 ++++ examples/mujoco/reproduction/td3/train_td3.py | 4 ++++ examples/mujoco/reproduction/trpo/train_trpo.py | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py index 7484efa5a..0cfab4c5f 100644 --- a/examples/mujoco/reproduction/ddpg/train_ddpg.py +++ b/examples/mujoco/reproduction/ddpg/train_ddpg.py @@ -207,6 +207,10 @@ def burnin_action_func(): eval_stats["stdev"], ) ) + import os + import json + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: experiments.train_agent_with_evaluation( agent=agent, diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py index d9ebfbe3d..76461b814 100644 --- a/examples/mujoco/reproduction/ppo/train_ppo.py +++ b/examples/mujoco/reproduction/ppo/train_ppo.py @@ -231,6 +231,10 @@ def ortho_init(layer, gain): eval_stats["stdev"], ) ) + import os + import json + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: experiments.train_agent_batch_with_evaluation( agent=agent, diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py index 03e11478f..900c0cdfe 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py +++ b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py @@ -256,6 +256,10 @@ def burnin_action_func(): eval_stats["stdev"], ) ) + import os + import json + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: experiments.train_agent_batch_with_evaluation( agent=agent, diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py index 7df9f5fa8..afccd4484 100644 --- a/examples/mujoco/reproduction/td3/train_td3.py +++ b/examples/mujoco/reproduction/td3/train_td3.py @@ -206,6 +206,10 @@ def burnin_action_func(): eval_stats["stdev"], ) ) + import os + import json + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: experiments.train_agent_with_evaluation( agent=agent, diff --git a/examples/mujoco/reproduction/trpo/train_trpo.py b/examples/mujoco/reproduction/trpo/train_trpo.py index 812e2e2c0..79a00e141 100644 --- a/examples/mujoco/reproduction/trpo/train_trpo.py +++ b/examples/mujoco/reproduction/trpo/train_trpo.py @@ -209,6 +209,10 @@ def ortho_init(layer, gain): eval_stats["stdev"], ) ) + import os + import json + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: + json.dump(eval_stats, f) else: pfrl.experiments.train_agent_with_evaluation( From 1678fd83d030770c6ec3757815207de218ce04b3 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Tue, 8 Sep 2020 18:34:34 +0900 Subject: [PATCH 28/36] Adds filelock requirement to DQN --- examples/atari/reproduction/dqn/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/atari/reproduction/dqn/README.md b/examples/atari/reproduction/dqn/README.md index 5f96e7fdf..c87d5f423 100644 --- a/examples/atari/reproduction/dqn/README.md +++ b/examples/atari/reproduction/dqn/README.md @@ -5,6 +5,7 @@ This example trains a DQN agent, from the following paper: [Human-level control - atari_py>=0.1.1 - opencv-python +- filelock ## Running the Example From 806a545e232e15cc22c2a524f101efd146af36a5 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 23 Sep 2020 13:46:58 +0900 Subject: [PATCH 29/36] Adds filelock requirement for pfrl --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 43bb94eed..45b6e8b0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ torch>=1.3.0 gym>=0.9.7 numpy>=1.10.4 +filelock pillow From 561cbfb32b2538072cb8097714f6a4e6484a35d2 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 6 Nov 2020 01:13:06 +0900 Subject: [PATCH 30/36] Applies isort --- examples/mujoco/reproduction/ddpg/train_ddpg.py | 2 +- examples/mujoco/reproduction/ppo/train_ppo.py | 2 +- .../soft_actor_critic/train_soft_actor_critic.py | 2 +- examples/mujoco/reproduction/td3/train_td3.py | 2 +- examples/mujoco/reproduction/trpo/train_trpo.py | 2 +- pfrl/utils/pretrained_models.py | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py index b70591cc7..9cbede0f9 100644 --- a/examples/mujoco/reproduction/ddpg/train_ddpg.py +++ b/examples/mujoco/reproduction/ddpg/train_ddpg.py @@ -203,8 +203,8 @@ def burnin_action_func(): eval_stats["stdev"], ) ) - import os import json + import os with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py index 3aafc34f6..2a91eb3df 100644 --- a/examples/mujoco/reproduction/ppo/train_ppo.py +++ b/examples/mujoco/reproduction/ppo/train_ppo.py @@ -230,8 +230,8 @@ def ortho_init(layer, gain): eval_stats["stdev"], ) ) - import os import json + import os with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py index 3a890b2b5..27d1ca478 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py +++ b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py @@ -253,8 +253,8 @@ def burnin_action_func(): eval_stats["stdev"], ) ) - import os import json + import os with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py index bb65a23a8..c831995e0 100644 --- a/examples/mujoco/reproduction/td3/train_td3.py +++ b/examples/mujoco/reproduction/td3/train_td3.py @@ -203,8 +203,8 @@ def burnin_action_func(): eval_stats["stdev"], ) ) - import os import json + import os with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/examples/mujoco/reproduction/trpo/train_trpo.py b/examples/mujoco/reproduction/trpo/train_trpo.py index 79a00e141..1b705b4cb 100644 --- a/examples/mujoco/reproduction/trpo/train_trpo.py +++ b/examples/mujoco/reproduction/trpo/train_trpo.py @@ -209,8 +209,8 @@ def ortho_init(layer, gain): eval_stats["stdev"], ) ) - import os import json + import os with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index 4dd4a1188..c6ecf7b9c 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -2,17 +2,17 @@ https://github.com/chainer/chainercv/blob/master/chainercv/utils/download.py """ -import filelock import hashlib import os import posixpath import shutil +import sys import tempfile import time -import sys import zipfile -from six.moves.urllib import request +import filelock +from six.moves.urllib import request _models_root = os.environ.get( "PFRL_MODELS_ROOT", os.path.join(os.path.expanduser("~"), ".pfrl", "models") From 09c43e3e62bba0aba0bc49d716c7eda61dee8562 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 11 Dec 2020 01:07:31 +0900 Subject: [PATCH 31/36] Applies black --- examples/mujoco/reproduction/ddpg/train_ddpg.py | 1 + examples/mujoco/reproduction/ppo/train_ppo.py | 1 + .../reproduction/soft_actor_critic/train_soft_actor_critic.py | 1 + examples/mujoco/reproduction/td3/train_td3.py | 1 + examples/mujoco/reproduction/trpo/train_trpo.py | 1 + 5 files changed, 5 insertions(+) diff --git a/examples/mujoco/reproduction/ddpg/train_ddpg.py b/examples/mujoco/reproduction/ddpg/train_ddpg.py index 9cbede0f9..45614ead9 100644 --- a/examples/mujoco/reproduction/ddpg/train_ddpg.py +++ b/examples/mujoco/reproduction/ddpg/train_ddpg.py @@ -205,6 +205,7 @@ def burnin_action_func(): ) import json import os + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/examples/mujoco/reproduction/ppo/train_ppo.py b/examples/mujoco/reproduction/ppo/train_ppo.py index 2a91eb3df..8bf7fbe5f 100644 --- a/examples/mujoco/reproduction/ppo/train_ppo.py +++ b/examples/mujoco/reproduction/ppo/train_ppo.py @@ -232,6 +232,7 @@ def ortho_init(layer, gain): ) import json import os + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py index 27d1ca478..929cb2925 100644 --- a/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py +++ b/examples/mujoco/reproduction/soft_actor_critic/train_soft_actor_critic.py @@ -255,6 +255,7 @@ def burnin_action_func(): ) import json import os + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/examples/mujoco/reproduction/td3/train_td3.py b/examples/mujoco/reproduction/td3/train_td3.py index c831995e0..2ca26a44a 100644 --- a/examples/mujoco/reproduction/td3/train_td3.py +++ b/examples/mujoco/reproduction/td3/train_td3.py @@ -205,6 +205,7 @@ def burnin_action_func(): ) import json import os + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: diff --git a/examples/mujoco/reproduction/trpo/train_trpo.py b/examples/mujoco/reproduction/trpo/train_trpo.py index 1b705b4cb..f9c88c79f 100644 --- a/examples/mujoco/reproduction/trpo/train_trpo.py +++ b/examples/mujoco/reproduction/trpo/train_trpo.py @@ -211,6 +211,7 @@ def ortho_init(layer, gain): ) import json import os + with open(os.path.join(args.outdir, "demo_scores.json"), "w") as f: json.dump(eval_stats, f) else: From 570e467783d161ca1d3435c9087e406db1944ffc Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 11 Dec 2020 14:30:47 +0900 Subject: [PATCH 32/36] Applies isort --- tests/utils_tests/test_pretrained_models.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/utils_tests/test_pretrained_models.py b/tests/utils_tests/test_pretrained_models.py index 467b9a478..e1ed17553 100644 --- a/tests/utils_tests/test_pretrained_models.py +++ b/tests/utils_tests/test_pretrained_models.py @@ -7,11 +7,9 @@ import pfrl import pfrl.nn as pnn -from pfrl import agents -from pfrl import explorers -from pfrl import replay_buffers -from pfrl.utils import download_model +from pfrl import agents, explorers, replay_buffers from pfrl.initializers import init_chainer_default +from pfrl.utils import download_model @pytest.mark.parametrize("pretrained_type", ["final", "best"]) From 6555e3dcf160f8130bec91df8daa2d8358f53d02 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Fri, 11 Dec 2020 16:06:31 +0900 Subject: [PATCH 33/36] Amends command --- examples/atari/reproduction/a3c/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/atari/reproduction/a3c/README.md b/examples/atari/reproduction/a3c/README.md index cbd330492..cae67f45c 100644 --- a/examples/atari/reproduction/a3c/README.md +++ b/examples/atari/reproduction/a3c/README.md @@ -15,7 +15,7 @@ python train_a3c.py [options] We have already trained models from this script for all the domains listed in the [results](#Results). To load a pretrained model: ``` -python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 +python train_a3c.py --demo --load-pretrained --env BreakoutNoFrameskip-v4 --pretrained-type best ``` ### Useful Options From be0cf2969d905b48a2a2cc8b015c1367719c41df Mon Sep 17 00:00:00 2001 From: Prabhat Nagarajan Date: Wed, 16 Dec 2020 12:14:00 +0900 Subject: [PATCH 34/36] Update pfrl/utils/pretrained_models.py Co-authored-by: Yasuhiro Fujita --- pfrl/utils/pretrained_models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index c6ecf7b9c..f36744fbe 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -102,7 +102,6 @@ def cached_download(url): string: Path to the downloaded file. """ cache_root = os.path.join(_models_root, "_dl_cache") - # cache_root = os.path.join(_models_root, '_dl_cache') try: os.makedirs(cache_root) except OSError: From d273d1ee280595905e8026896ea68469c9c5fefb Mon Sep 17 00:00:00 2001 From: Prabhat Nagarajan Date: Wed, 16 Dec 2020 12:14:13 +0900 Subject: [PATCH 35/36] Update pfrl/utils/pretrained_models.py Co-authored-by: Yasuhiro Fujita --- pfrl/utils/pretrained_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index f36744fbe..f1d3ee059 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -144,7 +144,7 @@ def download_and_store_model(alg, url, env, model_type): string: Path to the downloaded file. bool: whether the model was already cached. """ - lock = os.path.join(_get_model_directory(os.path.join(".lock")), "models.lock") + lock = os.path.join(_get_model_directory(".lock"), "models.lock") with filelock.FileLock(lock): root = _get_model_directory(os.path.join(alg, env)) url_basepath = posixpath.join(url, alg, env) From 719dfcea3f92dcc80982af52e64f5186f15490b4 Mon Sep 17 00:00:00 2001 From: Prabhat Date: Wed, 16 Dec 2020 12:29:47 +0900 Subject: [PATCH 36/36] Moves fork reference to the docstrings --- pfrl/utils/pretrained_models.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pfrl/utils/pretrained_models.py b/pfrl/utils/pretrained_models.py index f1d3ee059..bbd266931 100644 --- a/pfrl/utils/pretrained_models.py +++ b/pfrl/utils/pretrained_models.py @@ -33,17 +33,13 @@ download_url = "https://chainer-assets.preferred.jp/pfrl/" -""" -This function forked from Chainer, an MIT-licensed project, -https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L70 -""" - def _get_model_directory(model_name, create_directory=True): """Gets the path to the directory of given model. The generated path is just a concatenation of the global root directory - and the model name. + and the model name. This function forked from Chainer, an MIT-licensed project, + https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L43 Args: model_name (str): Name of the model. create_directory (bool): If True (default), this function also creates @@ -96,6 +92,8 @@ def cached_download(url): It downloads a file from the URL if there is no corresponding cache. If there is already a cache for the given URL, it just returns the path to the cache without downloading the same file. + This function forked from Chainer, an MIT-licensed project, + https://github.com/chainer/chainer/blob/v7.4.0/chainer/dataset/download.py#L70 Args: url (string): URL to download from. Returns: