diff --git a/_config.yml b/_config.yml index 6958ad1f..525c9690 100644 --- a/_config.yml +++ b/_config.yml @@ -20,8 +20,14 @@ remote_theme: jekyll/minima minima: skin: auto social_links: - - { platform: github, user_url: "https://github.com/guigoruiz1/yugiquery" } - + - { platform: github, user_url: "https://github.com/guigoruiz1" } + - { platform: linkedin, user_url: "https://www.linkedin.com/in/guigoruiz1" } + - { platform: stackoverflow, user_url: "https://stackoverflow.com/users/6794856" } + +header_pages: + # - README.md + - LICENSE.md + plugins: - jekyll-feed - jekyll-seo-tag diff --git a/assets/gateway.html b/assets/gateway.html index be4d8e20..f9decb6f 100644 --- a/assets/gateway.html +++ b/assets/gateway.html @@ -5,7 +5,7 @@
Once you put in your token, and click connect, it will connect, and then let you know when you can close the browser. After that, you should be good to go. diff --git a/assets/colors.json b/assets/json/colors.json similarity index 100% rename from assets/colors.json rename to assets/json/colors.json diff --git a/assets/dates.json b/assets/json/dates.json similarity index 100% rename from assets/dates.json rename to assets/json/dates.json diff --git a/assets/rarities.json b/assets/json/rarities.json similarity index 100% rename from assets/rarities.json rename to assets/json/rarities.json diff --git a/assets/regions.json b/assets/json/regions.json similarity index 100% rename from assets/regions.json rename to assets/json/regions.json diff --git a/assets/README.md b/assets/markdown/README.md similarity index 54% rename from assets/README.md rename to assets/markdown/README.md index 4d5094ec..70feb0e4 100644 --- a/assets/README.md +++ b/assets/markdown/README.md @@ -44,54 +44,84 @@ The full YugiQuery workflow can be run with python yugiquery.py ``` -Any Jupyter notebook in the *source* directory will be assumed to be a report and will be executed and exported to HTML. The index.md and README.md files will be updated, using their template files in the *assets* directory, to include a table with all the reports available and their timestamps. The source notebooks will then be cleared of their outputs and all changes will be commited to Git. +Any Jupyter notebook in the ***source*** directory will be assumed to be a report and will be executed and exported to HTML. The index.md and README.md files will be updated, using their respective template files in the ***assets*** directory, to include a table with all the reports available and their timestamps. The source notebooks will then be cleared of their outputs and all changes will be commited to Git. + +Report templates are included in the `assets/notebook` folder. Moving them to the source folder will enable them for execution. + +To use the optional Discord bot, run + +``` +python bot.py discord +``` + +Alternatively, to use the optional Telegram bot, run + +``` +python bot.py telegram +``` + +Both `yugiquery.py` and `bot.py` accept command line arguments. Using `-h` or `--help` will print an useful help message listing the parameters that can be passed and their usage. It is also possible to call the script directly as an executable using `./`, although that may be OS dependant. + +Further use cases can be found in the [documentation](#documentation). ## Installation -YugiQuery is meant to be user friendly to users without much coding experience. Provided you have Python and Git installed, upon first execution YugiQuery will try to install all its dependencies. If the operation is not succesfull, the user may try to install the dependencies manually relying on the install.sh script. A pip requirements.txt file is also provided, but it does not install every dependency installed by the install.sh script. +YugiQuery is meant to be user friendly to users without much coding experience. Provided you have Python and Git installed, upon first execution YugiQuery will try to install all its dependencies. If the operation is not succesfull, the user may try to install the dependencies manually, relying on the `install.sh` script and the pip `requirements.txt` file provided. The `install.sh`` script also install a nbconvert template which adds dynamic light and dark modes to the exported html report. This is the default template used by YugiQuery. In case it cannot be installed, the user should change the selected template on each report notebook. + +Further details can be found the [documentation](#documentation). ## Repository hierarchy -The repository is structured such that its root contains the web page source files while the actual executable files are kept in the *source* directory. Any template files and files used for reference such as dictionaries are kept in the *assets* directory. The raw data is saved in the *data* directory and the ReadTheDocs source files are kept the *docs* directory. Below is an example of the basic structure of the directory. +The repository is structured such that its root contains the web page source files, while the actual executable files are kept in the ***source*** directory. Any template files (markdown, nbconvert, notebook, etc) and files used for reference such as dictionaries are kept in the ***assets*** directory. The raw data used by the reports is saved in the ***data*** directory. The *Read The Docs* source files are kept in the ***docs*** directory. Below is an skeleton of the directory structure. ``` yugiquery/ ├─ assets/ -│ ├─ colors/ -│ ├─ dates.json -│ ├─ footer.md +│ ├─ json/ +│ │ ├─ colors.json +│ │ ├─ dates.json +│ │ ├─ rarities.json +│ │ └─ regions.json +│ ├─ markdown/ +│ │ ├─ footer.md +│ │ ├─ header.md +│ │ ├─ index.md +│ │ └─ REAMDME.md +│ ├─ nbconvert/ +│ │ ├─ conf.json +│ │ ├─ dynamic.css +│ │ └─ index.html.j2 +│ ├─ notebook/ +│ │ └─ Template.ipynb │ ├─ Gateway.html -│ ├─ header.md -│ ├─ index.md -│ ├─ rarities.json -│ ├─ README.md -│ ├─ regions.json -│ ├─ secrets.env -│ └─ Template.ipynb +│ └─ secrets.env ├─ data/ │ ├─ benchmark.json -│ ├─ data.csv -│ └─ data_changelog.csv +│ ├─ report_data.csv +│ └─ report_changelog.csv ├─ docs/ -│ ├─ bot.rst +│ ├─ Makefile +│ ├─ make.bat │ ├─ conf.py │ ├─ index.rst +│ ├─ bot.rst │ └─ yugiquery.rst ├─ source/ -│ ├─ bot.py │ ├─ install.sh -│ ├─ Report.ipynb │ ├─ requirements.txt +│ ├─ Report.ipynb +│ ├─ bot.py │ └─ yugiquery.py +├─ _config.yml +├─ .devcontainer.json ├─ .readthedocs.yaml ├─ index.md ├─ LICENSE.md ├─ README.md -├─ Report.html -└─ _config.yml +└─ Report.html ``` -Ideally, files in the *assets* directory should be read-only files exclusively for reference. Files in the *data* directory are read and write files for the generation of the reports. The root of the repository should only contain files intended for the web page generation by GitHub pages or files that cannot be in another location. +Ideally, files in the ***assets*** directory should be read-only files exclusively for reference. Files in the ***data*** directory are read and write files for the generation of the reports. The root of the repository should only contain files intended for the web page generation by GitHub pages or files that cannot be in another location. ## Documentation @@ -99,7 +129,9 @@ The documentation can be found at [ReadTheDocs](https://yugiquery.readthedocs.io ## Known limitations -At present, python<3.11 is needed to install all dependencies. This will be fixed once `TQDM` changes its Discord integration from the deprecated `disco-py` to `discord.py` +At present, `TQDM` relies on the deprecated `disco-py` package which won't build. To circunvent this problem until the official`TQDM` release drops the `disco-py` dependency, we install `TQDM` from [this fork](https://github.com/guigoruiz1/tqdm), which uses pure REST API and/or `discord.py`. + +Recent updates to `IPython` broke `HALO` in Jupyter notebooks. Until `HALO` conforms to the new IPython API, we install it from [this fork](https://github.com/guigoruiz1/halo). --- diff --git a/assets/footer.md b/assets/markdown/footer.md similarity index 100% rename from assets/footer.md rename to assets/markdown/footer.md diff --git a/assets/header.md b/assets/markdown/header.md similarity index 100% rename from assets/header.md rename to assets/markdown/header.md diff --git a/assets/index.md b/assets/markdown/index.md similarity index 100% rename from assets/index.md rename to assets/markdown/index.md diff --git a/assets/nbconvert/conf.json b/assets/nbconvert/conf.json new file mode 100644 index 00000000..e7e8b6dd --- /dev/null +++ b/assets/nbconvert/conf.json @@ -0,0 +1,13 @@ +{ + "base_template": "lab", + "mimetypes": { + "text/html": true + }, + "preprocessors": { + "100-TagRemovePreprocessor": { + "type": "nbconvert.preprocessors.TagRemovePreprocessor", + "enabled": true, + "remove_cell_tags": ["exclude"] + } + } +} diff --git a/assets/nbconvert/dynamic.css b/assets/nbconvert/dynamic.css new file mode 100644 index 00000000..e80cea7c --- /dev/null +++ b/assets/nbconvert/dynamic.css @@ -0,0 +1,297 @@ +/*----------------------------------------------------------------------------- +| Copyright (c) Jupyter Development Team. +| Distributed under the terms of the Modified BSD License. +|----------------------------------------------------------------------------*/ + +/* +The following CSS variables define the main, public API for styling JupyterLab. +These variables should be used by all plugins wherever possible. In other +words, plugins should not define custom colors, sizes, etc unless absolutely +necessary. This enables users to change the visual theme of JupyterLab +by changing these variables. + +Many variables appear in an ordered sequence (0,1,2,3). These sequences +are designed to work well together, so for example, `--jp-border-color1` should +be used with `--jp-layout-color1`. The numbers have the following meanings: + +* 0: super-primary, reserved for special emphasis +* 1: primary, most important under normal situations +* 2: secondary, next most important under normal situations +* 3: tertiary, next most important under normal situations + +Throughout JupyterLab, we are mostly following principles from Google's +Material Design when selecting colors. We are not, however, following +all of MD as it is not optimized for dense, information rich UIs. +*/ + +:root { + /* Light Theme */ + @media (prefers-color-scheme: light) { + --jp-shadow-base-lightness: 0; + --jp-border-color0: var(--md-grey-400); + --jp-border-color1: var(--md-grey-400); + --jp-border-color2: var(--md-grey-300); + --jp-border-color3: var(--md-grey-200); + --jp-ui-font-color0: rgba(0, 0, 0, 1); + --jp-ui-font-color1: rgba(0, 0, 0, 0.87); + --jp-ui-font-color2: rgba(0, 0, 0, 0.54); + --jp-ui-font-color3: rgba(0, 0, 0, 0.38); + + --jp-ui-inverse-font-color0: rgba(255, 255, 255, 1); + --jp-ui-inverse-font-color1: rgba(255, 255, 255, 1); + --jp-ui-inverse-font-color2: rgba(255, 255, 255, 0.7); + --jp-ui-inverse-font-color3: rgba(255, 255, 255, 0.5); + + --jp-content-font-color0: rgba(0, 0, 0, 1); + --jp-content-font-color1: rgba(0, 0, 0, 0.87); + --jp-content-font-color2: rgba(0, 0, 0, 0.54); + --jp-content-font-color3: rgba(0, 0, 0, 0.38); + --jp-content-link-color: var(--md-blue-900); + + --jp-layout-color0: white; + --jp-layout-color1: white; + --jp-layout-color2: var(--md-grey-200); + --jp-layout-color3: var(--md-grey-400); + + --jp-inverse-layout-color0: #111; + --jp-inverse-layout-color1: var(--md-grey-900); + --jp-inverse-layout-color2: var(--md-grey-800); + --jp-inverse-layout-color3: var(--md-grey-700); + + --jp-brand-color0: var(--md-blue-900); + --jp-brand-color1: var(--md-blue-700); + + + --jp-accent-color0: var(--md-green-900); + --jp-accent-color1: var(--md-green-700); + + --jp-warn-color0: var(--md-orange-900); + --jp-warn-color1: var(--md-orange-700); + + --jp-error-color0: var(--md-red-900); + --jp-error-color1: var(--md-red-700); + + + --jp-success-color0: var(--md-green-900); + --jp-success-color1: var(--md-green-700); + + --jp-info-color0: var(--md-cyan-900); + --jp-info-color1: var(--md-cyan-700); + + --jp-cell-editor-background: var(--md-grey-100); + --jp-cell-editor-border-color: var(--md-grey-300); + + --jp-cell-prompt-not-active-opacity: 0.5; + --jp-cell-prompt-not-active-font-color: var(--md-grey-700); + + --jp-notebook-multiselected-color: var(--md-blue-50); + + + --jp-rendermime-error-background: #fdd; + --jp-rendermime-table-row-background: var(--md-grey-100); + --jp-rendermime-table-row-hover-background: var(--md-light-blue-50); + + --jp-dialog-background: rgba(0, 0, 0, 0.25); + + --jp-toolbar-border-color: var(--jp-border-color1); + + --jp-toolbar-box-shadow: 0 0 2px 0 rgba(0, 0, 0, 0.24); + + --jp-toolbar-active-background: var(--md-grey-300); + + --jp-input-active-background: var(--jp-layout-color1); + --jp-input-hover-background: var(--jp-layout-color1); + --jp-input-background: var(--md-grey-100); + + --jp-editor-selected-background: #d9d9d9; + --jp-editor-selected-focused-background: #d7d4f0; + + --jp-mirror-editor-keyword-color: #008000; + --jp-mirror-editor-atom-color: #88f; + --jp-mirror-editor-number-color: #080; + --jp-mirror-editor-def-color: #00f; + --jp-mirror-editor-variable-color: var(--md-grey-900); + --jp-mirror-editor-variable-2-color: rgb(0, 54, 109); + --jp-mirror-editor-variable-3-color: #085; + --jp-mirror-editor-punctuation-color: #05a; + --jp-mirror-editor-property-color: #05a; + + --jp-mirror-editor-string-color: #ba2121; + --jp-mirror-editor-string-2-color: #708; + + --jp-mirror-editor-builtin-color: #008000; + + --jp-mirror-editor-tag-color: #170; + --jp-mirror-editor-attribute-color: #00c; + --jp-mirror-editor-header-color: blue; + --jp-mirror-editor-quote-color: #090; + --jp-mirror-editor-link-color: #00c; + + --jp-collaborator-color1: #ffad8e; + --jp-collaborator-color2: #dac83d; + --jp-collaborator-color3: #72dd76; + --jp-collaborator-color4: #00e4d0; + --jp-collaborator-color5: #45d4ff; + --jp-collaborator-color6: #e2b1ff; + --jp-collaborator-color7: #ff9de6; + + --jp-vega-background: white; + + --jp-search-toggle-off-opacity: 0.5; + + --jp-search-selected-match-background-color: rgb(245, 200, 0); + + --jp-json-icon-color: var(--md-orange-700); + --jp-console-icon-background-color: var(--md-blue-700); + --jp-terminal-icon-background-color: var(--md-grey-800); + --jp-terminal-icon-color: var(--md-grey-200); + --jp-text-editor-icon-color: var(--md-grey-700); + --jp-inspector-icon-color: var(--md-grey-700); + --jp-switch-true-position-color: var(--md-orange-900); + } + + /* Dark Theme */ + @media (prefers-color-scheme: dark) { + --jp-shadow-base-lightness: 32; + --jp-border-color0: var(--md-grey-700); + --jp-border-color1: var(--md-grey-700); + --jp-border-color2: var(--md-grey-800); + --jp-border-color3: var(--md-grey-900); + --jp-ui-font-color0: rgba(255, 255, 255, 1); + --jp-ui-font-color1: rgba(255, 255, 255, 0.87); + --jp-ui-font-color2: rgba(255, 255, 255, 0.54); + --jp-ui-font-color3: rgba(255, 255, 255, 0.38); + + --jp-ui-inverse-font-color0: rgba(0, 0, 0, 1); + --jp-ui-inverse-font-color1: rgba(0, 0, 0, 0.8); + --jp-ui-inverse-font-color2: rgba(0, 0, 0, 0.5); + --jp-ui-inverse-font-color3: rgba(0, 0, 0, 0.3); + + --jp-content-font-color0: rgba(255, 255, 255, 1); + --jp-content-font-color1: rgba(255, 255, 255, 1); + --jp-content-font-color2: rgba(255, 255, 255, 0.7); + --jp-content-font-color3: rgba(255, 255, 255, 0.5); + --jp-content-link-color: var(--md-blue-300); + + --jp-layout-color0: #111; + --jp-layout-color1: var(--md-grey-900); + --jp-layout-color2: var(--md-grey-800); + --jp-layout-color3: var(--md-grey-700); + + --jp-inverse-layout-color0: white; + --jp-inverse-layout-color1: white; + --jp-inverse-layout-color2: var(--md-grey-200); + --jp-inverse-layout-color3: var(--md-grey-400); + + --jp-brand-color0: var(--md-blue-700); + --jp-brand-color1: var(--md-blue-500); + + + --jp-accent-color0: var(--md-green-700); + --jp-accent-color1: var(--md-green-500); + + --jp-warn-color0: var(--md-orange-700); + --jp-warn-color1: var(--md-orange-500); + + --jp-error-color0: var(--md-red-700); + --jp-error-color1: var(--md-red-500); + + + --jp-success-color0: var(--md-green-700); + --jp-success-color1: var(--md-green-500); + + + --jp-info-color0: var(--md-cyan-700); + --jp-info-color1: var(--md-cyan-500); + + --jp-cell-editor-background: var(--jp-layout-color1); + --jp-cell-editor-border-color: var(--md-grey-700); + + + --jp-cell-prompt-not-active-opacity: 1; + --jp-cell-prompt-not-active-font-color: var(--md-grey-300); + + --jp-notebook-multiselected-color: rgba(33, 150, 243, 0.24); + + + --jp-rendermime-error-background: rgba(244, 67, 54, 0.28); + --jp-rendermime-table-row-background: var(--md-grey-900); + --jp-rendermime-table-row-hover-background: rgba(3, 169, 244, 0.2); + + --jp-dialog-background: rgba(0, 0, 0, 0.6); + + + --jp-toolbar-border-color: var(--jp-border-color2); + + --jp-toolbar-box-shadow: 0 0 2px 0 rgba(0, 0, 0, 0.8); + + --jp-toolbar-active-background: var(--jp-layout-color0); + + --jp-input-active-background: var(--jp-layout-color0); + --jp-input-hover-background: var(--jp-layout-color2); + --jp-input-background: var(--md-grey-800); + + --jp-editor-selected-background: var(--jp-layout-color2); + --jp-editor-selected-focused-background: rgba(33, 150, 243, 0.24); + + --jp-mirror-editor-keyword-color: var(--md-green-500); + --jp-mirror-editor-atom-color: var(--md-blue-300); + --jp-mirror-editor-number-color: var(--md-green-400); + --jp-mirror-editor-def-color: var(--md-blue-600); + --jp-mirror-editor-variable-color: var(--md-grey-300); + --jp-mirror-editor-variable-2-color: var(--md-blue-500); + --jp-mirror-editor-variable-3-color: var(--md-green-600); + --jp-mirror-editor-punctuation-color: var(--md-blue-400); + --jp-mirror-editor-property-color: var(--md-blue-400); + + --jp-mirror-editor-string-color: #ff7070; + --jp-mirror-editor-string-2-color: var(--md-purple-300); + + --jp-mirror-editor-builtin-color: var(--md-green-600); + + --jp-mirror-editor-tag-color: var(--md-green-700); + --jp-mirror-editor-attribute-color: var(--md-blue-700); + --jp-mirror-editor-header-color: var(--md-blue-500); + --jp-mirror-editor-quote-color: var(--md-green-300); + --jp-mirror-editor-link-color: var(--md-blue-700); + + --jp-collaborator-color1: #ad4a00; + --jp-collaborator-color2: #7b6a00; + --jp-collaborator-color3: #007e00; + --jp-collaborator-color4: #008772; + --jp-collaborator-color5: #0079b9; + --jp-collaborator-color6: #8b45c6; + --jp-collaborator-color7: #be208b; + + --jp-vega-background: var(--md-grey-400); + + --jp-search-toggle-off-opacity: 0.6; + + --jp-search-selected-match-background-color: rgb(255, 225, 0); + + /* scrollbar related styles. Supports every browser except Edge. */ + + /* colors based on JetBrain's Darcula theme */ + + --jp-scrollbar-background-color: #3f4244; + --jp-scrollbar-thumb-color: 88, 96, 97; /* need to specify thumb color as an RGB triplet */ + --jp-scrollbar-endpad: 3px; /* the minimum gap between the thumb and the ends of a scrollbar */ + + /* hacks for setting the thumb shape. These do nothing in Firefox */ + + --jp-scrollbar-thumb-margin: 3.5px; /* the space in between the sides of the thumb and the track */ + --jp-scrollbar-thumb-radius: 9px; /* set to a large-ish value for rounded endcaps on the thumb */ + + --jp-json-icon-color: var(--md-orange-500); + --jp-console-icon-background-color: var(--md-blue-500); + + --jp-terminal-icon-background-color: var(--md-grey-200); + --jp-terminal-icon-color: var(--md-grey-800); + --jp-text-editor-icon-color: var(--md-grey-200); + --jp-inspector-icon-color: var(--md-grey-200); + --jp-switch-true-position-color: var(--md-orange-700); + } + + +} + diff --git a/assets/nbconvert/index.html.j2 b/assets/nbconvert/index.html.j2 new file mode 100644 index 00000000..92b9c0b9 --- /dev/null +++ b/assets/nbconvert/index.html.j2 @@ -0,0 +1,6 @@ +{%- extends 'lab/index.html.j2' -%} + +{% block notebook_css %} +{{ super() }} +{{ resources.include_css("dynamic.css") }} +{% endblock notebook_css %} \ No newline at end of file diff --git a/assets/notebook/Collection.ipynb b/assets/notebook/Collection.ipynb new file mode 100644 index 00000000..9e02d83b --- /dev/null +++ b/assets/notebook/Collection.ipynb @@ -0,0 +1,3974 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "82c61f6c-3c7a-4460-b466-9c521973fa5d", + "metadata": { + "papermill": { + "duration": 2.114388, + "end_time": "2023-02-22T22:24:17.995164", + "exception": false, + "start_time": "2023-02-22T22:24:15.880776", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from yugiquery import *\n", + "init_notebook_mode(all_interactive=True)\n", + "\n", + "header('My Collection')" + ] + }, + { + "cell_type": "markdown", + "id": "48cd2c04-37eb-408f-86ee-17e204a2cea9", + "metadata": { + "papermill": { + "duration": 0.063612, + "end_time": "2023-02-22T22:24:18.102172", + "exception": false, + "start_time": "2023-02-22T22:24:18.038560", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "---" + ] + }, + { + "cell_type": "markdown", + "id": "6e66c226", + "metadata": { + "papermill": { + "duration": 0.041131, + "end_time": "2023-02-22T22:24:18.189667", + "exception": false, + "start_time": "2023-02-22T22:24:18.148536", + "status": "completed" + }, + "tags": [], + "toc": true + }, + "source": [ + "Table of Contents\n", + "=================\n", + "\n", + "* [1 Data loading](#Data-loading)\n", + " * [1.1 Read collection](#Read-collection)\n", + "* [2 Check changes](#Check-changes)\n", + " * [2.1 Load previous data](#Load-previous-data)\n", + " * [2.2 Generate changelog](#Generate-changelog)\n", + " * [2.3 Save data](#Save-data)\n", + "* [3 Data visualization](#Data-visualization)\n", + " * [3.1 Full data](#Full-data)\n", + " * [3.2 Card types](#Card-types)\n", + " * [3.3 Monsters](#Monsters)\n", + " * [3.3.1 Attributes](#Attributes)\n", + " * [3.3.2 Primary types](#Primary-types)\n", + " * [3.3.2.1 Has effect discrimination](#Has-effect-discrimination)\n", + " * [3.3.2.2 Is pendulum discrimination](#Is-pendulum-discrimination)\n", + " * [3.3.2.3 By attribute](#By-attribute)\n", + " * [3.3.3 Secondary types](#Secondary-types)\n", + " * [3.3.3.1 By attribute](#By-attribute)\n", + " * [3.3.3.2 By secondary type](#By-secondary-type)\n", + " * [3.3.4 Monster types](#Monster-types)\n", + " * [3.3.4.1 By Attribute](#By-Attribute)\n", + " * [3.3.4.2 By primary type](#By-primary-type)\n", + " * [3.3.4.3 By secondary type](#By-secondary-type)\n", + " * [3.3.5 ATK](#ATK)\n", + " * [3.3.6 DEF](#DEF)\n", + " * [3.3.7 Level/Rank](#Level/Rank)\n", + " * [3.3.7.1 ATK statistics](#ATK-statistics)\n", + " * [3.3.7.2 DEF statistics](#DEF-statistics)\n", + " * [3.3.8 Pendulum scale](#Pendulum-scale)\n", + " * [3.3.8.1 ATK statistics](#ATK-statistics)\n", + " * [3.3.8.2 DEF statistics](#DEF-statistics)\n", + " * [3.3.8.3 Level/Rank statistics](#Level/Rank-statistics)\n", + " * [3.3.9 Link](#Link)\n", + " * [3.3.9.1 ATK statistics](#ATK-statistics)\n", + " * [3.3.10 Link Arrows](#Link-Arrows)\n", + " * [3.3.10.1 By combination](#By-combination)\n", + " * [3.3.10.2 By unique](#By-unique)\n", + " * [3.3.10.3 By link](#By-link)\n", + " * [3.4 Spell & Trap](#Spell-&-Trap)\n", + " * [3.4.1 Properties](#Properties)\n", + " * [3.5 Effect type](#Effect-type)\n", + " * [3.5.1 Card type discrimination](#Card-type-discrimination)\n", + " * [3.6 Archseries](#Archseries)\n", + " * [3.6.1 By card type](#By-card-type)\n", + " * [3.6.2 By primary type](#By-primary-type)\n", + " * [3.6.3 By secondary type](#By-secondary-type)\n", + " * [3.6.4 By monster type](#By-monster-type)\n", + " * [3.6.5 By property](#By-property)\n", + " * [3.7 Artworks](#Artworks)\n", + " * [3.7.1 By card type](#By-card-type)\n", + " * [3.7.2 By primary type](#By-primary-type)\n", + " * [3.8 Errata](#Errata)\n", + " * [3.8.1 By card type](#By-card-type)\n", + " * [3.8.2 By primary type](#By-primary-type)\n", + " * [3.8.3 By artwork](#By-artwork)\n", + " * [3.9 TCG & OCG status](#TCG-&-OCG-status)\n", + " * [3.9.1 TGC status](#TGC-status)\n", + " * [3.9.1.1 By card type](#By-card-type)\n", + " * [3.9.1.2 By monster type](#By-monster-type)\n", + " * [3.9.1.3 By archseries](#By-archseries)\n", + " * [3.9.2 OCG status](#OCG-status)\n", + " * [3.9.2.1 By card type](#By-card-type)\n", + " * [3.9.2.2 By monster type](#By-monster-type)\n", + " * [3.9.2.3 By archseries](#By-archseries)\n", + " * [3.9.3 TCG vs. OCG status](#TCG-vs.-OCG-status)\n", + "* [4 Epilogue](#Epilogue)\n", + " * [4.1 HTML export](#HTML-export)\n", + "" + ] + }, + { + "cell_type": "markdown", + "id": "b60fae74", + "metadata": { + "papermill": { + "duration": 0.040783, + "end_time": "2023-02-22T22:24:18.272311", + "exception": false, + "start_time": "2023-02-22T22:24:18.231528", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Data loading" + ] + }, + { + "cell_type": "markdown", + "id": "699f2460", + "metadata": { + "papermill": { + "duration": 0.041502, + "end_time": "2023-02-22T22:24:18.354961", + "exception": false, + "start_time": "2023-02-22T22:24:18.313459", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Read collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "135ab538", + "metadata": { + "papermill": { + "duration": 0.051187, + "end_time": "2023-02-22T22:24:18.447381", + "exception": false, + "start_time": "2023-02-22T22:24:18.396194", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Timestamp\n", + "timestamp = arrow.utcnow()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5ac8487-bb9d-472c-857a-409b7eed3e2c", + "metadata": { + "papermill": { + "duration": 84.538069, + "end_time": "2023-02-22T22:25:43.027320", + "exception": false, + "start_time": "2023-02-22T22:24:18.489251", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_path = os.path.join(PARENT_DIR, \"data/collection\")\n", + "for ext in [\"\",\".ydk\",\".txt\", \".csv\"]:\n", + " if os.path.isfile(collection_path + ext):\n", + " list_df = pd.read_table(collection_path + ext, header=None)\n", + " print(f\"Loaded {os.path.basename(collection_path + ext)}\")\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b087c257-8610-40d1-a742-6e664891ac15", + "metadata": {}, + "outputs": [], + "source": [ + "def split_column_patterns(df):\n", + " series = df.stack().reset_index(drop=True).str.strip()\n", + " series = series.str.replace(\"\\u200b\",\"\")\n", + " df = pd.DataFrame()\n", + " df['Password'] = series[series.astype(str).str.match(r'^\\d+$')].reset_index(drop=True)\n", + " df['Card number'] = series[series.astype(str).str.match(r'^[A-Z0-9-]+-[A-Z]*\\d*$')].reset_index(drop=True)\n", + " df['Name'] = series[\n", + " ~(series.isin(df['Password']) | series.isin(df['Card number']) | series.str.startswith(\"#\"))\n", + " ].astype(str).reset_index(drop=True)\n", + " return df\n", + "\n", + "list_df = split_column_patterns(list_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74436586-62d0-4686-bfef-808eb8cae7f3", + "metadata": {}, + "outputs": [], + "source": [ + "# Get newest file if exist and correct tuples and timestamps\n", + "tuple_cols = [\n", + " \"Secondary type\",\n", + " \"Effect type\",\n", + " \"Link Arrows\",\n", + " \"Archseries\",\n", + " \"Artwork\",\n", + " \"Errata\",\n", + " \"Rarity\",\n", + " \"Cover card\",\n", + "]\n", + "\n", + "collection_df, _ = load_corrected_latest(\"cards\", tuple_cols)\n", + "\n", + "if not list_df[\"Card number\"].dropna().empty:\n", + " set_lists_df, _ = load_corrected_latest(\"sets\", tuple_cols)\n", + " set_lists_df = set_lists_df[set_lists_df[\"Card number\"].isin(list_df[\"Card number\"].dropna())]\n", + " extra_names = set_lists_df[set_lists_df[\"Card number\"].isin(list_df[\"Card number\"])][\"Name\"].unique()\n", + " list_df = (pd.concat([list_df.drop(columns=['Name', 'Card number']),\n", + " pd.concat([list_df['Name'].dropna(), pd.Series(extra_names).dropna()], ignore_index=True).drop_duplicates().to_frame('Name')], axis=1))\n", + " del set_lists_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4599452b-b7c6-40c4-8f84-84e1f478d5d3", + "metadata": {}, + "outputs": [], + "source": [ + "list_df = list_df.dropna(how=\"all\", axis=1).dropna(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1bd7293-eef8-4cf4-92f1-593d1185ea02", + "metadata": {}, + "outputs": [], + "source": [ + "mask_list = [collection_df[col].isin(list_df[col]) for col in list_df.columns.intersection(collection_df.columns)]\n", + "mask = np.logical_or.reduce(mask_list)\n", + "collection_df=collection_df[mask]" + ] + }, + { + "cell_type": "markdown", + "id": "9ff0cf51", + "metadata": { + "papermill": { + "duration": 0.066959, + "end_time": "2023-02-22T22:26:28.492052", + "exception": false, + "start_time": "2023-02-22T22:26:28.425093", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Check changes" + ] + }, + { + "cell_type": "markdown", + "id": "233e965d", + "metadata": { + "papermill": { + "duration": 0.045648, + "end_time": "2023-02-22T22:26:28.584087", + "exception": false, + "start_time": "2023-02-22T22:26:28.538439", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Load previous data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb7dc6f8", + "metadata": { + "papermill": { + "duration": 0.66308, + "end_time": "2023-02-22T22:26:29.293911", + "exception": false, + "start_time": "2023-02-22T22:26:28.630831", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Get latest file if exist\n", + "tuple_cols = [\n", + " \"Effect type\",\n", + " \"Secondary type\",\n", + " \"Link Arrows\",\n", + " \"Archseries\",\n", + " \"Artwork\",\n", + " \"Errata\",\n", + "]\n", + "previous_df, previous_ts = load_corrected_latest(\"collection\", tuple_cols)\n", + "\n", + "if previous_df is not None:\n", + " previous_df = previous_df.astype(\n", + " collection_df[previous_df.columns.intersection(collection_df.columns)].dtypes.to_dict()\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "96ce5451", + "metadata": { + "papermill": { + "duration": 0.067311, + "end_time": "2023-02-22T22:26:29.405020", + "exception": false, + "start_time": "2023-02-22T22:26:29.337709", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Generate changelog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fcca8f1", + "metadata": { + "papermill": { + "duration": 0.393628, + "end_time": "2023-02-22T22:26:29.843153", + "exception": false, + "start_time": "2023-02-22T22:26:29.449525", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "if previous_df is None:\n", + " changelog = None\n", + " print('Skipped')\n", + "else:\n", + " changelog = generate_changelog(previous_df, collection_df, col = 'Name')\n", + " if not changelog.empty:\n", + " display(changelog)\n", + " changelog.to_csv(\n", + " f'../data/{make_filename(report = \"collection\", timestamp = timestamp, previous_timestamp = previous_ts)}', \n", + " index = True\n", + " )\n", + " print('Changelog saved')" + ] + }, + { + "cell_type": "markdown", + "id": "9b15f8ec", + "metadata": { + "papermill": { + "duration": 0.142508, + "end_time": "2023-02-22T22:26:30.031104", + "exception": false, + "start_time": "2023-02-22T22:26:29.888596", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Save data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01e9eb77", + "metadata": { + "papermill": { + "duration": 0.331666, + "end_time": "2023-02-22T22:26:30.407431", + "exception": false, + "start_time": "2023-02-22T22:26:30.075765", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "if changelog is not None and changelog.empty:\n", + " print('No changes. New data not saved')\n", + "else: \n", + " collection_df.to_csv(\n", + " f'../data/{make_filename(report = \"collection\", timestamp = timestamp)}',\n", + " index=False\n", + " )\n", + " print('Data saved')" + ] + }, + { + "cell_type": "markdown", + "id": "c0ae1017", + "metadata": { + "papermill": { + "duration": 0.089761, + "end_time": "2023-02-22T22:26:30.542746", + "exception": false, + "start_time": "2023-02-22T22:26:30.452985", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Data visualization" + ] + }, + { + "cell_type": "markdown", + "id": "5ad091af", + "metadata": { + "papermill": { + "duration": 0.044781, + "end_time": "2023-02-22T22:26:30.633464", + "exception": false, + "start_time": "2023-02-22T22:26:30.588683", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Full data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb024043", + "metadata": { + "papermill": { + "duration": 0.091385, + "end_time": "2023-02-22T22:26:30.769693", + "exception": false, + "start_time": "2023-02-22T22:26:30.678308", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df" + ] + }, + { + "cell_type": "markdown", + "id": "8a8cb7cd", + "metadata": { + "papermill": { + "duration": 0.045869, + "end_time": "2023-02-22T22:26:30.953962", + "exception": false, + "start_time": "2023-02-22T22:26:30.908093", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Card types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4ec32e1", + "metadata": { + "papermill": { + "duration": 0.06536, + "end_time": "2023-02-22T22:26:31.065344", + "exception": false, + "start_time": "2023-02-22T22:26:30.999984", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print('Total number of Card types:', collection_df['Card type'].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "120a72dc", + "metadata": { + "papermill": { + "duration": 0.354006, + "end_time": "2023-02-22T22:26:31.466241", + "exception": false, + "start_time": "2023-02-22T22:26:31.112235", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "card_type_colors = [colors_dict[i] for i in collection_df['Card type'].value_counts().index]\n", + "collection_df['Card type'].value_counts().plot.bar(figsize = (18,6), grid = True, rot=0, color = card_type_colors)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "3b85eaee-9664-4e56-841b-f6849c99d55c", + "metadata": { + "papermill": { + "duration": 0.071833, + "end_time": "2023-12-04T15:46:27.660299", + "exception": false, + "start_time": "2023-12-04T15:46:27.588466", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Monsters" + ] + }, + { + "cell_type": "markdown", + "id": "6b6eb0fc-2901-4a90-8171-154e8df0bc9d", + "metadata": { + "papermill": { + "duration": 0.074412, + "end_time": "2023-12-04T15:46:27.807604", + "exception": false, + "start_time": "2023-12-04T15:46:27.733192", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Attributes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1d455bf-4d62-4f6e-bf57-8686a34095b5", + "metadata": { + "papermill": { + "duration": 0.093107, + "end_time": "2023-12-04T15:46:27.980289", + "exception": false, + "start_time": "2023-12-04T15:46:27.887182", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of attributes:\", collection_df[\"Attribute\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3a1ebdb-0362-40ce-88f9-e19a9691be54", + "metadata": { + "papermill": { + "duration": 0.17771, + "end_time": "2023-12-04T15:46:28.237718", + "exception": false, + "start_time": "2023-12-04T15:46:28.060008", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(columns=[\"Card type\", \"Page name\", \"Page URL\"]).groupby(\n", + " \"Attribute\"\n", + ").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8e19767-271e-469d-bf01-b17e01c15dd4", + "metadata": { + "papermill": { + "duration": 0.39166, + "end_time": "2023-12-04T15:46:28.704839", + "exception": false, + "start_time": "2023-12-04T15:46:28.313179", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "attribute_colors = [\n", + " colors_dict[i] for i in collection_df[\"Attribute\"].value_counts().index\n", + "]\n", + "collection_df[\"Attribute\"].value_counts().plot.bar(\n", + " figsize=(18, 6), grid=True, rot=0, color=attribute_colors\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "e7374fe5-8761-4963-b480-ac7734a6f7e3", + "metadata": { + "papermill": { + "duration": 0.072325, + "end_time": "2023-12-04T15:46:28.932397", + "exception": false, + "start_time": "2023-12-04T15:46:28.860072", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Primary types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e0e87c2-3c99-43d6-ba65-e1b7d317e07a", + "metadata": { + "papermill": { + "duration": 0.180779, + "end_time": "2023-12-04T15:46:29.355043", + "exception": false, + "start_time": "2023-12-04T15:46:29.174264", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of primary types:\", collection_df[\"Primary type\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7df046c2-1175-4449-a8c7-6aa55c140dd8", + "metadata": { + "papermill": { + "duration": 0.211471, + "end_time": "2023-12-04T15:46:29.653863", + "exception": false, + "start_time": "2023-12-04T15:46:29.442392", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(columns=[\"Card type\", \"Page name\", \"Page URL\"]).groupby(\n", + " \"Primary type\"\n", + ").nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "9d001dda-edea-4768-b64e-5dadddddd6dc", + "metadata": { + "papermill": { + "duration": 0.12162, + "end_time": "2023-12-04T15:46:29.859912", + "exception": false, + "start_time": "2023-12-04T15:46:29.738292", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### Has effect discrimination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91f0adc5-a6be-4b48-aa85-6e3be05cc79e", + "metadata": { + "papermill": { + "duration": 0.326807, + "end_time": "2023-12-04T15:46:30.301499", + "exception": false, + "start_time": "2023-12-04T15:46:29.974692", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "effect = pd.crosstab(\n", + " collection_df[\"Primary type\"],\n", + " pd.isna(collection_df[\"Effect type\"]),\n", + " rownames=[\"Primary type\"],\n", + " colnames=[\"Has effect\"],\n", + ").rename(columns={True: \"No Effect\", False: \"Effect\"})\n", + "effect" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cc52326-fb66-4f67-85f8-cfdc0f41938c", + "metadata": { + "papermill": { + "duration": 0.386946, + "end_time": "2023-12-04T15:46:30.784990", + "exception": false, + "start_time": "2023-12-04T15:46:30.398044", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "monster_type_colors = {\n", + " \"No Effect\": colors_dict[\"Normal Monster\"],\n", + " \"Effect\": [colors_dict[i] for i in effect.index],\n", + "}\n", + "effect.plot.bar(\n", + " figsize=(18, 6),\n", + " stacked=True,\n", + " grid=True,\n", + " rot=0,\n", + " legend=True,\n", + " color=monster_type_colors,\n", + ")\n", + "# plt.yscale('log')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "63ba0fdb-da74-482e-b10e-60cf37e52f78", + "metadata": { + "papermill": { + "duration": 0.085381, + "end_time": "2023-12-04T15:46:30.983026", + "exception": false, + "start_time": "2023-12-04T15:46:30.897645", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Obs: Normal monster can have effect if it is pendulum" + ] + }, + { + "cell_type": "markdown", + "id": "b2bc12ad-f5a6-4fdc-b0a2-c8642654616c", + "metadata": { + "papermill": { + "duration": 0.078801, + "end_time": "2023-12-04T15:46:31.146844", + "exception": false, + "start_time": "2023-12-04T15:46:31.068043", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### Is pendulum discrimination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe6cea37-b921-479e-a571-19e22ed1d093", + "metadata": { + "papermill": { + "duration": 0.10735, + "end_time": "2023-12-04T15:46:31.334289", + "exception": false, + "start_time": "2023-12-04T15:46:31.226939", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "pendulum = pd.crosstab(\n", + " collection_df[\"Primary type\"],\n", + " pd.isna(collection_df[\"Pendulum Scale\"]),\n", + " rownames=[\"Primary type\"],\n", + " colnames=[\"Is Pendulum\"],\n", + ").rename(columns={True: \"Not Pendulum\", False: \"Pendulum\"})\n", + "pendulum" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e690a15-1dd5-4a08-a08b-6a4ccf1ad567", + "metadata": { + "papermill": { + "duration": 0.39728, + "end_time": "2023-12-04T15:46:31.811406", + "exception": false, + "start_time": "2023-12-04T15:46:31.414126", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "monster_type_colors_b = {\n", + " \"Pendulum\": colors_dict[\"Pendulum Monster\"],\n", + " \"Not Pendulum\": [colors_dict[i] for i in pendulum.index],\n", + "}\n", + "pendulum.plot.bar(\n", + " figsize=(18, 6),\n", + " stacked=True,\n", + " grid=True,\n", + " rot=0,\n", + " color=monster_type_colors_b,\n", + " legend=True,\n", + " title=\"Primary types - Is pendulum\",\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "132ea0fa-13f5-40ea-83d7-b05f13476d19", + "metadata": { + "papermill": { + "duration": 0.076539, + "end_time": "2023-12-04T15:46:31.995855", + "exception": false, + "start_time": "2023-12-04T15:46:31.919316", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By attribute" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9a9063a-a019-4945-8374-3689789ba45d", + "metadata": { + "papermill": { + "duration": 0.141105, + "end_time": "2023-12-04T15:46:32.227700", + "exception": false, + "start_time": "2023-12-04T15:46:32.086595", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "primmary_crosstab = pd.crosstab(collection_df[\"Primary type\"], collection_df[\"Attribute\"])\n", + "primmary_crosstab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73e1e09b-9f01-4d12-b9d0-d7f825cff775", + "metadata": { + "papermill": { + "duration": 1.148479, + "end_time": "2023-12-04T15:46:33.458269", + "exception": false, + "start_time": "2023-12-04T15:46:32.309790", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(16, 10))\n", + "sns.heatmap(\n", + " primmary_crosstab.T,\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + " norm=mc.LogNorm(),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "9de13f38-c84d-4d5e-8bde-b9c142b42772", + "metadata": { + "papermill": { + "duration": 0.078557, + "end_time": "2023-12-04T15:46:33.647694", + "exception": false, + "start_time": "2023-12-04T15:46:33.569137", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Secondary types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed6fb23f-7173-443d-935e-6c493fe6a2b7", + "metadata": { + "papermill": { + "duration": 0.184716, + "end_time": "2023-12-04T15:46:33.917134", + "exception": false, + "start_time": "2023-12-04T15:46:33.732418", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "exploded_secondary_type = collection_df.explode(\"Secondary type\")\n", + "print(\n", + " \"Total number of secondary types:\",\n", + " exploded_secondary_type[\"Secondary type\"].nunique(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cc3c790-9c18-4089-9893-83703498c8e3", + "metadata": { + "papermill": { + "duration": 0.168618, + "end_time": "2023-12-04T15:46:34.274585", + "exception": false, + "start_time": "2023-12-04T15:46:34.105967", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "exploded_secondary_type.drop(\n", + " columns=[\"Card type\", \"Link\", \"Link Arrows\", \"Page name\", \"Page URL\"]\n", + ").groupby(\"Secondary type\").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34934a8d-f8f7-4222-99f1-5ab2dc55a74c", + "metadata": { + "papermill": { + "duration": 0.343478, + "end_time": "2023-12-04T15:46:34.705797", + "exception": false, + "start_time": "2023-12-04T15:46:34.362319", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "secondary_type_colors = colors_dict[\"Effect Monster\"]\n", + "exploded_secondary_type[\"Secondary type\"].value_counts().plot.bar(\n", + " figsize=(18, 6),\n", + " stacked=True,\n", + " grid=True,\n", + " rot=0,\n", + " color=secondary_type_colors,\n", + " legend=False,\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "261dcb4a-08e4-4ffb-b219-3ad7a43d55e6", + "metadata": { + "papermill": { + "duration": 0.123896, + "end_time": "2023-12-04T15:46:34.945641", + "exception": false, + "start_time": "2023-12-04T15:46:34.821745", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By attribute" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b6783fb-8ec4-48a4-90a3-c257d49a4b54", + "metadata": { + "papermill": { + "duration": 0.177366, + "end_time": "2023-12-04T15:46:35.235395", + "exception": false, + "start_time": "2023-12-04T15:46:35.058029", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "secondary_crosstab = pd.crosstab(\n", + " exploded_secondary_type[\"Secondary type\"], exploded_secondary_type[\"Attribute\"]\n", + ")\n", + "secondary_crosstab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35d72d01-2264-46a3-a941-4e210cc9f579", + "metadata": { + "papermill": { + "duration": 0.56671, + "end_time": "2023-12-04T15:46:35.886384", + "exception": false, + "start_time": "2023-12-04T15:46:35.319674", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 6))\n", + "sns.heatmap(\n", + " secondary_crosstab[secondary_crosstab > 0],\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "e0852798-703f-4416-9854-4791f9aa8469", + "metadata": { + "papermill": { + "duration": 0.37611, + "end_time": "2023-12-04T15:46:36.374756", + "exception": false, + "start_time": "2023-12-04T15:46:35.998646", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By secondary type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b198c90b-8d3f-42a4-aab8-deceabd685e7", + "metadata": { + "papermill": { + "duration": 0.138562, + "end_time": "2023-12-04T15:46:36.609801", + "exception": false, + "start_time": "2023-12-04T15:46:36.471239", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "secondary_crosstab_b = pd.crosstab(\n", + " exploded_secondary_type[\"Primary type\"],\n", + " exploded_secondary_type[\"Secondary type\"],\n", + " margins=True,\n", + ")\n", + "secondary_crosstab_b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2dc3ff1-939b-4a84-9233-63004a50814d", + "metadata": { + "papermill": { + "duration": 0.705098, + "end_time": "2023-12-04T15:46:37.399300", + "exception": false, + "start_time": "2023-12-04T15:46:36.694202", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 4))\n", + "sns.heatmap(\n", + " secondary_crosstab_b[secondary_crosstab_b>0],\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + " # norm=mc.LogNorm(),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "5458e1e0-04b1-4f40-95aa-4be6543667f4", + "metadata": { + "papermill": { + "duration": 0.079839, + "end_time": "2023-12-04T15:46:37.590383", + "exception": false, + "start_time": "2023-12-04T15:46:37.510544", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Monster types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b20e2634-4f6a-4632-858b-d4884cc2e840", + "metadata": { + "papermill": { + "duration": 0.098346, + "end_time": "2023-12-04T15:46:37.780279", + "exception": false, + "start_time": "2023-12-04T15:46:37.681933", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of monster types:\", collection_df[\"Monster type\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1aa4862-46fb-4bc4-9619-f848027cbed8", + "metadata": { + "papermill": { + "duration": 0.204201, + "end_time": "2023-12-04T15:46:38.084505", + "exception": false, + "start_time": "2023-12-04T15:46:37.880304", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(columns=[\"Card type\", \"Page name\", \"Page URL\"]).groupby(\n", + " \"Monster type\"\n", + ").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e24a3a4b-c5dd-447a-b678-51e829084e42", + "metadata": { + "papermill": { + "duration": 0.701448, + "end_time": "2023-12-04T15:46:38.874047", + "exception": false, + "start_time": "2023-12-04T15:46:38.172599", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "monster_type_colors = colors_dict[\"Monster Card\"]\n", + "collection_df[\"Monster type\"].value_counts().plot.bar(\n", + " figsize=(18, 6), grid=True, rot=45, color=monster_type_colors\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "15a62e4b-d3fb-4381-b998-4ba504bbdf0c", + "metadata": { + "papermill": { + "duration": 0.090956, + "end_time": "2023-12-04T15:46:39.104857", + "exception": false, + "start_time": "2023-12-04T15:46:39.013901", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By Attribute" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ba5a89b-2b8c-4b33-b1c1-52260c4cae36", + "metadata": { + "papermill": { + "duration": 0.130927, + "end_time": "2023-12-04T15:46:39.329786", + "exception": false, + "start_time": "2023-12-04T15:46:39.198859", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "monster_crosstab = pd.crosstab(collection_df[\"Monster type\"], collection_df[\"Attribute\"])\n", + "monster_crosstab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a62373e4-1255-478f-9622-0037e52e1a6a", + "metadata": { + "papermill": { + "duration": 1.066977, + "end_time": "2023-12-04T15:46:40.488156", + "exception": false, + "start_time": "2023-12-04T15:46:39.421179", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(20, 5))\n", + "sns.heatmap(\n", + " monster_crosstab[monster_crosstab > 0].T,\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + " # norm=mc.LogNorm(),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "6af3d325-e43c-4803-a2a0-e1043c2e6115", + "metadata": { + "papermill": { + "duration": 0.083864, + "end_time": "2023-12-04T15:46:40.692837", + "exception": false, + "start_time": "2023-12-04T15:46:40.608973", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By primary type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e56ac97e-53d8-4e57-af91-9881d0c102db", + "metadata": { + "papermill": { + "duration": 0.146767, + "end_time": "2023-12-04T15:46:40.931183", + "exception": false, + "start_time": "2023-12-04T15:46:40.784416", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "monster_crosstab_b = pd.crosstab(\n", + " collection_df[\"Monster type\"][collection_df[\"Monster type\"].notna()], collection_df[\"Primary type\"][collection_df[\"Monster type\"].notna()], dropna=False\n", + ")\n", + "monster_crosstab_b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8d89fa3-8ef1-4449-b0aa-a49a5ee0891b", + "metadata": { + "papermill": { + "duration": 1.486928, + "end_time": "2023-12-04T15:46:42.509838", + "exception": false, + "start_time": "2023-12-04T15:46:41.022910", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(20, 5))\n", + "sns.heatmap(\n", + " monster_crosstab_b[monster_crosstab_b > 0].T,\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + " norm=mc.LogNorm(),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "e095e940-2e46-4547-84fb-7de4846f325f", + "metadata": { + "papermill": { + "duration": 0.088063, + "end_time": "2023-12-04T15:46:42.700542", + "exception": false, + "start_time": "2023-12-04T15:46:42.612479", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By secondary type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8962506-06ab-46cc-917a-31cf13206059", + "metadata": { + "papermill": { + "duration": 0.16291, + "end_time": "2023-12-04T15:46:42.956002", + "exception": false, + "start_time": "2023-12-04T15:46:42.793092", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "monster_crosstab_c = pd.crosstab(\n", + " exploded_secondary_type[\"Monster type\"][exploded_secondary_type[\"Monster type\"].notna()],\n", + " exploded_secondary_type[\"Secondary type\"][exploded_secondary_type[\"Monster type\"].notna()],\n", + " dropna=False,\n", + ")\n", + "monster_crosstab_c" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ceeee72-82b5-4ff3-83de-b11dcea1eeca", + "metadata": { + "papermill": { + "duration": 1.102182, + "end_time": "2023-12-04T15:46:44.295773", + "exception": false, + "start_time": "2023-12-04T15:46:43.193591", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(20, 5))\n", + "sns.heatmap(\n", + " monster_crosstab_c[monster_crosstab_c > 0].T,\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + " norm=mc.LogNorm(),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "f2b76b4c-6b98-459d-94b6-079cefe43faa", + "metadata": { + "papermill": { + "duration": 0.092124, + "end_time": "2023-12-04T15:46:46.026349", + "exception": false, + "start_time": "2023-12-04T15:46:45.934225", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### ATK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7584d80-c2bd-4478-8804-301b777b3c7f", + "metadata": { + "papermill": { + "duration": 0.118168, + "end_time": "2023-12-04T15:46:46.248320", + "exception": false, + "start_time": "2023-12-04T15:46:46.130152", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of ATK values:\", collection_df[\"ATK\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "026fe36a-76c9-4a53-8e68-df49fe5639f2", + "metadata": { + "papermill": { + "duration": 0.208889, + "end_time": "2023-12-04T15:46:46.554095", + "exception": false, + "start_time": "2023-12-04T15:46:46.345206", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(columns=[\"Card type\", \"Page name\", \"Page URL\"]).groupby(\n", + " \"ATK\"\n", + ").nunique().sort_index(key=lambda x: pd.to_numeric(x, errors=\"coerce\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84a47008-885b-425b-bca4-f231d81043f3", + "metadata": { + "papermill": { + "duration": 0.999517, + "end_time": "2023-12-04T15:46:47.654938", + "exception": false, + "start_time": "2023-12-04T15:46:46.655421", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "atk_colors = colors_dict[\"Monster Card\"]\n", + "collection_df[\"ATK\"].value_counts().sort_index(\n", + " key=lambda x: pd.to_numeric(x, errors=\"coerce\")\n", + ").plot.bar(figsize=(18, 6), grid=True, color=atk_colors)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "0f2b0eb1-c908-4851-b751-f1043b927b7d", + "metadata": { + "papermill": { + "duration": 0.091302, + "end_time": "2023-12-04T15:46:47.884739", + "exception": false, + "start_time": "2023-12-04T15:46:47.793437", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### DEF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3b80710-cf0f-419c-a373-730114e629ef", + "metadata": { + "papermill": { + "duration": 0.391416, + "end_time": "2023-12-04T15:46:48.382950", + "exception": false, + "start_time": "2023-12-04T15:46:47.991534", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of DEF values:\", collection_df[\"DEF\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7cfc41de-f2e7-4575-a8ce-394ad02fe0af", + "metadata": { + "papermill": { + "duration": 0.209406, + "end_time": "2023-12-04T15:46:48.695656", + "exception": false, + "start_time": "2023-12-04T15:46:48.486250", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(columns=[\"Card type\", \"Page name\", \"Page URL\"]).groupby(\n", + " \"DEF\"\n", + ").nunique().sort_index(key=lambda x: pd.to_numeric(x, errors=\"coerce\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4cf8b59-df19-486f-988d-c3c8635f99e1", + "metadata": { + "papermill": { + "duration": 1.163212, + "end_time": "2023-12-04T15:46:49.964649", + "exception": false, + "start_time": "2023-12-04T15:46:48.801437", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def_colors = colors_dict[\"Monster Card\"]\n", + "collection_df[\"DEF\"].value_counts().sort_index(\n", + " key=lambda x: pd.to_numeric(x, errors=\"coerce\")\n", + ").plot.bar(figsize=(18, 6), grid=True, color=def_colors)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "152e955f-40fe-49d0-98ee-024e5b7fe5b7", + "metadata": { + "papermill": { + "duration": 0.093727, + "end_time": "2023-12-04T15:46:50.187008", + "exception": false, + "start_time": "2023-12-04T15:46:50.093281", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Level/Rank" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "905da500-aee6-4748-9e6b-da91497c12c1", + "metadata": { + "papermill": { + "duration": 0.218493, + "end_time": "2023-12-04T15:46:50.508589", + "exception": false, + "start_time": "2023-12-04T15:46:50.290096", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(\n", + " columns=[\"Card type\", \"Link\", \"Link Arrows\", \"Page name\", \"Page URL\"]\n", + ").groupby(\"Level/Rank\").nunique().sort_index(\n", + " key=lambda x: pd.to_numeric(x, errors=\"coerce\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "482f7235-32c6-4517-8067-eed27687ac75", + "metadata": { + "papermill": { + "duration": 0.485102, + "end_time": "2023-12-04T15:46:51.099295", + "exception": false, + "start_time": "2023-12-04T15:46:50.614193", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "stars_colors = colors_dict[\"Level\"]\n", + "collection_df[\"Level/Rank\"].value_counts().sort_index(\n", + " key=lambda x: pd.to_numeric(x, errors=\"coerce\")\n", + ").plot.bar(figsize=(18, 6), grid=True, rot=0, color=stars_colors)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "741a0df3-64cd-4343-b19d-a5015908f9af", + "metadata": { + "papermill": { + "duration": 0.094947, + "end_time": "2023-12-04T15:46:51.304278", + "exception": false, + "start_time": "2023-12-04T15:46:51.209331", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### ATK statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d700cb0b-3631-4b5f-8829-fc058d35c92e", + "metadata": { + "papermill": { + "duration": 0.208463, + "end_time": "2023-12-04T15:46:51.612617", + "exception": false, + "start_time": "2023-12-04T15:46:51.404154", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[[\"Level/Rank\", \"ATK\"]].apply(pd.to_numeric, errors=\"coerce\").dropna().astype(\n", + " int\n", + ").groupby(\"Level/Rank\").describe().round(1)" + ] + }, + { + "cell_type": "markdown", + "id": "d5b41e00-8eb7-46f8-8122-e5f6f3f5f9b8", + "metadata": { + "papermill": { + "duration": 0.105274, + "end_time": "2023-12-04T15:46:51.828673", + "exception": false, + "start_time": "2023-12-04T15:46:51.723399", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### DEF statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b2299d8-0b98-42ed-90e0-b27a0a61f5f7", + "metadata": { + "papermill": { + "duration": 0.202546, + "end_time": "2023-12-04T15:46:52.135902", + "exception": false, + "start_time": "2023-12-04T15:46:51.933356", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[[\"Level/Rank\", \"DEF\"]].apply(pd.to_numeric, errors=\"coerce\").dropna().astype(\n", + " int\n", + ").groupby(\"Level/Rank\").describe().round(1)" + ] + }, + { + "cell_type": "markdown", + "id": "fa85d96a-8877-42c6-a6a6-b48d33f2281f", + "metadata": { + "papermill": { + "duration": 0.096126, + "end_time": "2023-12-04T15:46:52.331370", + "exception": false, + "start_time": "2023-12-04T15:46:52.235244", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Pendulum scale" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1eba71e-4127-437e-b316-c985b20ab9bf", + "metadata": { + "papermill": { + "duration": 0.202621, + "end_time": "2023-12-04T15:46:52.649206", + "exception": false, + "start_time": "2023-12-04T15:46:52.446585", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(\n", + " columns=[\"Card type\", \"Link\", \"Link Arrows\", \"Page name\", \"Page URL\"]\n", + ").groupby(\"Pendulum Scale\").nunique().sort_index(\n", + " key=lambda x: pd.to_numeric(x, errors=\"coerce\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "610a288a-d93f-4880-830c-dd5e4a309c4b", + "metadata": { + "papermill": { + "duration": 0.548143, + "end_time": "2023-12-04T15:46:53.317457", + "exception": false, + "start_time": "2023-12-04T15:46:52.769314", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "scales_colors = colors_dict[\"Pendulum Monster\"]\n", + "collection_df[\"Pendulum Scale\"].value_counts().sort_index(\n", + " key=lambda x: pd.to_numeric(x, errors=\"coerce\")\n", + ").plot.bar(\n", + " figsize=(18, 6), grid=True, rot=0, color=scales_colors\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "d1ce766b-efab-4355-ae3f-0a7e9ab5d8c7", + "metadata": { + "papermill": { + "duration": 0.171643, + "end_time": "2023-12-04T15:46:53.713491", + "exception": false, + "start_time": "2023-12-04T15:46:53.541848", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### ATK statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85d931e6-f6c4-4745-8326-7d0fd3fc2665", + "metadata": { + "papermill": { + "duration": 0.188642, + "end_time": "2023-12-04T15:46:54.044500", + "exception": false, + "start_time": "2023-12-04T15:46:53.855858", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[[\"Pendulum Scale\", \"ATK\"]].apply(\n", + " pd.to_numeric, errors=\"coerce\"\n", + ").dropna().astype(int).groupby(\"Pendulum Scale\").describe().round(1)" + ] + }, + { + "cell_type": "markdown", + "id": "eda66a9d-530e-4504-b7a9-ffc625332d62", + "metadata": { + "papermill": { + "duration": 0.115467, + "end_time": "2023-12-04T15:46:54.291641", + "exception": false, + "start_time": "2023-12-04T15:46:54.176174", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### DEF statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2cb7f1cf-5f84-40c0-b0d0-41c88b116bb9", + "metadata": { + "papermill": { + "duration": 0.195269, + "end_time": "2023-12-04T15:46:54.804536", + "exception": false, + "start_time": "2023-12-04T15:46:54.609267", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[[\"Pendulum Scale\", \"DEF\"]].apply(\n", + " pd.to_numeric, errors=\"coerce\"\n", + ").dropna().astype(int).groupby(\"Pendulum Scale\").describe().round(1)" + ] + }, + { + "cell_type": "markdown", + "id": "5aa46ac8-86a3-483d-89a6-8d58267f6a91", + "metadata": { + "papermill": { + "duration": 0.123537, + "end_time": "2023-12-04T15:46:55.059406", + "exception": false, + "start_time": "2023-12-04T15:46:54.935869", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### Level/Rank statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cecce34-3a54-4f39-a58c-65eb8ae97f38", + "metadata": { + "papermill": { + "duration": 0.18283, + "end_time": "2023-12-04T15:46:55.352416", + "exception": false, + "start_time": "2023-12-04T15:46:55.169586", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[[\"Pendulum Scale\", \"Level/Rank\"]].apply(\n", + " pd.to_numeric, errors=\"coerce\"\n", + ").dropna().astype(int).groupby(\"Pendulum Scale\").describe().round(1)" + ] + }, + { + "cell_type": "markdown", + "id": "a7482a66-a39b-470e-853c-6ffc08270518", + "metadata": { + "papermill": { + "duration": 0.101782, + "end_time": "2023-12-04T15:46:55.567214", + "exception": false, + "start_time": "2023-12-04T15:46:55.465432", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Link" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42fdb885-759a-4687-bad8-0cc4a2e3b11f", + "metadata": { + "papermill": { + "duration": 0.184768, + "end_time": "2023-12-04T15:46:55.857375", + "exception": false, + "start_time": "2023-12-04T15:46:55.672607", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(\n", + " columns=[\n", + " \"Card type\",\n", + " \"Primary type\",\n", + " \"Secondary type\",\n", + " \"Level/Rank\",\n", + " \"DEF\",\n", + " \"Pendulum Scale\",\n", + " \"Page name\",\n", + " \"Page URL\",\n", + " ]\n", + ").groupby(\"Link\").nunique().sort_index(key=lambda x: pd.to_numeric(x, errors=\"coerce\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f8c8cdb-3876-48d6-be88-f91f9f90f59a", + "metadata": { + "papermill": { + "duration": 0.379899, + "end_time": "2023-12-04T15:46:56.348036", + "exception": false, + "start_time": "2023-12-04T15:46:55.968137", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "link_colors = colors_dict[\"Link Monster\"]\n", + "collection_df[\"Link\"].value_counts().sort_index(\n", + " key=lambda x: pd.to_numeric(x, errors=\"coerce\")\n", + ").plot.bar(figsize=(18, 6), grid=True, rot=0, color=link_colors)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "99f763bc-bc75-4e01-b121-f804c9049a97", + "metadata": { + "papermill": { + "duration": 0.098435, + "end_time": "2023-12-04T15:46:56.579313", + "exception": false, + "start_time": "2023-12-04T15:46:56.480878", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### ATK statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86a7c082-b348-41c7-b063-45e3e6dbd51a", + "metadata": { + "papermill": { + "duration": 0.169583, + "end_time": "2023-12-04T15:46:56.858115", + "exception": false, + "start_time": "2023-12-04T15:46:56.688532", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[[\"Link\", \"ATK\"]].apply(pd.to_numeric, errors=\"coerce\").dropna().astype(\n", + " int\n", + ").groupby(\"Link\").describe().round(1)" + ] + }, + { + "cell_type": "markdown", + "id": "8bb2f0c0-5eaa-4c06-b140-3e86c99f2d0f", + "metadata": { + "papermill": { + "duration": 0.109511, + "end_time": "2023-12-04T15:46:57.077784", + "exception": false, + "start_time": "2023-12-04T15:46:56.968273", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Link Arrows" + ] + }, + { + "cell_type": "markdown", + "id": "47dbd933-9471-404f-b0a1-6e663255dfb6", + "metadata": { + "papermill": { + "duration": 0.103368, + "end_time": "2023-12-04T15:46:57.284988", + "exception": false, + "start_time": "2023-12-04T15:46:57.181620", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By combination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca628104-acf1-4e33-867c-dc5bf2dd3bb0", + "metadata": { + "papermill": { + "duration": 0.124539, + "end_time": "2023-12-04T15:46:57.524937", + "exception": false, + "start_time": "2023-12-04T15:46:57.400398", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of link arrow combinations:\", collection_df[\"Link Arrows\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1549f0de-d925-4e87-b660-5431ef84b61d", + "metadata": { + "papermill": { + "duration": 0.195287, + "end_time": "2023-12-04T15:46:57.838180", + "exception": false, + "start_time": "2023-12-04T15:46:57.642893", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(\n", + " columns=[\n", + " \"Card type\",\n", + " \"Primary type\",\n", + " \"Level/Rank\",\n", + " \"Pendulum Scale\",\n", + " \"Link\",\n", + " \"Secondary type\",\n", + " \"DEF\",\n", + " \"Page name\",\n", + " \"Page URL\",\n", + " ]\n", + ").groupby(\"Link Arrows\").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec44ac2b-f53a-4e48-afaa-0fd9146674e1", + "metadata": { + "papermill": { + "duration": 1.187565, + "end_time": "2023-12-04T15:46:59.136682", + "exception": false, + "start_time": "2023-12-04T15:46:57.949117", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "arrows_colors = colors_dict[\"Link Monster\"]\n", + "collection_df[\"Link Arrows\"].value_counts().plot.barh(\n", + " figsize=(10, 20), grid=True, color=arrows_colors, title=\"Link arrows combinations\"\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "4dfe4740-a7e2-435c-b30d-b083bd6d86cd", + "metadata": { + "papermill": { + "duration": 0.102629, + "end_time": "2023-12-04T15:46:59.440607", + "exception": false, + "start_time": "2023-12-04T15:46:59.337978", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By unique" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0b72784-cc52-4604-9830-812be9db002d", + "metadata": { + "papermill": { + "duration": 0.167098, + "end_time": "2023-12-04T15:46:59.720603", + "exception": false, + "start_time": "2023-12-04T15:46:59.553505", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[collection_df[\"Link Arrows\"].notna()].drop(\n", + " columns=[\n", + " \"Card type\",\n", + " \"Primary type\",\n", + " \"Level/Rank\",\n", + " \"Pendulum Scale\",\n", + " \"Secondary type\",\n", + " \"DEF\",\n", + " \"Page name\",\n", + " \"Page URL\",\n", + " ]\n", + ").explode(\"Link Arrows\").groupby(\"Link Arrows\").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a089a3e-c6fb-4fce-9701-6c259be51d68", + "metadata": { + "papermill": { + "duration": 1.417547, + "end_time": "2023-12-04T15:47:01.250536", + "exception": false, + "start_time": "2023-12-04T15:46:59.832989", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "arrow_plot(collection_df[\"Link Arrows\"].explode(\"Link Arrows\"))" + ] + }, + { + "cell_type": "markdown", + "id": "4bc88a0b-2104-4550-a12e-4ec80c122910", + "metadata": { + "papermill": { + "duration": 0.103766, + "end_time": "2023-12-04T15:47:01.487315", + "exception": false, + "start_time": "2023-12-04T15:47:01.383549", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By link" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24f29ca8-3215-42f0-8850-81ec24ca7a87", + "metadata": { + "papermill": { + "duration": 0.191304, + "end_time": "2023-12-04T15:47:01.799691", + "exception": false, + "start_time": "2023-12-04T15:47:01.608387", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "arrow_per_link = collection_df[[\"Link Arrows\", \"Link\"]].explode(\"Link Arrows\").dropna()\n", + "arrow_crosstab = pd.crosstab(arrow_per_link[\"Link Arrows\"], arrow_per_link[\"Link\"])\n", + "arrow_crosstab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ef628d3-84a9-4a68-b4f2-e05f57ac4f73", + "metadata": { + "papermill": { + "duration": 0.65487, + "end_time": "2023-12-04T15:47:02.681315", + "exception": false, + "start_time": "2023-12-04T15:47:02.026445", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "sns.heatmap(\n", + " arrow_crosstab[arrow_crosstab > 0].T,\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + " # norm=mc.LogNorm(),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "53b81ad3-20ea-4991-bd20-63b1a2f87144", + "metadata": { + "papermill": { + "duration": 0.104945, + "end_time": "2023-12-04T15:47:02.912973", + "exception": false, + "start_time": "2023-12-04T15:47:02.808028", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Spell & Trap" + ] + }, + { + "cell_type": "markdown", + "id": "93d26517-e0cf-4d4e-a7b5-7c2f0a49efd7", + "metadata": { + "papermill": { + "duration": 0.108835, + "end_time": "2023-12-04T15:47:03.139312", + "exception": false, + "start_time": "2023-12-04T15:47:03.030477", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Properties" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df2d449d-1997-4cb7-a778-573e1c84d02f", + "metadata": { + "papermill": { + "duration": 0.121239, + "end_time": "2023-12-04T15:47:03.372789", + "exception": false, + "start_time": "2023-12-04T15:47:03.251550", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of properties:\", collection_df[\"Property\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c102bde-6fd0-4dff-b39d-dc08c438f8a5", + "metadata": { + "papermill": { + "duration": 0.184655, + "end_time": "2023-12-04T15:47:03.672977", + "exception": false, + "start_time": "2023-12-04T15:47:03.488322", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(columns=[\"Card type\", \"Page name\", \"Page URL\"]).groupby(\"Property\").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b68e31b1-7c10-4ec7-90a5-48849992c079", + "metadata": { + "papermill": { + "duration": 0.434558, + "end_time": "2023-12-04T15:47:04.250828", + "exception": false, + "start_time": "2023-12-04T15:47:03.816270", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "st_colors = [\n", + " colors_dict[i]\n", + " for i in collection_df[[\"Card type\", \"Property\"]]\n", + " .value_counts()\n", + " .index.get_level_values(0)\n", + "]\n", + "collection_df[\"Property\"].value_counts().plot.bar(\n", + " figsize=(18, 6), grid=True, rot=45, color=st_colors\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "4a6503bc-a133-48d3-9db2-d5a7b7143393", + "metadata": { + "papermill": { + "duration": 0.105349, + "end_time": "2023-12-04T15:47:04.500491", + "exception": false, + "start_time": "2023-12-04T15:47:04.395142", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Effect type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54001ebd-796e-4b93-a3b3-c1b927c1d95d", + "metadata": { + "papermill": { + "duration": 0.13678, + "end_time": "2023-12-04T15:47:04.761415", + "exception": false, + "start_time": "2023-12-04T15:47:04.624635", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of effect types:\", collection_df[\"Effect type\"].explode().nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15efbccf-bf35-48e3-8169-a934fb236fd0", + "metadata": { + "papermill": { + "duration": 0.242257, + "end_time": "2023-12-04T15:47:05.119088", + "exception": false, + "start_time": "2023-12-04T15:47:04.876831", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.explode(\"Effect type\").groupby(\"Effect type\").nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "c8410365-dd99-4f91-a3c9-849cde8b40cd", + "metadata": { + "papermill": { + "duration": 0.113659, + "end_time": "2023-12-04T15:47:05.394121", + "exception": false, + "start_time": "2023-12-04T15:47:05.280462", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Card type discrimination" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3df12dc8-1a37-4279-90c0-75e7339d75e6", + "metadata": { + "papermill": { + "duration": 0.17391, + "end_time": "2023-12-04T15:47:05.684273", + "exception": false, + "start_time": "2023-12-04T15:47:05.510363", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "st_diff = (\n", + " collection_df[[\"Card type\", \"Effect type\"]]\n", + " .explode(\"Effect type\")\n", + " .value_counts()\n", + " .unstack(0)\n", + " .fillna(0)\n", + " .astype(int)\n", + ")\n", + "st_diff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cb4d10e-b4f6-4f10-80a2-56f1e2128124", + "metadata": { + "papermill": { + "duration": 0.51611, + "end_time": "2023-12-04T15:47:06.317544", + "exception": false, + "start_time": "2023-12-04T15:47:05.801434", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "st_diff_colors = {\n", + " \"Monster Card\": colors_dict[\"Monster Card\"],\n", + " \"Spell Card\": colors_dict[\"Spell Card\"],\n", + " \"Trap Card\": colors_dict[\"Trap Card\"],\n", + "}\n", + "st_diff.plot.bar(figsize=(18, 6), stacked=True, grid=True, rot=45, color=st_diff_colors)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "506273b0-2f42-4ee4-aa59-37841e6e544d", + "metadata": { + "papermill": { + "duration": 0.111449, + "end_time": "2023-12-04T15:47:06.612115", + "exception": false, + "start_time": "2023-12-04T15:47:06.500666", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Archseries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c875342-4c62-4d5c-82e1-78c496e1030d", + "metadata": { + "papermill": { + "duration": 0.246182, + "end_time": "2023-12-04T15:47:06.991247", + "exception": false, + "start_time": "2023-12-04T15:47:06.745065", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "exploded_archseries = collection_df.explode(\"Archseries\")\n", + "print(\"Total number of Archseries:\", exploded_archseries[\"Archseries\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4d3880f-b8f6-41f6-9936-656f447db6cd", + "metadata": { + "papermill": { + "duration": 0.322809, + "end_time": "2023-12-04T15:47:07.511961", + "exception": false, + "start_time": "2023-12-04T15:47:07.189152", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "exploded_archseries.groupby(\"Archseries\").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0dd0ac4-b05d-47a7-88b5-c2eeffdc0037", + "metadata": { + "papermill": { + "duration": 10.030345, + "end_time": "2023-12-04T15:47:17.682051", + "exception": false, + "start_time": "2023-12-04T15:47:07.651706", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "exploded_archseries[\"Archseries\"].value_counts().plot.barh(\n", + " figsize=(10, 100), grid=True, title=\"Archtypes/Series\"\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "d5cc3b2f-08fd-4da9-b045-eafce75ae32f", + "metadata": { + "papermill": { + "duration": 0.139227, + "end_time": "2023-12-04T15:47:17.996350", + "exception": false, + "start_time": "2023-12-04T15:47:17.857123", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By card type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf57a506-deef-4142-a55f-5a80ecd33d0b", + "metadata": { + "papermill": { + "duration": 0.314749, + "end_time": "2023-12-04T15:47:18.465910", + "exception": false, + "start_time": "2023-12-04T15:47:18.151161", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "archseries_crosstab = pd.crosstab(\n", + " exploded_archseries[\"Archseries\"], exploded_archseries[\"Card type\"], margins=True\n", + ")\n", + "archseries_crosstab" + ] + }, + { + "cell_type": "markdown", + "id": "df601a59-c84d-4415-b0df-1503ca38dc13", + "metadata": { + "papermill": { + "duration": 0.521155, + "end_time": "2023-12-04T15:47:19.176685", + "exception": false, + "start_time": "2023-12-04T15:47:18.655530", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By primary type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1e88a20-5059-4a35-81cf-7bdea25a77d7", + "metadata": { + "papermill": { + "duration": 0.281694, + "end_time": "2023-12-04T15:47:19.719463", + "exception": false, + "start_time": "2023-12-04T15:47:19.437769", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "archseries_crosstab_b = pd.crosstab(\n", + " exploded_archseries[\"Archseries\"], exploded_archseries[\"Primary type\"], margins=True\n", + ")\n", + "archseries_crosstab_b" + ] + }, + { + "cell_type": "markdown", + "id": "eb356b20-8309-4c65-9704-1cdfbe2da998", + "metadata": { + "papermill": { + "duration": 0.140602, + "end_time": "2023-12-04T15:47:20.016539", + "exception": false, + "start_time": "2023-12-04T15:47:19.875937", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "\n", + "\n", + "### By secondary type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cbe789e-7311-4a68-9690-adaeca142d9d", + "metadata": { + "papermill": { + "duration": 0.265793, + "end_time": "2023-12-04T15:47:20.443159", + "exception": false, + "start_time": "2023-12-04T15:47:20.177366", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "exploded_archseries_secondary_type = exploded_archseries[\n", + " [\"Archseries\", \"Secondary type\"]\n", + "].explode(\"Secondary type\")\n", + "archseries_crosstab_c = pd.crosstab(\n", + " exploded_archseries_secondary_type[\"Archseries\"],\n", + " exploded_archseries_secondary_type[\"Secondary type\"],\n", + " margins=True,\n", + ")\n", + "archseries_crosstab_c" + ] + }, + { + "cell_type": "markdown", + "id": "d6e2f113-b89d-4d63-8ca2-1756695129eb", + "metadata": { + "papermill": { + "duration": 0.142693, + "end_time": "2023-12-04T15:47:20.772973", + "exception": false, + "start_time": "2023-12-04T15:47:20.630280", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By monster type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4e5a47f-6cb8-4070-a105-9d82083251b3", + "metadata": { + "papermill": { + "duration": 0.341591, + "end_time": "2023-12-04T15:47:21.268488", + "exception": false, + "start_time": "2023-12-04T15:47:20.926897", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "archseries_crosstab_d = pd.crosstab(\n", + " exploded_archseries[\"Archseries\"], exploded_archseries[\"Monster type\"], margins=True\n", + ")\n", + "archseries_crosstab_d" + ] + }, + { + "cell_type": "markdown", + "id": "bd9da03b-506d-47f8-94b4-72ce216c5673", + "metadata": { + "papermill": { + "duration": 0.143243, + "end_time": "2023-12-04T15:47:21.576003", + "exception": false, + "start_time": "2023-12-04T15:47:21.432760", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By property" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be64eaa9-e879-4188-a2ef-ea94cd1127dd", + "metadata": { + "papermill": { + "duration": 0.280021, + "end_time": "2023-12-04T15:47:22.034299", + "exception": false, + "start_time": "2023-12-04T15:47:21.754278", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "archseries_crosstab_e = pd.crosstab(\n", + " exploded_archseries[\"Archseries\"], exploded_archseries[\"Property\"], margins=True\n", + ")\n", + "archseries_crosstab_e" + ] + }, + { + "cell_type": "markdown", + "id": "965a5b43-2e30-4dca-b0db-d88a80119377", + "metadata": { + "papermill": { + "duration": 0.14324, + "end_time": "2023-12-04T15:47:22.353871", + "exception": false, + "start_time": "2023-12-04T15:47:22.210631", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Artworks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09abf3e8-890b-4c68-8207-c7bafb162e37", + "metadata": { + "papermill": { + "duration": 0.160522, + "end_time": "2023-12-04T15:47:22.681301", + "exception": false, + "start_time": "2023-12-04T15:47:22.520779", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\n", + " \"Total number of cards with edited or alternate artworks:\",\n", + " collection_df[\"Artwork\"].count(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e12cafd-50d0-44c9-8320-35e41cc97cf1", + "metadata": { + "papermill": { + "duration": 0.290304, + "end_time": "2023-12-04T15:47:23.126525", + "exception": false, + "start_time": "2023-12-04T15:47:22.836221", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[[\"Name\", \"Password\", \"TCG status\", \"OCG status\", \"Artwork\"]][\n", + " collection_df[\"Artwork\"].notna()\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8ec3625-8be2-4929-9586-cb1a69fb9734", + "metadata": { + "papermill": { + "duration": 0.290688, + "end_time": "2023-12-04T15:47:23.606005", + "exception": false, + "start_time": "2023-12-04T15:47:23.315317", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "artwork_value_counts = collection_df[\"Artwork\"].value_counts()\n", + "plt.figure(figsize=(20, 8))\n", + "plt.title(\"Artworks\")\n", + "venn2(\n", + " subsets=(\n", + " artwork_value_counts[(\"Alternate\",)],\n", + " artwork_value_counts[(\"Edited\",)],\n", + " artwork_value_counts[(\"Alternate\", \"Edited\")],\n", + " ),\n", + " set_labels=(\"Alternate artwork\", \"Edited artwork\"),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "fd787eb6-335a-4573-bec6-98111552ec32", + "metadata": { + "papermill": { + "duration": 0.149124, + "end_time": "2023-12-04T15:47:23.941322", + "exception": false, + "start_time": "2023-12-04T15:47:23.792198", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By card type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5136bdf7-d27d-4caf-98f5-1142f6aef3e2", + "metadata": { + "papermill": { + "duration": 0.309997, + "end_time": "2023-12-04T15:47:24.752179", + "exception": false, + "start_time": "2023-12-04T15:47:24.442182", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "artwork_crosstab = pd.crosstab(collection_df[\"Artwork\"], collection_df[\"Card type\"])\n", + "artwork_crosstab" + ] + }, + { + "cell_type": "markdown", + "id": "36290764-7f36-4e09-86bc-b944533c2520", + "metadata": { + "papermill": { + "duration": 0.15031, + "end_time": "2023-12-04T15:47:25.081330", + "exception": false, + "start_time": "2023-12-04T15:47:24.931020", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By primary type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "befa27f7-360e-40c1-a7cc-71246968c858", + "metadata": { + "papermill": { + "duration": 0.184641, + "end_time": "2023-12-04T15:47:25.438021", + "exception": false, + "start_time": "2023-12-04T15:47:25.253380", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "artwork_crosstab_b = pd.crosstab(collection_df[\"Artwork\"], collection_df[\"Primary type\"])\n", + "artwork_crosstab_b" + ] + }, + { + "cell_type": "markdown", + "id": "4ad4356a-02e4-44cb-ab5d-a3c521822d92", + "metadata": { + "papermill": { + "duration": 0.147757, + "end_time": "2023-12-04T15:47:25.746531", + "exception": false, + "start_time": "2023-12-04T15:47:25.598774", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "More granularity is unnecessary" + ] + }, + { + "cell_type": "markdown", + "id": "728c0248-718d-4b49-9749-f48b765fa28c", + "metadata": { + "papermill": { + "duration": 0.147776, + "end_time": "2023-12-04T15:47:26.053229", + "exception": false, + "start_time": "2023-12-04T15:47:25.905453", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Errata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b41746c0-1258-4e2a-aa84-41f7ce72df66", + "metadata": { + "papermill": { + "duration": 0.17064, + "end_time": "2023-12-04T15:47:26.381729", + "exception": false, + "start_time": "2023-12-04T15:47:26.211089", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of cards with errata:\", collection_df[\"Errata\"].count())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c98bfa2-425d-4d30-98ab-cfdc98ee754b", + "metadata": { + "papermill": { + "duration": 0.437268, + "end_time": "2023-12-04T15:47:26.976046", + "exception": false, + "start_time": "2023-12-04T15:47:26.538778", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[[\"Name\", \"Password\", \"TCG status\", \"OCG status\", \"Errata\"]][\n", + " collection_df[\"Errata\"].notna()\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc39a95b-1e73-40dc-9dfd-50eb4d826902", + "metadata": { + "papermill": { + "duration": 0.31186, + "end_time": "2023-12-04T15:47:27.453824", + "exception": false, + "start_time": "2023-12-04T15:47:27.141964", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "errata_counts = (\n", + " collection_df.groupby(\"Errata\").nunique().sort_values(\"Name\", ascending=False)\n", + ")\n", + "errata_counts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4af77f81-799e-4e49-8477-89bae1a9e7bf", + "metadata": { + "papermill": { + "duration": 0.31122, + "end_time": "2023-12-04T15:47:27.952095", + "exception": false, + "start_time": "2023-12-04T15:47:27.640875", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(20, 8))\n", + "plt.title(\"Errata\")\n", + "sorted_errata_name_counts = (\n", + " errata_counts[\"Name\"]\n", + " .drop((\"Any\",))\n", + " .sort_index(key=lambda x: [(len(i), i) for i in x])\n", + ")\n", + "venn2(\n", + " subsets=sorted_errata_name_counts,\n", + " set_labels=sorted_errata_name_counts.index[:-1].str[0],\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "86f52c23-a1f1-4f07-8bab-c548121eb983", + "metadata": { + "papermill": { + "duration": 0.149681, + "end_time": "2023-12-04T15:47:28.287301", + "exception": false, + "start_time": "2023-12-04T15:47:28.137620", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By card type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "812262a5-4a29-40f0-976c-4bb677fe964f", + "metadata": { + "papermill": { + "duration": 0.197182, + "end_time": "2023-12-04T15:47:28.658212", + "exception": false, + "start_time": "2023-12-04T15:47:28.461030", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "errata_crosstab = pd.crosstab(collection_df[\"Errata\"], collection_df[\"Card type\"])\n", + "errata_crosstab.sort_values(by=errata_crosstab.columns.tolist(), ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "59d93b47-22ec-455f-9a45-3a3cd0a8213a", + "metadata": { + "papermill": { + "duration": 0.173092, + "end_time": "2023-12-04T15:47:29.021303", + "exception": false, + "start_time": "2023-12-04T15:47:28.848211", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By primary type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5bb580a-1677-44ee-b6f1-775257c28b98", + "metadata": { + "papermill": { + "duration": 0.192112, + "end_time": "2023-12-04T15:47:29.487659", + "exception": false, + "start_time": "2023-12-04T15:47:29.295547", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "errata_crosstab_b = pd.crosstab(collection_df[\"Errata\"], collection_df[\"Primary type\"])\n", + "errata_crosstab_b.sort_values(by=errata_crosstab_b.columns.tolist(), ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "6bdca09f-c91e-499e-92fa-cbc24f1b561e", + "metadata": { + "papermill": { + "duration": 0.155143, + "end_time": "2023-12-04T15:47:29.843684", + "exception": false, + "start_time": "2023-12-04T15:47:29.688541", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "More granularity is unnecessary" + ] + }, + { + "cell_type": "markdown", + "id": "f26113c5-3dfa-445d-9ffb-5c8f16c6a949", + "metadata": { + "papermill": { + "duration": 0.36699, + "end_time": "2023-12-04T15:47:30.395448", + "exception": false, + "start_time": "2023-12-04T15:47:30.028458", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### By artwork" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82cc1938-7c37-424f-9462-815c56a93812", + "metadata": { + "papermill": { + "duration": 0.212769, + "end_time": "2023-12-04T15:47:30.826154", + "exception": false, + "start_time": "2023-12-04T15:47:30.613385", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "errata_crosstab_c = pd.crosstab(collection_df[\"Artwork\"], collection_df[\"Errata\"])\n", + "errata_crosstab_c.sort_values(by=errata_crosstab_c.columns.tolist(), ascending=False)" + ] + }, + { + "cell_type": "markdown", + "id": "f4d000ed-bcc4-4e6f-b8d6-adb507170757", + "metadata": { + "papermill": { + "duration": 0.39526, + "end_time": "2023-12-04T15:47:31.409523", + "exception": false, + "start_time": "2023-12-04T15:47:31.014263", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## TCG & OCG status" + ] + }, + { + "cell_type": "markdown", + "id": "aab7a880-16db-4897-83fc-db62a0740c16", + "metadata": { + "papermill": { + "duration": 0.158153, + "end_time": "2023-12-04T15:47:31.756665", + "exception": false, + "start_time": "2023-12-04T15:47:31.598512", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### TGC status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c7d9256-7618-4ba1-b9ed-ece1f961c766", + "metadata": { + "papermill": { + "duration": 0.169231, + "end_time": "2023-12-04T15:47:32.097232", + "exception": false, + "start_time": "2023-12-04T15:47:31.928001", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of TCG status:\", collection_df[\"TCG status\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dff2bc3d-68d4-4ea4-997b-f7b0ec9df441", + "metadata": { + "papermill": { + "duration": 0.293454, + "end_time": "2023-12-04T15:47:32.560808", + "exception": false, + "start_time": "2023-12-04T15:47:32.267354", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(columns=[\"Page name\", \"Page URL\"]).groupby(\n", + " \"TCG status\", dropna=False\n", + ").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87d68f74-7674-44fd-ab2f-9cafbfd9e522", + "metadata": { + "papermill": { + "duration": 0.759329, + "end_time": "2023-12-04T15:47:33.501005", + "exception": false, + "start_time": "2023-12-04T15:47:32.741676", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[\"TCG status\"].value_counts(dropna=False).plot.bar(\n", + " figsize=(18, 6), logy=True, grid=True, rot=45\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "db11164d-f521-492e-b063-7c738a63ea5f", + "metadata": { + "papermill": { + "duration": 0.159807, + "end_time": "2023-12-04T15:47:33.849038", + "exception": false, + "start_time": "2023-12-04T15:47:33.689231", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By card type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02c7debb-c9e5-474b-9f74-65b3f2732195", + "metadata": { + "papermill": { + "duration": 0.195538, + "end_time": "2023-12-04T15:47:34.217702", + "exception": false, + "start_time": "2023-12-04T15:47:34.022164", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Remove unlimited\n", + "tcg_crosstab = pd.crosstab(collection_df[\"Card type\"], collection_df[\"TCG status\"]).drop(\n", + " [\"Unlimited\"], axis=1\n", + ")\n", + "tcg_crosstab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39422447-2c64-4c64-a554-29924e60e6e9", + "metadata": { + "papermill": { + "duration": 0.57199, + "end_time": "2023-12-04T15:47:34.963744", + "exception": false, + "start_time": "2023-12-04T15:47:34.391754", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 6))\n", + "sns.heatmap(\n", + " tcg_crosstab[tcg_crosstab > 0],\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " # norm=mc.LogNorm(),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "e34b5dde-d333-444f-907c-9e3f7407f1f8", + "metadata": { + "papermill": { + "duration": 0.159156, + "end_time": "2023-12-04T15:47:35.337567", + "exception": false, + "start_time": "2023-12-04T15:47:35.178411", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By monster type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5989a50-13d9-44fb-80d1-aa12be8a8902", + "metadata": { + "papermill": { + "duration": 0.202355, + "end_time": "2023-12-04T15:47:35.727244", + "exception": false, + "start_time": "2023-12-04T15:47:35.524889", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Remove unlimited\n", + "tcg_crosstab_b = pd.crosstab(collection_df[\"Monster type\"], collection_df[\"TCG status\"]).drop(\n", + " [\"Unlimited\"], axis=1\n", + ")\n", + "tcg_crosstab_b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b021905a-49bd-4dd6-9ca7-f8d4c24b7d2e", + "metadata": { + "papermill": { + "duration": 1.031321, + "end_time": "2023-12-04T15:47:36.945591", + "exception": false, + "start_time": "2023-12-04T15:47:35.914270", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(20, 4))\n", + "sns.heatmap(\n", + " tcg_crosstab_b[tcg_crosstab_b > 0].T.dropna(how=\"all\",axis=1),\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "a4920f6c-dde2-4f3d-9ca3-afbddbd4bc92", + "metadata": { + "papermill": { + "duration": 0.303242, + "end_time": "2023-12-04T15:47:37.432676", + "exception": false, + "start_time": "2023-12-04T15:47:37.129434", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By archseries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97ec29bd-9f13-4682-a7ec-08d0fe56286f", + "metadata": { + "papermill": { + "duration": 0.234063, + "end_time": "2023-12-04T15:47:37.849190", + "exception": false, + "start_time": "2023-12-04T15:47:37.615127", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Remove unlimited\n", + "tcg_crosstab_c = pd.crosstab(\n", + " exploded_archseries[\"Archseries\"].where(\n", + " exploded_archseries[\"OCG status\"] != \"Unlimited\"\n", + " ),\n", + " exploded_archseries[\"TCG status\"],\n", + " margins=True,\n", + ")\n", + "tcg_crosstab_c" + ] + }, + { + "cell_type": "markdown", + "id": "586af0fd-91d2-44d0-9e9f-87c7ce9121f4", + "metadata": { + "papermill": { + "duration": 0.159223, + "end_time": "2023-12-04T15:47:38.199942", + "exception": false, + "start_time": "2023-12-04T15:47:38.040719", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### OCG status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b6ecb22-3020-460b-98fb-883c29e60f1d", + "metadata": { + "papermill": { + "duration": 0.177811, + "end_time": "2023-12-04T15:47:38.565700", + "exception": false, + "start_time": "2023-12-04T15:47:38.387889", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "print(\"Total number of OCG status:\", collection_df[\"OCG status\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d7000a8-86b0-4a00-8cf8-1a040a0e891b", + "metadata": { + "papermill": { + "duration": 0.323846, + "end_time": "2023-12-04T15:47:39.082984", + "exception": false, + "start_time": "2023-12-04T15:47:38.759138", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df.drop(columns=[\"Page name\", \"Page URL\"]).groupby(\n", + " \"OCG status\", dropna=False\n", + ").nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d3c61d0-dad2-48da-a214-15f1b3170231", + "metadata": { + "papermill": { + "duration": 0.714169, + "end_time": "2023-12-04T15:47:40.017703", + "exception": false, + "start_time": "2023-12-04T15:47:39.303534", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "collection_df[\"OCG status\"].value_counts(dropna=False).plot.bar(\n", + " figsize=(18, 6), logy=True, grid=True, rot=45\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "391c2d7e-1d24-4831-91d5-29205050d065", + "metadata": { + "papermill": { + "duration": 0.161988, + "end_time": "2023-12-04T15:47:40.372081", + "exception": false, + "start_time": "2023-12-04T15:47:40.210093", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By card type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29c2bc7f-1b6e-4356-9231-a7007c83a1cb", + "metadata": { + "papermill": { + "duration": 0.203154, + "end_time": "2023-12-04T15:47:40.766273", + "exception": false, + "start_time": "2023-12-04T15:47:40.563119", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Remove unlimited\n", + "ocg_crosstab = pd.crosstab(collection_df[\"Card type\"], collection_df[\"OCG status\"]).drop(\n", + " [\"Unlimited\"], axis=1\n", + ")\n", + "ocg_crosstab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50a7e74b-884b-42c9-9d2d-8f85f527a30b", + "metadata": { + "papermill": { + "duration": 0.522394, + "end_time": "2023-12-04T15:47:41.484695", + "exception": false, + "start_time": "2023-12-04T15:47:40.962301", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(12, 6))\n", + "sns.heatmap(ocg_crosstab[ocg_crosstab > 0], annot=True, fmt=\"g\", cmap=\"viridis\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "ae506de9-9e25-4e86-b90a-daef2e2b7d34", + "metadata": { + "papermill": { + "duration": 0.1683, + "end_time": "2023-12-04T15:47:41.839470", + "exception": false, + "start_time": "2023-12-04T15:47:41.671170", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By monster type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "314b3483-2dc6-402f-be7c-5d567d289f7e", + "metadata": { + "papermill": { + "duration": 0.202634, + "end_time": "2023-12-04T15:47:42.235020", + "exception": false, + "start_time": "2023-12-04T15:47:42.032386", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "ocg_crosstab_b = pd.crosstab(collection_df[\"Monster type\"], collection_df[\"OCG status\"]).drop(\n", + " [\"Unlimited\"], axis=1\n", + ")\n", + "ocg_crosstab_b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcb19d36-efbe-4dab-ba05-673c8a55c003", + "metadata": { + "papermill": { + "duration": 0.865688, + "end_time": "2023-12-04T15:47:43.269769", + "exception": false, + "start_time": "2023-12-04T15:47:42.404081", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(20, 4))\n", + "sns.heatmap(\n", + " ocg_crosstab_b[ocg_crosstab_b > 0].T.dropna(how=\"all\",axis=1),\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "1447349a-a239-4129-ae7f-89c47fc1528c", + "metadata": { + "papermill": { + "duration": 0.277018, + "end_time": "2023-12-04T15:47:43.724449", + "exception": false, + "start_time": "2023-12-04T15:47:43.447431", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "#### By archseries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec832d96-d1e0-4c99-bf99-3e8449ad2c7b", + "metadata": { + "papermill": { + "duration": 0.276812, + "end_time": "2023-12-04T15:47:44.435923", + "exception": false, + "start_time": "2023-12-04T15:47:44.159111", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Remove unlimited\n", + "ocg_crosstab_c = pd.crosstab(\n", + " exploded_archseries[\"Archseries\"].where(\n", + " exploded_archseries[\"OCG status\"] != \"Unlimited\"\n", + " ),\n", + " exploded_archseries[\"OCG status\"],\n", + " margins=True,\n", + ")\n", + "ocg_crosstab_c" + ] + }, + { + "cell_type": "markdown", + "id": "14a12acc-5dd0-4ec7-8e55-34b81c778fd8", + "metadata": { + "papermill": { + "duration": 0.167196, + "end_time": "2023-12-04T15:47:44.793952", + "exception": false, + "start_time": "2023-12-04T15:47:44.626756", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### TCG vs. OCG status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21cc588e-cf78-45f1-93c7-2b448888ae64", + "metadata": { + "papermill": { + "duration": 0.210368, + "end_time": "2023-12-04T15:47:45.202870", + "exception": false, + "start_time": "2023-12-04T15:47:44.992502", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "cg_crosstab = pd.crosstab(\n", + " collection_df[\"OCG status\"], collection_df[\"TCG status\"], dropna=False, margins=False\n", + ")\n", + "cg_crosstab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa6e9e43-33c4-426b-8a09-8015d1100899", + "metadata": { + "papermill": { + "duration": 0.806324, + "end_time": "2023-12-04T15:47:46.201316", + "exception": false, + "start_time": "2023-12-04T15:47:45.394992", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 8))\n", + "sns.heatmap(\n", + " cg_crosstab[cg_crosstab > 0],\n", + " annot=True,\n", + " fmt=\"g\",\n", + " cmap=\"viridis\",\n", + " square=True,\n", + " norm=mc.LogNorm(),\n", + ")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "4058f0b9-3031-4daa-8a4a-2dddf8c74c07", + "metadata": { + "papermill": { + "duration": 0.150264, + "end_time": "2023-02-22T22:27:41.665707", + "exception": false, + "start_time": "2023-02-22T22:27:41.515443", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Epilogue" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "871cb3eb-a0fc-42e8-b516-6d524deba2fc", + "metadata": { + "papermill": { + "duration": 0.165205, + "end_time": "2023-02-22T22:27:41.981675", + "exception": false, + "start_time": "2023-02-22T22:27:41.816470", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# benchmark(report='Collection',timestamp=timestamp)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "310f7d3e-ab76-4c5b-bf3d-f6898d559118", + "metadata": { + "papermill": { + "duration": 0.17123, + "end_time": "2023-02-22T22:27:42.303245", + "exception": false, + "start_time": "2023-02-22T22:27:42.132015", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "footer(timestamp)" + ] + }, + { + "cell_type": "markdown", + "id": "fade81da", + "metadata": { + "papermill": { + "duration": 0.161301, + "end_time": "2023-02-22T22:27:42.621788", + "exception": false, + "start_time": "2023-02-22T22:27:42.460487", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## HTML export" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6715b83e-9a4e-423c-81b9-33a81603acd4", + "metadata": { + "papermill": { + "duration": 0.166335, + "end_time": "2023-02-22T22:27:42.941056", + "exception": false, + "start_time": "2023-02-22T22:27:42.774721", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Save notebook on disck before generating HTML report\n", + "save_notebook()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de556e8b", + "metadata": { + "papermill": { + "duration": 3.911313, + "end_time": "2023-02-22T22:27:47.005324", + "exception": false, + "start_time": "2023-02-22T22:27:43.094011", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "! jupyter nbconvert Collection.ipynb --output-dir='../' --to=HTML --template=labdynamic --no-input" + ] + }, + { + "cell_type": "markdown", + "id": "641a7018-0f50-4ab6-b8f3-119a279cbc65", + "metadata": { + "papermill": { + "duration": 0.172809, + "end_time": "2023-02-22T22:27:47.330627", + "exception": false, + "start_time": "2023-02-22T22:27:47.157818", + "status": "completed" + }, + "tags": [ + "exclude" + ] + }, + "source": [ + "## Git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33f041fb-5042-464e-8992-c76484bcde77", + "metadata": {}, + "outputs": [], + "source": [ + "commit(\"*[Cc]olection*\", f\"Collection update - {timestamp.isoformat()}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + }, + "papermill": { + "default_parameters": {}, + "duration": 216.133721, + "end_time": "2023-02-22T22:27:50.787255", + "environment_variables": {}, + "exception": null, + "input_path": "Cards.ipynb", + "output_path": "Cards.ipynb", + "parameters": {}, + "start_time": "2023-02-22T22:24:14.653534", + "version": "2.4.0" + }, + "toc": { + "base_numbering": 1, + "nav_menu": { + "height": "599px", + "width": "228px" + }, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "353px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "toc-autonumbering": false, + "toc-showcode": false, + "toc-showmarkdowntxt": false + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/assets/Template.ipynb b/assets/notebook/Template.ipynb similarity index 98% rename from assets/Template.ipynb rename to assets/notebook/Template.ipynb index d6175a95..345abf8c 100644 --- a/assets/Template.ipynb +++ b/assets/notebook/Template.ipynb @@ -54,7 +54,7 @@ "toc": true }, "source": [ - "Table of Contents\n", + "Table of Contents \n", "=================\n", "\n", "* [1 Data aquisition](#Data-aquisition)\n", @@ -604,7 +604,7 @@ }, "outputs": [], "source": [ - "# ! jupyter nbconvert Template.ipynb --output-dir='../' --to=HTML --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags='exclude' --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True" + "# ! jupyter nbconvert Template.ipynb --output-dir='../' --to=HTML --template=labdynamic --no-input" ] }, { diff --git a/source/Bandai.ipynb b/source/Bandai.ipynb index d3f21e44..deec129f 100644 --- a/source/Bandai.ipynb +++ b/source/Bandai.ipynb @@ -40,6 +40,35 @@ "---" ] }, + { + "cell_type": "markdown", + "id": "e61188c8-73f1-48e5-a165-8fcc66d30f47", + "metadata": {}, + "source": [ + "Table of Contents \n", + "=================\n", + "\n", + "* [1 Data aquisition](#Data-aquisition)\n", + " * [1.1 Fetch online data](#Fetch-online-data)\n", + "* [2 Check changes](#Check-changes)\n", + " * [2.1 Load previous data](#Load-previous-data)\n", + " * [2.2 Generate changelog](#Generate-changelog)\n", + " * [2.3 Save data](#Save-data)\n", + "* [3 Data visualization](#Data-visualization)\n", + " * [3.1 Card type](#Card-type)\n", + " * [3.2 ATK](#ATK)\n", + " * [3.3 DEF](#DEF)\n", + " * [3.4 Level](#Level)\n", + " * [3.4.1 ATK statistics](#ATK-statistics)\n", + " * [3.4.2 DEF statistics](#DEF-statistics)\n", + " * [3.4.3 Level](#Level)\n", + " * [3.5 Monster type](#Monster-type)\n", + " * [3.6 Set](#Set)\n", + "* [4 Epilogue](#Epilogue)\n", + " * [4.1 HTML export](#HTML-export)\n", + "" + ] + }, { "cell_type": "markdown", "id": "ddf5afac-0c61-47d8-a0ad-f441a705eb88", @@ -61,6 +90,7 @@ "cell_type": "markdown", "id": "cc75b862-8058-4db2-9499-446fd0d3042e", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "papermill": { "duration": 0.022206, "end_time": "2023-12-18T05:01:31.489359", @@ -124,6 +154,7 @@ "cell_type": "markdown", "id": "ae8c018f-312e-4582-b1ab-af7d06e3e9d1", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "papermill": { "duration": 0.064848, "end_time": "2023-12-18T05:01:32.664431", @@ -308,6 +339,7 @@ "cell_type": "markdown", "id": "f31c671e-6e38-4f7c-b2e2-40a434e444d1", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "papermill": { "duration": 0.025519, "end_time": "2023-12-18T05:01:33.600942", @@ -367,6 +399,7 @@ "cell_type": "markdown", "id": "00c6b243-67ad-42c5-9189-4d2d33c5372c", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "papermill": { "duration": 0.041289, "end_time": "2023-12-18T05:01:34.480531", @@ -447,6 +480,7 @@ "cell_type": "markdown", "id": "8c7f10ac-6c4e-42fe-be55-c2ea61abb7ae", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "papermill": { "duration": 0.064626, "end_time": "2023-12-18T05:01:35.223137", @@ -527,6 +561,7 @@ "cell_type": "markdown", "id": "02b9970b-696d-4ef0-b01d-0385e6c53575", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "papermill": { "duration": 0.029757, "end_time": "2023-12-18T05:01:36.097334", @@ -903,7 +938,7 @@ }, "outputs": [], "source": [ - "! jupyter nbconvert Bandai.ipynb --output-dir='../' --to=HTML --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags='exclude' --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True" + "! jupyter nbconvert Bandai.ipynb --output-dir='../' --to=HTML --template=labdynamic --no-input" ] }, { diff --git a/source/Cards.ipynb b/source/Cards.ipynb index a3af4faf..07af5fe7 100644 --- a/source/Cards.ipynb +++ b/source/Cards.ipynb @@ -55,7 +55,7 @@ "toc": true }, "source": [ - "Table of Contents\n", + "Table of Contents \n", "=================\n", "\n", "* [1 Data aquisition](#Data-aquisition)\n", @@ -4101,7 +4101,7 @@ }, "outputs": [], "source": [ - "! jupyter nbconvert Cards.ipynb --output-dir='../' --to=HTML --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags='exclude' --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True" + "! jupyter nbconvert Cards.ipynb --output-dir='../' --to=HTML --template=labdynamic --no-input" ] }, { diff --git a/source/Rush.ipynb b/source/Rush.ipynb index 92727505..154290d2 100644 --- a/source/Rush.ipynb +++ b/source/Rush.ipynb @@ -54,7 +54,7 @@ "tags": [] }, "source": [ - "Table of Contents\n", + "Table of Contents \n", "=================\n", "\n", "* [1 Data aquisition](#Data-aquisition)\n", @@ -2635,7 +2635,7 @@ }, "outputs": [], "source": [ - "! jupyter nbconvert Rush.ipynb --output-dir='../' --to=HTML --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags='exclude' --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True" + "! jupyter nbconvert Rush.ipynb --output-dir='../' --to=HTML --template=labdynamic --no-input" ] }, { diff --git a/source/Sets.ipynb b/source/Sets.ipynb index 0cedb1be..c605fb8c 100644 --- a/source/Sets.ipynb +++ b/source/Sets.ipynb @@ -55,7 +55,7 @@ "toc": true }, "source": [ - "Table of Contents\n", + "Table of Contents \n", "=================\n", "\n", "* [1 Data aquisition](#Data-aquisition)\n", @@ -2109,7 +2109,7 @@ }, "outputs": [], "source": [ - "! jupyter nbconvert Sets.ipynb --output-dir='../' --to=HTML --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags='exclude' --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True" + "! jupyter nbconvert Sets.ipynb --output-dir='../' --to=HTML --template=labdynamic --no-input" ] }, { diff --git a/source/Speed.ipynb b/source/Speed.ipynb index e5f9ae47..9fb88078 100644 --- a/source/Speed.ipynb +++ b/source/Speed.ipynb @@ -54,7 +54,7 @@ "tags": [] }, "source": [ - "Table of Contents\n", + "Table of Contents \n", "=================\n", "\n", "* [1 Data aquisition](#Data-aquisition)\n", @@ -1147,7 +1147,7 @@ }, "outputs": [], "source": [ - "! jupyter nbconvert Speed.ipynb --output-dir='../' --to=HTML --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags='exclude' --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True" + "! jupyter nbconvert Speed.ipynb --output-dir='../' --to=HTML --template=labdynamic --no-input" ] }, { diff --git a/source/Timeline.ipynb b/source/Timeline.ipynb index 4a9f476b..89ea3a11 100644 --- a/source/Timeline.ipynb +++ b/source/Timeline.ipynb @@ -55,7 +55,7 @@ "toc": true }, "source": [ - "Table of Contents\n", + "Table of Contents \n", "=================\n", "\n", "* [1 Data preparation](#Data-preparation)\n", @@ -155,7 +155,7 @@ "outputs": [], "source": [ "# Load list of important dates\n", - "with open(\"../assets/dates.json\", \"r\") as f:\n", + "with open(\"../assets/json/dates.json\", \"r\") as f:\n", " dates_json = json.load(f)\n", " anime_df = (\n", " pd.DataFrame(dates_json[\"anime\"][\"series\"])\n", @@ -1406,7 +1406,7 @@ }, "outputs": [], "source": [ - "! jupyter nbconvert Timeline.ipynb --output-dir='../' --to=HTML --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags='exclude' --TemplateExporter.exclude_input=True --TemplateExporter.exclude_input_prompt=True --TemplateExporter.exclude_output_prompt=True" + "! jupyter nbconvert Timeline.ipynb --output-dir='../' --to=HTML --template=labdynamic --no-input" ] }, { diff --git a/source/bot.py b/source/bot.py index ae7bd0e0..0cdbfd73 100644 --- a/source/bot.py +++ b/source/bot.py @@ -59,6 +59,8 @@ from discord.ext import commands from tqdm.contrib.discord import tqdm as discord_pbar +# Silence discord.py pynacl optional dependency warning. +discord.VoiceClient.warn_nacl = False # ============== # # Helper methods # diff --git a/source/install.sh b/source/install.sh index 2a41d118..0f53f23c 100644 --- a/source/install.sh +++ b/source/install.sh @@ -1,10 +1,48 @@ #! /bin/bash CURRENT_DIR=$PWD cd "$(dirname "$0")" + +# Install Python packages pip3 install -U pip pip3 install -r requirements.txt pip3 install git+https://github.com/guigoruiz1/halo.git pip3 install git+https://github.com/guigoruiz1/tqdm.git pip3 install -U pynacl pip3 install -U nbstripout + +# Install nbconvert template + +# Get the second line after "data:" from jupyter --paths output +config_directories=$(jupyter --paths | awk '/data:/ {getline; getline; print}') + +# Check if a valid config directory is found +if [ -n "$config_directories" ]; then + templates_directory="$config_directories/nbconvert/templates" + + # Check if the nbconvert templates directory exists + if [ -d "$templates_directory" ]; then + # Create the destination folder if it does not exist + mkdir -p "$templates_directory/yugiquery" + + # Copy the folder to nbconvert templates directory + cp -r "../assets/nbconvert"/* "$templates_directory/yugiquery" + + # Check if the copy was successful + if [ $? -eq 0 ]; then + echo "nbconvert template successfully installed in $templates_directory" + else + echo "Error: Failed to install nbconvert template." + echo "Be sure to install it manually or change the template used when generating the HTML report." + fi + + else + echo "Error: Nbconvert templates directory not found in the specified Jupyter data directory." + echo "Be sure to install nbconvert and try again or install the template manually." + fi +else + echo "Error: Data directory not found in the Jupyter paths output." + echo "Make sure Jupyter is installed and configured correctly." +fi +# Finish + cd $CURRENT_DIR \ No newline at end of file diff --git a/source/requirements.txt b/source/requirements.txt index 20c8852b..682ba7ef 100644 --- a/source/requirements.txt +++ b/source/requirements.txt @@ -7,6 +7,7 @@ ipylab>=0.6.0 matplotlib>=3.7.2 matplotlib_venn>=0.11.9 nbformat>=5.9.1 +nbconvert>=7.11.0 numpy>=1.24.4 pandas>=2.0.3 papermill>=2.4.0 @@ -17,4 +18,5 @@ seaborn>=0.12.2 # tqdm>=4.65.0 wikitextparser>=0.53.0 itables>=1.6.2 -sphinx_rtd_theme>=2.0.0 \ No newline at end of file +sphinx_rtd_theme>=2.0.0 +nbstripout>=0.6.1 \ No newline at end of file diff --git a/source/yugiquery.py b/source/yugiquery.py index 2b5b6aca..591ae32c 100644 --- a/source/yugiquery.py +++ b/source/yugiquery.py @@ -267,7 +267,7 @@ def make_filename( str: The generated filename. """ if previous_timestamp is None: - return f"all_{report}_{timestamp.isoformat(timespec='minutes').replace('+00:00', 'Z')}.bz2" + return f"{report}_data_{timestamp.isoformat(timespec='minutes').replace('+00:00', 'Z')}.bz2" else: return f"{report}_changelog_{previous_timestamp.isoformat(timespec='minutes').replace('+00:00', 'Z')}_{timestamp.isoformat(timespec='minutes').replace('+00:00', 'Z')}.bz2" @@ -696,7 +696,7 @@ def load_corrected_latest(name_pattern: str, tuple_cols: List[str] = []): Tuple[pd.DataFrame, arrow.Arrow]: A tuple containing the loaded dataframe and the timestamp of the file. """ files = sorted( - glob.glob(f"../data/all_{name_pattern}_*.bz2"), + glob.glob(f"../data/{name_pattern}_data_*.bz2"), key=os.path.getctime, reverse=True, ) @@ -1318,8 +1318,8 @@ def update_index(): # Handle index and readme properly index_file_name = "index.md" readme_file_name = "README.md" - index_input_path = os.path.join(PARENT_DIR, "assets", index_file_name) - readme_input_path = os.path.join(PARENT_DIR, "assets", readme_file_name) + index_input_path = os.path.join(PARENT_DIR, "assets/markdown", index_file_name) + readme_input_path = os.path.join(PARENT_DIR, "assets/markdown", readme_file_name) index_output_path = os.path.join(PARENT_DIR, index_file_name) readme_output_path = os.path.join(PARENT_DIR, readme_file_name) @@ -1375,7 +1375,7 @@ def header(name: str = None): except: name = "" - with open(os.path.join(PARENT_DIR, "assets/header.md")) as f: + with open(os.path.join(PARENT_DIR, "assets/markdown/header.md")) as f: header = f.read() header = header.replace( "@TIMESTAMP@", @@ -1395,7 +1395,7 @@ def footer(timestamp: arrow.Arrow = None): Returns: Markdown: The generated Markdown footer. """ - with open(os.path.join(PARENT_DIR, "assets/footer.md")) as f: + with open(os.path.join(PARENT_DIR, "assets/markdown/footer.md")) as f: footer = f.read() now = arrow.utcnow() footer = footer.replace("@TIMESTAMP@", now.strftime("%d/%m/%Y %H:%M %Z")) @@ -2464,7 +2464,7 @@ def fetch_set_lists(titles: List[str], **kwargs): # Separate formating function print(f"{len(titles)} sets requested") titles = up.quote("|".join(titles)) - rarity_dict = load_json(os.path.join(PARENT_DIR, "assets/rarities.json")) + rarity_dict = load_json(os.path.join(PARENT_DIR, "assets/json/rarities.json")) set_lists_df = pd.DataFrame( columns=[ "Set", @@ -2709,7 +2709,7 @@ def fetch_set_info( if debug: print(f"{len(titles)} sets requested") - regions_dict = load_json(os.path.join(PARENT_DIR, "assets/regions.json")) + regions_dict = load_json(os.path.join(PARENT_DIR, "assets/json/regions.json")) # Info to ask info = extra_info + ["Series", "Set type", "Cover card"] # Release to ask @@ -2758,7 +2758,7 @@ def fetch_set_info( # Variables colors_dict = load_json( - os.path.join(PARENT_DIR, "assets/colors.json") + os.path.join(PARENT_DIR, "assets/json/colors.json") ) # Colors dictionary to associate to series and cards # Functions