Skip to content

Commit

Permalink
Updating PaliGemma notebooks (#543)
Browse files Browse the repository at this point in the history
* Updating PaliGemma notebooks

* Notebook format updates.
  • Loading branch information
joefernandez authored Dec 5, 2024
1 parent ce40377 commit 87e7cde
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 146 deletions.
49 changes: 29 additions & 20 deletions site/en/gemma/docs/paligemma/fine-tuning-paligemma.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"id": "G3MMAcssHTML"
},
"source": [
"<link rel=\"stylesheet\" href=\"/site-assets/css/gemma.css\">\n",

This comment has been minimized.

Copy link
@Cometacossin

Cometacossin Dec 10, 2024

"<link rel=\"stylesheet\" href=\"https://fonts.googleapis.com/css2?family=Google+Symbols:opsz,wght,FILL,GRAD@20..48,100..700,0..1,-50..200\" />"
"<link rel=\"stylesheet\" href=\"/site-assets/css/style.css\">\n",
"<link rel=\"stylesheet\" href=\"/site-assets/css/gemma.css\">\n"
]
},
{
Expand Down Expand Up @@ -59,15 +59,8 @@
"<td>\n",
"<a target=\"_blank\" href=\"https://github.com/google/generative-ai-docs/blob/main/site/en/gemma/docs/paligemma/fine-tuning-paligemma.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
"</td>\n",
"</table>\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wR53lePHuiP-"
},
"source": [
"</table>\n",
"\n",
"This notebook shows how to fine-tune [PaliGemma](https://ai.google.dev/gemma/docs/paligemma) on a vision-language task with [JAX](https://jax.readthedocs.io/en/latest/index.html). *Fine-tuning* is a process that can improve your model's performance on specific tasks or help the model adhere to specific output requirements when instructions aren't sufficient and you have a set of examples that demonstrate the outputs you want. Gemma-based models like PaliGemma require fine-tuning to produce expected results.\n",
"\n",
"### What's in this notebook\n",
Expand Down Expand Up @@ -172,7 +165,11 @@
"# vars as appropriate or make your credentials available in ~/.kaggle/kaggle.json\n",
"\n",
"os.environ[\"KAGGLE_USERNAME\"] = userdata.get('KAGGLE_USERNAME')\n",
"os.environ[\"KAGGLE_KEY\"] = userdata.get('KAGGLE_KEY')"
"os.environ[\"KAGGLE_KEY\"] = userdata.get('KAGGLE_KEY')\n",
"\n",
"# The T4 runtime is tight on memory to finetune this model. Preallocate\n",
"# all memory ahead of time to avoid out-of-memory due to fragmentation.\n",
"os.environ[\"XLA_PYTHON_CLIENT_MEM_FRACTION\"] = \"1.0\""
]
},
{
Expand Down Expand Up @@ -265,7 +262,7 @@
"tf.config.set_visible_devices([], \"GPU\")\n",
"tf.config.set_visible_devices([], \"TPU\")\n",
"\n",
"backend = jax.lib.xla_bridge.get_backend()\n",
"backend = jax.extend.backend.get_backend()\n",
"print(f\"JAX version: {jax.__version__}\")\n",
"print(f\"JAX platform: {backend.platform}\")\n",
"print(f\"JAX devices: {jax.device_count()}\")"
Expand All @@ -292,7 +289,7 @@
"\n",
"PaliGemma includes several model variations. For this tutorial, you'll use the base [JAX/FLAX PaliGemma 3B weight model](https://www.kaggle.com/models/google/paligemma/jax/paligemma-3b-pt-224).\n",
"\n",
"Download the `float16` version of the model checkpoint from Kaggle by running the following code. This process takes several minutes to complete."
"Download the model checkpoint from Kaggle by running the following code. This process takes several minutes to complete."
]
},
{
Expand All @@ -306,12 +303,19 @@
"import os\n",
"import kagglehub\n",
"\n",
"MODEL_PATH = \"./pt_224_128.params.f16.npz\"\n",
"# Use these for PaliGemma-2 3B 224px²\n",
"LLM_VARIANT = \"gemma2_2b\"\n",
"MODEL_PATH = \"./paligemma2-3b-pt-224.b16.npz\"\n",
"KAGGLE_HANDLE = \"google/paligemma-2/jax/paligemma2-3b-pt-224\" # Path to fetch from Kaggle.\n",
"\n",
"# Use these for PaliGemma 1:\n",
"# LLM_VARIANT = \"gemma_2b\"\n",
"# MODEL_PATH = \"./paligemma-3b-pt-224.f16.npz\"\n",
"# KAGGLE_HANDLE = \"google/paligemma/jax/paligemma-3b-pt-224\"\n",
"\n",
"if not os.path.exists(MODEL_PATH):\n",
" print(\"Downloading the checkpoint from Kaggle, this could take a few minutes....\")\n",
" # Note: kaggle archive contains the same checkpoint in multiple formats.\n",
" # Download only the float16 model.\n",
" MODEL_PATH = kagglehub.model_download('google/paligemma/jax/paligemma-3b-pt-224', 'paligemma-3b-pt-224.f16.npz')\n",
" MODEL_PATH = kagglehub.model_download(KAGGLE_HANDLE, MODEL_PATH)\n",
" print(f\"Model path: {MODEL_PATH}\")\n",
"\n",
"TOKENIZER_PATH = \"./paligemma_tokenizer.model\"\n",
Expand Down Expand Up @@ -360,8 +364,11 @@
"outputs": [],
"source": [
"# Define model\n",
"\n",
"# IMPORTANT: Gemma-2 has a \"final_logits_softcap\" property, we set it to 0.0\n",
"# for better transfer results.\n",
"model_config = ml_collections.FrozenConfigDict({\n",
" \"llm\": {\"vocab_size\": 257_152},\n",
" \"llm\": {\"vocab_size\": 257_152, \"variant\": LLM_VARIANT, \"final_logits_softcap\": 0.0},\n",
" \"img\": {\"variant\": \"So400m/14\", \"pool_type\": \"none\", \"scan\": True, \"dtype_mm\": \"float16\"}\n",
"})\n",
"model = paligemma.Model(**model_config)\n",
Expand Down Expand Up @@ -420,7 +427,9 @@
"\n",
"@functools.partial(jax.jit, donate_argnums=(0,), static_argnums=(1,))\n",
"def maybe_cast_to_f32(params, trainable):\n",
" return jax.tree.map(lambda p, m: p.astype(jnp.float32) if m else p,\n",
" # Cast others to float16, since some GPUs don't support bf16.\n",
" return jax.tree.map(lambda p, m: p.astype(jnp.float32)\n",
" if m else p.astype(jnp.float16),\n",
" params, trainable)\n",
"\n",
"# Loading all params in simultaneous - albeit much faster and more succinct -\n",
Expand Down
Loading

0 comments on commit 87e7cde

Please sign in to comment.