From 092cc7b83b0b9d68559c65c1bd95885301834f8d Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 15 Mar 2024 15:03:10 +0800 Subject: [PATCH] Created using Colaboratory --- featurize_news.ipynb | 3480 ++++++++++++++++++++++-------------------- 1 file changed, 1866 insertions(+), 1614 deletions(-) diff --git a/featurize_news.ipynb b/featurize_news.ipynb index c6e5c66..71079a3 100644 --- a/featurize_news.ipynb +++ b/featurize_news.ipynb @@ -1,508 +1,1318 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "mount_file_id": "1m9v35TGO0OOMzlT4exu7ahfYXXRampiz", - "authorship_tag": "ABX9TyPyYfgejErg0T3/L14UPodC", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "aa44fa70cbf043ea9109a44c1348cb2b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_755207a276dc423e982fd2433783aca9", - "IPY_MODEL_34a1c3fafe3840f8aef86fc0ce16d9d4", - "IPY_MODEL_09d934d3d6664434880da3f52cb52efc" - ], - "layout": "IPY_MODEL_d849f9c8e894473eb054103b6c0965cc" - } + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 351, + "referenced_widgets": [ + "67945d3f3b26409a98f6a4e91dd77380", + "fe0fdd93415c4970b6428e0b66c30681", + "69318b3a6a6749dbbbbdb87b506e94cb", + "ddacb928a856483481546b838556ac42", + "4377321d9ca04dd4ab10fe6ff82cf160", + "cef9e8b68be64849bcc259f7b5dc5e1f", + "d9da04e84c9144b6aabd6e5f23d1614a", + "1ee8c124f9494caf886773be63a2bfd7", + "2bcf94618d054443b3671bc38881af6d", + "120086e5de314c8b84dfa6d5bcaac2bc", + "b14c3547ac08454db2d01b25aa7dfddf", + "33f1dc7b277d4ec5b07aad03cd914c52", + "9038781a628241e191b7ed46cb3a6650", + "6f6a1228068b40e3b2417bd8c4b885cc", + "33e4be715486454389f2607defbd4550", + "4a8724d7531b494db9db7e66b2535244", + "62d0aabee46f40ea97ffae6f2bce5e40", + "63ea2925b4c34ae28af663b19e2ddf67", + "ccf0ea6bd9e140a6a24ac62efe0bdd67", + "7ececbbebe5f48fb81d77a8b91d48064", + "964d5ad0e62f43fda6bd9dae591d397c", + "76b2e19c188c4612afcb34bdc0ff58a2", + "f9fa031e785f4335a3fc4a36d611e207", + "a739a10becc8457b8179a669b4e72bf2", + "f17c69db88f646ca9c2bf1cc354fc907", + "94af3b28929a481abf532b49c3990691", + "ed26469a161c42df9f6c1070b84be68b", + "613313e837544f1fac967279c5746c9f", + "67488582ce1940cfbf07c577758f0b1d", + "605d95933565413886ff6508b7d20453", + "d3981ae1e68d4ad0abd704ea785df995", + "877edc5bb3894dc3935edfeaaf726a8f", + "c2123ea20a684903b82dc376a409c682", + "c58ce8753ead4874b0d81bab8f70a7bd", + "7cabc65f15ef4aedb3a7ce30fb4aac8d", + "96e4be4c35cc45feac1461ae5271bdc4", + "63e14cc87e1946a8876a4a9f2d1281e8", + "dd844b2e73f74885a1f9a336fbffa0e7", + "f3b91c2405d4476bb05d23ffec46c3c0", + "ec83790b87be401286f894faf430a8c4", + "f029693037ed49049bcd87b3d9683b2d", + "2623c225378c495485e0f63d38cc7925", + "bd79b8f8fdbb487f9142bf2090729f1b", + "eb545ae5ddf8409883198d2e5bab0745", + "30725e31e2c84e40bfc1b36f85a52c99", + "edb2ec97694341d19092a23c8be4e0bf", + "7073179f100644718ee8c660da9a1b2d", + "b59153f234b84236bf9fad4100c30b4c", + "7a761207d5e34d55b955587d247458bf", + "b0446147f04946e490e51e1f353fcd51", + "1594def1db68427b92d67948fb2c0b61", + "a5497d52e9b2414fb5624bd5a6caa51a", + "c4dce35597ce42b58b3734d5f9e11b59", + "9f69be914ec84dbdabb5766cb4148cba", + "7f1cc79f9f9847c58d92c055213f4588", + "64f3f713a80e4d67b7cdb012c7a1b73d" + ] }, - "755207a276dc423e982fd2433783aca9": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0e51337eb87e430b8e477488361c52e4", - "placeholder": "​", - "style": "IPY_MODEL_977f5053e46a432fb4d446ce55b13ac1", - "value": "config.json: 100%" - } + "id": "XNPa1N_VETtt", + "outputId": "d681349b-2436-4e85-d28c-3ca6ca3ef946" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] }, - "34a1c3fafe3840f8aef86fc0ce16d9d4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_91b626119aab4cdab59a02609a8caa49", - "max": 480, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_c3fe50a7e98347fe8fb51c7692c87d4f", - "value": 480 - } + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "67945d3f3b26409a98f6a4e91dd77380", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "config.json: 0%| | 0.00/480 [00:00 tag. You might need to adjust this depending on the webpage structure.\n", + " story = soup.find_all('p')\n", + "\n", + " # Extract the text from the story\n", + " full_story = ' '.join(p.text for p in story)\n", + " full_stories.append(full_story)\n", + " return full_stories\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EnFhQIkrEZs5" + }, + "outputs": [], + "source": [ + "def chunk_text(text, max_len):\n", + " # Tokenize the text into tokens\n", + " tokens = nltk.word_tokenize(text)\n", + "\n", + " # Calculate the number of chunks and the size of the final chunk\n", + " num_chunks = len(tokens) // max_len\n", + " final_chunk_size = len(tokens) % max_len\n", + "\n", + " # If the final chunk is too small, distribute its tokens among the other chunks\n", + " if final_chunk_size < max_len / 2:\n", + " num_chunks += 1\n", + " chunk_sizes = [len(tokens) // num_chunks + (1 if i < len(tokens) % num_chunks else 0) for i in range(num_chunks)]\n", + " chunks = [tokens[sum(chunk_sizes[:i]):sum(chunk_sizes[:i+1])] for i in range(num_chunks)]\n", + " else:\n", + " chunks = [tokens[i:i + max_len] for i in range(0, len(tokens), max_len)]\n", + "\n", + " return chunks\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RR_2MZ5nEZvO" + }, + "outputs": [], + "source": [ + "from torch.utils.data import DataLoader\n", + "\n", + "def featurize_stories(text, max_len, top_k):\n", + " # Extract candidate words/phrases\n", + " count = CountVectorizer(ngram_range=n_gram_range, stop_words=stop_words).fit([text])\n", + " all_candidates = count.get_feature_names_out()\n", + " doc = nlp(text)\n", + " noun_phrases = set(chunk.text.strip().lower() for chunk in doc.noun_chunks)\n", + " nouns = set()\n", + " for token in doc:\n", + " if token.pos_ == \"NOUN\":\n", + " nouns.add(token.text)\n", + "\n", + " all_nouns = nouns.union(noun_phrases)\n", + " candidates = list(filter(lambda candidate: candidate in all_nouns, all_candidates))\n", + " candidate_tokens = tokenizer(candidates, padding=True, return_tensors=\"pt\")\n", + " candidate_tokens = {k: v.to(device) for k, v in (candidate_tokens).items()}\n", + " candidate_embeddings = model(**candidate_tokens)[\"pooler_output\"]\n", + " candidate_embeddings = candidate_embeddings.detach()#.to_numpy()\n", + "\n", + " # words = nltk.word_tokenize(text)\n", + " # chunks = [words[i:i + 512] for i in range(0, len(words), 512)]\n", + " chunks = chunk_text(text, max_len) # use this to chunk better and use less padding thus less memory but also less affect from averging\n", + "\n", + " for chunk in chunks:\n", + " text_tokens = tokenizer(chunk, padding=True, return_tensors=\"pt\")\n", + " text_tokens = {k: v.to(device) for k, v in (text_tokens).items()}\n", + " text_embedding = model(**text_tokens)[\"pooler_output\"]\n", + " text_embedding = text_embedding.detach()#.to_numpy()\n", + " embeddings.append(text_embedding)\n", + " max_emb_shape = max(embedding.shape[0] for embedding in embeddings)\n", + " padded_embeddings = [np.pad(embedding.cpu(), ((0, max_emb_shape - embedding.shape[0]), (0, 0))) for embedding in embeddings]\n", + " avg_embedding = np.min(padded_embeddings, axis=0)\n", + " distances = cosine_similarity(avg_embedding, candidate_embeddings.cpu())\n", + " torch.cuda.empty_cache()\n", + " return [candidates[index] for index in distances.argsort()[0][::-1][-top_k:]]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3mlI9cOaEsnS" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "# data=pd.read_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet.csv')\n", + "data=pd.read_csv('/content/drive/MyDrive/consult/Louie_california_weather.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VaqzAonQGa11", + "outputId": "52055c96-4aa9-43cc-84ae-426a86df31ab" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 California Snow Storm Latest as Drivers Trappe...\n", + "1 Large, illegal marijuana grow posed ‘extreme f...\n", + "2 @nytimes The classic California extreme weathe...\n", + "3 extreme weather https://t.co/hp9yOVBkMR \n", + "4 Extreme weather https://t.co/T5QtBjx3z4 \n", + " ... \n", + "9995 @cryptojack Conquer the web3 platform with $BL...\n", + "9996 it’s a hockey game not desert storm\n", + "9997 @NHLFlames But we are having fun and we have a...\n", + "9998 @BlastRunnersPVP 3 days past after last announ...\n", + "9999 @Balkaur07527817 Ok then?\n", + "Name: text, Length: 10000, dtype: object" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['text']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "e5VsbTP7WaSH" + }, + "outputs": [], + "source": [ + "# df = pd.read_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet_features.txt',sep='\\t')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FjoOGK0LCj32" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "background_save": true, + "base_uri": "https://localhost:8080/" + }, + "id": "ta3pWa5BEZx2", + "outputId": "0ea02337-2c56-4903-e553-1462080bc098" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 2%|▏ | 161/10000 [02:28<2:07:04, 1.29it/s]" + ] + } + ], + "source": [ + "rank_articles=[]\n", + "from tqdm import tqdm\n", + "# for i in tqdm(range(len(df),len(data['text']))):\n", + "dataloader = DataLoader(data['text'], batch_size=32, shuffle=True, num_workers=4)\n", + "for i in tqdm(range(len(dataloader))):\n", + " try:\n", + " cc=featurize_stories(data['text'][i], max_len=512, top_k=4)\n", + " # print(cc)\n", + " rank_articles.append(cc)\n", + " except IndexError:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q3pN0rHtfWon" + }, + "outputs": [], + "source": [ + "flattened_list = [item for sublist in rank_articles for item in sublist]\n", + "from collections import Counter\n", + "counter = Counter(flattened_list)\n", + "df = pd.DataFrame.from_dict(counter, orient='index', columns=['Count'])\n", + "\n", + "df = df.sort_values(by='Count',ascending=False)\n", + "# df.to_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet_features2.txt',sep='\\t')\n", + "df.to_csv('/content/drive/MyDrive/consult/california_weather_tweet_features.txt',sep='\\t')\n", + "\n", + "print(len(df))\n", + "# df[:25]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "perIfMb9CUcm" + }, + "outputs": [], + "source": [ + "len(rank_articles)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rmd7fnVDebrq" + }, + "outputs": [], + "source": [ + "# df = pd.read_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet_features.txt',sep='\\t')\n", + "# df2=pd.read_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet_features2.txt',sep='\\t')\n", + "\n", + "# df=pd.concat([df,df2])\n", + "# print(df['Unnamed: 0'])\n", + "# df = df.groupby('Unnamed: 0').sum().sort_values(by='Count',ascending=False)\n", + "# df=df[df['Count']>int(np.round(len(df)*.001))]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yLeHkpHP_2aj" + }, + "outputs": [], + "source": [ + "import spacy\n", + "\n", + "nlp = spacy.load('en_core_web_sm')\n", + "\n", + "# nouns = ['apple', 'John', 'London', 'dog', 'Mary', 'Paris', 'banana']\n", + "nouns= df.reset_index()['Unnamed: 0'].to_list()\n", + "doc = nlp(' '.join(nouns))\n", + "\n", + "proper_nouns = [token.text for token in doc if token.pos_ == 'PROPN']\n", + "\n", + "print(proper_nouns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YGiVzRMvFX3X" + }, + "outputs": [], + "source": [ + "print(len(proper_nouns))\n", + "proper_nouns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IDJUwNpyFYSl" + }, + "outputs": [], + "source": [ + "len(nouns)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# run from module" + ], + "metadata": { + "id": "6W7czIuqXnGv" + } + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "Nxgc3toYFllR", + "colab": { + "base_uri": "https://localhost:8080/" }, - "96b1e718addb4128944636e5c55bd67f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "outputId": "7bdb39f7-e67f-46a3-ac1b-a1f3a7a14619" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m166.0/166.0 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m51.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for mlx-grph (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "CPU times: user 563 ms, sys: 68.6 ms, total: 631 ms\n", + "Wall time: 1min 21s\n" + ] + } + ], + "source": [ + "# %%time\n", + "# !pip install --quiet git+https://github.com/dcolinmorgan/mlx_grph.git" + ] + }, + { + "cell_type": "code", + "source": [ + "import sys,os,argparse,csv\n", + "sys.argv = ['DT_feat.py', '-n', '100', '-f', '3', '-o', 'OS_feats.csv', '-s', '1']\n", + "parser = argparse.ArgumentParser(description='Process OS data for dynamic features.')\n", + "parser.add_argument('-n', type=int, default=10, help='Number of data items to get')\n", + "parser.add_argument('-f', type=int, default=3, help='Number of features per item to get')\n", + "parser.add_argument('-o', type=str, default='OS_feats.csv', help='Output file name')\n", + "parser.add_argument('-s', type=int, default=1, help='Parallelize requests')\n", + "args, unknown = parser.parse_known_args()\n", + "\n", + "from mlx_grph.DT_feat import featurize_stories, process_data, get_data, process_hit" + ], + "metadata": { + "id": "5-qGJ8Ba8soe" + }, + "execution_count": 26, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import userdata\n", + "ost=userdata.get('OS_TOKEN')\n", + "text_file = open(\"/usr/local/lib/python3.10/dist-packages/mlx_grph/.env\", \"w\")\n", + "text_file.write('OS_TOKEN='+ost)\n", + "text_file.close()" + ], + "metadata": { + "id": "EOshLVZf93fW" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import logging\n", + "from tqdm import tqdm\n", + "logging.basicConfig(level=logging.INFO)" + ], + "metadata": { + "id": "odrq4fpMa726" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "data = get_data(100)\n", + "articles = process_data(data)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sJ-RIyki7A3r", + "outputId": "232d0318-7c10-4803-ea46-bbccc3d10ba5" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "grabbing text from url: 100%|██████████| 100/100 [01:22<00:00, 1.22it/s]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "rank_articles=[]\n", + "for i in tqdm(articles):\n", + " parts=str(i).split('[', 3)\n", + " try:\n", + " cc=featurize_stories(str(i), top_k = args.f, max_len=512)\n", + " cc.append(parts[1])\n", + " rank_articles.append(cc)\n", + " except Exception as e:\n", + " logging.error(f\"Failed to process article: {e}\")\n", + "with open(args.o, 'w', newline='') as file:\n", + " writer = csv.writer(file)\n", + " writer.writerows(rank_articles)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mXdybPuG3xSB", + "outputId": "318f3163-0f94-44ec-9685-b1c0eefc5661" + }, + "execution_count": 42, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + " 0%| | 0/98 [00:00\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Count
firefighters13
afternoon6
disease5
temperatures5
rebels4
emergency services4
morning4
hearst television3
ceasefire3
heavy snow3
evacuation3
cease3
freezing rain3
floor2
watsonville2
refugees2
machines2
heavy snowfall2
parents2
winter weather2
widespread snow2
beach2
'Gaza Strip', '16-01-2024',2
quake2
rescue2
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + " \n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df[:25]\",\n \"rows\": 25,\n \"fields\": [\n {\n \"column\": \"Count\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 2,\n \"max\": 13,\n \"num_unique_values\": 6,\n \"samples\": [\n 13,\n 6,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "filtered_list = [sublist for sublist in rank_articles if any('fire'.lower() in s.lower() for s in sublist)]\n" + ], + "metadata": { + "id": "_HJ0Wh8svPyU" + }, + "execution_count": 66, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "filtered_list" + ], + "metadata": { + "id": "emvYxi15u9TF", + "outputId": "0bb0db6c-82bd-4c6b-e5e3-273e257a65ed", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 73, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[['emergency services',\n", + " 'firefighters',\n", + " 'afternoon',\n", + " \"'Corofin, Clare, Ireland', '16-01-2024', \"],\n", + " ['defying order', 'councillors', 'ceasefire', \"None, '16-01-2024', \"],\n", + " ['firefighters', 'soups', 'flashing staff', \"None, '16-01-2024', \"],\n", + " ['unwanted wildfires',\n", + " 'foster parents',\n", + " 'disease',\n", + " \"'California, United States', '16-01-2024', \"],\n", + " ['highway oo', 'firefighters', 'temperatures', \"None, '16-01-2024', \"],\n", + " ['deputies',\n", + " 'united states',\n", + " 'firefighters',\n", + " \"'Lewis County, New York, United States', '16-01-2024', \"],\n", + " ['super tuesday', 'voters', 'firefighters', \"None, '16-01-2024', \"],\n", + " ['firefighters', 'okanagan lake', 'emergency crews', \"None, '16-01-2024', \"],\n", + " ['ceremony',\n", + " 'ceasefire',\n", + " 'minneapolis',\n", + " \"'Philadelphia, Pennsylvania, United States', '16-01-2024', \"],\n", + " ['firefighters', 'lamp', 'nominee', \"None, '16-01-2024', \"],\n", + " ['morning',\n", + " 'parents',\n", + " 'firefighters',\n", + " \"'Kaitangata, New Zealand (General), New Zealand', '16-01-2024', \"],\n", + " ['morning', 'firefighters', 'muscatine firefighters', \"None, '17-01-2024', \"],\n", + " ['emergency services',\n", + " 'firefighters',\n", + " 'afternoon',\n", + " \"'Corofin, Clare, Ireland', '17-01-2024', \"],\n", + " ['firefighters', 'rescue', 'kaylee shipley', \"None, '17-01-2024', \"],\n", + " ['united states',\n", + " 'children',\n", + " 'firefighters',\n", + " \"'Mercy Hospital, Kansas, United States', '17-01-2024', \"],\n", + " ['firefighters',\n", + " 'temperatures',\n", + " 'freezing temperatures',\n", + " \"'Tangipahoa Parish, Louisiana, United States', '17-01-2024', \"],\n", + " ['joyce karam', 'ceasefire', 'hamas terrorists', \"'Israel', '17-01-2024', \"]]" + ] + }, + "metadata": {}, + "execution_count": 73 + } + ] + }, + { + "cell_type": "code", + "source": [ + "!git clone https://github.com/dcolinmorgan/mlx_grph.git\n", + "# !python mlx_grph/mlx_grph/DT_feat.py -n 100 -f 5 -s 1 -o OS_feat.txt" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kpQ2qVisXmsh", + "outputId": "ff939f66-f058-48ac-8041-988a78d85b3f" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'mlx_grph'...\n", + "remote: Enumerating objects: 312, done.\u001b[K\n", + "remote: Counting objects: 100% (312/312), done.\u001b[K\n", + "remote: Compressing objects: 100% (196/196), done.\u001b[K\n", + "remote: Total 312 (delta 155), reused 228 (delta 78), pack-reused 0\u001b[K\n", + "Receiving objects: 100% (312/312), 8.53 MiB | 12.08 MiB/s, done.\n", + "Resolving deltas: 100% (155/155), done.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pytest mlx_grph/mlx_grph/test_DT_feat.py" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3w85wj4Ca1q6", + "outputId": "6b06191b-9eae-4544-cfa0-f1ae6b4a30a9" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m======================================= test session starts ========================================\u001b[0m\n", + "platform linux -- Python 3.10.12, pytest-7.4.4, pluggy-1.4.0\n", + "rootdir: /content/mlx_grph\n", + "plugins: anyio-3.7.1\n", + "collected 3 items \u001b[0m\n", + "\n", + "mlx_grph/mlx_grph/test_DT_feat.py \u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m [100%]\u001b[0m\n", + "\n", + "\u001b[32m======================================== \u001b[32m\u001b[1m3 passed\u001b[0m\u001b[32m in 15.56s\u001b[0m\u001b[32m ========================================\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "VKpI-uF_oOuY" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [], + "mount_file_id": "1m9v35TGO0OOMzlT4exu7ahfYXXRampiz", + "authorship_tag": "ABX9TyPekrRBu9rVgKwSugzuoqLS", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "120086e5de314c8b84dfa6d5bcaac2bc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "2451525358b046faa4dd7755c3ca7d3b": { + "1594def1db68427b92d67948fb2c0b61": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -551,10 +1361,10 @@ "width": null } }, - "832bd5ed2a884f749d5813e817a66cfe": { + "1ee8c124f9494caf886773be63a2bfd7": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -566,10 +1376,10 @@ "description_width": "" } }, - "1763d95c639e4cf2b1ec9d9a0126162a": { + "2623c225378c495485e0f63d38cc7925": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -618,26 +1428,10 @@ "width": null } }, - "5bb4ae3ced224927a201dbd623de447c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "03e314ef83374ef8a2c3c8ce85768195": { + "2bcf94618d054443b3671bc38881af6d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -686,10 +1480,10 @@ "width": null } }, - "9ff5ea43dcd84849884a0e2b4e1e861c": { + "30725e31e2c84e40bfc1b36f85a52c99": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", @@ -701,77 +1495,70 @@ "description_width": "" } }, - "fc41cbce687b497c980eec0189346674": { + "33e4be715486454389f2607defbd4550": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_8ab857952d494d17953ffb62d58cdf21", - "IPY_MODEL_7490938cd39940f0ad27ea6212e6cec5", - "IPY_MODEL_a7bdd3b0bb4d480b974267a59dfa560f" - ], - "layout": "IPY_MODEL_e0ab210dca0e4d1190f28d847a57723d" + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7ececbbebe5f48fb81d77a8b91d48064", + "max": 25, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_964d5ad0e62f43fda6bd9dae591d397c", + "value": 25 } }, - "8ab857952d494d17953ffb62d58cdf21": { + "33f1dc7b277d4ec5b07aad03cd914c52": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d08a6e7cb2174fff986a3a6957f2bf87", - "placeholder": "​", - "style": "IPY_MODEL_1edee7236c504b288b11526cc44163b0", - "value": "tokenizer_config.json: 100%" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "7490938cd39940f0ad27ea6212e6cec5": { + "4377321d9ca04dd4ab10fe6ff82cf160": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", + "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_c892dc7fdf1240dba9d708582cdf6582", - "max": 25, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_3a3633bda29e4c50b30fcc0d55f874d9", - "value": 25 + "layout": "IPY_MODEL_b14c3547ac08454db2d01b25aa7dfddf", + "placeholder": "​", + "style": "IPY_MODEL_33f1dc7b277d4ec5b07aad03cd914c52", + "value": " 331M/331M [00:18<00:00, 20.3MB/s]" } }, - "a7bdd3b0bb4d480b974267a59dfa560f": { + "4a8724d7531b494db9db7e66b2535244": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -783,16 +1570,31 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_42c3a045420f4d69b4a1a164ea2b6cd1", + "layout": "IPY_MODEL_76b2e19c188c4612afcb34bdc0ff58a2", "placeholder": "​", - "style": "IPY_MODEL_031687badc5942e8983d2349adae05d3", - "value": " 25.0/25.0 [00:00<00:00, 1.18kB/s]" + "style": "IPY_MODEL_f9fa031e785f4335a3fc4a36d611e207", + "value": " 25.0/25.0 [00:00<00:00, 1.49kB/s]" + } + }, + "605d95933565413886ff6508b7d20453": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "e0ab210dca0e4d1190f28d847a57723d": { + "613313e837544f1fac967279c5746c9f": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -841,10 +1643,10 @@ "width": null } }, - "d08a6e7cb2174fff986a3a6957f2bf87": { + "62d0aabee46f40ea97ffae6f2bce5e40": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -893,25 +1695,34 @@ "width": null } }, - "1edee7236c504b288b11526cc44163b0": { + "63e14cc87e1946a8876a4a9f2d1281e8": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "FloatProgressModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2623c225378c495485e0f63d38cc7925", + "max": 456318, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bd79b8f8fdbb487f9142bf2090729f1b", + "value": 456318 } }, - "c892dc7fdf1240dba9d708582cdf6582": { + "63ea2925b4c34ae28af663b19e2ddf67": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -960,26 +1771,25 @@ "width": null } }, - "3a3633bda29e4c50b30fcc0d55f874d9": { + "64f3f713a80e4d67b7cdb012c7a1b73d": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", - "bar_color": null, "description_width": "" } }, - "42c3a045420f4d69b4a1a164ea2b6cd1": { + "67488582ce1940cfbf07c577758f0b1d": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1028,47 +1838,10 @@ "width": null } }, - "031687badc5942e8983d2349adae05d3": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "79bd89ae105b4ce48af51a1b75e9b626": { + "69318b3a6a6749dbbbbdb87b506e94cb": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_9a98b52fbc384a8ea21ffe6c1f2a2844", - "IPY_MODEL_71d872ef603d428a8be6e57e707d30da", - "IPY_MODEL_1e346bbc0076472baa2d2c43ab75269c" - ], - "layout": "IPY_MODEL_c551bbce23b74bd28fb0b23c9d841aaf" - } - }, - "9a98b52fbc384a8ea21ffe6c1f2a2844": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1080,40 +1853,16 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_adb22d35bf2146b89f14db3b7fca1d12", + "layout": "IPY_MODEL_d9da04e84c9144b6aabd6e5f23d1614a", "placeholder": "​", - "style": "IPY_MODEL_4ff99aa6ee2545f3acc547e347587a4e", - "value": "vocab.json: 100%" + "style": "IPY_MODEL_1ee8c124f9494caf886773be63a2bfd7", + "value": "model.safetensors: 100%" } }, - "71d872ef603d428a8be6e57e707d30da": { + "6f6a1228068b40e3b2417bd8c4b885cc": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_63ff02d360d94d7db392d587719a57b9", - "max": 898823, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e770d02b60b6425fa3e48647be7f4f7a", - "value": 898823 - } - }, - "1e346bbc0076472baa2d2c43ab75269c": { - "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", - "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1125,68 +1874,37 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_f0050e2b77d94707ab50252d0b266a9e", + "layout": "IPY_MODEL_63ea2925b4c34ae28af663b19e2ddf67", "placeholder": "​", - "style": "IPY_MODEL_58cf82cb3c264cefaf3c1de74e20b2e6", - "value": " 899k/899k [00:00<00:00, 11.3MB/s]" + "style": "IPY_MODEL_ccf0ea6bd9e140a6a24ac62efe0bdd67", + "value": "tokenizer_config.json: 100%" } }, - "c551bbce23b74bd28fb0b23c9d841aaf": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", + "7073179f100644718ee8c660da9a1b2d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1594def1db68427b92d67948fb2c0b61", + "placeholder": "​", + "style": "IPY_MODEL_a5497d52e9b2414fb5624bd5a6caa51a", + "value": "tokenizer.json: 100%" } }, - "adb22d35bf2146b89f14db3b7fca1d12": { + "76b2e19c188c4612afcb34bdc0ff58a2": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1235,25 +1953,53 @@ "width": null } }, - "4ff99aa6ee2545f3acc547e347587a4e": { + "7a761207d5e34d55b955587d247458bf": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7f1cc79f9f9847c58d92c055213f4588", + "placeholder": "​", + "style": "IPY_MODEL_64f3f713a80e4d67b7cdb012c7a1b73d", + "value": " 1.36M/1.36M [00:00<00:00, 1.69MB/s]" + } + }, + "7cabc65f15ef4aedb3a7ce30fb4aac8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_96e4be4c35cc45feac1461ae5271bdc4", + "IPY_MODEL_63e14cc87e1946a8876a4a9f2d1281e8", + "IPY_MODEL_dd844b2e73f74885a1f9a336fbffa0e7" + ], + "layout": "IPY_MODEL_f3b91c2405d4476bb05d23ffec46c3c0" } }, - "63ff02d360d94d7db392d587719a57b9": { + "7ececbbebe5f48fb81d77a8b91d48064": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1302,26 +2048,10 @@ "width": null } }, - "e770d02b60b6425fa3e48647be7f4f7a": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "f0050e2b77d94707ab50252d0b266a9e": { + "7f1cc79f9f9847c58d92c055213f4588": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1370,25 +2100,26 @@ "width": null } }, - "58cf82cb3c264cefaf3c1de74e20b2e6": { + "877edc5bb3894dc3935edfeaaf726a8f": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "7925f35106e54808b39676248845606e": { + "9038781a628241e191b7ed46cb3a6650": { "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", "model_module_version": "1.5.0", + "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1400,62 +2131,57 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_7564f92351a9458982081a56423d16aa", - "IPY_MODEL_428fe61993834427a3f147c1e1c45675", - "IPY_MODEL_c22df7d5847343f5a2b084aa6068d947" + "IPY_MODEL_6f6a1228068b40e3b2417bd8c4b885cc", + "IPY_MODEL_33e4be715486454389f2607defbd4550", + "IPY_MODEL_4a8724d7531b494db9db7e66b2535244" ], - "layout": "IPY_MODEL_e829d043339f4a85a01a4e4622e43287" + "layout": "IPY_MODEL_62d0aabee46f40ea97ffae6f2bce5e40" } }, - "7564f92351a9458982081a56423d16aa": { + "94af3b28929a481abf532b49c3990691": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", - "_view_name": "HTMLView", + "_view_name": "ProgressView", + "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_79494f58d1114bcdbb0e11ad6d47935b", - "placeholder": "​", - "style": "IPY_MODEL_0406336821fb4e0b9746fde65e46564d", - "value": "merges.txt: 100%" + "layout": "IPY_MODEL_d3981ae1e68d4ad0abd704ea785df995", + "max": 898823, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_877edc5bb3894dc3935edfeaaf726a8f", + "value": 898823 } }, - "428fe61993834427a3f147c1e1c45675": { + "964d5ad0e62f43fda6bd9dae591d397c": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "ProgressStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ef2fc4ceeea3407abab1f625b7142700", - "max": 456318, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_f01fa8f2e0684f999655f49069bbb9a7", - "value": 456318 + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" } }, - "c22df7d5847343f5a2b084aa6068d947": { + "96e4be4c35cc45feac1461ae5271bdc4": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", @@ -1467,16 +2193,69 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_3c7beadee46d46a4b5979b0fa4b6a10a", + "layout": "IPY_MODEL_ec83790b87be401286f894faf430a8c4", "placeholder": "​", - "style": "IPY_MODEL_82af0d071a664eb98b276b5999c4ea8c", - "value": " 456k/456k [00:00<00:00, 15.2MB/s]" + "style": "IPY_MODEL_f029693037ed49049bcd87b3d9683b2d", + "value": "merges.txt: 100%" + } + }, + "9f69be914ec84dbdabb5766cb4148cba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a5497d52e9b2414fb5624bd5a6caa51a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a739a10becc8457b8179a669b4e72bf2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f17c69db88f646ca9c2bf1cc354fc907", + "IPY_MODEL_94af3b28929a481abf532b49c3990691", + "IPY_MODEL_ed26469a161c42df9f6c1070b84be68b" + ], + "layout": "IPY_MODEL_613313e837544f1fac967279c5746c9f" } }, - "e829d043339f4a85a01a4e4622e43287": { + "b0446147f04946e490e51e1f353fcd51": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1525,10 +2304,10 @@ "width": null } }, - "79494f58d1114bcdbb0e11ad6d47935b": { + "b14c3547ac08454db2d01b25aa7dfddf": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1577,25 +2356,50 @@ "width": null } }, - "0406336821fb4e0b9746fde65e46564d": { + "b59153f234b84236bf9fad4100c30b4c": { "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c4dce35597ce42b58b3734d5f9e11b59", + "max": 1355863, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9f69be914ec84dbdabb5766cb4148cba", + "value": 1355863 + } + }, + "bd79b8f8fdbb487f9142bf2090729f1b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", + "bar_color": null, "description_width": "" } }, - "ef2fc4ceeea3407abab1f625b7142700": { + "c2123ea20a684903b82dc376a409c682": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1644,26 +2448,10 @@ "width": null } }, - "f01fa8f2e0684f999655f49069bbb9a7": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "3c7beadee46d46a4b5979b0fa4b6a10a": { + "c4dce35597ce42b58b3734d5f9e11b59": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1704,121 +2492,48 @@ "order": null, "overflow": null, "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "82af0d071a664eb98b276b5999c4ea8c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "b29bc8f017f44a65ae4879ee47572cd4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_2a76d3ddf5d84977852a35b845099a81", - "IPY_MODEL_5532152755cd43b2b3d15b2e23fefdcd", - "IPY_MODEL_7a22178f7dce4a1b83e72ce3b2459aa5" - ], - "layout": "IPY_MODEL_4c283b0a9fed49d0b7734b6890b06615" - } - }, - "2a76d3ddf5d84977852a35b845099a81": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_c0798292b1bc4cae83b9a994e39687c5", - "placeholder": "​", - "style": "IPY_MODEL_96c0b204d2554218a730a97e5d71450c", - "value": "tokenizer.json: 100%" + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null } }, - "5532152755cd43b2b3d15b2e23fefdcd": { + "c58ce8753ead4874b0d81bab8f70a7bd": { "model_module": "@jupyter-widgets/controls", - "model_name": "FloatProgressModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_99e9fdc30f8f4eebb35118204a31ec8a", - "max": 1355863, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e8e88807dab74b05b12772d1bd979958", - "value": 1355863 + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "7a22178f7dce4a1b83e72ce3b2459aa5": { + "ccf0ea6bd9e140a6a24ac62efe0bdd67": { "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", "state": { - "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", + "_model_name": "DescriptionStyleModel", "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_679c6eba72214e518555653366ab91c0", - "placeholder": "​", - "style": "IPY_MODEL_ca4f9c42162340c5a239e1a57d3f3da6", - "value": " 1.36M/1.36M [00:00<00:00, 42.6MB/s]" + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" } }, - "4c283b0a9fed49d0b7734b6890b06615": { + "cef9e8b68be64849bcc259f7b5dc5e1f": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1867,10 +2582,10 @@ "width": null } }, - "c0798292b1bc4cae83b9a994e39687c5": { + "d3981ae1e68d4ad0abd704ea785df995": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1919,25 +2634,10 @@ "width": null } }, - "96c0b204d2554218a730a97e5d71450c": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "99e9fdc30f8f4eebb35118204a31ec8a": { + "d9da04e84c9144b6aabd6e5f23d1614a": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -1986,26 +2686,55 @@ "width": null } }, - "e8e88807dab74b05b12772d1bd979958": { + "dd844b2e73f74885a1f9a336fbffa0e7": { "model_module": "@jupyter-widgets/controls", - "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", + "model_name": "HTMLModel", "state": { + "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", + "_model_name": "HTMLModel", "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_eb545ae5ddf8409883198d2e5bab0745", + "placeholder": "​", + "style": "IPY_MODEL_30725e31e2c84e40bfc1b36f85a52c99", + "value": " 456k/456k [00:00<00:00, 765kB/s]" + } + }, + "ddacb928a856483481546b838556ac42": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2bcf94618d054443b3671bc38881af6d", + "max": 331055963, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_120086e5de314c8b84dfa6d5bcaac2bc", + "value": 331055963 } }, - "679c6eba72214e518555653366ab91c0": { + "eb545ae5ddf8409883198d2e5bab0745": { "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", "model_module_version": "1.2.0", + "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", @@ -2039,721 +2768,244 @@ "margin": null, "max_height": null, "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "ca4f9c42162340c5a239e1a57d3f3da6": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - } - } - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 336, - "referenced_widgets": [ - "aa44fa70cbf043ea9109a44c1348cb2b", - "755207a276dc423e982fd2433783aca9", - "34a1c3fafe3840f8aef86fc0ce16d9d4", - "09d934d3d6664434880da3f52cb52efc", - "d849f9c8e894473eb054103b6c0965cc", - "0e51337eb87e430b8e477488361c52e4", - "977f5053e46a432fb4d446ce55b13ac1", - "91b626119aab4cdab59a02609a8caa49", - "c3fe50a7e98347fe8fb51c7692c87d4f", - "00eee16604c6408483e154b4df8a7595", - "9e5d9dc852074d589a2692db4222dbd7", - "94928fd867d94a228be7035b5632b8ab", - "62ec1379185246b89888f0f85966de71", - "f88535ec5fd74d849bb4e508a92ec281", - "5a79919899c04baea70463a684eb3459", - "96b1e718addb4128944636e5c55bd67f", - "2451525358b046faa4dd7755c3ca7d3b", - "832bd5ed2a884f749d5813e817a66cfe", - "1763d95c639e4cf2b1ec9d9a0126162a", - "5bb4ae3ced224927a201dbd623de447c", - "03e314ef83374ef8a2c3c8ce85768195", - "9ff5ea43dcd84849884a0e2b4e1e861c", - "fc41cbce687b497c980eec0189346674", - "8ab857952d494d17953ffb62d58cdf21", - "7490938cd39940f0ad27ea6212e6cec5", - "a7bdd3b0bb4d480b974267a59dfa560f", - "e0ab210dca0e4d1190f28d847a57723d", - "d08a6e7cb2174fff986a3a6957f2bf87", - "1edee7236c504b288b11526cc44163b0", - "c892dc7fdf1240dba9d708582cdf6582", - "3a3633bda29e4c50b30fcc0d55f874d9", - "42c3a045420f4d69b4a1a164ea2b6cd1", - "031687badc5942e8983d2349adae05d3", - "79bd89ae105b4ce48af51a1b75e9b626", - "9a98b52fbc384a8ea21ffe6c1f2a2844", - "71d872ef603d428a8be6e57e707d30da", - "1e346bbc0076472baa2d2c43ab75269c", - "c551bbce23b74bd28fb0b23c9d841aaf", - "adb22d35bf2146b89f14db3b7fca1d12", - "4ff99aa6ee2545f3acc547e347587a4e", - "63ff02d360d94d7db392d587719a57b9", - "e770d02b60b6425fa3e48647be7f4f7a", - "f0050e2b77d94707ab50252d0b266a9e", - "58cf82cb3c264cefaf3c1de74e20b2e6", - "7925f35106e54808b39676248845606e", - "7564f92351a9458982081a56423d16aa", - "428fe61993834427a3f147c1e1c45675", - "c22df7d5847343f5a2b084aa6068d947", - "e829d043339f4a85a01a4e4622e43287", - "79494f58d1114bcdbb0e11ad6d47935b", - "0406336821fb4e0b9746fde65e46564d", - "ef2fc4ceeea3407abab1f625b7142700", - "f01fa8f2e0684f999655f49069bbb9a7", - "3c7beadee46d46a4b5979b0fa4b6a10a", - "82af0d071a664eb98b276b5999c4ea8c", - "b29bc8f017f44a65ae4879ee47572cd4", - "2a76d3ddf5d84977852a35b845099a81", - "5532152755cd43b2b3d15b2e23fefdcd", - "7a22178f7dce4a1b83e72ce3b2459aa5", - "4c283b0a9fed49d0b7734b6890b06615", - "c0798292b1bc4cae83b9a994e39687c5", - "96c0b204d2554218a730a97e5d71450c", - "99e9fdc30f8f4eebb35118204a31ec8a", - "e8e88807dab74b05b12772d1bd979958", - "679c6eba72214e518555653366ab91c0", - "ca4f9c42162340c5a239e1a57d3f3da6" - ] - }, - "id": "XNPa1N_VETtt", - "outputId": "2f9de0bd-2426-4678-c5be-024daf0492bc" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "config.json: 0%| | 0.00/480 [00:00 tag. You might need to adjust this depending on the webpage structure.\n", - " story = soup.find_all('p')\n", - "\n", - " # Extract the text from the story\n", - " full_story = ' '.join(p.text for p in story)\n", - " full_stories.append(full_story)\n", - " return full_stories\n" - ], - "metadata": { - "id": "RJjNm4qvEZoD" - }, - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def chunk_text(text, max_len):\n", - " # Tokenize the text into tokens\n", - " tokens = nltk.word_tokenize(text)\n", - "\n", - " # Calculate the number of chunks and the size of the final chunk\n", - " num_chunks = len(tokens) // max_len\n", - " final_chunk_size = len(tokens) % max_len\n", - "\n", - " # If the final chunk is too small, distribute its tokens among the other chunks\n", - " if final_chunk_size < max_len / 2:\n", - " num_chunks += 1\n", - " chunk_sizes = [len(tokens) // num_chunks + (1 if i < len(tokens) % num_chunks else 0) for i in range(num_chunks)]\n", - " chunks = [tokens[sum(chunk_sizes[:i]):sum(chunk_sizes[:i+1])] for i in range(num_chunks)]\n", - " else:\n", - " chunks = [tokens[i:i + max_len] for i in range(0, len(tokens), max_len)]\n", - "\n", - " return chunks\n" - ], - "metadata": { - "id": "EnFhQIkrEZs5" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "from torch.utils.data import DataLoader\n", - "\n", - "def featurize_stories(text, max_len, top_k):\n", - " # Extract candidate words/phrases\n", - " count = CountVectorizer(ngram_range=n_gram_range, stop_words=stop_words).fit([text])\n", - " all_candidates = count.get_feature_names_out()\n", - " doc = nlp(text)\n", - " noun_phrases = set(chunk.text.strip().lower() for chunk in doc.noun_chunks)\n", - " nouns = set()\n", - " for token in doc:\n", - " if token.pos_ == \"NOUN\":\n", - " nouns.add(token.text)\n", - "\n", - " all_nouns = nouns.union(noun_phrases)\n", - " candidates = list(filter(lambda candidate: candidate in all_nouns, all_candidates))\n", - " candidate_tokens = tokenizer(candidates, padding=True, return_tensors=\"pt\")\n", - " candidate_tokens = {k: v.to(device) for k, v in (candidate_tokens).items()}\n", - " candidate_embeddings = model(**candidate_tokens)[\"pooler_output\"]\n", - " candidate_embeddings = candidate_embeddings.detach()#.to_numpy()\n", - "\n", - " # words = nltk.word_tokenize(text)\n", - " # chunks = [words[i:i + 512] for i in range(0, len(words), 512)]\n", - " chunks = chunk_text(text, max_len) # use this to chunk better and use less padding thus less memory but also less affect from averging\n", - "\n", - " for chunk in chunks:\n", - " text_tokens = tokenizer(chunk, padding=True, return_tensors=\"pt\")\n", - " text_tokens = {k: v.to(device) for k, v in (text_tokens).items()}\n", - " text_embedding = model(**text_tokens)[\"pooler_output\"]\n", - " text_embedding = text_embedding.detach()#.to_numpy()\n", - " embeddings.append(text_embedding)\n", - " max_emb_shape = max(embedding.shape[0] for embedding in embeddings)\n", - " padded_embeddings = [np.pad(embedding.cpu(), ((0, max_emb_shape - embedding.shape[0]), (0, 0))) for embedding in embeddings]\n", - " avg_embedding = np.min(padded_embeddings, axis=0)\n", - " distances = cosine_similarity(avg_embedding, candidate_embeddings.cpu())\n", - " torch.cuda.empty_cache()\n", - " return [candidates[index] for index in distances.argsort()[0][::-1][-top_k:]]\n", - "\n" - ], - "metadata": { - "id": "RR_2MZ5nEZvO" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "import pandas as pd\n", - "# data=pd.read_csv('/content/drive/MyDrive/consult/Louie_disaster_tweets.csv',header=None)\n", - "data=pd.read_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet.csv')" - ], - "metadata": { - "id": "3mlI9cOaEsnS" - }, - "execution_count": 14, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "data['text']" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "ec83790b87be401286f894faf430a8c4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - "id": "VaqzAonQGa11", - "outputId": "eb0ccd0e-95d9-4ecb-ff5a-bd4fc20cae1d" - }, - "execution_count": 15, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0 #DoNothingDeSantis\\nWith Hurricane Season Loom...\n", - "1 @drscribblesmd I'll be so glad when hurricane ...\n", - "2 @wideawake_media @useful_eater The Florida hur...\n", - "3 @135knots The hurricane landfall better be awa...\n", - "4 @WSJ It’s Hurricane season. \n", - " ... \n", - "8881 @Shiri8580 @Chandrakbose Ur story of Savarkar ...\n", - "8882 Accuracy of labeling refers to the ingredients...\n", - "8883 @citizentvkenya Is she even still a Bishop? An...\n", - "8884 🔥 New RetroDrop : DistricOne x OpenLaverage \\n...\n", - "8885 @alikous I think what makes him more willing i...\n", - "Name: text, Length: 8886, dtype: object" - ] - }, - "metadata": {}, - "execution_count": 15 - } - ] - }, - { - "cell_type": "code", - "source": [ - "df = pd.read_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet_features.txt',sep='\\t')" - ], - "metadata": { - "id": "e5VsbTP7WaSH" - }, - "execution_count": 16, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "rank_articles=[]\n", - "from tqdm import tqdm\n", - "for i in tqdm(range(len(df),len(data['text']))):\n", - " try:\n", - " cc=featurize_stories(data['text'][i], max_len=512, top_k=4)\n", - " # print(cc)\n", - " rank_articles.append(cc)\n", - " except IndexError:\n", - " pass" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "ed26469a161c42df9f6c1070b84be68b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c2123ea20a684903b82dc376a409c682", + "placeholder": "​", + "style": "IPY_MODEL_c58ce8753ead4874b0d81bab8f70a7bd", + "value": " 899k/899k [00:00<00:00, 1.13MB/s]" + } }, - "id": "ta3pWa5BEZx2", - "outputId": "57d328e4-81f7-4aa0-c629-afe56889c4ad" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - " 83%|████████▎ | 6098/7321 [2:06:39<48:04, 2.36s/it]" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "flattened_list = [item for sublist in rank_articles for item in sublist]\n", - "from collections import Counter\n", - "counter = Counter(flattened_list)\n", - "df = pd.DataFrame.from_dict(counter, orient='index', columns=['Count'])\n", - "\n", - "df = df.sort_values(by='Count',ascending=False)\n", - "df.to_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet_features2.txt',sep='\\t')\n", - "print(len(df))\n", - "# df[:25]" - ], - "metadata": { - "id": "Q3pN0rHtfWon" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "df = pd.read_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet_features.txt',sep='\\t')\n", - "df2=pd.read_csv('/content/drive/MyDrive/consult/florida-hurricane-tweet_features2.txt',sep='\\t')\n", - "\n", - "df=pd.concat([df,df2])\n", - "print(df['Unnamed: 0'])\n", - "df = df.groupby('Unnamed: 0').sum().sort_values(by='Count',ascending=False)\n", - "df=df[df['Count']>int(np.round(len(df)*.001))]" - ], - "metadata": { - "id": "rmd7fnVDebrq", - "colab": { - "base_uri": "https://localhost:8080/" + "edb2ec97694341d19092a23c8be4e0bf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7073179f100644718ee8c660da9a1b2d", + "IPY_MODEL_b59153f234b84236bf9fad4100c30b4c", + "IPY_MODEL_7a761207d5e34d55b955587d247458bf" + ], + "layout": "IPY_MODEL_b0446147f04946e490e51e1f353fcd51" + } }, - "outputId": "7bc9282e-cdf8-4fa3-f8de-b3c52b10f825" - }, - "execution_count": 45, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "0 storm\n", - "1 tornado\n", - "2 weather\n", - "3 snow\n", - "4 day\n", - " ... \n", - "6020 witnesses\n", - "6021 fleece\n", - "6022 fleas\n", - "6023 ensemble members\n", - "6024 walkway\n", - "Name: Unnamed: 0, Length: 7590, dtype: object\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "import spacy\n", - "\n", - "nlp = spacy.load('en_core_web_sm')\n", - "\n", - "# nouns = ['apple', 'John', 'London', 'dog', 'Mary', 'Paris', 'banana']\n", - "nouns= df.reset_index()['Unnamed: 0'].to_list()\n", - "doc = nlp(' '.join(nouns))\n", - "\n", - "proper_nouns = [token.text for token in doc if token.pos_ == 'PROPN']\n", - "\n", - "print(proper_nouns)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "f029693037ed49049bcd87b3d9683b2d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - "id": "yLeHkpHP_2aj", - "outputId": "9a041c28-2188-40aa-ff4c-81d78e83e78c" - }, - "execution_count": 46, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "['tornado', 'lol', 'tokens', 'march', 'partnership', 'horizon', 'coast', 'times', 'texas', 'california', 'desantis', 'feet', 'saturday', 'israel', 'tornado', 'february', 'god', 'friday', 'crime', 'america', 'neil', 'oliver', 'couple', 'st', 'gaza', 'outlook', 'anticipation', 'umbrella', 'flwx', 'house', 'drought', 'canada', 'sun', 'sunday', 'hell', 'games', 'fans', 'illinois', 'space', 'party', 'mother', 'chicago', 'new', 'england', 'baby', 'mother', 'ٺون', 'tuesday', 'biden', 'russia', 'terrorists', 'county', 'thursday', 'rainbow', 'disaster', 'luck', 'nevada', 'women', 'ocean', 'kids', 'sierra', 'nevada', 'warmth', 'mar', 'safety', 'youtube', 'wednesday', 'hamas', 'mexico', 'miami', 'deluge', 'anxiety', 'supercell', 'counties', 'southern', 'california', 'air', 'hope', 'yield', 'monday', 'txwx', 'ca', 'seas', 'ilwx', 'movie', 'round', 'kansas', 'fan']\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "print(len(proper_nouns))\n", - "proper_nouns" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "f17c69db88f646ca9c2bf1cc354fc907": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_67488582ce1940cfbf07c577758f0b1d", + "placeholder": "​", + "style": "IPY_MODEL_605d95933565413886ff6508b7d20453", + "value": "vocab.json: 100%" + } }, - "id": "YGiVzRMvFX3X", - "outputId": "adc2b1a6-c0cc-40c6-bee5-003b6839c87d" - }, - "execution_count": 47, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "88\n" - ] + "f3b91c2405d4476bb05d23ffec46c3c0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['tornado',\n", - " 'lol',\n", - " 'tokens',\n", - " 'march',\n", - " 'partnership',\n", - " 'horizon',\n", - " 'coast',\n", - " 'times',\n", - " 'texas',\n", - " 'california',\n", - " 'desantis',\n", - " 'feet',\n", - " 'saturday',\n", - " 'israel',\n", - " 'tornado',\n", - " 'february',\n", - " 'god',\n", - " 'friday',\n", - " 'crime',\n", - " 'america',\n", - " 'neil',\n", - " 'oliver',\n", - " 'couple',\n", - " 'st',\n", - " 'gaza',\n", - " 'outlook',\n", - " 'anticipation',\n", - " 'umbrella',\n", - " 'flwx',\n", - " 'house',\n", - " 'drought',\n", - " 'canada',\n", - " 'sun',\n", - " 'sunday',\n", - " 'hell',\n", - " 'games',\n", - " 'fans',\n", - " 'illinois',\n", - " 'space',\n", - " 'party',\n", - " 'mother',\n", - " 'chicago',\n", - " 'new',\n", - " 'england',\n", - " 'baby',\n", - " 'mother',\n", - " 'ٺون',\n", - " 'tuesday',\n", - " 'biden',\n", - " 'russia',\n", - " 'terrorists',\n", - " 'county',\n", - " 'thursday',\n", - " 'rainbow',\n", - " 'disaster',\n", - " 'luck',\n", - " 'nevada',\n", - " 'women',\n", - " 'ocean',\n", - " 'kids',\n", - " 'sierra',\n", - " 'nevada',\n", - " 'warmth',\n", - " 'mar',\n", - " 'safety',\n", - " 'youtube',\n", - " 'wednesday',\n", - " 'hamas',\n", - " 'mexico',\n", - " 'miami',\n", - " 'deluge',\n", - " 'anxiety',\n", - " 'supercell',\n", - " 'counties',\n", - " 'southern',\n", - " 'california',\n", - " 'air',\n", - " 'hope',\n", - " 'yield',\n", - " 'monday',\n", - " 'txwx',\n", - " 'ca',\n", - " 'seas',\n", - " 'ilwx',\n", - " 'movie',\n", - " 'round',\n", - " 'kansas',\n", - " 'fan']" - ] - }, - "metadata": {}, - "execution_count": 47 - } - ] - }, - { - "cell_type": "code", - "source": [ - "len(nouns)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "f9fa031e785f4335a3fc4a36d611e207": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - "id": "IDJUwNpyFYSl", - "outputId": "4e50b25b-5b07-4bc6-b431-afa792c26c0c" - }, - "execution_count": 48, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "438" - ] - }, - "metadata": {}, - "execution_count": 48 + "fe0fdd93415c4970b6428e0b66c30681": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_69318b3a6a6749dbbbbdb87b506e94cb", + "IPY_MODEL_ddacb928a856483481546b838556ac42", + "IPY_MODEL_4377321d9ca04dd4ab10fe6ff82cf160" + ], + "layout": "IPY_MODEL_cef9e8b68be64849bcc259f7b5dc5e1f" + } } - ] - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "Nxgc3toYFllR" - }, - "execution_count": null, - "outputs": [] + } } - ] + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file