0.0.4 (update)

JuanBindez · Jan 9, 2025 · e66e711 · e66e711
1 parent dacdada
commit e66e711
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 8 deletions.
diff --git a/build.sh b/build.sh
@@ -2,7 +2,7 @@
 
 VERSION=0
 MINOR=0
-PATCH=3
+PATCH=4
 EXTRAVERSION=""
 
 NOTES="(update)"

diff --git a/ctesibioAI_colab.ipynb b/ctesibioAI_colab.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# CtesibioAI v0.0.2"
+    "# CtesibioAI v0.0.4"
    ]
   },
   {
@@ -56,11 +56,18 @@
    },
    "outputs": [],
    "source": [
-    "datas = [\n",
-    "    {\"pergunta\": \"Qual é a capital do Brasil?\", \"resposta\": \"A capital do Brasil é Brasília.\"},\n",
-    "    {\"pergunta\": \"Quem descobriu o Brasil?\", \"resposta\": \"O Brasil foi descoberto por Pedro Álvares Cabral.\"},\n",
-    "    {\"pergunta\": \"Qual é a maior floresta tropical do mundo?\", \"resposta\": \"A maior floresta tropical do mundo é a Floresta Amazônica.\"},\n",
-    "]"
+    "data = [\n",
+    "    {\"question\": \"What is a large language model (LLM)?\", \"answer\": \"An LLM is an artificial intelligence model trained on large volumes of text to understand and generate natural language.\"},\n",
+    "    {\"question\": \"What are some examples of famous LLMs?\", \"answer\": \"Examples of famous LLMs include GPT, BERT, T5, and ChatGPT.\"},\n",
+    "    {\"question\": \"What are LLMs used for?\", \"answer\": \"LLMs are used in tasks such as text generation, machine translation, text summarization, question answering, and more.\"},\n",
+    "    {\"question\": \"What does the term 'fine-tuning' mean?\", \"answer\": \"Fine-tuning is the process of adjusting a pre-trained LLM on a specific dataset for a particular application or domain.\"},\n",
+    "    {\"question\": \"What is 'tokenization'?\", \"answer\": \"Tokenization is the process of breaking text into smaller units, called tokens, which can be words, subwords, or characters.\"},\n",
+    "    {\"question\": \"How do LLMs learn to generate text?\", \"answer\": \"LLMs learn to generate text by predicting the next word or token in a sequence based on patterns in the training text.\"},\n",
+    "    {\"question\": \"What is the main challenge in training?\", \"answer\": \"The main challenges include the high computational cost, the need for large datasets, and the difficulty of avoiding biases in the models.\"},\n",
+    "    {\"question\": \"What are parameters?\", \"answer\": \"Parameters are adjustable values in the model that determine how it processes and generates text based on input data.\"},\n",
+    "    {\"question\": \"What is a transformer?\", \"answer\": \"Transformers are a neural network architecture that uses attention mechanisms to process data sequences, such as text.\"},\n",
+    "    {\"question\": \"What is the role of 'pre-training'?\", \"answer\": \"'Pre-training' involves training the model on a large corpus of text to learn general language patterns before fine-tuning it for specific tasks.\"},\n",
+    "]\n"
    ]
   },
   {

diff --git a/prompt.py b/prompt.py
@@ -7,7 +7,7 @@
 tokenizer.bos_token = "<BOS>"
 tokenizer.eos_token = "<EOS>"
 
-input_text = "<BOS>capital do brasil?"
+input_text = "<BOS>capital of Brazil?"
 inputs = tokenizer.encode(input_text, return_tensors="pt")
 
 output = model.generate(
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,7 +2,7 @@ @@
     VERSION=0
     MINOR=0
-    PATCH=3
+    PATCH=4
     EXTRAVERSION=""
     NOTES="(update)"
@@ Expand Down @@