Skip to content

Commit

Permalink
0.0.4 (update)
Browse files Browse the repository at this point in the history
  • Loading branch information
JuanBindez committed Jan 9, 2025
1 parent dacdada commit e66e711
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 8 deletions.
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

VERSION=0
MINOR=0
PATCH=3
PATCH=4
EXTRAVERSION=""

NOTES="(update)"
Expand Down
19 changes: 13 additions & 6 deletions ctesibioAI_colab.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# CtesibioAI v0.0.2"
"# CtesibioAI v0.0.4"
]
},
{
Expand Down Expand Up @@ -56,11 +56,18 @@
},
"outputs": [],
"source": [
"datas = [\n",
" {\"pergunta\": \"Qual é a capital do Brasil?\", \"resposta\": \"A capital do Brasil é Brasília.\"},\n",
" {\"pergunta\": \"Quem descobriu o Brasil?\", \"resposta\": \"O Brasil foi descoberto por Pedro Álvares Cabral.\"},\n",
" {\"pergunta\": \"Qual é a maior floresta tropical do mundo?\", \"resposta\": \"A maior floresta tropical do mundo é a Floresta Amazônica.\"},\n",
"]"
"data = [\n",
" {\"question\": \"What is a large language model (LLM)?\", \"answer\": \"An LLM is an artificial intelligence model trained on large volumes of text to understand and generate natural language.\"},\n",
" {\"question\": \"What are some examples of famous LLMs?\", \"answer\": \"Examples of famous LLMs include GPT, BERT, T5, and ChatGPT.\"},\n",
" {\"question\": \"What are LLMs used for?\", \"answer\": \"LLMs are used in tasks such as text generation, machine translation, text summarization, question answering, and more.\"},\n",
" {\"question\": \"What does the term 'fine-tuning' mean?\", \"answer\": \"Fine-tuning is the process of adjusting a pre-trained LLM on a specific dataset for a particular application or domain.\"},\n",
" {\"question\": \"What is 'tokenization'?\", \"answer\": \"Tokenization is the process of breaking text into smaller units, called tokens, which can be words, subwords, or characters.\"},\n",
" {\"question\": \"How do LLMs learn to generate text?\", \"answer\": \"LLMs learn to generate text by predicting the next word or token in a sequence based on patterns in the training text.\"},\n",
" {\"question\": \"What is the main challenge in training?\", \"answer\": \"The main challenges include the high computational cost, the need for large datasets, and the difficulty of avoiding biases in the models.\"},\n",
" {\"question\": \"What are parameters?\", \"answer\": \"Parameters are adjustable values in the model that determine how it processes and generates text based on input data.\"},\n",
" {\"question\": \"What is a transformer?\", \"answer\": \"Transformers are a neural network architecture that uses attention mechanisms to process data sequences, such as text.\"},\n",
" {\"question\": \"What is the role of 'pre-training'?\", \"answer\": \"'Pre-training' involves training the model on a large corpus of text to learn general language patterns before fine-tuning it for specific tasks.\"},\n",
"]\n"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
tokenizer.bos_token = "<BOS>"
tokenizer.eos_token = "<EOS>"

input_text = "<BOS>capital do brasil?"
input_text = "<BOS>capital of Brazil?"
inputs = tokenizer.encode(input_text, return_tensors="pt")

output = model.generate(
Expand Down

0 comments on commit e66e711

Please sign in to comment.