Skip to content

Commit

Permalink
update openai, datasets, transformers, peft, and trl packages
Browse files Browse the repository at this point in the history
  • Loading branch information
jojortz committed Dec 22, 2023
1 parent 649cc2c commit 1fd8e63
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 77 deletions.
68 changes: 21 additions & 47 deletions pykoi/rlhf/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ class RLHFConfig:

base_model_path: str = field(
default="elinas/llama-7b-hf-transformers-4.29",
metadata={
"help": "Huggingface model name or a local path to the base model."
},
metadata={"help": "Huggingface model name or a local path to the base model."},
)
dataset_type: Optional[str] = field(
default="local_db",
Expand Down Expand Up @@ -68,7 +66,7 @@ class RLHFConfig:
# default=8,
# metadata={"help": "Batch size."})
per_device_train_batch_size: Optional[int] = field(
default=2, metadata={"help": "Batch size per device for training."}
default=1, metadata={"help": "Batch size per device for training."}
)
per_device_eval_batch_size: Optional[int] = field(
default=8, metadata={"help": "Batch size per device for evaluation."}
Expand All @@ -89,12 +87,8 @@ class RLHFConfig:
local_rank: Optional[int] = field(
default=-1, metadata={"help": "Used for multi-gpu."}
)
fp16: Optional[bool] = field(
default=True, metadata={"help": "Enable FP16."}
)
bf16: Optional[bool] = field(
default=False, metadata={"help": "Enable BF16."}
)
fp16: Optional[bool] = field(default=True, metadata={"help": "Enable FP16."})
bf16: Optional[bool] = field(default=False, metadata={"help": "Enable BF16."})
load_in_8bit: Optional[bool] = field(
default=True,
metadata={"help": "Whether load the model weights in 8-bit or not."},
Expand All @@ -113,6 +107,9 @@ class RLHFConfig:
gradient_checkpointing: Optional[bool] = field(
default=False, metadata={"help": "Enable gradient checkpointing."}
)
gradient_checkpointing_use_reentrant: Optional[bool] = field(
default=True, metadata={"help": "Enable reentrant for gradient checkpointing."}
)
seed: Optional[int] = field(default=0, metadata={"help": "Random seed."})
num_workers: Optional[int] = field(
default=None, metadata={"help": "Number of workers."}
Expand All @@ -121,9 +118,7 @@ class RLHFConfig:
default="./rlhf_checkpoints",
metadata={"help": "Output directory for all model weights."},
)
log_freq: Optional[int] = field(
default=1, metadata={"help": "Logging frequency."}
)
log_freq: Optional[int] = field(default=1, metadata={"help": "Logging frequency."})
eval_freq: Optional[int] = field(
default=1000, metadata={"help": "Evaluation frequency."}
)
Expand All @@ -135,7 +130,7 @@ class RLHFConfig:
metadata={"help": "Whether push to Huggingface Hub or not."},
)

## Step 1 SFT parameters
# Step 1 SFT parameters
max_steps: Optional[int] = field(
default=5, metadata={"help": "Maximum number of training steps."}
)
Expand All @@ -145,9 +140,7 @@ class RLHFConfig:
)
dataset_subset_sft_train: Optional[int] = field(
default=10000,
metadata={
"help": "The size of the subset of the training data to use."
},
metadata={"help": "The size of the subset of the training data to use."},
)
split: Optional[str] = field(
default="train", metadata={"help": "Dataset split to use."}
Expand All @@ -167,8 +160,7 @@ class RLHFConfig:
default="step1_supervised_finetuning_lora_final/",
metadata={
"help": (
"Output directory for step 1 supervised finetuning's Lora"
" weights."
"Output directory for step 1 supervised finetuning's Lora" " weights."
)
},
)
Expand All @@ -194,17 +186,14 @@ class RLHFConfig:
reward_model_path: Optional[str] = field(
default="databricks/dolly-v2-3b",
metadata={
"help": (
"Huggingface model name or a local path to the reward model."
)
"help": ("Huggingface model name or a local path to the reward model.")
},
)
reward_lora_path: Optional[str] = field(
default="step2_reward_finetuning_lora_final/",
metadata={
"help": (
"Output directory for step 1 supervised finetuning's Lora"
" weights."
"Output directory for step 1 supervised finetuning's Lora" " weights."
)
},
)
Expand All @@ -222,9 +211,7 @@ class RLHFConfig:
)
reward_num_of_data: Optional[int] = field(
default=1000,
metadata={
"help": "The size of the subset of the training data to use."
},
metadata={"help": "The size of the subset of the training data to use."},
)
max_seq_length_reward: Optional[int] = field(
default=512, metadata={"help": "Maximum sequence length."}
Expand All @@ -246,9 +233,7 @@ class RLHFConfig:
)
label_names: Optional[List[str]] = field(
default_factory=list,
metadata={
"help": "List of column names in the dataset to use as labels."
},
metadata={"help": "List of column names in the dataset to use as labels."},
)
logging_strategy: Optional[str] = field(
default="steps",
Expand Down Expand Up @@ -284,20 +269,14 @@ class RLHFConfig:
)
dataset_subset_rl_train: Optional[int] = field(
default=10000,
metadata={
"help": "The size of the subset of the training data to use."
},
metadata={"help": "The size of the subset of the training data to use."},
)
adafactor: Optional[bool] = field(
default=False,
metadata={"help": "whether to use the adafactor optimizer"},
)
top_k: Optional[float] = field(
default=0.0, metadata={"help": "Value for top_k"}
)
top_p: Optional[float] = field(
default=1.0, metadata={"help": "Value for top_p"}
)
top_k: Optional[float] = field(default=0.0, metadata={"help": "Value for top_k"})
top_p: Optional[float] = field(default=1.0, metadata={"help": "Value for top_p"})
do_sample: Optional[bool] = field(
default=True, metadata={"help": "Flag for sampling"}
)
Expand All @@ -318,9 +297,7 @@ class RLHFConfig:
)
ppo_epochs: Optional[int] = field(
default=10,
metadata={
"help": "the number of optimisation epochs per batch of samples"
},
metadata={"help": "the number of optimisation epochs per batch of samples"},
)
total_epochs: Optional[int] = field(
default=100, metadata={"help": "number of total epochs"}
Expand All @@ -333,9 +310,7 @@ class RLHFConfig:
)
reward_baseline: Optional[float] = field(
default=0.0,
metadata={
"help": "a baseline value that is subtracted from the reward"
},
metadata={"help": "a baseline value that is subtracted from the reward"},
)
init_kl_coef: Optional[float] = field(
default=0.2,
Expand All @@ -354,8 +329,7 @@ class RLHFConfig:
default="step3_reinforcement_learning_final_lora_weights/",
metadata={
"help": (
"Output directory for step 3 reinforcement learning's Lora"
" weights."
"Output directory for step 3 reinforcement learning's Lora" " weights."
)
},
)
40 changes: 15 additions & 25 deletions pykoi/rlhf/supervised_finetuning.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""superised_finetuning."""
import os
from typing import Optional
import torch
import time

from datetime import datetime
from typing import Optional

import torch
from datasets import Dataset, load_dataset
from peft import PeftConfig, PeftModel
from transformers import (
Expand All @@ -13,22 +13,19 @@
AutoTokenizer,
TrainingArguments,
)

from trl import SFTTrainer
from trl.trainer.utils import ConstantLengthDataset

from pykoi.chat.db.constants import (
QA_CSV_HEADER_ANSWER,
QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_ANSWER,
QA_CSV_HEADER_VOTE_STATUS,
)
from pykoi.chat.db.qa_database import QuestionAnswerDatabase
from pykoi.rlhf.config import RLHFConfig
from pykoi.telemetry.events import SFTStartEvent, SFTStopEvent
from pykoi.telemetry.telemetry import Telemetry
from pykoi.telemetry.events import (
SFTStartEvent,
SFTStopEvent,
)


class SupervisedFinetuning:
Expand All @@ -46,9 +43,7 @@ class SupervisedFinetuning:
trainer (SFTTrainer): The trainer object used for training the model.
"""

def __init__(self,
rlhf_config: RLHFConfig,
enable_telemetry: bool = True) -> None:
def __init__(self, rlhf_config: RLHFConfig, enable_telemetry: bool = True) -> None:
"""
Initializes the SFTTrainer object.
Expand All @@ -58,18 +53,12 @@ def __init__(self,
"""
self._telemetry = Telemetry(enable_telemetry)
self._rlhf_config = rlhf_config
self.tokenizer = AutoTokenizer.from_pretrained(
rlhf_config.base_model_path
)
self.tokenizer = AutoTokenizer.from_pretrained(rlhf_config.base_model_path)
self.num_proc = (
self._rlhf_config.num_workers
if not self._rlhf_config.streaming
else None
self._rlhf_config.num_workers if not self._rlhf_config.streaming else None
)
self.dataset = self.create_datasets(self.tokenizer, self._rlhf_config)
self.torch_dtype = (
torch.bfloat16 if self._rlhf_config.bf16 else torch.float16
)
self.torch_dtype = torch.bfloat16 if self._rlhf_config.bf16 else torch.float16
# self.torch_dtype = torch.bfloat16 if bf16 else (torch.float16 if fp16 else torch.float32)
self.training_args = TrainingArguments(
output_dir=self._rlhf_config.output_dir,
Expand All @@ -86,6 +75,9 @@ def __init__(self,
warmup_steps=self._rlhf_config.num_warmup_steps,
gradient_accumulation_steps=self._rlhf_config.gradient_accumulation_steps,
gradient_checkpointing=self._rlhf_config.gradient_checkpointing,
gradient_checkpointing_kwargs={
"use_reentrant": self._rlhf_config.gradient_checkpointing_use_reentrant
},
fp16=self._rlhf_config.fp16,
bf16=self._rlhf_config.bf16,
weight_decay=self._rlhf_config.weight_decay,
Expand Down Expand Up @@ -158,7 +150,7 @@ def save(self, output_path=None):
def train_and_save(self, output_path=None):
start_event = SFTStartEvent(
start_time=time.time(), date_time=datetime.utcfromtimestamp(time.time())
)
)
self._telemetry.capture(start_event)
self.trainer.train()
self.save(output_path)
Expand All @@ -182,9 +174,7 @@ def create_datasets(self, tokenizer, args):
if args.dataset_type == "local_db":
qa_database = QuestionAnswerDatabase()
my_data_pd = qa_database.retrieve_all_question_answers_as_pandas()
my_data_pd = my_data_pd[
my_data_pd[QA_CSV_HEADER_VOTE_STATUS] == "up"
]
my_data_pd = my_data_pd[my_data_pd[QA_CSV_HEADER_VOTE_STATUS] == "up"]
my_data_pd = my_data_pd[
[QA_CSV_HEADER_ID, QA_CSV_HEADER_QUESTION, QA_CSV_HEADER_ANSWER]
]
Expand Down
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ pydantic = "2.5.2"
starlette = "0.27.0"
uvicorn = "0.23.1"
scipy = "1.11.1"
openai = "^1.2.4"
openai = "1.6.1"
passlib = "1.7.4"
bcrypt = "4.0.1"
posthog = "3.0.1"
pynvml = "11.5.0"
pandas = "2.0.3"
python-dotenv = "^1.0.0"

transformers = { version = "4.35.0", optional = true }
transformers = { version = "4.36.2", optional = true }
einops = { version = "0.6.1", optional = true }
accelerate = { version = "0.21.0", optional = true }
bitsandbytes = { version = "0.40.2", optional = true }
Expand All @@ -37,10 +37,10 @@ python-multipart = { version = "0.0.6", optional = true }
tiktoken = { version = "0.4.0", optional = true }
sentence-transformers = { version = "2.2.2", optional = true }

datasets = { version = "2.14.5", optional = true }
datasets = { version = "2.15.0", optional = true }
evaluate = { version = "0.4.0", optional = true }
peft = { version = "0.5.0", optional = true }
trl = { version = "0.4.7", optional = true }
peft = { version = "0.7.1", optional = true }
trl = { version = "0.7.4", optional = true }

[tool.poetry.extras]
huggingface = [
Expand Down

0 comments on commit 1fd8e63

Please sign in to comment.