diff --git a/pykoi/rlhf/config.py b/pykoi/rlhf/config.py index d413fe7..c37b05f 100644 --- a/pykoi/rlhf/config.py +++ b/pykoi/rlhf/config.py @@ -19,9 +19,7 @@ class RLHFConfig: base_model_path: str = field( default="elinas/llama-7b-hf-transformers-4.29", - metadata={ - "help": "Huggingface model name or a local path to the base model." - }, + metadata={"help": "Huggingface model name or a local path to the base model."}, ) dataset_type: Optional[str] = field( default="local_db", @@ -68,7 +66,7 @@ class RLHFConfig: # default=8, # metadata={"help": "Batch size."}) per_device_train_batch_size: Optional[int] = field( - default=2, metadata={"help": "Batch size per device for training."} + default=1, metadata={"help": "Batch size per device for training."} ) per_device_eval_batch_size: Optional[int] = field( default=8, metadata={"help": "Batch size per device for evaluation."} @@ -89,12 +87,8 @@ class RLHFConfig: local_rank: Optional[int] = field( default=-1, metadata={"help": "Used for multi-gpu."} ) - fp16: Optional[bool] = field( - default=True, metadata={"help": "Enable FP16."} - ) - bf16: Optional[bool] = field( - default=False, metadata={"help": "Enable BF16."} - ) + fp16: Optional[bool] = field(default=True, metadata={"help": "Enable FP16."}) + bf16: Optional[bool] = field(default=False, metadata={"help": "Enable BF16."}) load_in_8bit: Optional[bool] = field( default=True, metadata={"help": "Whether load the model weights in 8-bit or not."}, @@ -113,6 +107,9 @@ class RLHFConfig: gradient_checkpointing: Optional[bool] = field( default=False, metadata={"help": "Enable gradient checkpointing."} ) + gradient_checkpointing_use_reentrant: Optional[bool] = field( + default=True, metadata={"help": "Enable reentrant for gradient checkpointing."} + ) seed: Optional[int] = field(default=0, metadata={"help": "Random seed."}) num_workers: Optional[int] = field( default=None, metadata={"help": "Number of workers."} @@ -121,9 +118,7 @@ class RLHFConfig: default="./rlhf_checkpoints", metadata={"help": "Output directory for all model weights."}, ) - log_freq: Optional[int] = field( - default=1, metadata={"help": "Logging frequency."} - ) + log_freq: Optional[int] = field(default=1, metadata={"help": "Logging frequency."}) eval_freq: Optional[int] = field( default=1000, metadata={"help": "Evaluation frequency."} ) @@ -135,7 +130,7 @@ class RLHFConfig: metadata={"help": "Whether push to Huggingface Hub or not."}, ) - ## Step 1 SFT parameters + # Step 1 SFT parameters max_steps: Optional[int] = field( default=5, metadata={"help": "Maximum number of training steps."} ) @@ -145,9 +140,7 @@ class RLHFConfig: ) dataset_subset_sft_train: Optional[int] = field( default=10000, - metadata={ - "help": "The size of the subset of the training data to use." - }, + metadata={"help": "The size of the subset of the training data to use."}, ) split: Optional[str] = field( default="train", metadata={"help": "Dataset split to use."} @@ -167,8 +160,7 @@ class RLHFConfig: default="step1_supervised_finetuning_lora_final/", metadata={ "help": ( - "Output directory for step 1 supervised finetuning's Lora" - " weights." + "Output directory for step 1 supervised finetuning's Lora" " weights." ) }, ) @@ -194,17 +186,14 @@ class RLHFConfig: reward_model_path: Optional[str] = field( default="databricks/dolly-v2-3b", metadata={ - "help": ( - "Huggingface model name or a local path to the reward model." - ) + "help": ("Huggingface model name or a local path to the reward model.") }, ) reward_lora_path: Optional[str] = field( default="step2_reward_finetuning_lora_final/", metadata={ "help": ( - "Output directory for step 1 supervised finetuning's Lora" - " weights." + "Output directory for step 1 supervised finetuning's Lora" " weights." ) }, ) @@ -222,9 +211,7 @@ class RLHFConfig: ) reward_num_of_data: Optional[int] = field( default=1000, - metadata={ - "help": "The size of the subset of the training data to use." - }, + metadata={"help": "The size of the subset of the training data to use."}, ) max_seq_length_reward: Optional[int] = field( default=512, metadata={"help": "Maximum sequence length."} @@ -246,9 +233,7 @@ class RLHFConfig: ) label_names: Optional[List[str]] = field( default_factory=list, - metadata={ - "help": "List of column names in the dataset to use as labels." - }, + metadata={"help": "List of column names in the dataset to use as labels."}, ) logging_strategy: Optional[str] = field( default="steps", @@ -284,20 +269,14 @@ class RLHFConfig: ) dataset_subset_rl_train: Optional[int] = field( default=10000, - metadata={ - "help": "The size of the subset of the training data to use." - }, + metadata={"help": "The size of the subset of the training data to use."}, ) adafactor: Optional[bool] = field( default=False, metadata={"help": "whether to use the adafactor optimizer"}, ) - top_k: Optional[float] = field( - default=0.0, metadata={"help": "Value for top_k"} - ) - top_p: Optional[float] = field( - default=1.0, metadata={"help": "Value for top_p"} - ) + top_k: Optional[float] = field(default=0.0, metadata={"help": "Value for top_k"}) + top_p: Optional[float] = field(default=1.0, metadata={"help": "Value for top_p"}) do_sample: Optional[bool] = field( default=True, metadata={"help": "Flag for sampling"} ) @@ -318,9 +297,7 @@ class RLHFConfig: ) ppo_epochs: Optional[int] = field( default=10, - metadata={ - "help": "the number of optimisation epochs per batch of samples" - }, + metadata={"help": "the number of optimisation epochs per batch of samples"}, ) total_epochs: Optional[int] = field( default=100, metadata={"help": "number of total epochs"} @@ -333,9 +310,7 @@ class RLHFConfig: ) reward_baseline: Optional[float] = field( default=0.0, - metadata={ - "help": "a baseline value that is subtracted from the reward" - }, + metadata={"help": "a baseline value that is subtracted from the reward"}, ) init_kl_coef: Optional[float] = field( default=0.2, @@ -354,8 +329,7 @@ class RLHFConfig: default="step3_reinforcement_learning_final_lora_weights/", metadata={ "help": ( - "Output directory for step 3 reinforcement learning's Lora" - " weights." + "Output directory for step 3 reinforcement learning's Lora" " weights." ) }, ) diff --git a/pykoi/rlhf/supervised_finetuning.py b/pykoi/rlhf/supervised_finetuning.py index ba6016c..fd98d46 100644 --- a/pykoi/rlhf/supervised_finetuning.py +++ b/pykoi/rlhf/supervised_finetuning.py @@ -1,10 +1,10 @@ """superised_finetuning.""" import os -from typing import Optional -import torch import time - from datetime import datetime +from typing import Optional + +import torch from datasets import Dataset, load_dataset from peft import PeftConfig, PeftModel from transformers import ( @@ -13,22 +13,19 @@ AutoTokenizer, TrainingArguments, ) - from trl import SFTTrainer from trl.trainer.utils import ConstantLengthDataset + from pykoi.chat.db.constants import ( + QA_CSV_HEADER_ANSWER, QA_CSV_HEADER_ID, QA_CSV_HEADER_QUESTION, - QA_CSV_HEADER_ANSWER, QA_CSV_HEADER_VOTE_STATUS, ) from pykoi.chat.db.qa_database import QuestionAnswerDatabase from pykoi.rlhf.config import RLHFConfig +from pykoi.telemetry.events import SFTStartEvent, SFTStopEvent from pykoi.telemetry.telemetry import Telemetry -from pykoi.telemetry.events import ( - SFTStartEvent, - SFTStopEvent, -) class SupervisedFinetuning: @@ -46,9 +43,7 @@ class SupervisedFinetuning: trainer (SFTTrainer): The trainer object used for training the model. """ - def __init__(self, - rlhf_config: RLHFConfig, - enable_telemetry: bool = True) -> None: + def __init__(self, rlhf_config: RLHFConfig, enable_telemetry: bool = True) -> None: """ Initializes the SFTTrainer object. @@ -58,18 +53,12 @@ def __init__(self, """ self._telemetry = Telemetry(enable_telemetry) self._rlhf_config = rlhf_config - self.tokenizer = AutoTokenizer.from_pretrained( - rlhf_config.base_model_path - ) + self.tokenizer = AutoTokenizer.from_pretrained(rlhf_config.base_model_path) self.num_proc = ( - self._rlhf_config.num_workers - if not self._rlhf_config.streaming - else None + self._rlhf_config.num_workers if not self._rlhf_config.streaming else None ) self.dataset = self.create_datasets(self.tokenizer, self._rlhf_config) - self.torch_dtype = ( - torch.bfloat16 if self._rlhf_config.bf16 else torch.float16 - ) + self.torch_dtype = torch.bfloat16 if self._rlhf_config.bf16 else torch.float16 # self.torch_dtype = torch.bfloat16 if bf16 else (torch.float16 if fp16 else torch.float32) self.training_args = TrainingArguments( output_dir=self._rlhf_config.output_dir, @@ -86,6 +75,9 @@ def __init__(self, warmup_steps=self._rlhf_config.num_warmup_steps, gradient_accumulation_steps=self._rlhf_config.gradient_accumulation_steps, gradient_checkpointing=self._rlhf_config.gradient_checkpointing, + gradient_checkpointing_kwargs={ + "use_reentrant": self._rlhf_config.gradient_checkpointing_use_reentrant + }, fp16=self._rlhf_config.fp16, bf16=self._rlhf_config.bf16, weight_decay=self._rlhf_config.weight_decay, @@ -158,7 +150,7 @@ def save(self, output_path=None): def train_and_save(self, output_path=None): start_event = SFTStartEvent( start_time=time.time(), date_time=datetime.utcfromtimestamp(time.time()) - ) + ) self._telemetry.capture(start_event) self.trainer.train() self.save(output_path) @@ -182,9 +174,7 @@ def create_datasets(self, tokenizer, args): if args.dataset_type == "local_db": qa_database = QuestionAnswerDatabase() my_data_pd = qa_database.retrieve_all_question_answers_as_pandas() - my_data_pd = my_data_pd[ - my_data_pd[QA_CSV_HEADER_VOTE_STATUS] == "up" - ] + my_data_pd = my_data_pd[my_data_pd[QA_CSV_HEADER_VOTE_STATUS] == "up"] my_data_pd = my_data_pd[ [QA_CSV_HEADER_ID, QA_CSV_HEADER_QUESTION, QA_CSV_HEADER_ANSWER] ] diff --git a/pyproject.toml b/pyproject.toml index b7100b3..b2054e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ pydantic = "2.5.2" starlette = "0.27.0" uvicorn = "0.23.1" scipy = "1.11.1" -openai = "^1.2.4" +openai = "1.6.1" passlib = "1.7.4" bcrypt = "4.0.1" posthog = "3.0.1" @@ -22,7 +22,7 @@ pynvml = "11.5.0" pandas = "2.0.3" python-dotenv = "^1.0.0" -transformers = { version = "4.35.0", optional = true } +transformers = { version = "4.36.2", optional = true } einops = { version = "0.6.1", optional = true } accelerate = { version = "0.21.0", optional = true } bitsandbytes = { version = "0.40.2", optional = true } @@ -37,10 +37,10 @@ python-multipart = { version = "0.0.6", optional = true } tiktoken = { version = "0.4.0", optional = true } sentence-transformers = { version = "2.2.2", optional = true } -datasets = { version = "2.14.5", optional = true } +datasets = { version = "2.15.0", optional = true } evaluate = { version = "0.4.0", optional = true } -peft = { version = "0.5.0", optional = true } -trl = { version = "0.4.7", optional = true } +peft = { version = "0.7.1", optional = true } +trl = { version = "0.7.4", optional = true } [tool.poetry.extras] huggingface = [