fix:conflict

eosphoros-ai · Jun 11, 2024 · d759d78 · d759d78
2 parents 9a715b5 + e11087a
commit d759d78
Show file tree

Hide file tree

Showing 11 changed files with 143 additions and 23 deletions.
diff --git a/README.ja.md b/README.ja.md
@@ -154,6 +154,12 @@ DB-GPTのアーキテクチャは以下の図に示されています：
   私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル（LLM）を含む幅広いモデルをサポートしています。
 
   - ニュース
+    - 🔥🔥🔥  [Qwen2-57B-A14B-Instruct](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct)
+    - 🔥🔥🔥  [Qwen2-72B-Instruct](https://huggingface.co/Qwen/Qwen2-72B-Instruct)
+    - 🔥🔥🔥  [Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct)
+    - 🔥🔥🔥  [Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct)
+    - 🔥🔥🔥  [Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct)
+    - 🔥🔥🔥  [glm-4-9b-chat](https://huggingface.co/THUDM/glm-4-9b-chat)
     - 🔥🔥🔥  [Phi-3](https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3)
     - 🔥🔥🔥  [Yi-1.5-34B-Chat](https://huggingface.co/01-ai/Yi-1.5-34B-Chat)
     - 🔥🔥🔥  [Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)

diff --git a/README.md b/README.md
@@ -158,6 +158,12 @@ At present, we have introduced several key features to showcase our current capa
   We offer extensive model support, including dozens of large language models (LLMs) from both open-source and API agents, such as LLaMA/LLaMA2, Baichuan, ChatGLM, Wenxin, Tongyi, Zhipu, and many more. 
 
   - News
+    - 🔥🔥🔥  [Qwen2-57B-A14B-Instruct](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct)
+    - 🔥🔥🔥  [Qwen2-72B-Instruct](https://huggingface.co/Qwen/Qwen2-72B-Instruct)
+    - 🔥🔥🔥  [Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct)
+    - 🔥🔥🔥  [Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct)
+    - 🔥🔥🔥  [Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct)
+    - 🔥🔥🔥  [glm-4-9b-chat](https://huggingface.co/THUDM/glm-4-9b-chat)
     - 🔥🔥🔥  [Phi-3](https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3)
     - 🔥🔥🔥  [Yi-1.5-34B-Chat](https://huggingface.co/01-ai/Yi-1.5-34B-Chat)
     - 🔥🔥🔥  [Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)

diff --git a/README.zh.md b/README.zh.md
@@ -152,6 +152,12 @@
   海量模型支持，包括开源、API代理等几十种大语言模型。如LLaMA/LLaMA2、Baichuan、ChatGLM、文心、通义、智谱等。当前已支持如下模型: 
 
   - 新增支持模型
+    - 🔥🔥🔥  [Qwen2-57B-A14B-Instruct](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct)
+    - 🔥🔥🔥  [Qwen2-72B-Instruct](https://huggingface.co/Qwen/Qwen2-72B-Instruct)
+    - 🔥🔥🔥  [Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct)
+    - 🔥🔥🔥  [Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct)
+    - 🔥🔥🔥  [Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct)
+    - 🔥🔥🔥  [glm-4-9b-chat](https://huggingface.co/THUDM/glm-4-9b-chat)
     - 🔥🔥🔥  [Phi-3](https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3)
     - 🔥🔥🔥  [Yi-1.5-34B-Chat](https://huggingface.co/01-ai/Yi-1.5-34B-Chat)
     - 🔥🔥🔥  [Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)

diff --git a/assets/wechat.jpg b/assets/wechat.jpg
diff --git a/dbgpt/configs/model_config.py b/dbgpt/configs/model_config.py
@@ -52,6 +52,9 @@ def get_device() -> str:
     "chatglm2-6b-int4": os.path.join(MODEL_PATH, "chatglm2-6b-int4"),
     # https://huggingface.co/THUDM/chatglm3-6b
     "chatglm3-6b": os.path.join(MODEL_PATH, "chatglm3-6b"),
+    # https://huggingface.co/THUDM/glm-4-9b-chat
+    "glm-4-9b-chat": os.path.join(MODEL_PATH, "glm-4-9b-chat"),
+    "glm-4-9b-chat-1m": os.path.join(MODEL_PATH, "glm-4-9b-chat-1m"),
     "guanaco-33b-merged": os.path.join(MODEL_PATH, "guanaco-33b-merged"),
     "falcon-40b": os.path.join(MODEL_PATH, "falcon-40b"),
     "gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
@@ -122,6 +125,42 @@ def get_device() -> str:
     "codeqwen1.5-7b-chat": os.path.join(MODEL_PATH, "CodeQwen1.5-7B-Chat"),
     # https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat
     "qwen1.5-moe-a2.7b-chat": os.path.join(MODEL_PATH, "Qwen1.5-MoE-A2.7B-Chat"),
+    "qwen2-57b-a14b-instruct": os.path.join(MODEL_PATH, "Qwen2-57B-A14B-Instruct"),
+    "qwen2-57b-a14b-instruct-gptq-int4": os.path.join(
+        MODEL_PATH, "Qwen2-57B-A14B-Instruct-GPTQ-Int4"
+    ),
+    "qwen2-72b-instruct": os.path.join(MODEL_PATH, "Qwen2-72B-Instruct"),
+    "qwen2-72b-instruct-awq": os.path.join(MODEL_PATH, "Qwen2-72B-Instruct-AWQ"),
+    "qwen2-72b-instruct-gptq-int8": os.path.join(
+        MODEL_PATH, "Qwen2-72B-Instruct-GPTQ-Int8"
+    ),
+    "qwen2-72b-instruct-gptq-int4": os.path.join(
+        MODEL_PATH, "Qwen2-72B-Instruct-GPTQ-Int4"
+    ),
+    "qwen2-7b-instruct": os.path.join(MODEL_PATH, "Qwen2-7B-Instruct"),
+    "qwen2-7b-instruct-awq": os.path.join(MODEL_PATH, "Qwen2-7B-Instruct-AWQ"),
+    "qwen2-7b-instruct-gptq-int8": os.path.join(
+        MODEL_PATH, "Qwen2-7B-Instruct-GPTQ-Int8"
+    ),
+    "qwen2-7b-instruct-gptq-int4": os.path.join(
+        MODEL_PATH, "Qwen2-7B-Instruct-GPTQ-Int4"
+    ),
+    "qwen2-1.5b-instruct": os.path.join(MODEL_PATH, "Qwen2-1.5B-Instruct"),
+    "qwen2-1.5b-instruct-awq": os.path.join(MODEL_PATH, "Qwen2-1.5B-Instruct-AWQ"),
+    "qwen2-1.5b-instruct-gptq-int8": os.path.join(
+        MODEL_PATH, "Qwen2-1.5B-Instruct-GPTQ-Int8"
+    ),
+    "qwen2-1.5b-instruct-gptq-int4": os.path.join(
+        MODEL_PATH, "Qwen2-1.5B-Instruct-GPTQ-Int4"
+    ),
+    "qwen2-0.5b-instruct": os.path.join(MODEL_PATH, "Qwen2-0.5B-Instruct"),
+    "qwen2-0.5b-instruct-awq": os.path.join(MODEL_PATH, "Qwen2-0.5B-Instruct-AWQ"),
+    "qwen2-0.5b-instruct-gptq-int8": os.path.join(
+        MODEL_PATH, "Qwen2-0.5B-Instruct-GPTQ-Int8"
+    ),
+    "qwen2-0.5b-instruct-gptq-int4": os.path.join(
+        MODEL_PATH, "Qwen2-0.5B-Instruct-GPTQ-Int4"
+    ),
     # (Llama2 based) We only support WizardLM-13B-V1.2 for now, which is trained from Llama-2 13b, see https://huggingface.co/WizardLM/WizardLM-13B-V1.2
     "wizardlm-13b": os.path.join(MODEL_PATH, "WizardLM-13B-V1.2"),
     # wget https://huggingface.co/TheBloke/vicuna-13B-v1.5-GGUF/resolve/main/vicuna-13b-v1.5.Q4_K_M.gguf -O models/ggml-model-q4_0.gguf

diff --git a/dbgpt/model/adapter/hf_adapter.py b/dbgpt/model/adapter/hf_adapter.py
@@ -18,6 +18,8 @@ class NewHFChatModelAdapter(LLMModelAdapter, ABC):
     prompt template for this model
     """
 
+    trust_remote_code: bool = True
+
     def new_adapter(self, **kwargs) -> "NewHFChatModelAdapter":
         return self.__class__()
 
@@ -77,13 +79,18 @@ def load(self, model_path: str, from_pretrained_kwargs: dict):
                 model_path,
                 use_fast=self.use_fast_tokenizer(),
                 revision=revision,
-                trust_remote_code=True,
+                trust_remote_code=self.trust_remote_code,
             )
         except TypeError:
             tokenizer = AutoTokenizer.from_pretrained(
-                model_path, use_fast=False, revision=revision, trust_remote_code=True
+                model_path,
+                use_fast=False,
+                revision=revision,
+                trust_remote_code=self.trust_remote_code,
             )
         try:
+            if "trust_remote_code" not in from_pretrained_kwargs:
+                from_pretrained_kwargs["trust_remote_code"] = self.trust_remote_code
             model = AutoModelForCausalLM.from_pretrained(
                 model_path, low_cpu_mem_usage=True, **from_pretrained_kwargs
             )
@@ -303,6 +310,20 @@ def do_match(self, lower_model_name_or_path: Optional[str] = None):
             and "qwen" in lower_model_name_or_path
             and "1.5" in lower_model_name_or_path
             and "moe" not in lower_model_name_or_path
+            and "qwen2" not in lower_model_name_or_path
+        )
+
+
+class Qwen2Adapter(QwenAdapter):
+
+    support_4bit: bool = True
+    support_8bit: bool = True
+
+    def do_match(self, lower_model_name_or_path: Optional[str] = None):
+        return (
+            lower_model_name_or_path
+            and "qwen2" in lower_model_name_or_path
+            and "instruct" in lower_model_name_or_path
         )
 
 
@@ -480,6 +501,19 @@ def do_match(self, lower_model_name_or_path: Optional[str] = None):
         )
 
 
+class GLM4Aapter(NewHFChatModelAdapter):
+    """
+    https://huggingface.co/defog/glm-4-8b
+    """
+
+    def do_match(self, lower_model_name_or_path: Optional[str] = None):
+        return (
+            lower_model_name_or_path
+            and "glm-4" in lower_model_name_or_path
+            and "chat" in lower_model_name_or_path
+        )
+
+
 # The following code is used to register the model adapter
 # The last registered model adapter is matched first
 register_model_adapter(YiAdapter)
@@ -496,3 +530,5 @@ def do_match(self, lower_model_name_or_path: Optional[str] = None):
 register_model_adapter(PhiAdapter)
 register_model_adapter(SQLCoderAdapter)
 register_model_adapter(OpenChatAdapter)
+register_model_adapter(GLM4Aapter)
+register_model_adapter(Qwen2Adapter)
diff --git a/dbgpt/model/adapter/vllm_adapter.py b/dbgpt/model/adapter/vllm_adapter.py
@@ -40,7 +40,7 @@ def model_param_class(self, model_type: str = None) -> BaseModelParameters:
             help="local model path of the huggingface model to use",
         )
         parser.add_argument("--model_type", type=str, help="model type")
-        parser.add_argument("--device", type=str, default=None, help="device")
+        # parser.add_argument("--device", type=str, default=None, help="device")
         # TODO parse prompt templete from `model_name` and `model_path`
         parser.add_argument(
             "--prompt_template",
@@ -76,7 +76,11 @@ def load_from_params(self, params):
         # Set the attributes from the parsed arguments.
         engine_args = AsyncEngineArgs(**vllm_engine_args_dict)
         engine = AsyncLLMEngine.from_engine_args(engine_args)
-        return engine, engine.engine.tokenizer
+        tokenizer = engine.engine.tokenizer
+        if hasattr(tokenizer, "tokenizer"):
+            # vllm >= 0.2.7
+            tokenizer = tokenizer.tokenizer
+        return engine, tokenizer
 
     def support_async(self) -> bool:
         return True

diff --git a/dbgpt/model/llm_out/vllm_llm.py b/dbgpt/model/llm_out/vllm_llm.py
@@ -61,9 +61,7 @@ async def generate_stream(
         **gen_params
     )
 
-    results_generator = model.generate(
-        prompt, sampling_params, request_id, prompt_token_ids=prompt_token_ids
-    )
+    results_generator = model.generate(prompt, sampling_params, request_id)
     async for request_output in results_generator:
         prompt = request_output.prompt
         if echo:

diff --git a/dbgpt/model/proxy/llms/zhipu.py b/dbgpt/model/proxy/llms/zhipu.py
@@ -1,3 +1,4 @@
+import os
 from concurrent.futures import Executor
 from typing import Iterator, Optional
 
@@ -37,23 +38,37 @@ def __init__(
         self,
         model: Optional[str] = None,
         api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
         model_alias: Optional[str] = "zhipu_proxyllm",
         context_length: Optional[int] = 8192,
         executor: Optional[Executor] = None,
     ):
         try:
-            import zhipuai
+            from zhipuai import ZhipuAI
 
         except ImportError as exc:
-            raise ValueError(
-                "Could not import python package: zhipuai "
-                "Please install dashscope by command `pip install zhipuai"
-            ) from exc
+            if (
+                "No module named" in str(exc)
+                or "cannot find module" in str(exc).lower()
+            ):
+                raise ValueError(
+                    "The python package 'zhipuai' is not installed. "
+                    "Please install it by running `pip install zhipuai`."
+                ) from exc
+            else:
+                raise ValueError(
+                    "Could not import python package: zhipuai "
+                    "This may be due to a version that is too low. "
+                    "Please upgrade the zhipuai package by running `pip install --upgrade zhipuai`."
+                ) from exc
         if not model:
             model = CHATGLM_DEFAULT_MODEL
-        if api_key:
-            zhipuai.api_key = api_key
+        if not api_key:
+            # Compatible with DB-GPT's config
+            api_key = os.getenv("ZHIPU_PROXY_API_KEY")
+
         self._model = model
+        self.client = ZhipuAI(api_key=api_key, base_url=api_base)
 
         super().__init__(
             model_names=[model, model_alias],
@@ -84,26 +99,25 @@ def sync_generate_stream(
         request: ModelRequest,
         message_converter: Optional[MessageConverter] = None,
     ) -> Iterator[ModelOutput]:
-        import zhipuai
 
         request = self.local_covert_message(request, message_converter)
 
         messages = request.to_common_messages(support_system_role=False)
 
         model = request.model or self._model
         try:
-            res = zhipuai.model_api.sse_invoke(
+            response = self.client.chat.completions.create(
                 model=model,
-                prompt=messages,
+                messages=messages,
                 temperature=request.temperature,
                 # top_p=params.get("top_p"),
-                incremental=False,
+                stream=True,
             )
-            for r in res.events():
-                if r.event == "add":
-                    yield ModelOutput(text=r.data, error_code=0)
-                elif r.event == "error":
-                    yield ModelOutput(text=r.data, error_code=1)
+            partial_text = ""
+            for chunk in response:
+                delta_content = chunk.choices[0].delta.content
+                partial_text += delta_content
+                yield ModelOutput(text=partial_text, error_code=0)
         except Exception as e:
             return ModelOutput(
                 text=f"**LLMServer Generate Error, Please CheckErrorInfo.**: {e}",

diff --git a/dbgpt/rag/knowledge/datasource.py b/dbgpt/rag/knowledge/datasource.py
@@ -52,6 +52,7 @@ def support_chunk_strategy(cls) -> List[ChunkStrategy]:
         return [
             ChunkStrategy.CHUNK_BY_SIZE,
             ChunkStrategy.CHUNK_BY_SEPARATOR,
+            ChunkStrategy.CHUNK_BY_PAGE,
         ]
 
     @classmethod
@@ -63,3 +64,12 @@ def type(cls) -> KnowledgeType:
     def document_type(cls) -> DocumentType:
         """Return document type."""
         return DocumentType.DATASOURCE
+
+    @classmethod
+    def default_chunk_strategy(cls) -> ChunkStrategy:
+        """Return default chunk strategy.
+
+        Returns:
+            ChunkStrategy: default chunk strategy
+        """
+        return ChunkStrategy.CHUNK_BY_PAGE
diff --git a/dbgpt/rag/summary/db_summary_client.py b/dbgpt/rag/summary/db_summary_client.py
@@ -104,6 +104,7 @@ def init_db_profile(self, db_summary_client, dbname):
                 connector=db_summary_client.db,
                 index_store=vector_connector.index_client,
             )
+
             if len(db_assembler.get_chunks()) > 0:
                 db_assembler.persist()
         else: