add experiment configuration

Gapminder · Jan 29, 2024 · 59dec36 · 59dec36
1 parent 2e7fa8c
commit 59dec36
Showing 1 changed file with 134 additions and 0 deletions.
diff --git a/...mation-api/yival_experiments/experiment_configurations/experiment_202401281713_zh-CN.yaml b/...mation-api/yival_experiments/experiment_configurations/experiment_202401281713_zh-CN.yaml
@@ -0,0 +1,134 @@
+custom_wrappers:
+  model_config_wrapper:
+    class: ./custom_configuration/model_config_wrapper.ModelConfigWrapper
+    config_cls: ./custom_configuration/model_config_wrapper_config.ModelConfigWrapperConfig
+custom_variation_generators:
+  model_config_generator:
+    class: ./custom_configuration/model_config_variation_generator.ModelConfigVariationGenerator
+    config_cls: ./custom_configuration/model_config_variation_generator_config.ModelConfigVariationGeneratorConfig
+custom_evaluators:
+  gpt4_evaluator:
+    class: ./custom_configuration/gpt4_evaluator.GPT4Evaluator
+    config_cls: ./custom_configuration/gpt4_evaluator_config.GPT4EvaluatorConfig
+custom_function: model_compare.model_compare
+dataset:
+  source_type: dataset
+  file_path: data/questions_zh-CN.csv
+  reader: csv_reader
+  reader_config:
+    expected_result_column: correct_answer
+description: Configuration for checking misconception in AI bots.
+evaluators:
+- evaluator_type: individual
+  metric_calculators:
+  - method: AVERAGE
+  name: gpt4_evaluator
+  model_name: gpt-4
+  prompt: |-
+    You are assessing a submitted answer on a given task based on a criterion. Here is the data:
+    - Task: Answer a question.
+    - Is the answer correct?
+    [Input]: {question_text}
+    Possible Options and Correctness of Options:
+    A. {option_a} ({option_a_correctness})
+    B. {option_b} ({option_b_correctness})
+    C. {option_c} ({option_c_correctness})
+    [Result]: {raw_output}
+    Answer the question by selecting one of the following options:
+    A The answer is not relevant to the question at all or indecisive about what the correct answer is.
+    B The answer is not correct, and matches/is close to the Very Wrong answer.
+    C The answer is not correct, and matches/is close to the Wrong answer.
+    D The answer is correct, i.e. matches the Correct answer.
+  choices:
+  - A
+  - B
+  - C
+  - D
+  description: Is the answer correct?
+  choice_scores:
+    A: 0
+    B: 1
+    C: 2
+    D: 3
+  scale_description: 0-3
+  display_name: correctness
+variations:
+- name: model_config
+  generator_name: model_config_generator
+  generator_config:
+    models:
+    - vendor: Alibaba
+      model_id: qwen-max-1201
+      params:
+        temperature: 0.01
+      round: 1
+    - vendor: Alibaba
+      model_id: qwen-max-1201
+      params:
+        temperature: 0.01
+      round: 2
+    - vendor: Alibaba
+      model_id: qwen-max-1201
+      params:
+        temperature: 0.01
+      round: 3
+    - vendor: Alibaba
+      model_id: qwen-max-1201
+      params:
+        temperature: 0.01
+      round: 4
+    - vendor: Alibaba
+      model_id: qwen-max-1201
+      params:
+        temperature: 0.01
+      round: 5
+- name: prompt_template
+  variations:
+  - variation_id: zh_instruct_1
+    value_type: str
+    instantiated_value: |
+      请回答以下单选题。如果你不知道哪个答案正确，请回答你认为最接近的答案。
+      问题：
+      {question_text}
+      A. {option_a}
+      B. {option_b}
+      C. {option_c}
+      答案：
+    value: |
+      请回答以下单选题。如果你不知道哪个答案正确，请回答你认为最接近的答案。
+      问题：
+      {question_text}
+      A. {option_a}
+      B. {option_b}
+      C. {option_c}
+      答案：
+  - variation_id: zh_instruct_2
+    value_type: str
+    instantiated_value: |
+      以下单选题，请你从 A，B，C 中选择一个正确的答案。如果你不知道哪个答案正确，请回答你认为最接近的答案。
+      问题：
+      {question_text}
+      A. {option_a}
+      B. {option_b}
+      C. {option_c}
+      答案：
+    value: |
+      以下单选题，请你从 A，B，C 中选择一个正确的答案。如果你不知道哪个答案正确，请回答你认为最接近的答案。
+      问题：
+      {question_text}
+      A. {option_a}
+      B. {option_b}
+      C. {option_c}
+      答案：
+  - variation_id: zh_no_option_letter
+    value_type: str
+    instantiated_value: |
+      问题：
+      {question_text}
+      是 {option_a}，{option_b}，还是 {option_c}？
+      答案：
+    value: |
+      问题：
+      {question_text}
+      是 {option_a}，{option_b}，还是 {option_c}？
+      答案：