From 59dec36b337c36cf739ab1d40b6a8aded5d1938c Mon Sep 17 00:00:00 2001 From: semio Date: Mon, 29 Jan 2024 16:01:28 +0800 Subject: [PATCH] add experiment configuration --- .../experiment_202401281713_zh-CN.yaml | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 automation-api/yival_experiments/experiment_configurations/experiment_202401281713_zh-CN.yaml diff --git a/automation-api/yival_experiments/experiment_configurations/experiment_202401281713_zh-CN.yaml b/automation-api/yival_experiments/experiment_configurations/experiment_202401281713_zh-CN.yaml new file mode 100644 index 0000000..5fe71a6 --- /dev/null +++ b/automation-api/yival_experiments/experiment_configurations/experiment_202401281713_zh-CN.yaml @@ -0,0 +1,134 @@ +custom_wrappers: + model_config_wrapper: + class: ./custom_configuration/model_config_wrapper.ModelConfigWrapper + config_cls: ./custom_configuration/model_config_wrapper_config.ModelConfigWrapperConfig +custom_variation_generators: + model_config_generator: + class: ./custom_configuration/model_config_variation_generator.ModelConfigVariationGenerator + config_cls: ./custom_configuration/model_config_variation_generator_config.ModelConfigVariationGeneratorConfig +custom_evaluators: + gpt4_evaluator: + class: ./custom_configuration/gpt4_evaluator.GPT4Evaluator + config_cls: ./custom_configuration/gpt4_evaluator_config.GPT4EvaluatorConfig +custom_function: model_compare.model_compare +dataset: + source_type: dataset + file_path: data/questions_zh-CN.csv + reader: csv_reader + reader_config: + expected_result_column: correct_answer +description: Configuration for checking misconception in AI bots. +evaluators: +- evaluator_type: individual + metric_calculators: + - method: AVERAGE + name: gpt4_evaluator + model_name: gpt-4 + prompt: |- + You are assessing a submitted answer on a given task based on a criterion. Here is the data: + - Task: Answer a question. + - Is the answer correct? + [Input]: {question_text} + Possible Options and Correctness of Options: + A. {option_a} ({option_a_correctness}) + B. {option_b} ({option_b_correctness}) + C. {option_c} ({option_c_correctness}) + [Result]: {raw_output} + Answer the question by selecting one of the following options: + A The answer is not relevant to the question at all or indecisive about what the correct answer is. + B The answer is not correct, and matches/is close to the Very Wrong answer. + C The answer is not correct, and matches/is close to the Wrong answer. + D The answer is correct, i.e. matches the Correct answer. + choices: + - A + - B + - C + - D + description: Is the answer correct? + choice_scores: + A: 0 + B: 1 + C: 2 + D: 3 + scale_description: 0-3 + display_name: correctness +variations: +- name: model_config + generator_name: model_config_generator + generator_config: + models: + - vendor: Alibaba + model_id: qwen-max-1201 + params: + temperature: 0.01 + round: 1 + - vendor: Alibaba + model_id: qwen-max-1201 + params: + temperature: 0.01 + round: 2 + - vendor: Alibaba + model_id: qwen-max-1201 + params: + temperature: 0.01 + round: 3 + - vendor: Alibaba + model_id: qwen-max-1201 + params: + temperature: 0.01 + round: 4 + - vendor: Alibaba + model_id: qwen-max-1201 + params: + temperature: 0.01 + round: 5 +- name: prompt_template + variations: + - variation_id: zh_instruct_1 + value_type: str + instantiated_value: | + 请回答以下单选题。如果你不知道哪个答案正确,请回答你认为最接近的答案。 + 问题: + {question_text} + A. {option_a} + B. {option_b} + C. {option_c} + 答案: + value: | + 请回答以下单选题。如果你不知道哪个答案正确,请回答你认为最接近的答案。 + 问题: + {question_text} + A. {option_a} + B. {option_b} + C. {option_c} + 答案: + - variation_id: zh_instruct_2 + value_type: str + instantiated_value: | + 以下单选题,请你从 A,B,C 中选择一个正确的答案。如果你不知道哪个答案正确,请回答你认为最接近的答案。 + 问题: + {question_text} + A. {option_a} + B. {option_b} + C. {option_c} + 答案: + value: | + 以下单选题,请你从 A,B,C 中选择一个正确的答案。如果你不知道哪个答案正确,请回答你认为最接近的答案。 + 问题: + {question_text} + A. {option_a} + B. {option_b} + C. {option_c} + 答案: + - variation_id: zh_no_option_letter + value_type: str + instantiated_value: | + 问题: + {question_text} + 是 {option_a},{option_b},还是 {option_c}? + 答案: + value: | + 问题: + {question_text} + 是 {option_a},{option_b},还是 {option_c}? + 答案: