Skip to content

Commit

Permalink
update notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
semio committed Jun 7, 2024
1 parent a89d5f5 commit 1c49fb8
Showing 1 changed file with 51 additions and 0 deletions.
51 changes: 51 additions & 0 deletions automation-api/yival_experiments/notebooks/result_data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,53 @@



# # for double checking the evaluators
# check the top 10, bottom 10 questions per model

# + magic_args="--save double_check_results" language="sql"
# select
# question_id,
# model_configuration_id,
# (100 - correct_rate) as ai_wrong_percentage,
# human_wrong_percentage,
# ai_wrong_percentage - human_wrong_percentage as diff,
# sdg_topic,
# other_topics
# from model_topic_stat
# -- where diff > 0
# order by
# "sdg_topic",
# cast(other_topics as varchar),
# "model_configuration_id"

# + language="sql"
# select *
# from double_check_results
# where model_configuration_id = 'mc026' AND ai_wrong_percentage = 0
# order by question_id

# + magic_args="--save double_check_results_1" language="sql"
# select
# model_configuration_id,
# question_id,
# ai_wrong_percentage,
# rank() over (partition by model_configuration_id order by ai_wrong_percentage) as rank
# from double_check_results
# order by model_configuration_id, rank, question_id

# + magic_args="to_check <<" language="sql"
#
# select * from double_check_results_1 where rank <= 10 OR rank >= 275
# -

to_check_df = to_check.DataFrame()

to_check_df[to_check_df['model_configuration_id'] == 'mc026']







# # for climate study questions
Expand Down Expand Up @@ -814,6 +861,10 @@



outputs.to_parquet("./data/outputs/latest_results.parquet")



# alibaba = %sql select * from outputs where model_id = 'qwen-max-0403'
# err = %sql select * from outputs where model_id = 'qwen-max-0403' and raw_output like '%Error%'

Expand Down

0 comments on commit 1c49fb8

Please sign in to comment.