Skip to content

Commit

Permalink
chore: readjust structure
Browse files Browse the repository at this point in the history
  • Loading branch information
taharallouche committed Oct 26, 2024
1 parent 51a7fa3 commit 9baccc8
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 66 deletions.
23 changes: 23 additions & 0 deletions hakeem/core/utils/coerce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pandas as pd


def coerce_schema(
annotations: pd.DataFrame, task_column: str, worker_column: str
) -> pd.DataFrame:
all_columns = annotations.reset_index().columns
required = [task_column, worker_column]

if missing := set(required) - set(all_columns):
raise ValueError(
f"Annotations should have {task_column} and"
f" {worker_column} as columns or index levels, missing {missing}."
)

if set(all_columns) == set(required):
raise ValueError("Annotations should have at least one label column")

annotations = annotations.reset_index().set_index(required)[
[column for column in annotations.columns if column not in required]
]

return annotations
132 changes: 66 additions & 66 deletions hakeem/paper_results/evaluation/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,92 +10,92 @@
from tqdm import tqdm

from hakeem.core.aggregation.aggregators.condorcet import (
CondorcetAggregator,
CondorcetAggregator,
)
from hakeem.core.aggregation.aggregators.mallows import (
DiceAggregator,
EuclidAggregator,
JaccardAggregator,
StandardApprovalAggregator,
DiceAggregator,
EuclidAggregator,
JaccardAggregator,
StandardApprovalAggregator,
)
from hakeem.core.aggregation.base import Aggregator
from hakeem.core.utils.utils import get_mean_confidence_interval
from hakeem.paper_results.evaluation.utils import get_mean_confidence_interval
from hakeem.paper_results.inventory import COLUMNS

logging.basicConfig(
level=logging.INFO, format="'%(asctime)s - %(levelname)s - %(message)s'"
level=logging.INFO, format="'%(asctime)s - %(levelname)s - %(message)s'"
)


def compare_methods(
annotations: pd.DataFrame,
groundtruth: pd.DataFrame,
max_voters: int,
n_batch: int,
aggregators: Mapping[str, Aggregator] = {
"Standard Approval Aggregator": StandardApprovalAggregator(),
"Euclidean Mallow Aggregator": EuclidAggregator(),
"Jaccard Mallow Aggregator": JaccardAggregator(),
"Dice Mallow Aggregator": DiceAggregator(),
"Condorcet Aggregator": CondorcetAggregator(),
},
annotations: pd.DataFrame,
groundtruth: pd.DataFrame,
max_voters: int,
n_batch: int,
aggregators: Mapping[str, Aggregator] = {
"Standard Approval Aggregator": StandardApprovalAggregator(),
"Euclidean Mallow Aggregator": EuclidAggregator(),
"Jaccard Mallow Aggregator": JaccardAggregator(),
"Dice Mallow Aggregator": DiceAggregator(),
"Condorcet Aggregator": CondorcetAggregator(),
},
) -> dict[str, NDArray]:
accuracy = {
aggregator: np.zeros([n_batch, max_voters - 1]) for aggregator in aggregators
}
confidence_intervals = {
aggregator: np.zeros([max_voters - 1, 3]) for aggregator in aggregators
}
accuracy = {
aggregator: np.zeros([n_batch, max_voters - 1]) for aggregator in aggregators
}
confidence_intervals = {
aggregator: np.zeros([max_voters - 1, 3]) for aggregator in aggregators
}

logging.info("Experiment started : running the different aggregators ...")
logging.info("Experiment started : running the different aggregators ...")

for num in tqdm(
range(1, max_voters), desc="Number of voters", position=0, leave=True
):
for batch in tqdm(range(n_batch), desc="Batch", position=1, leave=False):
voters = sample(
list(annotations.index.get_level_values(COLUMNS.voter).unique()), num
)
annotations_batch = annotations[
annotations.index.get_level_values(COLUMNS.voter).isin(voters)
]
for num in tqdm(
range(1, max_voters), desc="Number of voters", position=0, leave=True
):
for batch in tqdm(range(n_batch), desc="Batch", position=1, leave=False):
voters = sample(
list(annotations.index.get_level_values(COLUMNS.voter).unique()), num
)
annotations_batch = annotations[
annotations.index.get_level_values(COLUMNS.voter).isin(voters)
]

for name, aggregator in aggregators.items():
aggregated_labels = aggregator.fit_predict(annotations_batch)
accuracy[name][batch, num - 1] = accuracy_score(
groundtruth, aggregated_labels
)
for name, aggregator in aggregators.items():
aggregated_labels = aggregator.fit_predict(annotations_batch)
accuracy[name][batch, num - 1] = accuracy_score(
groundtruth, aggregated_labels
)

for name in aggregators:
confidence_intervals[name][num - 1, :] = get_mean_confidence_interval(
accuracy[name][:, num - 1]
)
for name in aggregators:
confidence_intervals[name][num - 1, :] = get_mean_confidence_interval(
accuracy[name][:, num - 1]
)

logging.info("Experiment completed, gathering the results ..")
logging.info("Experiment completed, gathering the results ..")

return confidence_intervals
return confidence_intervals


def plot_accuracies(confidence_intervals: Mapping[str, NDArray]) -> None:
fig = plt.figure() # noqa: F841
x_limit = (
max(accuracies.shape[0] for accuracies in confidence_intervals.values()) + 1
)
fig = plt.figure() # noqa: F841
x_limit = (
max(accuracies.shape[0] for accuracies in confidence_intervals.values()) + 1
)

for aggregator, confidence_interval in confidence_intervals.items():
plt.errorbar(
range(1, x_limit),
confidence_interval[:, 0],
label=aggregator,
)
plt.fill_between(
range(1, x_limit),
confidence_interval[:, 1],
confidence_interval[:, 2],
alpha=0.2,
)
for aggregator, confidence_interval in confidence_intervals.items():
plt.errorbar(
range(1, x_limit),
confidence_interval[:, 0],
label=aggregator,
)
plt.fill_between(
range(1, x_limit),
confidence_interval[:, 1],
confidence_interval[:, 2],
alpha=0.2,
)

plt.legend()
plt.xlabel("Number of voters")
plt.ylabel("Accuracy")
plt.savefig("results.png")
plt.legend()
plt.xlabel("Number of voters")
plt.ylabel("Accuracy")
plt.savefig("results.png")
File renamed without changes.
105 changes: 105 additions & 0 deletions tests/core/utils/test_coerce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import pandas as pd
import pytest


@pytest.mark.ut
@pytest.mark.parametrize(
["annotations", "task_column", "worker_column", "expected_result"],
[
(
pd.DataFrame(
{
"task": ["q1", "q1"],
"worker": ["v1", "v2"],
"a": [1, 0],
"b": [0, 1],
}
).set_index(["task", "worker"]),
"task",
"worker",
pd.DataFrame(
{
"task": ["q1", "q1"],
"worker": ["v1", "v2"],
"a": [1, 0],
"b": [0, 1],
}
).set_index(["task", "worker"]),
),
(
pd.DataFrame(
{
"question": ["q1", "q1"],
"voter": ["v1", "v2"],
"a": [1, 0],
"b": [0, 1],
}
),
"question",
"voter",
pd.DataFrame(
{
"question": ["q1", "q1"],
"voter": ["v1", "v2"],
"a": [1, 0],
"b": [0, 1],
}
).set_index(["question", "voter"]),
),
(
pd.DataFrame(
{
"question": ["q1", "q1"],
"voter": ["v1", "v2"],
"a": [1, 0],
"b": [0, 1],
}
).set_index("question"),
"question",
"voter",
pd.DataFrame(
{
"question": ["q1", "q1"],
"voter": ["v1", "v2"],
"a": [1, 0],
"b": [0, 1],
}
).set_index(["question", "voter"]),
),
(
pd.DataFrame(
{
"extra_index_level": ["l1", "l2"],
"question": ["q1", "q1"],
"voter": ["v1", "v2"],
"a": [1, 0],
"b": [0, 1],
}
).set_index(["extra_index_level", "question"]),
"question",
"voter",
pd.DataFrame(
{
"question": ["q1", "q1"],
"voter": ["v1", "v2"],
"a": [1, 0],
"b": [0, 1],
}
).set_index(["question", "voter"]),
),
],
)
def test_coerce_schema(
annotations: pd.DataFrame,
task_column: str,
worker_column: str,
expected_result: pd.DataFrame,
):
# Given
from hakeem.core.utils.coerce import coerce_schema

# When
result = coerce_schema(annotations, task_column, worker_column)

# Then
pd.testing.assert_frame_equal(expected_result, result)

0 comments on commit 9baccc8

Please sign in to comment.