Skip to content

Commit

Permalink
v0.2.6 (#8)
Browse files Browse the repository at this point in the history
* TopBottomR similarity
* Update centrality tool
* Update ConceptTypicality for new version of fcapsy
* Centrality html export
* Black formatter
  • Loading branch information
mikulatomas authored Feb 23, 2022
1 parent 5c69685 commit 7536c9c
Show file tree
Hide file tree
Showing 11 changed files with 151 additions and 53 deletions.
4 changes: 2 additions & 2 deletions fcapsy_experiments/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .correlation_table import CorrelationTable
from .correlations_box_plot import correlations_boxplots

__version__ = "0.2.5"
__version__ = "0.2.6"
__author__ = "Tomáš Mikula"
__email__ = "mail@tomasmikula.cz"
__license__ = "MIT license"
__license__ = "MIT license"
14 changes: 12 additions & 2 deletions fcapsy_experiments/_styles.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,20 @@

css_corr = [
{"selector": "td, th", "props": [("text-align", "center")]},
# {"selector": "thead th", "props": [("text-align", "center !important")]},
{"selector": "thead th", "props": [("height", "50px"), ("text-align", "left")]},
{"selector": "tbody th", "props": [("text-align", "right")]},
{
"selector": "th.col_heading",
"props": [("writing-mode", "vertical-rl"), ("transform", "rotateZ(-90deg)")],
"props": [
("writing-mode", "vertical-rl"),
("transform", "rotateZ(-90deg)"),
("text-align", "left"),
],
},
]

css_centrality = [
{"selector": "td, th", "props": [("text-align", "center")]},
{"selector": "thead th", "props": [("text-align", "center")]},
{"selector": "tbody th", "props": [("text-align", "right")]},
]
2 changes: 1 addition & 1 deletion fcapsy_experiments/centrality/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .centrality import Centrality
from .centrality import Centrality
4 changes: 2 additions & 2 deletions fcapsy_experiments/centrality/centrality.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd

from fcapsy.centrality import centrality
from fcapsy_experiments._styles import css, css_corr
from fcapsy_experiments._styles import css, css_centrality


class Centrality:
Expand Down Expand Up @@ -95,6 +95,6 @@ def to_html(self, include_core_flag: bool = False, quantile: float = 0.75) -> st

final_table = final_table.style.format(precision=3)
final_table.background_gradient(cmap="RdYlGn")
final_table.set_table_styles(css + css_corr)
final_table.set_table_styles(css + css_centrality)

return final_table.to_html()
2 changes: 1 addition & 1 deletion fcapsy_experiments/mca/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .mca_concept import MCAConcept
from .mca_concept import MCAConcept
30 changes: 25 additions & 5 deletions fcapsy_experiments/mca/mca_concept.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,22 @@ def to_plotly(self) -> "go.Figure":
data = self.concept_df_transformed.copy()

hover_data = {
"x": False,
"y": False,
}
"x": False,
"y": False,
}

if self._color_by is not None:
color_name = self._color_by[0]
data[color_name] = self._color_by[1]
hover_data[color_name] = ":.2f"
else:
color_name = None

if self.n_components == 3:
data = data.rename(columns={0: "x", 1: "y", 2: "z"})

hover_data["z"] = False

fig = px.scatter_3d(
data,
x="x",
Expand Down Expand Up @@ -107,3 +107,23 @@ def to_plotly(self) -> "go.Figure":
)

return fig

def to_plotly_html(
self, default_width: int = 700, default_height: int = 390
) -> str:
"""Generates html version of plotly graph
Args:
default_width (int, optional): default graph width. Defaults to 700.
default_height (int, optional): default graph height. Defaults to 390.
Returns:
str: graph html
"""
return self.to_plotly().to_html(
full_html=False,
include_plotlyjs="cdn",
include_mathjax="cdn",
default_width=default_width,
default_height=default_height,
)
1 change: 1 addition & 0 deletions fcapsy_experiments/typicality/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .concept_typicality import ConceptTypicality
from .top_r_similarity import TopRSimilarity
from .top_bottom_r_similarity import TopBottomRSimilarity
12 changes: 8 additions & 4 deletions fcapsy_experiments/typicality/concept_typicality.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ def __init__(
"typ_avg": {
# must be callable
"func": typicality_avg,
"args": {"J": [jaccard], "SMC": [smc], "R": [russell_rao]},
"args": {
"J": {"similarity": jaccard},
"SMC": {"similarity": smc},
"R": {"similarity": russell_rao},
},
}
}

Expand Down Expand Up @@ -82,10 +86,10 @@ def _init(self, concept, count, typicality_functions, extra_columns):
for typicality in typicality_functions.values():
function = typicality["func"]
args = typicality["args"].values()

if args:
row = []
for name, arg in typicality["args"].items():
row.append(function(item, concept, *arg))
for _, arg in typicality["args"].items():
row.append(function(item, concept, **arg))
else:
row.append(function(item, concept))

Expand Down
49 changes: 49 additions & 0 deletions fcapsy_experiments/typicality/top_bottom_r_similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import typing

import pandas as pd

from itertools import combinations

from .top_r_similarity import TopRSimilarity


class TopBottomRSimilarity(TopRSimilarity):
@staticmethod
def _init(inst, to_columns):
r_range = range(1, len(inst._source.index))

results = []

# filtered_columns = filter(
# lambda c: c not in ignore_columns, inst._source.columns
# )

columns_tuples, labels = inst._get_column_orders(
inst._source,
[(x, y) for x in inst._source.columns for y in to_columns if x != y],
)

for (column1_order, column2_order), label in zip(columns_tuples, labels):
column1_order_reversed = column1_order[::-1]
column2_order_reversed = column2_order[::-1]

for r in r_range:
top_r = inst._top_r_similarity(
inst._context, inst._similarity, column1_order, column2_order, r
)
bottom_r = inst._top_r_similarity(
inst._context,
inst._similarity,
column1_order_reversed,
column2_order_reversed,
r,
)
results.append(
[
r,
top_r * bottom_r,
label,
]
)

return pd.DataFrame(results, columns=["r", "top_bottom_r_similarity", "label"])
84 changes: 49 additions & 35 deletions fcapsy_experiments/typicality/top_r_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,26 @@ def __init__(
source: "pd.DataFrame",
context: "concepts.Context",
similarity: typing.Callable = jaccard,
ignore_columns: typing.List[str] = None,
to_columns: typing.List[str] = None,
) -> None:
"""Calculates TopR similarities for given dataframe.
Args:
source (pd.DataFrame): source data (usually objects ordered by multiple metrics)
context (concepts.Context): source formal context
similarity (typing.Callable, optional): similarity which should be used. Defaults to jaccard.
ignore_columns (typing.List[str], optional): which columns to ignore from source dataframe. Defaults to None.
to_columns (typing.List[str], optional): which columns to compare every other from source dataframe. Defaults to None (means all).
"""
if ignore_columns is None:
ignore_columns = []
if to_columns is None:
to_columns = source.columns

self._similarity = similarity
self._source = source
self._context = context
self.df = self._init(self, ignore_columns)
self.df = self._init(self, to_columns)

@staticmethod
def _k_values_or_until_differs(iterator, r):
def _r_values_or_until_differs(iterator, r):
max_idx = len(iterator) - 1

if r == 0:
Expand All @@ -47,7 +47,26 @@ def _k_values_or_until_differs(iterator, r):
break

@staticmethod
def _top_r_similarity(inst, metric_1_order, metric_2_order, r):
def _get_column_orders(df, tuples):
results = []
labels = []

for column1, column2 in tuples:
column1_order = list(
df.sort_values(column1, ascending=False, kind="mergesort").index
)

column2_order = list(
df.sort_values(column2, ascending=False, kind="mergesort").index
)

results.append([column1_order, column2_order])
labels.append(f"{column1}-{column2}")

return results, labels

@staticmethod
def _top_r_similarity(context, similarity, metric_1_order, metric_2_order, r):
def _get_vectors(context, items):
try:
label_domain = context._extents
Expand All @@ -62,56 +81,50 @@ def _get_vectors(context, items):
return vectors

vectors_1 = _get_vectors(
inst._context, list(inst._k_values_or_until_differs(metric_1_order, r))
context, list(TopRSimilarity._r_values_or_until_differs(metric_1_order, r))
)
vectors_2 = _get_vectors(
inst._context, list(inst._k_values_or_until_differs(metric_2_order, r))
context, list(TopRSimilarity._r_values_or_until_differs(metric_2_order, r))
)

i1 = mean(
(max((inst._similarity(b1, b2) for b2 in vectors_1)) for b1 in vectors_2)
)
i1 = mean((max((similarity(b1, b2) for b2 in vectors_1)) for b1 in vectors_2))

i2 = mean(
(max((inst._similarity(b1, b2) for b2 in vectors_2)) for b1 in vectors_1)
)
i2 = mean((max((similarity(b1, b2) for b2 in vectors_2)) for b1 in vectors_1))

return min(i1, i2)

@staticmethod
def _init(inst, ignore_columns):
def _init(inst, to_columns):
r_range = range(1, len(inst._source.index))

results = []

filtered_columns = filter(
lambda c: c not in ignore_columns, inst._source.columns
)

for column1, column2 in combinations(filtered_columns, 2):
column1_order = list(
inst._source.sort_values(
column1, ascending=False, kind="mergesort"
).index
)
column2_order = list(
inst._source.sort_values(
column2, ascending=False, kind="mergesort"
).index
)
# filtered_columns = filter(
# lambda c: c not in ignore_columns, inst._source.columns
# )

label = f"{column1}-{column2}"
columns_tuples, labels = inst._get_column_orders(
inst._source,
[(x, y) for x in inst._source.columns for y in to_columns if x != y],
)

for (column1_order, column2_order), label in zip(columns_tuples, labels):
for r in r_range:
results.append(
[
r,
inst._top_r_similarity(inst, column1_order, column2_order, r),
inst._top_r_similarity(
inst._context,
inst._similarity,
column1_order,
column2_order,
r,
),
label,
]
)

return pd.DataFrame(results, columns=["r", "top_k_similarity", "label"])
return pd.DataFrame(results, columns=["r", "top_r_similarity", "label"])

def to_plotly(self) -> "go.Figure":
"""Generates plotly figure.
Expand All @@ -122,7 +135,7 @@ def to_plotly(self) -> "go.Figure":
fig = px.line(
self.df,
x="r",
y="top_k_similarity",
y=self.df.columns[1],
color="label",
labels={"label": "Legend"},
line_dash="label",
Expand All @@ -148,6 +161,7 @@ def to_plotly(self) -> "go.Figure":
title="S",
mirror=True,
ticks="inside",
range=[0, 1],
showline=True,
linecolor="black",
linewidth=1,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name="fcapsy-experiments",
version="0.2.4",
version="0.2.5",
author="Tomáš Mikula",
author_email="mail@tomasmikula.cz",
description="Package of experiments for fcapsy library.",
Expand Down

0 comments on commit 7536c9c

Please sign in to comment.