Skip to content

Commit

Permalink
Merge pull request #70 from causy-dev/refactor_generators_and_keep_tr…
Browse files Browse the repository at this point in the history
…ack_of_dependent_triples

Keep track of dependent triples
  • Loading branch information
this-is-sofia authored Jan 16, 2025
2 parents 161d0d0 + bc4c55a commit 55f0669
Show file tree
Hide file tree
Showing 4 changed files with 315 additions and 18 deletions.
38 changes: 26 additions & 12 deletions causy/causal_discovery/constraint/independence_tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@ def process(
"""
results = []
already_deleted_edges = set()
for nodes in itertools.permutations(nodes):
x: NodeInterface = graph.nodes[nodes[0]]
y: NodeInterface = graph.nodes[nodes[1]]
z: NodeInterface = graph.nodes[nodes[2]]
for node in nodes:
remaining_nodes = [n for n in nodes if n != node]
x: NodeInterface = graph.nodes[remaining_nodes[0]]
y: NodeInterface = graph.nodes[remaining_nodes[1]]
z: NodeInterface = graph.nodes[node]

# Avoid division by zero
if x is None or y is None or z is None:
return

Expand Down Expand Up @@ -144,7 +144,8 @@ def process(
u=x,
v=y,
action=TestResultAction.REMOVE_EDGE_UNDIRECTED,
data={"separatedBy": [z]},
data={"separatedBy": [z],
"triple": [x, y, [z]]},
)
)
already_deleted_edges.add((x, y))
Expand All @@ -155,6 +156,7 @@ def process(
u=x,
v=y,
action=TestResultAction.DO_NOTHING,
data={"triple": [x, y, [z]]},
)
)

Expand Down Expand Up @@ -236,26 +238,34 @@ def process(
p_value = 2 * (1 - stats.norm.cdf(z_value))

# If the p value is smaller than the threshold, the null hypothesis (conditional independence) is rejected, otherwise we accept it and delete the edge
nodes_set = set([graph.nodes[n] for n in nodes])
if p_value > self.threshold:
logger.debug(
f"Nodes {graph.nodes[nodes[0]].name} and {graph.nodes[nodes[1]].name} are uncorrelated given nodes {','.join([graph.nodes[on].name for on in other_neighbours])}"
)
nodes_set = set([graph.nodes[n] for n in nodes])
return TestResult(
u=graph.nodes[nodes[0]],
v=graph.nodes[nodes[1]],
action=TestResultAction.REMOVE_EDGE_UNDIRECTED,
data={
"separatedBy": list(
nodes_set - {graph.nodes[nodes[0]], graph.nodes[nodes[1]]}
)
},
),
"triple": [graph.nodes[nodes[0]], graph.nodes[nodes[1]], nodes_set - {graph.nodes[nodes[0]], graph.nodes[nodes[1]]}],
}
)
else:
return TestResult(
u=graph.nodes[nodes[0]],
v=graph.nodes[nodes[1]],
action=TestResultAction.DO_NOTHING,
data={
"separatedBy": list(
nodes_set - {graph.nodes[nodes[0]], graph.nodes[nodes[1]]}
),
"triple": [graph.nodes[nodes[0]], graph.nodes[nodes[1]],
nodes_set - {graph.nodes[nodes[0]], graph.nodes[nodes[1]]}],
}
)


Expand Down Expand Up @@ -317,25 +327,29 @@ def process(
partial_correlation.item(),
self.threshold,
)

nodes_set = set([graph.nodes[n] for n in nodes])
if abs(t) < critical_t:
logger.debug(
f"Nodes {graph.nodes[nodes[0]].name} and {graph.nodes[nodes[1]].name} are uncorrelated given nodes {','.join([graph.nodes[on].name for on in other_neighbours])}"
)
nodes_set = set([graph.nodes[n] for n in nodes])
return TestResult(
u=graph.nodes[nodes[0]],
v=graph.nodes[nodes[1]],
action=TestResultAction.REMOVE_EDGE_UNDIRECTED,
data={
"separatedBy": list(
nodes_set - {graph.nodes[nodes[0]], graph.nodes[nodes[1]]}
)
),
"triple": [graph.nodes[nodes[0]], graph.nodes[nodes[1]], nodes_set - {graph.nodes[nodes[0]], graph.nodes[nodes[1]]}]
},
)
else:
return TestResult(
u=graph.nodes[nodes[0]],
v=graph.nodes[nodes[1]],
action=TestResultAction.DO_NOTHING,
data={
"triple": [graph.nodes[nodes[0]], graph.nodes[nodes[1]],
nodes_set - {graph.nodes[nodes[0]], graph.nodes[nodes[1]]}]
},
)
233 changes: 229 additions & 4 deletions tests/test_generators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from causy.causal_discovery.constraint.algorithms.pc import PC_EDGE_TYPES
from causy.common_pipeline_steps.calculation import CalculatePearsonCorrelations
from causy.generators import PairsWithNeighboursGenerator
from causy.generators import PairsWithNeighboursGenerator, AllCombinationsGenerator
from causy.graph_model import graph_model_factory
from causy.causal_discovery.constraint.independence_tests.common import (
CorrelationCoefficientTest,
Expand All @@ -14,14 +14,125 @@
class GeneratorsTestCase(CausyTestCase):
SEED = 1

def test_pairs_with_neighbours_generator(self):
# TODO, wip
def test_pairs_with_neighbours_generator_two_nodes(self):
rdnv = self.seeded_random.normalvariate
sample_generator = IIDSampleGenerator(
edges=[
SampleEdge(NodeReference("X"), NodeReference("Y"), 1),
],
random=lambda: rdnv(0, 1),
)

algo = graph_model_factory(
Algorithm(
pipeline_steps=[
CalculatePearsonCorrelations(),
CorrelationCoefficientTest(threshold=0.005),
PartialCorrelationTest(threshold=0.005),
],
edge_types=PC_EDGE_TYPES,
extensions=[],
name="PC",
)
)
test_data, graph = sample_generator.generate(1000)
tst = algo()
tst.create_graph_from_data(test_data)
tst.create_all_possible_edges()
tst.execute_pipeline_steps()
result = PairsWithNeighboursGenerator(
comparison_settings=ComparisonSettings(min=2, max=4)
).generate(tst.graph.graph, tst)
all_results = []

for i in result:
all_results.append(i)
pass

def test_pairs_with_neighbours_generator_three_nodes_one_neighbour(self):
rdnv = self.seeded_random.normalvariate
sample_generator = IIDSampleGenerator(
edges=[
SampleEdge(NodeReference("X"), NodeReference("Y"), 1),
SampleEdge(NodeReference("X"), NodeReference("Z"), 1),
],
random=lambda: rdnv(0, 1),
)

algo = graph_model_factory(
Algorithm(
pipeline_steps=[
CalculatePearsonCorrelations(),
CorrelationCoefficientTest(threshold=0.005),
PartialCorrelationTest(threshold=0.005),
],
edge_types=PC_EDGE_TYPES,
extensions=[],
name="PC",
)
)
test_data, graph = sample_generator.generate(1000)
tst = algo()
tst.create_graph_from_data(test_data)
tst.create_all_possible_edges()
tst.execute_pipeline_steps()
result = PairsWithNeighboursGenerator(
comparison_settings=ComparisonSettings(min=3, max=4)
).generate(tst.graph.graph, tst)
all_results = []

for i in result:
all_results.append(i)
pass

def test_pairs_with_neighbours_generator_three_nodes_two_neighbours(self):
rdnv = self.seeded_random.normalvariate
sample_generator = IIDSampleGenerator(
edges=[
SampleEdge(NodeReference("X"), NodeReference("Y"), 1),
SampleEdge(NodeReference("X"), NodeReference("Z"), 1),
SampleEdge(NodeReference("Y"), NodeReference("Z"), 1),
],
random=lambda: rdnv(0, 1),
)

algo = graph_model_factory(
Algorithm(
pipeline_steps=[
CalculatePearsonCorrelations(),
CorrelationCoefficientTest(threshold=0.005),
PartialCorrelationTest(threshold=0.005),
],
edge_types=PC_EDGE_TYPES,
extensions=[],
name="PC",
)
)
test_data, graph = sample_generator.generate(1000)
tst = algo()
tst.create_graph_from_data(test_data)
tst.create_all_possible_edges()
tst.execute_pipeline_steps()
result = PairsWithNeighboursGenerator(
comparison_settings=ComparisonSettings(min=3, max=4)
).generate(tst.graph.graph, tst)
all_results = []

for i in result:
all_results.append(i)
pass

def test_pairs_with_neighbours_generator_four_nodes_fully_connected(self):
rdnv = self.seeded_random.normalvariate
sample_generator = IIDSampleGenerator(
edges=[
SampleEdge(NodeReference("X"), NodeReference("Y"), 1),
SampleEdge(NodeReference("X"), NodeReference("Z"), 1),
SampleEdge(NodeReference("X"), NodeReference("W"), 1),
SampleEdge(NodeReference("Y"), NodeReference("Z"), 1),
SampleEdge(NodeReference("Z"), NodeReference("W"), 1),
SampleEdge(NodeReference("Y"), NodeReference("W"), 1),
SampleEdge(NodeReference("Z"), NodeReference("W"), 1),
],
random=lambda: rdnv(0, 1),
)
Expand Down Expand Up @@ -49,4 +160,118 @@ def test_pairs_with_neighbours_generator(self):
all_results = []

for i in result:
all_results.extend(i)
all_results.append(i)
pass

def test_all_combinations_generator_two_nodes(self):
rdnv = self.seeded_random.normalvariate
sample_generator = IIDSampleGenerator(
edges=[
SampleEdge(NodeReference("X"), NodeReference("Y"), 1),
],
random=lambda: rdnv(0, 1),
)

algo = graph_model_factory(
Algorithm(
pipeline_steps=[
CalculatePearsonCorrelations(),
CorrelationCoefficientTest(threshold=0.005),
PartialCorrelationTest(threshold=0.005),
],
edge_types=PC_EDGE_TYPES,
extensions=[],
name="PC",
)
)

test_data, graph = sample_generator.generate(1000)
tst = algo()
tst.create_graph_from_data(test_data)
tst.create_all_possible_edges()
tst.execute_pipeline_steps()
result = AllCombinationsGenerator(
comparison_settings=ComparisonSettings(min=2, max=2)
).generate(tst.graph.graph, tst)
all_results = []

for i in result:
all_results.append(i)
pass

def test_all_combinations_generator(self):
rdnv = self.seeded_random.normalvariate
sample_generator = IIDSampleGenerator(
edges=[
SampleEdge(NodeReference("X"), NodeReference("Y"), 1),
SampleEdge(NodeReference("X"), NodeReference("Z"), 1),
],
random=lambda: rdnv(0, 1),
)

algo = graph_model_factory(
Algorithm(
pipeline_steps=[
CalculatePearsonCorrelations(),
CorrelationCoefficientTest(threshold=0.005),
PartialCorrelationTest(threshold=0.005),
],
edge_types=PC_EDGE_TYPES,
extensions=[],
name="PC",
)
)
test_data, graph = sample_generator.generate(1000)
tst = algo()
tst.create_graph_from_data(test_data)
tst.create_all_possible_edges()
tst.execute_pipeline_steps()
result = AllCombinationsGenerator(
comparison_settings=ComparisonSettings(min=3, max=3)
).generate(tst.graph.graph, tst)
all_results = []

for i in result:
all_results.append(i)

pass

def test_all_combinations_generator_four_nodes_fully_connected(self):
rdnv = self.seeded_random.normalvariate
sample_generator = IIDSampleGenerator(
edges=[
SampleEdge(NodeReference("X"), NodeReference("Y"), 1),
SampleEdge(NodeReference("X"), NodeReference("Z"), 1),
SampleEdge(NodeReference("X"), NodeReference("W"), 1),
SampleEdge(NodeReference("Y"), NodeReference("Z"), 1),
SampleEdge(NodeReference("Y"), NodeReference("W"), 1),
SampleEdge(NodeReference("Z"), NodeReference("W"), 1),
],
random=lambda: rdnv(0, 1),
)

algo = graph_model_factory(
Algorithm(
pipeline_steps=[
CalculatePearsonCorrelations(),
CorrelationCoefficientTest(threshold=0.005),
PartialCorrelationTest(threshold=0.005),
],
edge_types=PC_EDGE_TYPES,
extensions=[],
name="PC",
)
)
test_data, graph = sample_generator.generate(1000)
tst = algo()
tst.create_graph_from_data(test_data)
tst.create_all_possible_edges()
tst.execute_pipeline_steps()
result = AllCombinationsGenerator(
comparison_settings=ComparisonSettings(min=2, max=4)
).generate(tst.graph.graph, tst)
all_results = []

for i in result:
all_results.append(i)
pass
Loading

0 comments on commit 55f0669

Please sign in to comment.