Skip to content

Commit

Permalink
Merge pull request #311 from choubeyy/main
Browse files Browse the repository at this point in the history
Added more comments
  • Loading branch information
udayRage authored Mar 5, 2024
2 parents 9671440 + e21b6fd commit 32808ae
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 11 deletions.
8 changes: 1 addition & 7 deletions PAMI/subgraphMining/basic/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,9 @@
class _gSpan(ABC):

@abstractmethod
def run(self, inPath, outPath, minSupport, outputSingleVertices, maxNumberOfEdges, outputGraphIds):
def run(self):
"""
Run the gSpan algorithm.
:param inPath: Path to the input file containing the graphs.
:param outPath: Path to the output file for the frequent subgraphs.
:param minSupport: Minimum support threshold as a fraction.
:param outputSingleVertices: Boolean indicating whether to output single vertices as subgraphs.
:param maxNumberOfEdges: Maximum number of edges for the subgraphs to find.
:param outputGraphIds: Boolean indicating whether to include graph IDs in the output.
"""
pass

Expand Down
11 changes: 11 additions & 0 deletions PAMI/subgraphMining/basic/dfsCode.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,18 @@ def __init__(self):
# return pickle.loads(pickle.dumps(self))

def notPreOfRm(self, v):
"""
This function checks if a given value is not the second-to-last element on the
`rightMostPath` given a vertex.
"""
if len(self.rightMostPath) <= 1:
return True
return v != self.rightMostPath[-2]

def getAllVLabels(self):
"""
This function retrieves all vertex labels from the extended edge list and returns them in a list.
"""
labels = []
vertexMap = {}
for ee in self.eeList:
Expand All @@ -29,6 +36,10 @@ def getAllVLabels(self):
return labels

def add(self, ee):
"""
The `add` function in adds elements to the EE list while updating the rightmost element and path
based on certain conditions.
"""
if self.size == 0:
self.rightMost = 1
self.rightMostPath.extend([0, 1])
Expand Down
17 changes: 17 additions & 0 deletions PAMI/subgraphMining/basic/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ class Graph:
emptyIntegerArray = []

def __init__(self, id, vMap=None, dfsCode=None):
"""
The `__init__` function initializes a graph object with optional parameters for vertex mapping and
DFS code.
"""
self.vMap = {}
self.id = id
if vMap is not None:
Expand Down Expand Up @@ -38,6 +42,9 @@ def getId(self):
return self.id

def removeInfrequentLabel(self, label):
"""
The function removes vertices with a specific label from the graph and updates the edges accordingly.
"""
toRemove = [key for key, vertex in self.vMap.items() if vertex.getLabel() == label]
for key in toRemove:
del self.vMap[key]
Expand All @@ -50,6 +57,9 @@ def removeInfrequentLabel(self, label):
vertex.getEdgeList().remove(edge)

def precalculateVertexNeighbors(self):
"""
The function precalculates the neighbors of each vertex in a graph and stores them in a cache.
"""
self.neighborCache = {}
self.edgeCount = 0

Expand All @@ -68,11 +78,18 @@ def precalculateVertexNeighbors(self):
self.edgeCount //= 2

def precalculateVertexList(self):
"""
The function precalculateVertexList creates a list of vertices by iterating through a dictionary of
vertices.
"""
self.vertices = []
for _, vertex in self.vMap.items():
self.vertices.append(vertex)

def precalculateLabelsToVertices(self):
"""
This function precalculates and stores mappings of vertex labels to their corresponding vertex IDs.
"""
self.mapLabelToVertexIds = {}
for vertex in self.vertices:
label = vertex.getLabel()
Expand Down
36 changes: 32 additions & 4 deletions PAMI/subgraphMining/basic/gspan.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ class GSpan(_ab._gSpan):
edge_count_pruning = True

def __init__(self, inPath, minSupport, outputSingleVertices=True, maxNumberOfEdges=float('inf'), outputGraphIds=True) -> None:
"""
Initialize variables
"""

self.minSup = minSupport
self.frequentSubgraphs = []
self._runtime = 0
Expand Down Expand Up @@ -86,6 +90,7 @@ def run(self):

t2 = _ab.time.time()

#Calculate runtime
self._runtime = (t2 - t1)

process = _ab._psutil.Process(_ab._os.getpid())
Expand Down Expand Up @@ -207,16 +212,21 @@ def subgraphIsomorphisms(self, c: _ab.DFSCode, g: _ab.Graph):
"""
isoms = []
startLabel = c.getEeList()[0].getVLabel1()

# Find all vertices in the graph that match the start label and initialize isomorphisms with them
for vId in g.findAllWithLabel(startLabel):
hMap = {}
hMap[0] = vId
isoms.append(hMap)

# For each edge in the DFS code, try to extend each partial isomorphism
for ee in c.getEeList():
v1, v2, v2Label, eLabel = ee.getV1(), ee.getV2(), ee.getVLabel2(), ee.getEdgeLabel()
updateIsoms = []

# Try to extend each current isomorphism with the current edge
for iso in isoms:
mappedV1 = iso.get(v1)
# Forward edge
if v1 < v2:
mappedVertices = list(iso.values())
for mappedV2 in g.getAllNeighbors(mappedV1):
Expand All @@ -229,8 +239,10 @@ def subgraphIsomorphisms(self, c: _ab.DFSCode, g: _ab.Graph):

updateIsoms.append(tempM)

# Backward edge
else:
mappedV2 = iso.get(v2)
# Check if the backward edge exists in the graph matching the DFS code edge
if g.isNeighboring(mappedV1, mappedV2) and eLabel == g.getEdgeLabel(mappedV1, mappedV2):
updateIsoms.append(iso)

Expand All @@ -253,26 +265,33 @@ def rightMostPathExtensionsFromSingle(self, c: _ab.DFSCode, g: _ab.Graph):
:return: The function `rightMostPathExtensionsFromSingle` returns a dictionary `extensions`
containing extended edges as keys and sets of graph IDs as values.
"""
# Get the unique identifier for the given graph
gid = g.getId()
# Initialize a dictionary to store potential extensions
extensions = {}

# If the DFS code is empty, consider all edges of the graph for extension
if c.isEmpty():
for vertex in g.vertices:
for e in vertex.getEdgeList():
# Determine the order of vertex labels to maintain consistency
v1Label = g.getVLabel(e.v1)
v2Label = g.getVLabel(e.v2)
if v1Label < v2Label:
ee1 = _ab.ExtendedEdge(0, 1, v1Label, v2Label, e.getEdgeLabel())
else:
ee1 = _ab.ExtendedEdge(0, 1, v2Label, v1Label, e.getEdgeLabel())

# Update the extensions dictionary with new or existing extended edges
setOfGraphIds = extensions.get(ee1, set())
setOfGraphIds.add(gid)
extensions[ee1] = setOfGraphIds
else:
# For non-empty DFS code, focus on extending from the rightmost path
rightMost = c.getRightMost()
isoms = self.subgraphIsomorphisms(c, g)

# Iterate through all isomorphisms to find valid extensions
for isom in isoms:
invertedIsom = {v: k for k, v in isom.items()}
mappedRm = isom[rightMost]
Expand Down Expand Up @@ -318,22 +337,26 @@ def rightMostPathExtensions(self, c: _ab.DFSCode, graphDb, graphIds):
if c.isEmpty():
for id in graphIds:
g = graphDb[id]
# Skip graphs if pruning based on edge count is enabled and applicable
if GSpan.edge_count_pruning and c.size >= g.getEdgeCount():
self.pruneByEdgeCount += 1
continue
for v in g.vertices:
for e in v.getEdgeList():
# Organize the vertex labels to maintain consistent ordering
v1L = g.getVLabel(e.v1)
v2L = g.getVLabel(e.v2)
if v1L < v2L:
ee1 = _ab.ExtendedEdge(0, 1, v1L, v2L, e.getEdgeLabel())
else:
ee1 = _ab.ExtendedEdge(0, 1, v2L, v1L, e.getEdgeLabel())


# Add the new or existing extensions to the dictionary
setOfGraphIds = extensions.get(ee1, set())
setOfGraphIds.add(id)
extensions[ee1] = setOfGraphIds
else:
# For non-empty DFS codes, extend based on the rightmost path of each graph
rightMost = c.getRightMost()
for id in graphIds:
g = graphDb[id]
Expand Down Expand Up @@ -508,6 +531,7 @@ def findAllOnlyOneVertex(self, graphDb, outputFrequentVertices):
if v.getEdgeList():
vLabel = v.getLabel()
labelM.setdefault(vLabel, set()).add(g.getId())
# Check each label for frequency against the minimum support threshold
for label, tempSupG in labelM.items():
sup = len(tempSupG)
if sup >= self.minSup:
Expand All @@ -533,28 +557,31 @@ def removeInfrequentVertexPairs(self, graphDb):
"""
if GSpan.eliminate_infrequent_edge_labels:
matrix = _ab.SparseTriangularMatrix()
alreadySeenPair = set()
alreadySeenPair = set() # To avoid double counting pairs in the same graph

if GSpan.eliminate_infrequent_edge_labels:
mapEdgeLabelToSupport = {}
alreadySeenEdgeLabel = set()
alreadySeenEdgeLabel = set() # To avoid double counting edge labels in the same graph

for g in graphDb:
vertices = g.getAllVertices()

# Check each vertex and its edges for infrequent pairs and labels
for v1 in vertices:
labelV1 = v1.getLabel()

for edge in v1.getEdgeList():
v2 = edge.another(v1.getId())
labelV2 = g.getVLabel(v2)

# Track vertex label pairs for infrequency analysis
if GSpan.eliminate_infrequent_edge_labels:
pair = self.Pair(labelV1, labelV2)
if pair not in alreadySeenPair:
matrix.incrementCount(labelV1, labelV2)
alreadySeenPair.add(pair)

# Track edge labels for infrequency analysis
if GSpan.eliminate_infrequent_edge_labels:
edgeLabel = edge.getEdgeLabel()
if edgeLabel not in alreadySeenEdgeLabel:
Expand All @@ -581,6 +608,7 @@ def removeInfrequentVertexPairs(self, graphDb):
labelV2 = g.getVLabel(v2)
count = matrix.getSupportForItems(v1.getLabel(), labelV2)

# Remove edges based on infrequency criteria
if GSpan.eliminate_infrequent_vertex_pairs and count < self.minSup:
v1.removeEdge(edge)
self.infrequentVertexPairsRemoved += 1
Expand Down
3 changes: 3 additions & 0 deletions PAMI/subgraphMining/basic/sparseTriangularMatrix.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# The `SparseTriangularMatrix` class represents a matrix with sparse triangular structure and provides
# methods for incrementing counts, getting support for items, setting support values, and removing
# infrequent entries based on a minimum support threshold.
class SparseTriangularMatrix:
def __init__(self):
self.matrix = {}
Expand Down

0 comments on commit 32808ae

Please sign in to comment.