-
Notifications
You must be signed in to change notification settings - Fork 196
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #489 from choubeyy/main
Added method to convert graph transactions to a new format and added Synthetic Graph Generator
- Loading branch information
Showing
2 changed files
with
139 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Usage | ||
# obj = ConvertFormat('iFile', 'oFile') | ||
# | ||
class ConvertFormat: | ||
def __init__(self, iFile, oFile): | ||
self.iFile = iFile | ||
self.oFile = oFile | ||
self.convert() | ||
|
||
def _writeGraphToFile(self, graph, oFile): | ||
node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes'])) | ||
edge_str = ' '.join(f"{u} {v} {label}" for u, v, label in graph['edges']) | ||
oFile.write(f"{node_str} : {edge_str}\n") | ||
|
||
def convert(self): | ||
graph = {} | ||
with open(self.iFile, 'r') as iFile, open(self.oFile, 'w') as oFile: | ||
for line in iFile: | ||
parts = line.strip().split() | ||
if not parts: | ||
continue | ||
if parts[0] == 't': | ||
if graph: | ||
self._writeGraphToFile(graph, oFile) | ||
graph = {'nodes': [], 'edges': []} | ||
elif parts[0] == 'v': | ||
graph['nodes'].append((int(parts[1]), parts[2])) | ||
elif parts[0] == 'e': | ||
graph['edges'].append((int(parts[1]), int(parts[2]), parts[3])) | ||
if graph: | ||
self._writeGraphToFile(graph, oFile) |
108 changes: 108 additions & 0 deletions
108
PAMI/extras/syntheticDataGenerator/createSyntheticGraphTransactions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# | ||
# obj = SyntheticGraphGenerator(numOfGraphTransactions, avgNumVertices, avgNumEdges, distinctVertexlLabels, distinctEdgeLabels, 'oFile', graphFormat) | ||
# graphFormat = 'old' or 'new', default is 'old' | ||
# | ||
import random | ||
from PAMI.extras.visualize import graphs | ||
|
||
class SyntheticGraphGenerator: | ||
def __init__(self, numGraphs, avgNumVertices, avgNumEdges, numVertexLabels, numEdgeLabels, outputFileName, format='old'): | ||
self.numGraphs = numGraphs | ||
self.avgNumVertices = avgNumVertices | ||
self.avgNumEdges = avgNumEdges | ||
self.numVertexLabels = numVertexLabels | ||
self.numEdgeLabels = numEdgeLabels | ||
self.outputFileName = outputFileName | ||
self.format = format | ||
self._validate() | ||
self.generate() | ||
|
||
def _validate(self): | ||
if self.avgNumVertices < 1: | ||
raise ValueError("Average number of vertices should be greater than 0") | ||
if self.avgNumEdges < 0: | ||
raise ValueError("Average number of edges should be greater than or equal to 0") | ||
if self.numVertexLabels < 1: | ||
raise ValueError("Number of labels should be greater than 0") | ||
if self.numEdgeLabels < 1: | ||
raise ValueError("Number of labels should be greater than 0") | ||
if self.numGraphs < 1: | ||
raise ValueError("Number of graphs should be greater than 0") | ||
|
||
if self.avgNumVertices < self.avgNumEdges: | ||
raise ValueError("Average number of vertices should be greater than or equal to average number of edges") | ||
|
||
if self.avgNumEdges > self.avgNumVertices * (self.avgNumVertices - 1) / 2: | ||
raise ValueError("Average number of edges should be less than or equal to n(n-1)/2") | ||
|
||
if self.avgNumVertices < self.numVertexLabels: | ||
raise ValueError("Average number of vertices should be greater than or equal to number of vertex labels") | ||
|
||
if self.avgNumEdges < self.numEdgeLabels: | ||
raise ValueError("Average number of edges should be greater than or equal to number of edge labels") | ||
|
||
def generate(self): | ||
with open(self.outputFileName, 'w') as oFile: | ||
for i in range(self.numGraphs): | ||
numVertices = random.randint(max(self.avgNumVertices-3, 1), self.avgNumVertices+3) | ||
numEdges = random.randint(max(self.avgNumEdges-3, 0), self.avgNumEdges+3) | ||
|
||
if numVertices < numEdges: | ||
numVertices = numEdges + 1 | ||
|
||
if numEdges > numVertices * (numVertices - 1) / 2: | ||
numEdges = numVertices * (numVertices - 1) // 2 | ||
|
||
if numVertices < self.numVertexLabels: | ||
numVertices = self.numVertexLabels | ||
|
||
if numEdges < self.numEdgeLabels: | ||
numEdges = self.numEdgeLabels | ||
|
||
graph = {'nodes': [], 'edges': []} | ||
|
||
# Add vertices | ||
for j in range(numVertices): | ||
graph['nodes'].append((j, random.randint(0, self.numVertexLabels-1))) | ||
|
||
# Ensure connectivity by creating a spanning tree first | ||
connectedNodes = set() | ||
connectedNodes.add(0) | ||
while len(connectedNodes) < numVertices: | ||
u = random.choice(list(connectedNodes)) | ||
v = random.choice([node for node in range(numVertices) if node not in connectedNodes]) | ||
graph['edges'].append((u, v, random.randint(0, self.numEdgeLabels-1))) | ||
connectedNodes.add(v) | ||
|
||
# Add remaining edges randomly | ||
additionalEdges = numEdges - (numVertices - 1) | ||
for _ in range(additionalEdges): | ||
while True: | ||
u = random.randint(0, numVertices-1) | ||
v = random.randint(0, numVertices-1) | ||
if u != v and (u, v, _) not in graph['edges'] and (v, u, _) not in graph['edges']: | ||
graph['edges'].append((u, v, random.randint(0, self.numEdgeLabels-1))) | ||
break | ||
|
||
if self.format == 'new': | ||
self._writeGraphToFileNewFormat(graph, oFile) | ||
else: | ||
self._writeGraphToFile(graph, oFile, i) | ||
|
||
|
||
def _writeGraphToFile(self, graph, oFile, i): | ||
oFile.write(f't # {i}\n') | ||
for node in graph['nodes']: | ||
oFile.write(f"v {node[0]} {node[1]}\n") | ||
for edge in graph['edges']: | ||
oFile.write(f"e {edge[0]} {edge[1]} {edge[2]}\n") | ||
|
||
def _writeGraphToFileNewFormat(self, graph, oFile): | ||
node_str = ' '.join(f"{node} {label}" for node, label in sorted(graph['nodes'])) | ||
edge_str = ' '.join(f"{u} {v} {label}" for u, v, label in graph['edges']) | ||
oFile.write(f"{node_str} : {edge_str}\n") | ||
|
||
if __name__ == "__main__": | ||
obj = SyntheticGraphGenerator(10, 2, 1, 1, 1, 'synthetic_graphs.txt', 'old') | ||
vis = graphs.graphDatabase('synthetic_graphs.txt') | ||
vis.plot() |