Skip to content

Commit

Permalink
Merge pull request #561 from vanithakattumuri/main
Browse files Browse the repository at this point in the history
#2 updated the UtilityDatabase.py
  • Loading branch information
udayRage authored Dec 13, 2024
2 parents 39478f5 + 88d099c commit 8310abe
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 62 deletions.
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# generateTransactionalDatabase is a code used to convert the database into Temporal database.
#
# **Importing this algorithm into a python program**
# --------------------------------------------------------
#
# from PAMI.extras.generateDatabase import generateTransactionalDatabase as db
# obj = db(10, 5, 10)
# obj.create()
# obj.save('db.txt')
# print(obj.getTransactions()) to get the transactional database as a pandas dataframe

# **Running the code from the command line**
# --------------------------------------------------------
#
# python generateDatabase.py 10 5 10 db.txt
# cat db.txt
#
Expand Down Expand Up @@ -121,16 +121,15 @@ def tuning(self, array, sumRes) -> list:
"""

while np.sum(array) != sumRes:
# get index of largest value
randIndex = np.random.randint(0, len(array))
# if sum is too large, decrease the largest value

if np.sum(array) > sumRes:
array[randIndex] -= 1
# if sum is too small, increase the smallest value
maxIndex = np.argmax(array)
array[maxIndex] -= 1
# if sum is too small, increase the smallest value
else:
minIndex = np.argmin(array)
array[randIndex] += 1
return array
array[minIndex] += 1
return array

def generateArray(self, nums, avg, maxItems) -> list:
"""
Expand All @@ -154,7 +153,7 @@ def generateArray(self, nums, avg, maxItems) -> list:
"""

# generate n random values
values = np.random.randint(1, maxItems, nums)
values = np.random.randint(1, avg * 1.5, nums)

sumRes = nums * avg

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
self.seperator = sep
self.occurrenceProbabilityOfSameTimestamp = occurrenceProbabilityOfSameTimestamp
self.occurrenceProbabilityToSkipSubsequentTimestamp = occurrenceProbabilityToSkipSubsequentTimestamp
self.current_timestamp = int()
self._startTime = float()
self._endTime = float()
self._memoryUSS = float()
Expand All @@ -76,7 +77,7 @@ def __init__(

def getPoint(self, x1, y1, x2, y2):

return (np.random.randint(x1, x2), np.random.randint(y1, y2))
return (np.random.randint(x1, x2),np.random.randint(y1, y2))

def performCoinFlip(self, probability: float) -> bool:
"""
Expand All @@ -86,7 +87,7 @@ def performCoinFlip(self, probability: float) -> bool:
:return: True if the coin lands heads, False otherwise.
"""
result = np.random.choice([0, 1], p=[1 - probability, probability])
return result == 1
return result

def tuning(self, array, sumRes) -> list:
"""
Expand All @@ -106,16 +107,14 @@ def tuning(self, array, sumRes) -> list:
"""

while np.sum(array) != sumRes:
# get index of largest value
randIndex = np.random.randint(0, len(array))
# if sum is too large, decrease the largest value
if np.sum(array) > sumRes:
array[randIndex] -= 1
# if sum is too small, increase the smallest value
maxIndex = np.argmax(array)
array[maxIndex] -= 1
# if sum is too small, increase the smallest value
else:
minIndex = np.argmin(array)
array[randIndex] += 1
return array
array[minIndex] += 1
return array

def generateArray(self, nums, avg, maxItems) -> list:
"""
Expand All @@ -139,7 +138,7 @@ def generateArray(self, nums, avg, maxItems) -> list:
"""

# generate n random values
values = np.random.randint(1, maxItems, nums)
values = np.random.randint(1, avg*1.5, nums)

sumRes = nums * avg

Expand Down Expand Up @@ -172,39 +171,29 @@ def create(self) -> None:
"""
self._startTime = time.time()
db = set()
lineSize = [] #may be error. need to check it.
sumRes = self.databaseSize * self.avgItemsPerTransaction # Total number of items

values = self.generateArray(self.databaseSize, self.avgItemsPerTransaction, self.numItems)

for i in range(self.databaseSize):
# Determine the timestamp
if self.performCoinFlip(self.occurrenceProbabilityOfSameTimestamp):
timestamp = self.current_timestamp
else:
if self.performCoinFlip(self.occurrenceProbabilityToSkipSubsequentTimestamp):
if self.performCoinFlip(self.occurrenceProbabilityToSkipSubsequentTimestamp)==1:
self.current_timestamp += 2
else:
self.current_timestamp += 1
timestamp = self.current_timestamp

self.db.append([timestamp]) # Start the transaction with the timestamp

lineSize.append([i, 0]) # Initialize lineSize with 0 for each transaction

# Adjust lineSize to ensure sum of sizes equals sumRes
lineSize = self.tuning(lineSize, sumRes)

# For each transaction, generate items
for i in tqdm.tqdm(range(len(lineSize))):
transaction_index = lineSize[i][0]
num_items = lineSize[i][1]

if num_items > self.numItems:
raise ValueError(
"Error: Either increase numItems or decrease avgItemsPerTransaction or modify percentage")
items = np.random.choice(range(1, self.numItems + 1), num_items, replace=False)
self.db[transaction_index].extend(items)
for i in tqdm.tqdm(range(self.databaseSize)):
items = np.random.choice(range(1, self.numItems + 1), values[i], replace=False)
nline = [self.itemPoint[i] for i in items]
self.db[i].extend(nline)

self._runTime = time.time() - self._startTime
self._endTime = time.time()
process = psutil.Process(os.getpid())
self._memoryUSS = process.memory_full_info().uss
self._memoryRSS = process.memory_info().rss
Expand Down
Original file line number Diff line number Diff line change
@@ -1,51 +1,73 @@
import numpy as np
import pandas as pd
import random
import psutil, os, time


class UtilityDataGenerator:
def __init__(self, databaseSize, numberOfItems, averageLengthOfTransaction,
minimumInternalUtilityValue, maximumInternalUtilityValue,
minimumExternalUtilityValue, maximumExternalUtilityValue):
class UtilityDatabase:
def __init__(self, databaseSize, numItems, avgItemsPerTransaction,
minInternalUtilityValue, maxInternalUtilityValue,
minExternalUtilityValue, maxExternalUtilityValue):
self.databaseSize = databaseSize
self.numberOfItems = numberOfItems
self.averageLengthOfTransaction = averageLengthOfTransaction
self.minInternalUtilityValue = minimumInternalUtilityValue
self.maxInternalUtilityValue = maximumInternalUtilityValue
self.minExternalUtilityValue = minimumExternalUtilityValue
self.maxExternalUtilityValue = maximumExternalUtilityValue
self.numItems = numItems
self.avgItemsPerTransaction = avgItemsPerTransaction
self.minInternalUtilityValue = minInternalUtilityValue
self.maxInternalUtilityValue = maxInternalUtilityValue
self.minExternalUtilityValue = minExternalUtilityValue
self.maxExternalUtilityValue = maxExternalUtilityValue
self.entries = []
self.ExternalUtilityData = self.GenerateExternalUtilityData()
self._startTime = float()
self._endTime = float()
self._memoryUSS = float()
self._memoryRSS = float()

def GenerateExternalUtilityData(self):
items = range(1, self.numberOfItems + 1)
items = range(1, self.numItems + 1)
ExternalUtilityData = {f'item{item}': random.randint(100, 900) for item in items}
return ExternalUtilityData

def Generate(self):
def create(self):
self._startTime = time.time()
for entry_id in range(1, self.databaseSize + 1):
entry_length = np.random.randint(1, self.averageLengthOfTransaction * 2)
entry_length = np.random.randint(1, self.avgItemsPerTransaction * 2)
entry = np.random.randint(self.minInternalUtilityValue, self.maxInternalUtilityValue + 1,
size=self.numberOfItems)
size=self.numItems)
entry_sum = entry.sum()
self.entries.append((entry, entry_sum))
self._endTime = time.time()

def Save(self, fileName):
def save(self, fileName):
with open(fileName, 'w') as file:
for idx, (entry, entry_sum) in enumerate(self.entries, start=1):
entry_str = '\t'.join(map(str, entry))
file.write(f'{idx}\t{entry_str}\t{entry_sum}\n')

def getMemoryUSS(self) -> float:

process = psutil.Process(os.getpid())
self._memoryUSS = process.memory_full_info().uss
return self._memoryUSS

def getMemoryRSS(self) -> float:

process = psutil.Process(os.getpid())
self._memoryRSS = process.memory_info().rss
return self._memoryRSS

def getRuntime(self) -> float:
return self._endTime - self._startTime

def SaveItemsInternalUtilityValues(self, fileName):
items = random.sample(range(1, self.numberOfItems + 1), self.numberOfItems)
items = random.sample(range(1, self.numItems + 1), self.numItems)
internal_utility_data = [np.random.randint(self.minInternalUtilityValue, self.maxInternalUtilityValue + 1) for _
in items]
data = {'Item': items, 'Internal Utility Value': internal_utility_data}
df = pd.DataFrame(data)
df.to_csv(fileName, sep='\t', index=False)

def Saveitemsexternalutilityvalues(self, fileName):
items = random.sample(range(1, self.numberOfItems + 1), self.numberOfItems)
items = random.sample(range(1, self.numItems + 1), self.numItems)
data = {'Item': [f'item{item}' for item in items],
'External Utility Value': list(self.ExternalUtilityData.values())}
df = pd.DataFrame(data)
Expand All @@ -59,22 +81,22 @@ def GetUtilityData(self):
return df

def GetInternalUtilityData(self):
items = random.sample(range(1, self.numberOfItems + 1), self.numberOfItems)
items = random.sample(range(1, self.numItems + 1), self.numItems)
InternalUtilityData = [np.random.randint(self.minInternalUtilityValue, self.maxInternalUtilityValue + 1) for _
in items]
data = {'Item': items, 'Internal Utility Value': InternalUtilityData}
df = pd.DataFrame(data)
return df

def GetExternalUtilityData(self):
items = random.sample(range(1, self.numberOfItems + 1), self.numberOfItems)
items = random.sample(range(1, self.numItems + 1), self.numItems)
data = {'Item': [f'item{item}' for item in items],
'External Utility Value': list(self.ExternalUtilityData.values())}
df = pd.DataFrame(data)
return df

def GenerateAndPrintItemPairs(self):
items = random.sample(range(1, self.numberOfItems + 1), 2)
items = random.sample(range(1, self.numItems + 1), 2)
item1_id = f'item{items[0]}'
item2_id = f'item{items[1]}'
item1_value = self.ExternalUtilityData[item1_id]
Expand All @@ -87,12 +109,15 @@ def GenerateAndPrintItemPairs(self):


if __name__ == "__main__":
data_generator = UtilityDataGenerator(100000, 2000, 10, 1, 100, 1, 10)
data_generator.Generate()
data_generator.Save("utility_data-6.csv")
data_generator = UtilityDatabase(100000, 2000, 10, 1, 100, 1, 10)
data_generator.create()
data_generator.save("utility_data-6.csv")
data_generator.SaveItemsInternalUtilityValues("items_internal_utility.csv")
data_generator.Saveitemsexternalutilityvalues("items_external_utility.csv")
utility_data = data_generator.GetUtilityData()
utilityDataFrame = data_generator.GetUtilityData()
print('Runtime: ' + str(data_generator.getRuntime()))
print('Memory (RSS): ' + str(data_generator.getMemoryRSS()))
print('Memory (USS): ' + str(data_generator.getMemoryUSS()))
InternalUtilityData = data_generator.GetInternalUtilityData()
ExternalUtilityData = data_generator.GetExternalUtilityData()

Expand Down

0 comments on commit 8310abe

Please sign in to comment.