Skip to content

Commit 113dfdd

Browse files
committed
traigo main
1 parent 19a257a commit 113dfdd

File tree

7 files changed

+1154
-0
lines changed

7 files changed

+1154
-0
lines changed

.gitattributes

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
new_edited_loaded_data.csv filter=lfs diff=lfs merge=lfs -text
2+
loaded_data.csv filter=lfs diff=lfs merge=lfs -text

.github/workflows/actions.yml

Whitespace-only changes.

.gitignore

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ConsultaData.csv
2+
edited_purchase_history.csv
3+
codarticu_idx_to_codarticu.json
4+
test_data.csv
5+
train_data.csv
6+
your_pretrained_model.h5
7+
loaded_data.csv
8+
new_edited_loaded_data.csv

autotrain.py

+298
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
from http.server import BaseHTTPRequestHandler, HTTPServer
2+
import time
3+
import uvicorn
4+
from fastapi import FastAPI
5+
import numpy as np
6+
import pandas as pd
7+
8+
import pyodbc
9+
import warnings
10+
11+
from sklearn.metrics.pairwise import cosine_similarity
12+
from sklearn.model_selection import GridSearchCV
13+
from sklearn.decomposition import TruncatedSVD
14+
from typing import List
15+
16+
import pandas as pd
17+
18+
import pickle
19+
import numpy as np
20+
import pandas as pd
21+
import matplotlib.pyplot as plt
22+
from sklearn.utils import shuffle
23+
24+
from keras.models import Model
25+
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate
26+
from keras.layers import Dropout, BatchNormalization, Activation
27+
from keras.regularizers import l2
28+
from keras.optimizers import SGD, Adam
29+
30+
from keras.models import load_model
31+
32+
from sklearn.preprocessing import StandardScaler
33+
34+
from fastapi import Depends
35+
36+
import json
37+
38+
import traceback
39+
40+
hostName = "localhost"
41+
serverPort = 8081
42+
warnings.filterwarnings('ignore')
43+
app = FastAPI()
44+
hits = 0
45+
46+
47+
48+
# Define global variables to store data and model
49+
50+
trained_model = None
51+
loaded_data = None
52+
53+
def obtener_datos():
54+
55+
global trained_model, loaded_data, M, df_train, mu, movie_idx_to_movie_id
56+
57+
print("init call")
58+
print("conectando...")
59+
60+
#cnxn_str = ("Driver={SQL Server Native Client 11.0};"
61+
#
62+
cnxn_str = ("Driver={ODBC Driver 11 for SQL Server};"
63+
"Server=181.169.115.183,1433;"
64+
"Database=F_SISTEMA;"
65+
"UID=External;"
66+
"PWD=external2022_123!;")
67+
cnxn = pyodbc.connect(cnxn_str, timeout=50000)
68+
69+
loaded_data = pd.read_sql("""
70+
select
71+
cli.CodCliente as CodCliente
72+
,RTRIM(art.CodArticulo) as CodArticu
73+
,cast((coalesce(SUM((reng.CantidadPedida+reng.CantPedidaDerivada)*reng.PrecioVenta),0)*1+(COUNT(reng.NroRenglon)/100)) as decimal) as Cantidad
74+
from f_central.dbo.ven_clientes as cli
75+
inner join f_central.dbo.StkFer_Articulos as art
76+
on 1 = 1
77+
left join F_CENTRAL.dbo.VenFer_PedidoReng as reng
78+
on reng.CodCliente = cli.CodCliente
79+
and reng.CodArticu = art.CodArticulo
80+
group by cli.CodCliente,art.CodArticulo
81+
order by cli.CodCliente
82+
""", cnxn)
83+
loaded_data.to_csv('new_edited_loaded_data.csv', index=False)
84+
85+
86+
if loaded_data is None:
87+
# Load data if not already loaded
88+
print("Using old data...")
89+
loaded_data = pd.read_csv('purchase_history.csv')
90+
91+
###ACA ARRANCARIA PREPROCCES
92+
93+
94+
# Create a mapping for CodCliente using pandas factorize
95+
loaded_data['CodCliente_idx'], _ = pd.factorize(loaded_data['CodCliente'])
96+
97+
# Create a mapping for CodArticu using pandas factorize
98+
loaded_data['CodArticu_idx'], _ = pd.factorize(loaded_data['CodArticu'])
99+
100+
#loaded_data.to_csv('edited_loaded_data.csv', index=False)
101+
102+
# Ensure there are no missing or invalid values in the dataset
103+
missing_values = loaded_data.isnull().values.any()
104+
105+
# Verify that all user and movie indices are within the expected range (0 to N-1)
106+
valid_indices = (
107+
(loaded_data['CodCliente_idx'] >= 0) &
108+
(loaded_data['CodCliente_idx'] < loaded_data['CodCliente_idx'].nunique()) &
109+
(loaded_data['CodArticu_idx'] >= 0) &
110+
(loaded_data['CodArticu_idx'] < loaded_data['CodArticu_idx'].nunique())
111+
)
112+
113+
if missing_values:
114+
print("Dataset contains missing or invalid values.")
115+
return "Hay datos invalidos, checkear"
116+
117+
if valid_indices.all():
118+
print("All user and movie indices are within the expected range.")
119+
else:
120+
print("Some user or movie indices are out of the expected range.")
121+
return "Usuarios o Articulos estan fuera de rango"
122+
123+
##ACA ARRANCARIA PREPROCES2DICT
124+
125+
# Convert 'CodArticu' column to numeric (if it contains numeric values)
126+
loaded_data['CodArticu'] = pd.to_numeric(loaded_data['CodArticu'], errors='coerce')
127+
128+
# Create a StandardScaler instance for 'Cantidad'
129+
scaler = StandardScaler()
130+
131+
# Normalize the 'Cantidad' column
132+
loaded_data['Cantidad'] = scaler.fit_transform(loaded_data['Cantidad'].values.reshape(-1, 1))
133+
134+
# split into train and test
135+
loaded_data = shuffle(loaded_data)
136+
cutoff = int(0.8*len(loaded_data))
137+
df_train = loaded_data.iloc[:cutoff]
138+
df_test = loaded_data.iloc[cutoff:]
139+
140+
# Elimina filas con NaN values en train y test datasets
141+
df_train = df_train.dropna()
142+
df_test = df_test.dropna()
143+
144+
# Check for NaN values in train and test datasets
145+
train_has_nan = df_train.isnull().values.any()
146+
test_has_nan = df_test.isnull().values.any()
147+
148+
if train_has_nan:
149+
print("Train dataset contains NaN values.")
150+
return "Train_Data tiene nan values. checkear"
151+
152+
if test_has_nan:
153+
print("Test dataset contains NaN values.")
154+
return "Test_Data tiene nan values. checkear"
155+
156+
157+
# Initialize dictionaries to ensure all users are present in both sets
158+
all_users = set(loaded_data.CodCliente_idx.unique())
159+
users_in_train = set(df_train.CodCliente_idx.unique())
160+
users_in_test = set(df_test.CodCliente_idx.unique())
161+
missing_users_in_train = all_users - users_in_train
162+
missing_users_in_test = all_users - users_in_test
163+
164+
# Add missing users to the training set
165+
missing_users_data = loaded_data[loaded_data.CodCliente_idx.isin(missing_users_in_train)]
166+
df_train = pd.concat([df_train, missing_users_data])
167+
168+
# Add missing users to the test set
169+
missing_users_data = loaded_data[loaded_data.CodCliente_idx.isin(missing_users_in_test)]
170+
df_test = pd.concat([df_test, missing_users_data])
171+
172+
# Now df_train and df_test contain all users
173+
df_train.to_csv('train_data.csv', index=False)
174+
df_test.to_csv('test_data.csv', index=False)
175+
176+
# Create a mapping from movie index to movie ID
177+
movie_idx_to_movie_id = {}
178+
for index, row in loaded_data.iterrows():
179+
movie_idx_to_movie_id[row['CodArticu_idx']] = row['CodArticu']
180+
181+
182+
###ACA ARRANCA MF_KERAS
183+
184+
N = loaded_data.CodCliente_idx.max() + 1 # number of users
185+
M = loaded_data.CodArticu_idx.max() + 1 # number of movies
186+
187+
# initialize variables
188+
K = 40 # latent dimensionality
189+
mu = df_train.Cantidad.mean()
190+
epochs = 5
191+
reg = 0.00001 # regularization penalty
192+
193+
194+
# keras model
195+
u = Input(shape=(1,))
196+
m = Input(shape=(1,))
197+
u_embedding = Embedding(N, K)(u) # (N, 1, K)
198+
m_embedding = Embedding(M, K)(m) # (N, 1, K)
199+
u_embedding = Flatten()(u_embedding) # (N, K)
200+
m_embedding = Flatten()(m_embedding) # (N, K)
201+
x = Concatenate()([u_embedding, m_embedding]) # (N, 2K)
202+
203+
# the neural network
204+
x = Dense(400)(x)
205+
# x = BatchNormalization()(x)
206+
x = Activation('relu')(x)
207+
x = Dropout(0.5)(x)
208+
# x = Dense(100)(x)
209+
x = BatchNormalization()(x)
210+
# x = Activation('relu')(x)
211+
x = Dense(1)(x)
212+
213+
model = Model(inputs=[u, m], outputs=x)
214+
model.compile(
215+
loss='mse',
216+
# optimizer='adam',
217+
# optimizer=Adam(lr=0.01),
218+
optimizer=SGD(lr=0.0005, momentum=0.3),
219+
metrics=['mse'],
220+
)
221+
222+
r = model.fit(
223+
x=[df_train.CodCliente_idx.values, df_train.CodArticu_idx.values],
224+
y=df_train.Cantidad.values - mu,
225+
epochs=epochs,
226+
batch_size=128,
227+
validation_data=(
228+
[df_test.CodCliente_idx.values, df_test.CodArticu_idx.values],
229+
df_test.Cantidad.values - mu
230+
)
231+
)
232+
233+
trained_model = model
234+
235+
trained_model.save('your_pretrained_model.h5')
236+
237+
return trained_model, loaded_data, M, df_train, mu, movie_idx_to_movie_id
238+
239+
240+
241+
242+
auto_run = obtener_datos()
243+
244+
245+
246+
@app.get("/consulta/{CodCliente}")
247+
async def recommend_top_10_items_for_user(CodCliente: int, top_N: int = 10):
248+
global trained_model, loaded_data, M, df_train, mu, movie_idx_to_movie_id
249+
250+
if trained_model is None:
251+
trained_model = load_model('your_pretrained_model.h5')
252+
loaded_data = pd.read_csv('edited_loaded_data.csv')
253+
M = loaded_data.CodArticu_idx.max() + 1 # number of movies
254+
df_train = pd.read_csv('train_data.csv')
255+
mu = df_train.Cantidad.mean()
256+
257+
# Check if CodCliente exists in loaded_data
258+
if CodCliente not in loaded_data['CodCliente'].values:
259+
return "Ese CodCliente no existe." # Return a message indicating the UserID is not valid
260+
261+
# Map the user ID to its corresponding index
262+
user_idx = loaded_data[loaded_data['CodCliente'] == CodCliente]['CodCliente_idx'].values[0]
263+
264+
# Get the indices of all movies
265+
CodArticu_indices = np.arange(M)
266+
267+
# Create an array with the user index repeated for all movies
268+
user_array = np.array([user_idx] * M)
269+
270+
# Predict movie ratings for the user
271+
predicted_ratings = trained_model.predict([user_array, CodArticu_indices]) + mu
272+
273+
274+
275+
# Create a DataFrame with movie indices, predicted ratings, and movie IDs
276+
movie_ratings = pd.DataFrame({
277+
'movie_index': CodArticu_indices,
278+
'predicted_rating': predicted_ratings.flatten(),
279+
'movie_id': [movie_idx_to_movie_id[i] for i in CodArticu_indices]
280+
})
281+
282+
# Sort the DataFrame by predicted ratings in descending order
283+
top_movie_ratings = movie_ratings.sort_values(by='predicted_rating', ascending=False)
284+
285+
# Get the top N recommended movie IDs
286+
top_movie_ids = top_movie_ratings.head(top_N)['movie_id'].values
287+
288+
recommended_movie_ids = top_movie_ids
289+
290+
print("Top {} recommended movies for user (CodCliente) {}:".format(top_N, CodCliente))
291+
for movie_id in recommended_movie_ids:
292+
print("Movie ID:", movie_id)
293+
294+
return "salio por fin"
295+
296+
297+
if __name__ == '__main__':
298+
uvicorn.run(app, host=hostName, port=serverPort)

requirements.txt

5.56 KB
Binary file not shown.

0 commit comments

Comments
 (0)