1
+ from http .server import BaseHTTPRequestHandler , HTTPServer
2
+ import time
3
+ import uvicorn
4
+ from fastapi import FastAPI
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ import pyodbc
9
+ import warnings
10
+
11
+ from sklearn .metrics .pairwise import cosine_similarity
12
+ from sklearn .model_selection import GridSearchCV
13
+ from sklearn .decomposition import TruncatedSVD
14
+ from typing import List
15
+
16
+ import pandas as pd
17
+
18
+ import pickle
19
+ import numpy as np
20
+ import pandas as pd
21
+ import matplotlib .pyplot as plt
22
+ from sklearn .utils import shuffle
23
+
24
+ from keras .models import Model
25
+ from keras .layers import Input , Embedding , Flatten , Dense , Concatenate
26
+ from keras .layers import Dropout , BatchNormalization , Activation
27
+ from keras .regularizers import l2
28
+ from keras .optimizers import SGD , Adam
29
+
30
+ from keras .models import load_model
31
+
32
+ from sklearn .preprocessing import StandardScaler
33
+
34
+ from fastapi import Depends
35
+
36
+ import json
37
+
38
+ import traceback
39
+
40
+ hostName = "localhost"
41
+ serverPort = 8081
42
+ warnings .filterwarnings ('ignore' )
43
+ app = FastAPI ()
44
+ hits = 0
45
+
46
+
47
+
48
+ # Define global variables to store data and model
49
+
50
+ trained_model = None
51
+ loaded_data = None
52
+
53
+ def obtener_datos ():
54
+
55
+ global trained_model , loaded_data , M , df_train , mu , movie_idx_to_movie_id
56
+
57
+ print ("init call" )
58
+ print ("conectando..." )
59
+
60
+ #cnxn_str = ("Driver={SQL Server Native Client 11.0};"
61
+ #
62
+ cnxn_str = ("Driver={ODBC Driver 11 for SQL Server};"
63
+ "Server=181.169.115.183,1433;"
64
+ "Database=F_SISTEMA;"
65
+ "UID=External;"
66
+ "PWD=external2022_123!;" )
67
+ cnxn = pyodbc .connect (cnxn_str , timeout = 50000 )
68
+
69
+ loaded_data = pd .read_sql ("""
70
+ select
71
+ cli.CodCliente as CodCliente
72
+ ,RTRIM(art.CodArticulo) as CodArticu
73
+ ,cast((coalesce(SUM((reng.CantidadPedida+reng.CantPedidaDerivada)*reng.PrecioVenta),0)*1+(COUNT(reng.NroRenglon)/100)) as decimal) as Cantidad
74
+ from f_central.dbo.ven_clientes as cli
75
+ inner join f_central.dbo.StkFer_Articulos as art
76
+ on 1 = 1
77
+ left join F_CENTRAL.dbo.VenFer_PedidoReng as reng
78
+ on reng.CodCliente = cli.CodCliente
79
+ and reng.CodArticu = art.CodArticulo
80
+ group by cli.CodCliente,art.CodArticulo
81
+ order by cli.CodCliente
82
+ """ , cnxn )
83
+ loaded_data .to_csv ('new_edited_loaded_data.csv' , index = False )
84
+
85
+
86
+ if loaded_data is None :
87
+ # Load data if not already loaded
88
+ print ("Using old data..." )
89
+ loaded_data = pd .read_csv ('purchase_history.csv' )
90
+
91
+ ###ACA ARRANCARIA PREPROCCES
92
+
93
+
94
+ # Create a mapping for CodCliente using pandas factorize
95
+ loaded_data ['CodCliente_idx' ], _ = pd .factorize (loaded_data ['CodCliente' ])
96
+
97
+ # Create a mapping for CodArticu using pandas factorize
98
+ loaded_data ['CodArticu_idx' ], _ = pd .factorize (loaded_data ['CodArticu' ])
99
+
100
+ #loaded_data.to_csv('edited_loaded_data.csv', index=False)
101
+
102
+ # Ensure there are no missing or invalid values in the dataset
103
+ missing_values = loaded_data .isnull ().values .any ()
104
+
105
+ # Verify that all user and movie indices are within the expected range (0 to N-1)
106
+ valid_indices = (
107
+ (loaded_data ['CodCliente_idx' ] >= 0 ) &
108
+ (loaded_data ['CodCliente_idx' ] < loaded_data ['CodCliente_idx' ].nunique ()) &
109
+ (loaded_data ['CodArticu_idx' ] >= 0 ) &
110
+ (loaded_data ['CodArticu_idx' ] < loaded_data ['CodArticu_idx' ].nunique ())
111
+ )
112
+
113
+ if missing_values :
114
+ print ("Dataset contains missing or invalid values." )
115
+ return "Hay datos invalidos, checkear"
116
+
117
+ if valid_indices .all ():
118
+ print ("All user and movie indices are within the expected range." )
119
+ else :
120
+ print ("Some user or movie indices are out of the expected range." )
121
+ return "Usuarios o Articulos estan fuera de rango"
122
+
123
+ ##ACA ARRANCARIA PREPROCES2DICT
124
+
125
+ # Convert 'CodArticu' column to numeric (if it contains numeric values)
126
+ loaded_data ['CodArticu' ] = pd .to_numeric (loaded_data ['CodArticu' ], errors = 'coerce' )
127
+
128
+ # Create a StandardScaler instance for 'Cantidad'
129
+ scaler = StandardScaler ()
130
+
131
+ # Normalize the 'Cantidad' column
132
+ loaded_data ['Cantidad' ] = scaler .fit_transform (loaded_data ['Cantidad' ].values .reshape (- 1 , 1 ))
133
+
134
+ # split into train and test
135
+ loaded_data = shuffle (loaded_data )
136
+ cutoff = int (0.8 * len (loaded_data ))
137
+ df_train = loaded_data .iloc [:cutoff ]
138
+ df_test = loaded_data .iloc [cutoff :]
139
+
140
+ # Elimina filas con NaN values en train y test datasets
141
+ df_train = df_train .dropna ()
142
+ df_test = df_test .dropna ()
143
+
144
+ # Check for NaN values in train and test datasets
145
+ train_has_nan = df_train .isnull ().values .any ()
146
+ test_has_nan = df_test .isnull ().values .any ()
147
+
148
+ if train_has_nan :
149
+ print ("Train dataset contains NaN values." )
150
+ return "Train_Data tiene nan values. checkear"
151
+
152
+ if test_has_nan :
153
+ print ("Test dataset contains NaN values." )
154
+ return "Test_Data tiene nan values. checkear"
155
+
156
+
157
+ # Initialize dictionaries to ensure all users are present in both sets
158
+ all_users = set (loaded_data .CodCliente_idx .unique ())
159
+ users_in_train = set (df_train .CodCliente_idx .unique ())
160
+ users_in_test = set (df_test .CodCliente_idx .unique ())
161
+ missing_users_in_train = all_users - users_in_train
162
+ missing_users_in_test = all_users - users_in_test
163
+
164
+ # Add missing users to the training set
165
+ missing_users_data = loaded_data [loaded_data .CodCliente_idx .isin (missing_users_in_train )]
166
+ df_train = pd .concat ([df_train , missing_users_data ])
167
+
168
+ # Add missing users to the test set
169
+ missing_users_data = loaded_data [loaded_data .CodCliente_idx .isin (missing_users_in_test )]
170
+ df_test = pd .concat ([df_test , missing_users_data ])
171
+
172
+ # Now df_train and df_test contain all users
173
+ df_train .to_csv ('train_data.csv' , index = False )
174
+ df_test .to_csv ('test_data.csv' , index = False )
175
+
176
+ # Create a mapping from movie index to movie ID
177
+ movie_idx_to_movie_id = {}
178
+ for index , row in loaded_data .iterrows ():
179
+ movie_idx_to_movie_id [row ['CodArticu_idx' ]] = row ['CodArticu' ]
180
+
181
+
182
+ ###ACA ARRANCA MF_KERAS
183
+
184
+ N = loaded_data .CodCliente_idx .max () + 1 # number of users
185
+ M = loaded_data .CodArticu_idx .max () + 1 # number of movies
186
+
187
+ # initialize variables
188
+ K = 40 # latent dimensionality
189
+ mu = df_train .Cantidad .mean ()
190
+ epochs = 5
191
+ reg = 0.00001 # regularization penalty
192
+
193
+
194
+ # keras model
195
+ u = Input (shape = (1 ,))
196
+ m = Input (shape = (1 ,))
197
+ u_embedding = Embedding (N , K )(u ) # (N, 1, K)
198
+ m_embedding = Embedding (M , K )(m ) # (N, 1, K)
199
+ u_embedding = Flatten ()(u_embedding ) # (N, K)
200
+ m_embedding = Flatten ()(m_embedding ) # (N, K)
201
+ x = Concatenate ()([u_embedding , m_embedding ]) # (N, 2K)
202
+
203
+ # the neural network
204
+ x = Dense (400 )(x )
205
+ # x = BatchNormalization()(x)
206
+ x = Activation ('relu' )(x )
207
+ x = Dropout (0.5 )(x )
208
+ # x = Dense(100)(x)
209
+ x = BatchNormalization ()(x )
210
+ # x = Activation('relu')(x)
211
+ x = Dense (1 )(x )
212
+
213
+ model = Model (inputs = [u , m ], outputs = x )
214
+ model .compile (
215
+ loss = 'mse' ,
216
+ # optimizer='adam',
217
+ # optimizer=Adam(lr=0.01),
218
+ optimizer = SGD (lr = 0.0005 , momentum = 0.3 ),
219
+ metrics = ['mse' ],
220
+ )
221
+
222
+ r = model .fit (
223
+ x = [df_train .CodCliente_idx .values , df_train .CodArticu_idx .values ],
224
+ y = df_train .Cantidad .values - mu ,
225
+ epochs = epochs ,
226
+ batch_size = 128 ,
227
+ validation_data = (
228
+ [df_test .CodCliente_idx .values , df_test .CodArticu_idx .values ],
229
+ df_test .Cantidad .values - mu
230
+ )
231
+ )
232
+
233
+ trained_model = model
234
+
235
+ trained_model .save ('your_pretrained_model.h5' )
236
+
237
+ return trained_model , loaded_data , M , df_train , mu , movie_idx_to_movie_id
238
+
239
+
240
+
241
+
242
+ auto_run = obtener_datos ()
243
+
244
+
245
+
246
+ @app .get ("/consulta/{CodCliente}" )
247
+ async def recommend_top_10_items_for_user (CodCliente : int , top_N : int = 10 ):
248
+ global trained_model , loaded_data , M , df_train , mu , movie_idx_to_movie_id
249
+
250
+ if trained_model is None :
251
+ trained_model = load_model ('your_pretrained_model.h5' )
252
+ loaded_data = pd .read_csv ('edited_loaded_data.csv' )
253
+ M = loaded_data .CodArticu_idx .max () + 1 # number of movies
254
+ df_train = pd .read_csv ('train_data.csv' )
255
+ mu = df_train .Cantidad .mean ()
256
+
257
+ # Check if CodCliente exists in loaded_data
258
+ if CodCliente not in loaded_data ['CodCliente' ].values :
259
+ return "Ese CodCliente no existe." # Return a message indicating the UserID is not valid
260
+
261
+ # Map the user ID to its corresponding index
262
+ user_idx = loaded_data [loaded_data ['CodCliente' ] == CodCliente ]['CodCliente_idx' ].values [0 ]
263
+
264
+ # Get the indices of all movies
265
+ CodArticu_indices = np .arange (M )
266
+
267
+ # Create an array with the user index repeated for all movies
268
+ user_array = np .array ([user_idx ] * M )
269
+
270
+ # Predict movie ratings for the user
271
+ predicted_ratings = trained_model .predict ([user_array , CodArticu_indices ]) + mu
272
+
273
+
274
+
275
+ # Create a DataFrame with movie indices, predicted ratings, and movie IDs
276
+ movie_ratings = pd .DataFrame ({
277
+ 'movie_index' : CodArticu_indices ,
278
+ 'predicted_rating' : predicted_ratings .flatten (),
279
+ 'movie_id' : [movie_idx_to_movie_id [i ] for i in CodArticu_indices ]
280
+ })
281
+
282
+ # Sort the DataFrame by predicted ratings in descending order
283
+ top_movie_ratings = movie_ratings .sort_values (by = 'predicted_rating' , ascending = False )
284
+
285
+ # Get the top N recommended movie IDs
286
+ top_movie_ids = top_movie_ratings .head (top_N )['movie_id' ].values
287
+
288
+ recommended_movie_ids = top_movie_ids
289
+
290
+ print ("Top {} recommended movies for user (CodCliente) {}:" .format (top_N , CodCliente ))
291
+ for movie_id in recommended_movie_ids :
292
+ print ("Movie ID:" , movie_id )
293
+
294
+ return "salio por fin"
295
+
296
+
297
+ if __name__ == '__main__' :
298
+ uvicorn .run (app , host = hostName , port = serverPort )
0 commit comments