@@ -80,7 +80,7 @@ def obtener_datos():
80
80
group by cli.CodCliente,art.CodArticulo
81
81
order by cli.CodCliente
82
82
""" , cnxn )
83
- loaded_data .to_csv ('new_edited_loaded_data.csv' , index = False )
83
+ # loaded_data.to_csv('new_edited_loaded_data.csv', index=False)
84
84
85
85
86
86
if loaded_data is None :
@@ -91,13 +91,13 @@ def obtener_datos():
91
91
###ACA ARRANCARIA PREPROCCES
92
92
93
93
94
- # Create a mapping for CodCliente using pandas factorize
94
+ # Creando index para CodCliente usando factorize de pandas
95
95
loaded_data ['CodCliente_idx' ], _ = pd .factorize (loaded_data ['CodCliente' ])
96
96
97
- # Create a mapping for CodArticu using pandas factorize
97
+ # Creando index para CodArticu
98
98
loaded_data ['CodArticu_idx' ], _ = pd .factorize (loaded_data ['CodArticu' ])
99
99
100
- #loaded_data.to_csv('edited_loaded_data .csv', index=False)
100
+ #loaded_data.to_csv('new_edited_loaded_data .csv', index=False)
101
101
102
102
# Ensure there are no missing or invalid values in the dataset
103
103
missing_values = loaded_data .isnull ().values .any ()
@@ -122,8 +122,8 @@ def obtener_datos():
122
122
123
123
##ACA ARRANCARIA PREPROCES2DICT
124
124
125
- # Convert 'CodArticu' column to numeric (if it contains numeric values)
126
- loaded_data ['CodArticu' ] = pd .to_numeric (loaded_data ['CodArticu' ], errors = 'coerce' )
125
+ # Convierto 'CodArticu' en numerico
126
+ loaded_data ['CodArticu' ] = pd .to_numeric (loaded_data ['CodArticu' ], errors = 'coerce' )
127
127
128
128
# Create a StandardScaler instance for 'Cantidad'
129
129
scaler = StandardScaler ()
@@ -154,29 +154,31 @@ def obtener_datos():
154
154
return "Test_Data tiene nan values. checkear"
155
155
156
156
157
- # Initialize dictionaries to ensure all users are present in both sets
157
+ # Asegurandonos que train y test tengan los mismos CodCliente
158
158
all_users = set (loaded_data .CodCliente_idx .unique ())
159
159
users_in_train = set (df_train .CodCliente_idx .unique ())
160
160
users_in_test = set (df_test .CodCliente_idx .unique ())
161
161
missing_users_in_train = all_users - users_in_train
162
162
missing_users_in_test = all_users - users_in_test
163
163
164
- # Add missing users to the training set
164
+ # Agregando CodClientes faltantes a training set
165
165
missing_users_data = loaded_data [loaded_data .CodCliente_idx .isin (missing_users_in_train )]
166
166
df_train = pd .concat ([df_train , missing_users_data ])
167
167
168
- # Add missing users to the test set
168
+ # Agregando CodClientes faltantes a test set
169
169
missing_users_data = loaded_data [loaded_data .CodCliente_idx .isin (missing_users_in_test )]
170
170
df_test = pd .concat ([df_test , missing_users_data ])
171
171
172
- # Now df_train and df_test contain all users
173
- df_train .to_csv ('train_data.csv' , index = False )
174
- df_test .to_csv ('test_data.csv' , index = False )
172
+ # Ahora df_train and df_test tienen mismos CodCliente
173
+ # df_train.to_csv('train_data.csv', index=False)
174
+ # df_test.to_csv('test_data.csv', index=False)
175
175
176
- # Create a mapping from movie index to movie ID
177
- movie_idx_to_movie_id = {}
178
- for index , row in loaded_data .iterrows ():
179
- movie_idx_to_movie_id [row ['CodArticu_idx' ]] = row ['CodArticu' ]
176
+ # Creando mapping para CodArticu_idx a CodArticu efficiently
177
+ codarticu_idx_to_codarticu = dict (zip (loaded_data ['CodArticu_idx' ], loaded_data ['CodArticu' ]))
178
+
179
+ # Saving the mapping as a JSON file
180
+ with open ('codarticu_idx_to_codarticu.json' , 'w' ) as f :
181
+ json .dump (codarticu_idx_to_codarticu , f )
180
182
181
183
182
184
###ACA ARRANCA MF_KERAS
@@ -254,44 +256,47 @@ async def recommend_top_10_items_for_user(CodCliente: int, top_N: int = 10):
254
256
df_train = pd .read_csv ('train_data.csv' )
255
257
mu = df_train .Cantidad .mean ()
256
258
257
- # Check if CodCliente exists in loaded_data
259
+ # Se fija si existe el CodCliente ingresado
258
260
if CodCliente not in loaded_data ['CodCliente' ].values :
259
- return "Ese CodCliente no existe." # Return a message indicating the UserID is not valid
261
+ return "Ese CodCliente no existe."
260
262
261
- # Map the user ID to its corresponding index
263
+ # Mapea el CodCliente ingresado con su respectivo indice
262
264
user_idx = loaded_data [loaded_data ['CodCliente' ] == CodCliente ]['CodCliente_idx' ].values [0 ]
263
265
264
- # Get the indices of all movies
266
+ # Busca los indices de todas los articulos
265
267
CodArticu_indices = np .arange (M )
266
268
267
- # Create an array with the user index repeated for all movies
269
+ # Crea array con el CodCliente ingresado y todas los articulos
268
270
user_array = np .array ([user_idx ] * M )
269
271
270
- # Predict movie ratings for the user
272
+ # Predice cuan buena es la recomendacion
271
273
predicted_ratings = trained_model .predict ([user_array , CodArticu_indices ]) + mu
272
274
275
+ # Carga diccionario CodArticu_idx - CodArticu
276
+ with open ('codarticu_idx_to_codarticu.json' , 'rb' ) as f :
277
+ codarticu_idx_to_codarticu = json .load (f )
273
278
274
-
275
- # Create a DataFrame with movie indices, predicted ratings, and movie IDs
276
- movie_ratings = pd .DataFrame ({
277
- 'movie_index' : CodArticu_indices ,
279
+ # Crea dataframe con CodArticu_indices, predicted ratings, and CodArticu
280
+ codarticu_ratings = pd .DataFrame ({
281
+ 'CodArticu_indices' : CodArticu_indices ,
278
282
'predicted_rating' : predicted_ratings .flatten (),
279
- 'movie_id ' : [movie_idx_to_movie_id [ i ] for i in CodArticu_indices ]
283
+ 'CodArticu ' : [codarticu_idx_to_codarticu [ str ( i ) ] for i in CodArticu_indices ]
280
284
})
281
285
282
- # Sort the DataFrame by predicted ratings in descending order
283
- top_movie_ratings = movie_ratings .sort_values (by = 'predicted_rating' , ascending = False )
284
286
285
- # Get the top N recommended movie IDs
286
- top_movie_ids = top_movie_ratings .head (top_N )['movie_id' ].values
287
+ # Lo ordena en orden descendente
288
+ top_codarticu_ratings = codarticu_ratings .sort_values (by = 'predicted_rating' , ascending = False )
289
+
290
+ # Agarra los mejores 10
291
+ top_codarticu_ids = top_codarticu_ratings .head (top_N )['CodArticu' ].values
287
292
288
- recommended_movie_ids = top_movie_ids
293
+ recommended_codarticu_ids = top_codarticu_ids
289
294
290
- print ("Top {} recommended movies for user (CodCliente) {}:" .format (top_N , CodCliente ))
291
- for movie_id in recommended_movie_ids :
292
- print ("Movie ID :" , movie_id )
295
+ print ("Top {} articulos recomendados para cliente (CodCliente) {}:" .format (top_N , CodCliente ))
296
+ for codarticu_id in recommended_codarticu_ids :
297
+ print ("CodArticu :" , codarticu_id )
293
298
294
- return "salio por fin "
299
+ return "listas las recommendaciones "
295
300
296
301
297
302
if __name__ == '__main__' :
0 commit comments