-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrid_search_svc_kernel_pca.py
73 lines (62 loc) · 2.83 KB
/
grid_search_svc_kernel_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os
import pandas as pd
from sklearn.decomposition import KernelPCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
import torch
from tqdm import tqdm
import joblib
# Create a pipeline with standard scaler, kernel PCA, and SVM
pipeline = Pipeline([
('scaler', StandardScaler()),
('kpca', KernelPCA()),
('svm', SVC())
])
# Define the parameter grid
param_grid = {
'kpca__kernel': ['rbf', 'poly', 'sigmoid'],
'kpca__gamma': [1e-3, 1e-2, 1e-1, 1], # For RBF and Polynomial kernels
'kpca__degree': [2, 3, 4], # For Polynomial kernel
'kpca__coef0': [0, 1], # For Polynomial and Sigmoid kernels
'kpca__n_components': [5, 10, 20, 30, 40, 45, 50, 60, 70, 80, 120, 160, 200, 230, 270, 310, 350], # Number of components to keep
'svm__kernel': ['linear', 'rbf', 'poly', 'sigmoid'], # SVM kernels to explore
'svm__C': [0.01, 0.1, 1, 10, 100], # SVM regularization parameter
'svm__gamma': ['scale', 'auto'] # SVM gamma parameter for RBF kernel
}
# Set up Stratified K-Fold cross-validation
cv = StratifiedKFold(n_splits=5)
name_dataset = "caltech101"
X_train = torch.load(os.path.join(f'output/{name_dataset}','trainfeat.pth')).cpu().numpy()
y_train = torch.load(os.path.join(f'output/{name_dataset}','trainlabels.pth')).numpy()
# Integrate tqdm with joblib
with joblib.parallel_backend('loky', n_jobs=-1):
with tqdm(total=len(param_grid['kpca__kernel']) *
len(param_grid['kpca__gamma']) *
len(param_grid['kpca__degree']) *
len(param_grid['kpca__coef0']) *
len(param_grid['kpca__n_components']) *
len(param_grid['svm__kernel']) *
len(param_grid['svm__C']) *
len(param_grid['svm__gamma'])) as progress_bar:
def tqdm_grid_search(*args, **kwargs):
result = original_grid_search(*args, **kwargs)
progress_bar.update(1)
return result
# Backup the original fit function
original_grid_search = GridSearchCV.fit
# Replace the fit function with the tqdm-wrapped version
GridSearchCV.fit = tqdm_grid_search
# Set up the GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=cv, scoring='accuracy')
# Fit the GridSearch to your data
grid_search.fit(X_train, y_train)
# Restore the original fit function
GridSearchCV.fit = original_grid_search
# Output the best parameters and the best score
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Score: {grid_search.best_score_}")
# Save the GridSearchCV results to a CSV file
results = pd.DataFrame(grid_search.cv_results_)
results.to_csv('grid_search_results_kpca.csv', index=False)