Skip to content

Commit b766232

Browse files
committedAug 21, 2024·
take pure ReHLine class
1 parent 3d7112f commit b766232

File tree

7 files changed

+508
-236
lines changed

7 files changed

+508
-236
lines changed
 
Binary file not shown.

‎doc/source/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,6 @@ If you use this code please star 🌟 the repository and cite the following pape
7878
:maxdepth: 2
7979
:hidden:
8080

81+
tutorials
8182
example
8283
benchmark

‎doc/source/tutorials.rst

+40-2
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,43 @@ and ridge regularized Huber minimization (RidgeHuber).
1919

2020
.. image:: ./figs/tab.png
2121

22-
Solving Custom ERMs
23-
-------------------
22+
Solving PLQ ERMs
23+
-------------------
24+
25+
Loss
26+
****
27+
28+
.. code:: python
29+
30+
# name (str): name of the custom loss function
31+
# loss_kwargs: more keys and values for loss parameters
32+
loss = {'name': <loss_name>, <**loss_kwargs>}
33+
34+
.. list-table::
35+
36+
* - **SVM**
37+
- | ``loss_name``: 'hinge' / 'svm' / 'SVM'
38+
|
39+
| *Example:* ``loss = {'name': 'SVM'}``
40+
41+
* - **Quantile Reg**
42+
- | ``loss_name``: 'check' / 'quantile' / 'quantile regression' / 'QR'
43+
| ``qt`` (*list*): [q1, q2, ... qK]
44+
|
45+
| *Example:* ``loss = {'name': 'QR', 'qt': [0.25, 0.75]}``
46+
47+
* - **Smooth SVM**
48+
- | ``loss_name``: 'sSVM' / 'smooth SVM' / 'smooth hinge'
49+
|
50+
| *Example:* ``loss = {'name': 'sSVM'}``
51+
52+
* - **Huber**
53+
- | ``loss_name``: 'huber' / 'Huber'
54+
|
55+
| *Example:* ``loss = {'name': 'huber'}``
56+
57+
* - **SVR**
58+
- | ``loss_name``: 'SVR' / 'svr'
59+
| ``epsilon`` (*float*): 0.1
60+
|
61+
| *Example:* ``loss = {'name': 'svr', 'epsilon': 0.1}``

‎rehline/__init__.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# Import from internal C++ module
2-
from ._base import make_fair_classification, rehu, relu
3-
from ._class import ReHLine, ReHLine_solver, ReHLineLinear
2+
from ._base import ReHLine_solver, _BaseReHLine
3+
from ._class import ReHLine
4+
from ._data import make_fair_classification
45
from ._internal import rehline_internal, rehline_result
56

6-
__all__ = ("ReHLine",
7-
"ReHLineLinear",
7+
__all__ = ("_BaseReHLine",
8+
"ReHLine",
89
"make_fair_classification")

‎rehline/_base.py

+122-40
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,123 @@
55

66
# License: MIT License
77

8+
from abc import abstractmethod
9+
810
import numpy as np
911
from scipy.special import huber
10-
from sklearn.datasets import make_classification
11-
from sklearn.preprocessing import StandardScaler
12+
from sklearn.base import BaseEstimator
13+
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
14+
15+
from ._internal import rehline_internal, rehline_result
16+
17+
18+
class _BaseReHLine(BaseEstimator):
19+
r"""Base Class of ReHLine Formulation.
20+
21+
.. math::
22+
23+
\min_{\mathbf{\beta} \in \mathbb{R}^d} \sum_{i=1}^n \sum_{l=1}^L \text{ReLU}( u_{li} \mathbf{x}_i^\intercal \mathbf{\beta} + v_{li}) + \sum_{i=1}^n \sum_{h=1}^H {\text{ReHU}}_{\tau_{hi}}( s_{hi} \mathbf{x}_i^\intercal \mathbf{\beta} + t_{hi}) + \frac{1}{2} \| \mathbf{\beta} \|_2^2, \\ \text{ s.t. }
24+
\mathbf{A} \mathbf{\beta} + \mathbf{b} \geq \mathbf{0},
25+
26+
where :math:`\mathbf{U} = (u_{li}),\mathbf{V} = (v_{li}) \in \mathbb{R}^{L \times n}`
27+
and :math:`\mathbf{S} = (s_{hi}),\mathbf{T} = (t_{hi}),\mathbf{\tau} = (\tau_{hi}) \in \mathbb{R}^{H \times n}`
28+
are the ReLU-ReHU loss parameters, and :math:`(\mathbf{A},\mathbf{b})` are the constraint parameters.
29+
30+
Parameters
31+
----------
1232
33+
C : float, default=1.0
34+
Regularization parameter. The strength of the regularization is
35+
inversely proportional to C. Must be strictly positive.
36+
37+
U, V: array of shape (L, n_samples), default=np.empty(shape=(0, 0))
38+
The parameters pertaining to the ReLU part in the loss function.
39+
40+
Tau, S, T: array of shape (H, n_samples), default=np.empty(shape=(0, 0))
41+
The parameters pertaining to the ReHU part in the loss function.
42+
43+
A: array of shape (K, n_features), default=np.empty(shape=(0, 0))
44+
The coefficient matrix in the linear constraint.
45+
46+
b: array of shape (K, ), default=np.empty(shape=0)
47+
The intercept vector in the linear constraint.
1348
14-
def relu(x):
49+
"""
50+
51+
def __init__(self, C=1.,
52+
U=np.empty(shape=(0,0)), V=np.empty(shape=(0,0)),
53+
Tau=np.empty(shape=(0,0)),
54+
S=np.empty(shape=(0,0)), T=np.empty(shape=(0,0)),
55+
A=np.empty(shape=(0,0)), b=np.empty(shape=(0))):
56+
self.C = C
57+
self.U = U
58+
self.V = V
59+
self.S = S
60+
self.T = T
61+
self.Tau = Tau
62+
self.A = A
63+
self.b = b
64+
self.L = U.shape[0]
65+
self.n = U.shape[1]
66+
self.H = S.shape[0]
67+
self.K = A.shape[0]
68+
69+
def auto_shape(self):
70+
"""
71+
Automatically generate the shape of the parameters of the ReHLine loss function.
72+
"""
73+
self.L = self.U.shape[0]
74+
self.n = self.U.shape[1]
75+
self.H = self.S.shape[0]
76+
self.K = self.A.shape[0]
77+
78+
def call_ReLHLoss(self, score):
79+
"""
80+
Return the value of the ReHLine loss of the `score`.
81+
82+
Parameters
83+
----------
84+
score : ndarray of shape (n_samples, )
85+
The input score that will be evaluated through the ReHLine loss.
86+
87+
Returns
88+
-------
89+
float
90+
ReHLine loss evaluation of the given score.
91+
"""
92+
93+
relu_input = np.zeros((self.L, self.n))
94+
rehu_input = np.zeros((self.H, self.n))
95+
if self.L > 0:
96+
relu_input = (self.U.T * score[:,np.newaxis]).T + self.V
97+
if self.H > 0:
98+
rehu_input = (self.S.T * score[:,np.newaxis]).T + self.T
99+
return np.sum(_relu(relu_input), 0) + np.sum(_rehu(rehu_input), 0)
100+
101+
@abstractmethod
102+
def fit(self, X, y, sample_weight):
103+
"""Fit model."""
104+
105+
@abstractmethod
106+
def decision_function(self, X):
107+
"""The decision function evaluated on the given dataset
108+
109+
Parameters
110+
----------
111+
X : array-like of shape (n_samples, n_features)
112+
The data matrix.
113+
114+
Returns
115+
-------
116+
ndarray of shape (n_samples, )
117+
Returns the decision function of the samples.
118+
"""
119+
# Check if fit has been called
120+
check_is_fitted(self)
121+
122+
X = check_array(X)
123+
124+
def _relu(x):
15125
"""
16126
Evaluation of ReLU given a vector.
17127
@@ -31,7 +141,7 @@ def relu(x):
31141
return np.maximum(x, 0)
32142

33143

34-
def rehu(x, cut=1):
144+
def _rehu(x, cut=1):
35145
"""
36146
Evaluation of ReHU given a vector.
37147
@@ -64,39 +174,11 @@ def _check_rehu(rehu_coef, rehu_intercept, rehu_cut):
64174
if len(rehu_coef) > 0:
65175
assert (rehu_cut >= 0.0).all(), "`rehu_cut` must be non-negative!"
66176

67-
def make_fair_classification(n_samples=100, n_features=5, ind_sensitive=0):
68-
"""
69-
Generate a random binary fair classification problem.
70-
71-
Parameters
72-
----------
73-
n_samples : int, default=100
74-
The number of samples.
75-
76-
n_features : int, default=5
77-
The total number of features.
78-
79-
ind_sensitive : int, default=0
80-
The index of the sensitive feature.
81-
82-
Returns
83-
-------
84-
X : ndarray of shape (n_samples, n_features)
85-
The generated samples.
86-
87-
y : ndarray of shape (n_samples,)
88-
The +/- labels for class membership of each sample.
89-
90-
X_sen: ndarray of shape (n_samples,)
91-
The centered samples of the sensitive feature.
92-
"""
93-
94-
X, y = make_classification(n_samples, n_features)
95-
y = 2*y - 1
96-
97-
scaler = StandardScaler()
98-
X = scaler.fit_transform(X)
99-
100-
X_sen = X[:, ind_sensitive]
101-
102-
return X, y, X_sen
177+
def ReHLine_solver(X, U, V,
178+
Tau=np.empty(shape=(0, 0)),
179+
S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)),
180+
A=np.empty(shape=(0, 0)), b=np.empty(shape=(0)),
181+
max_iter=1000, tol=1e-4, shrink=1, verbose=1, trace_freq=100):
182+
result = rehline_result()
183+
rehline_internal(result, X, A, b, U, V, S, T, Tau, max_iter, tol, shrink, verbose, trace_freq)
184+
return result

‎rehline/_class.py

+298-190
Large diffs are not rendered by default.

‎rehline/_data.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
2+
import numpy as np
3+
from sklearn.datasets import make_classification
4+
from sklearn.preprocessing import StandardScaler
5+
6+
7+
def make_fair_classification(n_samples=100, n_features=5, ind_sensitive=0):
8+
"""
9+
Generate a random binary fair classification problem.
10+
11+
Parameters
12+
----------
13+
n_samples : int, default=100
14+
The number of samples.
15+
16+
n_features : int, default=5
17+
The total number of features.
18+
19+
ind_sensitive : int, default=0
20+
The index of the sensitive feature.
21+
22+
Returns
23+
-------
24+
X : ndarray of shape (n_samples, n_features)
25+
The generated samples.
26+
27+
y : ndarray of shape (n_samples,)
28+
The +/- labels for class membership of each sample.
29+
30+
X_sen: ndarray of shape (n_samples,)
31+
The centered samples of the sensitive feature.
32+
"""
33+
34+
X, y = make_classification(n_samples, n_features)
35+
y = 2*y - 1
36+
37+
scaler = StandardScaler()
38+
X = scaler.fit_transform(X)
39+
40+
X_sen = X[:, ind_sensitive]
41+
42+
return X, y, X_sen

0 commit comments

Comments
 (0)
Please sign in to comment.