Source code for discretefirstorder._discrete_first_order

"""
Discrete First-Order Method for Classification and Regression
"""

from abc import ABCMeta, abstractmethod

import numpy as np
from scipy.linalg import lstsq
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.linear_model._base import _preprocess_data
from sklearn.utils.validation import (
    check_array,
    check_is_fitted,
    check_random_state,
    check_X_y,
)

from ._dfo_optim import LOSSES, _solve_dfo, _threshold


# TODO consider inheriting from LinearModel
class BaseDFO(BaseEstimator, metaclass=ABCMeta):
    """Base class for Discrete First Order classification and regression."""

    def __init__(
        self,
        loss,
        learning_rate="auto",
        k=1,
        polish=True,
        n_runs=50,
        max_iter=100,
        tol=1e-3,
        fit_intercept=False,
        normalize=False,
        random_state=None,
    ):
        super(BaseDFO, self).__init__()
        self.loss = loss
        self.learning_rate = learning_rate
        self.k = k
        self.polish = polish
        self.n_runs = n_runs
        self.max_iter = max_iter
        self.tol = tol
        self.fit_intercept = fit_intercept
        self.normalize = normalize
        self.random_state = random_state

        # check loss
        if loss not in LOSSES.keys():
            raise NotImplementedError(f"Loss '{loss}' not implemented!")

        # check learning rate
        if isinstance(learning_rate, str):
            if learning_rate != "auto":
                raise ValueError(
                    "If learning rate is a string it must be set to 'auto'."
                )

    @abstractmethod
    def fit(self, X, y):
        """Fit model."""


[docs]class DFORegressor(RegressorMixin, BaseDFO): """Discrete first-order regressor. Parameters ---------- loss : str type of loss to be minimized. One of 'mse' or 'mae'. learning_rate : str or float learning rate to be used. k : int number of non-zero features to keep. polish : bool whether to polish coefficients by running least squares on the active set. n_runs : int number of runs of the discrete first order optimization procedure. max_iter : int maximum number of steps to take during one run of the discrete first order optimization algorithm. tol : float tolerance below which the optimization algorithm stops. fit_intercept : bool whether to fit an intercept term normalize : bool whether to normalize the input data. Attributes ---------- coef_ : ndarray, shape (n_features,) coefficient vector. intercept_ : float intercept. Examples -------- >>> from discretefirstorder import DFORegressor >>> import numpy as np >>> X = np.arange(100).reshape(100, 1) >>> y = np.random.normal(size=(100, )) >>> estimator = DFORegressor() >>> estimator.fit(X, y) DFORegressor() """
[docs] def __init__( self, loss="mse", learning_rate="auto", k=1, polish=True, n_runs=50, max_iter=100, tol=1e-3, fit_intercept=True, normalize=True, random_state=None, ): super(DFORegressor, self).__init__( loss=loss, learning_rate=learning_rate, k=k, polish=polish, n_runs=n_runs, max_iter=max_iter, tol=tol, fit_intercept=fit_intercept, normalize=normalize, random_state=random_state, )
def _set_intercept(self, X_offset, y_offset, X_scale): """Set intercept (adapted from sklearn LinearModel) Parameters ---------- X_offset : ndarray of shape (n_features,) average (offset) value of each feature y_offset : float average (offset) target value X_scale : ndarray of shape (n_features,) scale of each feature """ if self.fit_intercept: self.coef_ = np.divide(self.coef_, X_scale) self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T) else: self.intercept_ = 0.0 # noinspection PyAttributeOutsideInit
[docs] def fit(self, X, y, coef_init=None): """Implementation of the fit method for the discrete first-order regressor. Parameters ---------- X : array-like of shape (n_samples, n_features) the training input samples. y : array-like of shape (n_samples,) the target values. coef_init : (optional) array-like of shape (n_features,) initial value of regression coefficients Returns ------- self : object Returns self. """ # check that X and y have correct shape X, y = check_X_y(X, y) n_samples, n_features = X.shape # other checks if self.k > X.shape[1]: raise ValueError( f"Parameter k with value {self.k} is greater than input number of features." ) self.random_state_ = check_random_state(self.random_state) # preprocess data (center and scale) # this is like in other linear models # by default we expect fit_intercept = False and normalize = False # therefore no preprocessing X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, self.fit_intercept, self.normalize ) # init coefficients if coef_init is None: # regardless of X.shape[1] we rely on scipy's routine # for undetermined cases, it will use the pseudo inverse of X.T X coef_init, _, _, _ = lstsq(X, y) coef_init = _threshold(coef_init, self.k) # optimize objective = float("inf") coef = coef_init coef_init_temp = coef_init n_iter = None for _ in range(self.n_runs): coef_temp, objective_temp, n_iter_temp = _solve_dfo( coef=coef_init_temp, X=X, y=y, learning_rate=self.learning_rate, k=self.k, loss_type=self.loss, polish=self.polish, max_iter=self.max_iter, tol=self.tol, ) if objective_temp < objective: coef = coef_temp objective = objective_temp n_iter = n_iter_temp coef_init_temp = coef_init + ( 2 * self.random_state_.rand(n_features) * np.max(np.abs(coef_init)) ) # coefficients for scaled features self.coef_ = coef # TODO consider using LinearModel's _set_intercept # rescale coefficients and set intercept self._set_intercept(X_offset, y_offset, X_scale) self.n_iter_ = n_iter # add fitted flag self.is_fitted_ = True return self
[docs] def predict(self, X): """Implementation of a prediction for the discrete first-order regressor. Parameters ---------- X : array-like, shape (n_samples, n_features) The input samples. Returns ------- y : ndarray, shape (n_samples,) The output corresponding to each input sample """ # check is fit had been called check_is_fitted(self, ["coef_", "intercept_"]) # input validation X = check_array(X) return X @ self.coef_ + self.intercept_