Source code for sparsereg.model.ffx

import warnings
from collections import namedtuple
from copy import deepcopy
from itertools import product
from operator import attrgetter

import joblib
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import clone
from sklearn.base import RegressorMixin
from sklearn.base import TransformerMixin
from sklearn.linear_model.coordinate_descent import _pre_fit
from sklearn.linear_model.coordinate_descent import ElasticNet
from sklearn.metrics import make_scorer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.utils.validation import check_random_state
from sklearn.utils.validation import check_X_y

from sparsereg.model.base import PrintMixin
from sparsereg.model.base import RationalFunctionMixin
from sparsereg.preprocessing.symfeat import SymbolicFeatures
from sparsereg.util import aic
from sparsereg.util import nrmse
from sparsereg.util import pareto_front
from sparsereg.util.pipeline import ColumnSelector


[docs]class FFXModel(Pipeline):
    def __init__(self, strategy, **kw):
        self.strategy = strategy
        self.kw = kw
        super().__init__(
            steps=[
                ("selection", ColumnSelector(index=self.strategy.index)),
                (
                    "features",
                    SymbolicFeatures(
                        exponents=self.strategy.exponents,
                        operators=self.strategy.operators,
                        consider_products=self.strategy.consider_products,
                    ),
                ),
                ("regression", strategy.base(warm_start=True, **self.kw)),
            ]
        )

    def __hash__(self):
        return hash(joblib.hash((self._final_estimator.coef_, self._final_estimator.intercept_)))

    def __eq__(self, other):
        if self._final_estimator.coef_.shape != other._final_estimator.coef_.shape:
            return False

        return np.allclose(self._final_estimator.coef_, other._final_estimator.coef_) and np.allclose(
            self._final_estimator.intercept_, other._final_estimator.intercept_
        )

[docs]    def print_model(self, input_features=None):
        for step in self.steps[:-1]:
            input_features = step[1].get_feature_names(input_features)
        return self._final_estimator.print_model(input_features)

[docs]    def pre_compute(self, x, y):
        pass


[docs]class FFXElasticNet(PrintMixin, ElasticNet):
    """Mixin, implements only the `score` method."""

[docs]    def score(self, x, y):
        """Score using the nrmse."""
        return nrmse(self.predict(x), y)


[docs]class FFXRationalElasticNet(RationalFunctionMixin, FFXElasticNet):
    pass


Strategy = namedtuple("Strategy", "exponents operators consider_products index base")

Strategy.__doc__ = """
Class for holding strategy data.

:param iterable exponents: exponents to consider
:param dict operators: mapping str (name) to callable (operator)
:param bool consider_products: Whether to consider products of features as features
:param slice index: Which columns of the features provided to `fit` will be used.
:param type base: type of FFX variant to use.
"""


[docs]def build_strategies(exponents, operators, rational=True):
    strategies = []
    linear = Strategy(
        exponents=[1], operators={}, consider_products=False, index=slice(None), base=FFXElasticNet
    )
    strategies.append(linear)
    if rational:
        rational = Strategy(
            exponents=[1], operators={}, consider_products=False, index=slice(None), base=FFXRationalElasticNet
        )
        strategies.append(rational)
    if sorted(exponents) != [1]:
        full_exponents = Strategy(
            exponents=exponents, operators={}, consider_products=True, index=slice(None), base=FFXElasticNet
        )
        strategies.insert(1, full_exponents)
        if rational:
            full_exponents_rational = Strategy(
                exponents=exponents,
                operators={},
                consider_products=True,
                index=slice(None),
                base=FFXRationalElasticNet,
            )
            strategies.append(full_exponents_rational)

    if operators:
        if exponents != [1]:
            simple_operators = Strategy(
                exponents=[1],
                operators=operators,
                consider_products=True,
                index=slice(None),
                base=FFXElasticNet,
            )
            strategies.append(simple_operators)
        full_operators = Strategy(
            exponents=exponents,
            operators=operators,
            consider_products=True,
            index=slice(None),
            base=FFXElasticNet,
        )
        strategies.append(full_operators)

    def strategy_generator(front):
        yield from strategies

    return strategy_generator


def _get_alphas(alpha_max, num_alphas, eps):
    """Return list of alphas between `alpha_max` and `alpha_max*eps` in descending order.

    The returned alphas will be logarithmically spaced mostly.
    Close to zero `approx. 1/4 * num_alphas` will be inserted with a 10x finer logarithmic spacing.
    """
    st, fin = np.log10(alpha_max * eps), np.log10(alpha_max)
    alphas1 = np.logspace(st, fin, num=num_alphas * 10)[::-1][: int(num_alphas / 4)]
    alphas2 = np.logspace(st, fin, num=num_alphas)
    return sorted(set(alphas1).union(alphas2), reverse=True)


def _path_is_saturated(models, n_tail=15, digits=4):
    if len(models) <= n_tail:
        return False
    else:
        return round(models[-1].train_score_, digits) == round(models[-n_tail].train_score_, digits)


[docs]def enet_path(
    est, x_train, x_test, y_train, y_test, num_alphas, eps, l1_ratio, target_score, n_tail, max_complexity
):
    models = []

    trafo = Pipeline(steps=est.steps[:-1])
    final = est._final_estimator
    fit_intercept = final.fit_intercept
    normalize = final.normalize
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        features = trafo.fit_transform(x_train)

    if isinstance(final, RationalFunctionMixin):
        features = est._final_estimator._transform(features, y_train)

    X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(
        features, y_train, None, True, normalize=normalize, fit_intercept=fit_intercept, copy=True
    )

    n_samples = X.shape[0]
    alpha_max = np.abs(np.nanmax(X.T @ y) / (n_samples * l1_ratio))

    est.set_params(
        regression__precompute=precompute,
        regression__fit_intercept=False,
        regression__normalize=False,
        regression__warm_start=True,
    )

    est_ = FFXElasticNet()
    est_.set_params(**final.get_params())

    for alpha in _get_alphas(alpha_max, num_alphas, eps):
        est_.set_params(l1_ratio=l1_ratio, alpha=alpha)

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            est_.fit(X, y, check_input=False)

        model = deepcopy(est)
        model.set_params(
            regression__fit_intercept=fit_intercept,
            regression__normalize=normalize,
            regression__l1_ratio=l1_ratio,
            regression__alpha=alpha,
        )

        for attr in ["coef_", "intercept_", "n_iter_"]:
            setattr(model._final_estimator, attr, getattr(est_, attr))

        model._final_estimator._set_intercept(X_offset, y_offset, X_scale)
        if isinstance(model._final_estimator, RationalFunctionMixin):
            model._final_estimator._arrange_coef()

        model.train_score_ = model.score(x_train, y_train)
        model.test_score_ = model.score(x_test, y_test)
        model.complexity_ = np.count_nonzero(model._final_estimator.coef_)
        models.append(model)

        if model.train_score_ <= target_score:
            # print("Reached target score")
            break
        elif model.complexity_ >= max_complexity:
            # print("Reached target complexity")
            break
        elif _path_is_saturated(models, n_tail=n_tail):
            # print("Stagnation in train score")
            break
    return models


[docs]def run_strategy(
    strategy,
    x_train,
    x_test,
    y_train,
    y_test,
    num_alphas,
    eps,
    l1_ratios,
    target_score,
    n_tail,
    max_complexity,
    n_jobs,
    **kw,
):

    est = FFXModel(strategy, **kw)
    with joblib.Parallel(n_jobs=n_jobs) as parallel:
        paths = parallel(
            joblib.delayed(enet_path)(
                est,
                x_train,
                x_test,
                y_train,
                y_test,
                num_alphas,
                eps,
                l1_ratio,
                target_score,
                n_tail,
                max_complexity,
            )
            for l1_ratio in l1_ratios
        )
    return [model for path in paths for model in path]


[docs]def run_ffx(
    x_train,
    x_test,
    y_train,
    y_test,
    exponents,
    operators,
    num_alphas=100,
    l1_ratios=(0.1, 0.3, 0.5, 0.7, 0.9, 0.95),
    eps=1e-30,
    target_score=0.01,
    max_complexity=50,
    n_tail=15,
    random_state=None,
    strategies=None,
    n_jobs=1,
    rational=True,
    **kw,
):

    strategies = strategies or build_strategies(exponents, operators, rational=rational)

    non_dominated_models = []

    for strategy in strategies(non_dominated_models):
        models = run_strategy(
            strategy,
            x_train,
            x_test,
            y_train,
            y_test,
            num_alphas,
            eps,
            l1_ratios,
            target_score,
            n_tail,
            max_complexity,
            n_jobs,
            **kw,
        )
        front = pareto_front(models, "complexity_", "test_score_")
        non_dominated_models.extend(front)
        if any(model.test_score_ <= target_score for model in front):
            break

    return sorted(
        pareto_front(non_dominated_models, "complexity_", "test_score_"), key=attrgetter("complexity_")
    )


[docs]class WeightedEnsembleEstimator(BaseEstimator, TransformerMixin):
    def __init__(self, estimators, weights):
        self.estimators = estimators
        self.weights = weights

[docs]    def fit(self, x, y=None):
        return self

[docs]    def predict(self, x):
        return np.sum([w * est.predict(x) for w, est in zip(self.weights, self.estimators)], axis=0)

[docs]    def print_model(self, input_features=None):
        return "+".join(
            [
                "{}*({})".format(w, est.print_model(input_features))
                for w, est in zip(self.weights, self.estimators)
            ]
        )


[docs]class FFX(BaseEstimator, RegressorMixin):
    def __init__(
        self,
        l1_ratios=(0.4, 0.8, 0.95),
        num_alphas=30,
        eps=1e-5,
        random_state=None,
        strategies=None,
        target_score=0.01,
        n_tail=5,
        decision="min",
        max_complexity=50,
        exponents=[1, 2],
        operators={},
        n_jobs=1,
        rational=True,
        **kw,
    ):
        """Fast Function eXtraction model.

        :param iterable l1_ratios: Determines ratio of l1 to l2 penalty term
        :param int num_alphas: Determines numbers of different ratios of cost function to penalty term. `0<= l1_ratio <= 1`.
        :param float eps: ratio of smallest to largest alpha considered. (`0 < eps < 1`)
        :param int random_state:
        :param iterable strategies: `Strategy` s to consider
        :param float target_score: break condition on cost function for innermost loop
        :param int n_tail: length of path (in alpha) to check into past for saturation
        :param str decision: one of ``'weight'`` or ``'min'``
        :param float max_complexity: break condition on model complexity for innermost loop
        :param iterable exponents: can contain float and negative values
        :param dict operators: mapping operator name to callable (of one variable)
        :param int n_jobs:
        :param bool rational: Whether to consider general rational functions as well
        :param kw:

        The implemented algorithm is found in `http://dx.doi.org/10.1007/978-1-4614-1770-5_13`.

        A `Strategy` is determined by a set of nonlinear functions from which an extended set of features
        will be generated by evaluating these functions on all given features.
        You can either supply the strategies directly via the `strategies` parameter or let the strategies be generated.
        Generation of strategies is configured by the parameters `exponents`, `operators` and `rational`.
        When `strategies` is given, `exponents`, `operators` and `rational` have no effect.

        Strategy generation takes place in the following manner:


        `exponents`:
            Orders of the monomials to consider for each single feature. (No products between features here).
            `exponents` is an iterable of numbers (floats and negative values are possible,
            1 will always automatically be included.)
            The first step in strategy generation is calculating all monomials.
        `operators`:
            mapping of str to callable taking one parameter.
            All callables in `operators` will be evaluated on all monomials from the first step

        products
            Not configurable. Always consider all products of each operator feature
            from the second step with each monomial feature from the first.
            And all products of monomial features with all monomial features based on a different feature
            (thus generating mixed products up to order `2*max(exponents)`).
        `rational`
            If true, do not only consider generalized linear models from all basis functions
            but consider also rational functions using the rational function trick
            described `here <`http://dx.doi.org/10.1007/978-1-4614-1770-5_13>`_

        For each `Strategy`, an elastic net optimizer will be run with many combinations of l1_ratio and alpha.
        A `l1_ratio` of 0 corresponds to ridge regression (only l2 penalty), a `l1_ratio` of 1 corresponds to
        LASSO regression (only l1 penalty).
        `alpha` determines the amount of regularization, where `alpha=0` would mean now regularization and
        `alpha -> infty` would mean only regularization.
        For details on the used elastic net algorithm see :class:`sklearn.linear_model.ElasticNet`.

        The number of alphas is loosely determined by `num_alpha` (the actual number is close and never smaller).
        The maximum value of the considered `alpha` is determined dynamically based on Tibshirani's "Strong Rules", see
        `https://doi.org/10.1111/j.1467-9868.2011.01004.x`_
        The rule gives an `alpha` for which the fitted model will (in most relevant cases)
        have a complexity of 0 (no nonzero terms).
        This maximum alpha also depends on the l1_ratio, therefore the iteration over alpha takes place
        in the innermost loop.

        The innermost loop would iterate from the maximum alpha to `eps` times the maximum alpha.
        With increasing `alpha`, the complexity (number of non-zero terms) is expected to increase, whereas the
        cost (nrmse evaluated on the training set) is expected to decrease.

        The innermost loop has three break conditions:

        1. train_score
           If the cost is less or equal to `target_score`
        2. complexity
           If the complexity  is greater or equal to `max_complexity`
        3. saturation
           No significant improvement in the cost during the last `n_tail` iterations.
           (Significant -> last 4 decimal digits)

        To obtain a single model from the Pareto front of models, the Akaike information criterion (AIC)
        is used (see `https://en.wikipedia.org/wiki/Akaike_information_criterion`).
        How it is used is determined by the `decision` parameter.
        If `decision == 'min'`, the model with the smallest AIC is taken,
        if `decision == 'weighted'`, the resulting model will be a linear combination of all models
        the front consists of, weighted by `exp((min(AIC)-AIC)/2)`.

        """
        self.l1_ratios = l1_ratios
        self.num_alphas = num_alphas
        self.eps = eps
        self.random_state = check_random_state(random_state)
        self.strategies = strategies
        self.target_score = target_score
        self.n_tail = n_tail
        self.exponents = exponents
        self.operators = operators
        self.kw = kw
        self.decision = decision
        self.max_complexity = max_complexity
        self.n_jobs = n_jobs
        self.rational = rational

[docs]    def fit(self, x, y=None):
        x, y = check_X_y(x, y)
        x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=self.random_state)
        self.front = run_ffx(
            x_train,
            x_test,
            y_train,
            y_test,
            self.exponents,
            self.operators,
            num_alphas=self.num_alphas,
            l1_ratios=self.l1_ratios,
            target_score=self.target_score,
            n_tail=self.n_tail,
            random_state=self.random_state,
            strategies=self.strategies,
            n_jobs=self.n_jobs,
            max_complexity=self.max_complexity,
            rational=self.rational,
            eps=self.eps,
            **self.kw,
        )
        self.make_model(x_test, y_test)
        return self

[docs]    def predict(self, x):
        return self._model.predict(x)

[docs]    def score(self, x, y):
        return self._model.score(x, y)

[docs]    def make_model(self, x_test, y_test):
        residuals = [y_test - est.predict(x_test) for est in self.front]
        complexities = [est.complexity_ for est in self.front]
        aic_scores = np.array([aic(res, c) for res, c in zip(residuals, complexities)])
        aic_scores -= np.min(aic_scores)

        if self.decision == "weight":
            weights = np.exp(-aic_scores / 2)
            weights /= np.sum(weights)
            self._model = WeightedEnsembleEstimator(self.front, weights)
        else:
            self._model = self.front[np.argmin(aic_scores)]

[docs]    def print_model(self, input_features=None):
        return self._model.print_model(input_features)