Group Lasso¶

Out:

defaultdict(<class 'list'>, {1: ['x_0', 'sin(x_0)', 'x_0**2*sin(x_0)'], 2: ['x_0**2'], 0: ['cos(x_0)', 'sin(x_0**2)', 'cos(x_0**2)', 'x_0*sin(x_0)', 'x_0*cos(x_0)', 'x_0**2*cos(x_0)', 'x_0*sin(x_0**2)', 'x_0**2*sin(x_0**2)', 'x_0*cos(x_0**2)', 'x_0**2*cos(x_0**2)'], 3: ['x_0**3']})
/home/docs/checkouts/readthedocs.org/user_builds/sparsereg/envs/latest/lib/python3.6/site-packages/sklearn/model_selection/_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.
  warnings.warn(CV_WARNING, FutureWarning)
0.9977261167415085
0.640 x_0**2 + 0.975 x_0**3 + 0.968
/home/docs/checkouts/readthedocs.org/user_builds/sparsereg/envs/latest/lib/python3.6/site-packages/sklearn/model_selection/_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.
  warnings.warn(CV_WARNING, FutureWarning)
0.9984230583925218
0.561 x_0**2 + -0.126 x_0**2*cos(x_0) + 0.975 x_0**3 + 1.046

from collections import defaultdict

import numpy as np
from sklearn.cluster import AgglomerativeClustering
from sklearn.linear_model import Lasso
from sklearn.metrics import explained_variance_score
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.utils.validation import check_random_state

from sparsereg.model.base import print_model
from sparsereg.model.group_lasso import SparseGroupLasso
from sparsereg.preprocessing.symfeat import SymbolicFeatures

rng = check_random_state(42)
x = rng.normal(size=(10000, 1))
y = np.cos(x[:, 0]) + x[:, 0] ** 2 + x[:, 0] ** 3  # + 0.01*rng.normal(size=1000)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=rng)
pre = SymbolicFeatures(exponents=[1, 2], operators={"sin": np.sin, "cos": np.cos}).fit(x_train)
features_train = pre.transform(x_train)
features_test = pre.transform(x_test)
km = AgglomerativeClustering(n_clusters=4).fit(features_train.T)
labels = defaultdict(list)
for k, v in zip(pre.get_feature_names(), km.labels_):
    labels[v].append(k)
print(labels)
params = {"alpha": [0.001, 0.01, 0.02, 0.05], "normalize": [True]}
scorer = make_scorer(explained_variance_score)
sgl = SparseGroupLasso(groups=km.labels_, rho=0.3, alpha=0.02)
l = Lasso()
for model in [sgl, l]:
    grid = GridSearchCV(model, params, n_jobs=1, scoring=scorer, error_score=0).fit(features_train, y_train)
    print(grid.score(features_test, y_test))
    print(
        print_model(
            grid.best_estimator_.coef_, pre.get_feature_names(), intercept=grid.best_estimator_.intercept_
        )
    )

Total running time of the script: ( 0 minutes 2.273 seconds)

Gallery generated by Sphinx-Gallery