from itertools import chain
from operator import attrgetter
import numpy as np
from sklearn.exceptions import FitFailedWarning
from sklearn.linear_model.base import LinearModel
[docs]def dominates(a, b):
return all(ai <= bi for ai, bi in zip(a, b)) and not a == b
def _get_fit(m, attrs):
if attrs:
get_fit = attrgetter(*attrs)
elif isinstance(next(iter(m)), tuple):
get_fit = lambda x: x
else:
raise ValueError("No attributes given")
return get_fit
def _pareto_front(models, *attrs):
"""Helper function. Performs simple cull algorithm"""
get_fit = _get_fit(models, attrs)
front = set()
for m in models:
dominated = set()
for f in front:
fitf = get_fit(f)
fitm = get_fit(m)
if dominates(fitm, fitf):
dominated.add(f)
elif dominates(fitf, fitm):
break
else:
front.add(m)
front -= dominated
return sorted(front, key=get_fit)
[docs]def pareto_front(models, *attrs, all=False):
"""
Simple cull. Can recursively determine all fronts.
"""
if not all:
return _pareto_front(models, *attrs)
else:
fronts = []
models = set(models)
while models:
fronts.append(_pareto_front(models, *attrs))
models -= set(fronts[-1])
return fronts
[docs]def crowding_distance(models, *attrs):
"""
Assumes models in lexicographical sorted.
"""
get_fit = _get_fit(models, attrs)
f = np.array(sorted([get_fit(m) for m in models]))
scale = np.max(f, axis=0) - np.min(f, axis=0)
with np.errstate(invalid="ignore"):
dist = np.sum(abs(np.roll(f, 1, axis=0) - np.roll(f, -1, axis=0)) / scale, axis=1)
dist[0] = np.infty
dist[-1] = np.infty
return dist
[docs]def sort_non_dominated(models, *attrs, index=False):
"""
NSGA2 based sorting
"""
fronts = pareto_front(list(models), *attrs, all=True)
distances = [
crowding_distance(front, *attrs) for front in fronts
] # if len(front) > 2 else list(np.zeros_like(front))
# fd = (list of models, list of distances)
# convert that into (model, distance) tuples
# sort in descending order by distance
# resolve the nested chain
ranked = chain.from_iterable(sorted(zip(*fd), key=lambda x: x[1]) for fd in zip(fronts, distances))
ranked = [m for (m, d) in ranked] # discard the distance
if not index:
return ranked
else:
ind = models.index
return [ind(r) for r in ranked]
[docs]def normalize(x, order=2):
m = 1.0 / np.linalg.norm(x, ord=order, axis=0)
return m * x, m
[docs]def cardinality(x, null=1e-9):
return sum(map(lambda x: abs(x) >= null, x))
[docs]def rmse(x):
return np.sqrt(np.mean(x ** 2))
[docs]def nrmse(x, y):
scale = max(y) - min(y) or 1.0
return rmse(x - y) / scale
[docs]class ReducedLinearModel(LinearModel):
def __init__(self, mask, lm):
self.mask = mask
self.lm = lm
[docs] def fit(self, x, y):
mask = self.mask
if not x.shape[1] == mask.shape[0]:
raise FitFailedWarning
self.lm = self.lm.fit(x[:, mask], y)
self.coef_ = np.zeros(shape=mask.shape)
self.coef_[mask] = self.lm.coef_
return self
[docs] def predict(self, x):
return self.lm.predict(x[:, self.mask])
[docs] def scores(self, x, y):
return self.lm.scores(x[:, self.mask], y)
[docs]def aic(residuals, k, correct=False):
"""Akaike information criterion"""
n = len(residuals)
correction = 2 * (k + 1) * k / (n - k - 1) if correct else 0
return n * np.log(np.var(residuals)) + 2 * k + correction