Note
Click here to download the full example code
Group lasso with overlap¶
Comparison of solvers for a least squares with overlapping group lasso regularization.
References¶
This example is modeled after the experiments in Adaptive Three Operator Splitting, Appendix E.3.
import numpy as np
from sklearn import datasets, preprocessing
import pylab as plt
import copt as cp
np.random.seed(0)
n_samples, n_features = 100, 1002
# .. generate some data ..
# .. the first set of blocks is
groups = [np.arange(8 * i, 8 * i + 10) for i in range(125)]
ground_truth = np.zeros(n_features)
g = np.random.randint(0, len(groups), 10)
for i in g:
ground_truth[groups[i]] = np.random.randn()
A = np.random.randn(n_samples, n_features)
p = 0.95 # create a matrix with correlations between features
for i in range(1, n_features):
A[:, i] = p * A[:, i] + (1 - p) * A[:, i-1]
A[:, 0] /= np.sqrt(1 - p ** 2)
A = preprocessing.StandardScaler().fit_transform(A)
b = A.dot(ground_truth) + np.random.randn(n_samples)
# make labels in {0, 1}
b = np.sign(b)
b = (b + 1) // 2
# .. compute the step-size ..
max_iter = 5000
f = cp.utils.LogLoss(A, b)
step_size = 1. / f.lipschitz
# .. run the solver for different values ..
# .. of the regularization parameter beta ..
all_betas = np.logspace(-4, -1, 4)
all_trace_ls, all_trace_nols, all_trace_pdhg_nols, all_trace_pdhg = [], [], [], []
all_trace_ls_time, all_trace_nols_time, all_trace_pdhg_nols_time, all_trace_pdhg_time = [], [], [], []
out_img = []
for i, beta in enumerate(all_betas):
print('beta = %s' % beta)
G1 = cp.utils.GroupL1(beta, groups[::2])
G2 = cp.utils.GroupL1(beta, groups[1::2])
def loss(x):
return f(x) + G1(x) + G2(x)
cb_tosls = cp.utils.Trace()
x0 = np.zeros(n_features)
tos_ls = cp.minimize_three_split(
f.f_grad, x0, G1.prox, G2.prox, step_size=10 * step_size,
max_iter=max_iter, tol=1e-14, verbose=1,
callback=cb_tosls, h_Lipschitz=beta)
trace_ls = np.array([loss(x) for x in cb_tosls.trace_x])
all_trace_ls.append(trace_ls)
all_trace_ls_time.append(cb_tosls.trace_time)
cb_tos = cp.utils.Trace()
x0 = np.zeros(n_features)
tos = cp.minimize_three_split(
f.f_grad, x0, G1.prox, G2.prox,
step_size=step_size,
max_iter=max_iter, tol=1e-14, verbose=1,
line_search=True, callback=cb_tos)
trace_nols = np.array([loss(x) for x in cb_tos.trace_x])
all_trace_nols.append(trace_nols)
all_trace_nols_time.append(cb_tos.trace_time)
out_img.append(tos.x)
cb_pdhg = cp.utils.Trace()
x0 = np.zeros(n_features)
pdhg = cp.minimize_primal_dual(
f.f_grad, x0, G1.prox, G2.prox,
callback=cb_pdhg, max_iter=max_iter,
step_size=step_size,
step_size2=(1. / step_size) / 2, tol=0, line_search=False)
trace_pdhg = np.array([loss(x) for x in cb_pdhg.trace_x])
all_trace_pdhg.append(trace_pdhg)
all_trace_pdhg_time.append(cb_pdhg.trace_time)
cb_pdhg_nols = cp.utils.Trace()
x0 = np.zeros(n_features)
pdhg_nols = cp.minimize_primal_dual(
f.f_grad, x0, G1.prox, G2.prox,
callback=cb_pdhg_nols, max_iter=max_iter,
step_size=step_size,
step_size2=(1. / step_size) / 2, tol=0, line_search=False)
trace_pdhg_nols = np.array([loss(x) for x in cb_pdhg_nols.trace_x])
all_trace_pdhg_nols.append(trace_pdhg_nols)
all_trace_pdhg_nols_time.append(cb_pdhg_nols.trace_time)
# .. plot the results ..
fig, ax = plt.subplots(2, 4, sharey=False)
xlim = [2000, 2000, 1000, 2000]
markevery = [x//5 for x in xlim]
for i, beta in enumerate(all_betas):
ax[0, i].set_title(r'$\lambda=%s$' % beta)
ax[0, i].set_title(r'$\lambda=%s$' % beta)
ax[0, i].plot(out_img[i] / np.max(out_img[i]))
ax[0, i].plot(ground_truth / np.max(ground_truth))
ax[0, i].set_xticks(())
ax[0, i].set_yticks(())
ax[0, i].set_ylim((-0.5, 1.5))
fmin = min(np.min(all_trace_ls[i]), np.min(all_trace_nols[i]))
scale = 1. # all_trace_ls[i][0] - fmin
plot_tos, = ax[1, i].plot(
(all_trace_ls[i] - fmin) / scale, '--',
lw=2, marker='o', markevery=markevery[i],
markersize=5)
plot_nols, = ax[1, i].plot(
(all_trace_nols[i] - fmin) / scale,
lw=2, marker='h', markevery=markevery[i],
markersize=5)
plot_pdhg, = ax[1, i].plot(
(all_trace_pdhg[i] - fmin) / scale,
lw=2, marker='^', markevery=markevery[i],
markersize=5)
plot_pdhg_nols, = ax[1, i].plot(
(all_trace_pdhg_nols[i] - fmin) / scale,
lw=2, marker='d', markevery=markevery[i],
markersize=5)
ax[1, i].set_xlabel('Iterations')
ax[1, i].set_yscale('log')
ax[1, i].set_ylim((1e-10, None))
ax[1, i].set_xlim((0, xlim[i]))
ax[1, i].grid(True)
plt.gcf().subplots_adjust(bottom=0.25)
plt.figlegend(
(plot_tos, plot_nols, plot_pdhg, plot_pdhg_nols),
('TOS with line search', 'TOS without line search', 'PDHG with line search', 'PDHG without line search'), 'lower center', ncol=2,
scatterpoints=1, frameon=False,)
ax[1, 0].set_ylabel('Objective minus optimum')
plt.show()
Total running time of the script: ( 0 minutes 0.000 seconds)
Estimated memory usage: 0 MB