A toy mixed-norm regression problem#

Let \(f_k(x)\) be the sum of the \(k\) largest entries of \(|x|\).

It turns out that this function is a norm! Special cases:

  • \(f_1(x) = \|x\|_{\infty}\)

  • \(f_n(x) = \|x\|_1\).

Consider the minimum-norm regression problem:

\(\qquad \operatorname{minimize} ~f_k(x) + \gamma f_n(x) ~\text{ s.t. }~ A x = y\).

For a wide \(m \times n\) matrix \(A\), an \(m\)-vector \(y\), and a tuning parameter \(\gamma\).

The larger values of \(x\) are “more penalized” as \(\gamma\) gets smaller.

import cvxpy as cp
import itertools

def sum_abs_largest(expr, k):
    # implement me!
    return None
import numpy as np
import scipy.linalg as la
import scipy.sparse as sp
import time

def make_problem_data(n, m, true_k):
    np.random.seed(1)
    x_ref = np.zeros(n)
    idxs = np.random.choice(np.arange(n), true_k, replace=False)
    x_ref[idxs] = 10 + 100*np.random.rand(true_k)
    A = np.random.randn(m, n)
    v = np.random.normal(0, 1.0, m)
    v /= (m*la.norm(v))
    y = A @ x_ref + v
    return A, x_ref, y

def make_and_solve(loss, x):
    prob = cp.Problem(cp.Minimize(loss), [A @ x == y])
    tic = time.time()
    prob.solve(solver='ECOS')
    toc = time.time()
    t = toc - tic
    x_est = x.value
    return prob, x_est, t
n, m, true_k = 14, 7, 4
A, x_ref, y = make_problem_data(n, m, true_k)
x = cp.Variable(n)
gamma = cp.Parameter(pos=True)
gamma.value = 1e-3
k = 3
loss1 = sum_abs_largest(x, k) + gamma * cp.norm(x, 1)
prob1, x1, t1 = make_and_solve(loss1, x)
print(f'Solve time (home brewed)\n\t{t1}')

loss2 = cp.sum_largest(cp.abs(x), k) + gamma * cp.norm(x, 1)
prob2, x2, t2 = make_and_solve(loss2, x)
print(f'Solve time (cvxpy atom)\n\t{t2}')

disc = la.norm(x1 - x2) / min(la.norm(x1), la.norm(x2))
print(f'Discrepency between two solutions\n\t{disc}')
import matplotlib.pyplot as plt
%matplotlib inline

A1 = prob1.get_problem_data(solver='SCS')[0]['A']
plt.spy(A1, aspect='auto')
plt.show()

A2 = prob2.get_problem_data(solver='SCS')[0]['A']
plt.spy(A2, aspect='auto')
plt.show()