import marimo __generated_with = "0.7.0" app = marimo.App() @app.cell(hide_code=True) def __(mo): mo.md("# Regularization and Sparsity") return @app.cell(hide_code=True) def __(mo): mo.md( r""" This notebook shows how the choice of regularization in a least squares regression problem can affect the sparsity of solutions. We will use CVXPY to solve the problem \[ \begin{equation*} \begin{array}{ll} \text{minimize} & \|A x - b\|_2^2 + \lambda \|x \|_p \\ \end{array} \end{equation*} \] where $A \in \mathbf{R}^{m \times n}$ and $b \in \mathbf{R}^{m}$ are problem data, $x \in \mathbf{R}^n$ is the optimization variable, and $\lambda > 0$ is a scalar that controls the strength of the regularization. Let's experiment how solutions to this problem differ for $p=1$, \[ \|x\|_1 = |x_1| + |x_2| + \cdots + |x_n|, \] and $p=2$, \[ \|x\|_2 = \sqrt{x_1^2 + x_2^2 + \cdots + x_n^2}. \] """ ) return @app.cell def __(np): # Problem data m = 100 n = 20 np.random.seed(0) A = np.random.randn(m, n) b = np.random.randn(m) return A, b, m, n @app.cell def __(mo): mo.md("**TODO**: Implement the `create_problem` function below.") return @app.cell def __(cp): def create_problem(A, b, p): # create the optimization variable x x = ... # lambd is the regularization hyperparameter lambda lambd = cp.Parameter(nonneg=True) # create the objective: minimize the sum squares of the residuals plus the # regularization objective = cp.Minimize( ... ) # don't change the next line return cp.Problem(objective), x, lambd return create_problem, @app.cell(hide_code=True) def __(): def solver(problem, x, lambd): def solve(v): lambd.value = v problem.solve() return x.value return solve return solver, @app.cell(hide_code=True) def __(A, b, create_problem, solver): l2_solver = solver(*create_problem(A, b, p=2)) l1_solver = solver(*create_problem(A, b, p=1)) return l1_solver, l2_solver @app.cell(hide_code=True) def __(functools, l1_solver, l2_solver, np, sparsity_parameter): x_min_max= [np.inf, -np.inf] @functools.cache def solve(lambd): return l2_solver(lambd), l1_solver(lambd) x_l2, x_l1 = solve(sparsity_parameter.value) return solve, x_l1, x_l2, x_min_max @app.cell def __(mo): mo.md("## Parameter selection") return @app.cell(hide_code=True) def __(mo): sparsity_parameter = mo.ui.slider(0, 10, step=0.1) mo.md( f""" Choose the regularization strength $\lambda$: {sparsity_parameter} """ ) return sparsity_parameter, @app.cell(hide_code=True) def __(mo, n, number_of_zeros, sparsity_parameter, x_l1, x_l2): ( mo.md( """ **$\lambda$ = 0.** No regularization is applied. The solutions are the same. """ ) if sparsity_parameter.value == 0 else mo.md( f""" **$\lambda$ = {sparsity_parameter.value}.** Watch how the fraction of entries of $x$ near $0$ changes as $\lambda$ increases. **$p=1$**: {number_of_zeros(x_l1) / n * 100:.02f}% of the entries of $x$ are extremely close to $0$. **$p=2$**: {number_of_zeros(x_l2) / n * 100:.02f}% of the entries of $x$ are extremely close to $0$. """ ) ) return @app.cell(hide_code=True) def __(cdf, plt, x_l1, x_l2): cdf_figure, cdf_axs = plt.subplots(2, 1, sharex=True) cdf(x_l1, cdf_axs[0]).set_title("$p=1$") cdf(x_l2, cdf_axs[1]).set_title("$p=2$") plt.tight_layout() cdf_figure return cdf_axs, cdf_figure @app.cell(hide_code=True) def __(mo): mo.md( """ ## Sparsity The $\ell_1$ norm, when used as a regularizer, encourages solutions to be _sparse_: to have many zeros and only a few nonzeros. A sparse regressor (if it is a good model) encodes which featuers are important for making predictions, and which are not: If a component of $x$ is $0$, then the corresponding feature or measurement must not be important in making predictions. """ ) return @app.cell(hide_code=True) def __(np): def number_of_zeros(x): return np.isclose(x, 0).sum() return number_of_zeros, @app.cell(hide_code=True) def __(np, x_min_max): def cdf(x, ax): heights = np.arange(1, x.size+1) / x.size xs = np.sort(x) if np.min(xs) < x_min_max[0]: x_min_max[0] = np.min(xs) if np.max(xs) > x_min_max[1]: x_min_max[1] = np.max(xs) ax.set_xlim(-0.2, 0.2) ax.step(xs, heights) ax.set_xlabel('$x_i$') ax.set_ylabel('fraction of components') return ax return cdf, @app.cell def __(): import cvxpy as cp import matplotlib.pyplot as plt import numpy as np return cp, np, plt @app.cell def __(): import functools return functools, @app.cell def __(): import marimo as mo return mo, if __name__ == "__main__": app.run()