Coverage for src/cvxcla/first.py: 100%
43 statements
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-27 06:45 +0000
« prev ^ index » next coverage.py v7.14.3, created at 2026-06-27 06:45 +0000
1"""First turning point computation for the Critical Line Algorithm.
3This module provides functions to compute the first turning point on the efficient frontier,
4which is the portfolio with the highest expected return that satisfies the constraints.
5Two implementations are provided: a direct algorithm and a linear programming approach.
6"""
8from __future__ import annotations
10import numpy as np
11from numpy.typing import NDArray
12from scipy.optimize import linprog # type: ignore[import-untyped]
14from .types import TurningPoint
17#
18def init_algo(
19 mean: NDArray[np.float64],
20 lower_bounds: NDArray[np.float64],
21 upper_bounds: NDArray[np.float64],
22 total: float = 1.0,
23) -> TurningPoint:
24 """Compute the first turning point for a single all-ones budget constraint.
26 The key insight behind Markowitz's CLA is to find first the
27 turning point associated with the highest expected return, and then
28 compute the sequence of turning points, each with a lower expected
29 return than the previous. That first turning point consists in the
30 smallest subset of assets with highest return such that the sum of
31 their upper boundaries equals or exceeds the budget ``total``.
33 We sort the expected returns in descending order.
34 This gives us a sequence for searching for the
35 first free asset. All weights are initially set to their lower bounds,
36 and following the sequence from the previous step, we move those
37 weights from the lower to the upper bound until the sum of weights
38 reaches ``total``. The last iterated weight is then reduced
39 to comply with the constraint that the sum of weights equals ``total``.
40 This last weight is the first free asset,
41 and the resulting vector of weights the first turning point.
43 Args:
44 mean: Vector of expected returns.
45 lower_bounds: Lower box bounds.
46 upper_bounds: Upper box bounds.
47 total: Target sum of weights (the right-hand side ``b`` of the all-ones
48 budget constraint ``sum(w) = total``; ``1`` for fully-invested,
49 ``0`` for dollar-neutral, ``> 1`` for a leveraged total).
50 """
51 if np.any(lower_bounds > upper_bounds):
52 msg = "Lower bounds must be less than or equal to upper bounds"
53 raise ValueError(msg)
55 # Initialize weights to lower bounds
56 weights = np.copy(lower_bounds).astype(np.float64)
57 free = np.full_like(mean, False, dtype=np.bool_)
59 # Move weights from lower to upper bound until the sum reaches ``total``. The
60 # check needs a tolerance: the increment ``total - sum(weights)`` can bring the
61 # sum to ``total`` only up to floating-point error, and without the slack the
62 # loop would move on and mark the NEXT asset (sitting on its bound) as free
63 # while the genuinely interior asset stays blocked.
64 for index in np.argsort(-mean):
65 weights[index] += np.min([upper_bounds[index] - lower_bounds[index], total - np.sum(weights)])
66 if np.sum(weights) >= total - 1e-12:
67 free[index] = True
68 break
70 if not np.any(free):
71 # # We have not reached the target sum of weights...
72 msg = "Could not construct a fully invested portfolio"
73 raise ValueError(msg)
75 # Return first turning point, the point with the highest expected return.
76 return TurningPoint(free=free, weights=weights)
79def first_vertex_lp(
80 mean: NDArray[np.float64],
81 lower_bounds: NDArray[np.float64],
82 upper_bounds: NDArray[np.float64],
83 a: NDArray[np.float64],
84 b: NDArray[np.float64],
85 tol: float,
86 g: NDArray[np.float64] | None = None,
87 h: NDArray[np.float64] | None = None,
88) -> TurningPoint:
89 """Compute the first turning point for a general ``A w = b``, ``G w <= h`` system.
91 The maximum-return vertex of the feasible polytope
92 ``{w : A w = b, G w <= h, lower <= w <= upper}`` is a linear program,
93 ``maximize mean @ w``. The greedy fill of :func:`init_algo` only solves the
94 single all-ones budget with no inequality rows; for a general (weighted, or
95 multi-row) ``A`` or any ``G`` we solve the LP directly with HiGHS (via
96 :func:`scipy.optimize.linprog`), which returns a vertex. The free set is read
97 off the solution (assets strictly inside their box bounds) and the initial
98 active inequality set off the tight rows (``g_i w`` at ``h_i`` to tolerance).
100 Args:
101 mean: Vector of expected returns.
102 lower_bounds: Lower box bounds.
103 upper_bounds: Upper box bounds.
104 a: Equality-constraint matrix (``m x n``).
105 b: Equality-constraint right-hand side (length ``m``).
106 tol: Tolerance for classifying an asset as free (strictly interior) and a
107 row as active (tight).
108 g: Inequality-constraint matrix (``p x n``); ``None`` means no rows.
109 h: Inequality-constraint right-hand side (length ``p``).
111 Returns:
112 The maximum-return vertex as a :class:`TurningPoint`, carrying the active
113 inequality rows in ``active_ineq``.
115 Raises:
116 ValueError: If the linear program is infeasible or unbounded (the
117 constraints admit no maximum-return vertex), or if that vertex is
118 degenerate: the free set does not span the equality rows together with
119 the active inequality rows, so the reduced KKT system would be
120 singular. That case is declined here rather than left to surface as an
121 opaque singular-matrix error later in the trace.
122 """
123 g = np.zeros((0, mean.shape[0])) if g is None else np.asarray(g, dtype=np.float64)
124 h = np.zeros(0) if h is None else np.asarray(h, dtype=np.float64)
126 # maximize mean @ w == minimize -mean @ w, subject to A w = b, G w <= h, box.
127 result = linprog(
128 c=-np.asarray(mean, dtype=np.float64),
129 A_eq=np.asarray(a, dtype=np.float64),
130 b_eq=np.asarray(b, dtype=np.float64),
131 A_ub=g if g.shape[0] else None,
132 b_ub=h if g.shape[0] else None,
133 bounds=list(zip(lower_bounds, upper_bounds, strict=True)),
134 method="highs",
135 )
136 if not result.success:
137 msg = f"Could not find a maximum-return vertex (linear program: {result.message})"
138 raise ValueError(msg)
140 weights = np.asarray(result.x, dtype=np.float64)
141 free = (weights > lower_bounds + tol) & (weights < upper_bounds - tol)
142 active_ineq = (g @ weights >= h - tol) if g.shape[0] else np.zeros(0, dtype=bool)
144 # The free set must span the equality rows together with the active inequality
145 # rows: C = [A ; G_active] restricted to the free assets must have full row
146 # rank, or the reduced KKT solve is singular. A degenerate maximum-return
147 # vertex (a basic asset pinned on a bound) violates this; decline it with an
148 # actionable diagnosis instead of letting it surface as an opaque "Singular
149 # matrix" error downstream.
150 c = np.vstack([a, g[active_ineq]])
151 mc = c.shape[0]
152 n_free = int(np.count_nonzero(free))
153 # rank(C[:, free]) <= min(mc, n_free), so fewer free assets than active rows
154 # is degenerate by itself. Testing this first also keeps matrix_rank off a
155 # zero-column block, whose empty singular-value reduction raises on numpy 2.0.
156 if n_free < mc or np.linalg.matrix_rank(c[:, free]) < mc:
157 msg = (
158 f"The maximum-return vertex is degenerate (free-set size {n_free}, "
159 f"active constraints {mc}): a basic asset sits exactly on a box bound, so the free set "
160 "does not span the active equality and inequality rows and the reduced KKT system is "
161 "singular. Tracing a frontier from a degenerate first vertex is not yet supported; perturb "
162 "the bounds or the constraints so the maximum-return vertex is non-degenerate."
163 )
164 raise ValueError(msg)
166 return TurningPoint(free=free, weights=weights, active_ineq=active_ineq)
169def _free(
170 w: NDArray[np.float64], lower_bounds: NDArray[np.float64], upper_bounds: NDArray[np.float64]
171) -> NDArray[np.bool_]:
172 """Determine which asset should be free in the turning point.
174 This helper function identifies the asset that should be marked as free
175 in the turning point. It selects the asset that is furthest from its bounds,
176 which helps ensure numerical stability in the algorithm.
178 Args:
179 w: Vector of portfolio weights.
180 lower_bounds: Vector of lower bounds for asset weights.
181 upper_bounds: Vector of upper bounds for asset weights.
183 Returns:
184 A boolean vector indicating which asset is free (True) and which are blocked (False).
186 """
187 # Calculate the distance from each weight to its nearest bound
188 distance = np.min(np.array([np.abs(w - lower_bounds), np.abs(upper_bounds - w)]), axis=0)
190 # Find the index of the asset furthest from its bounds
191 index = np.argmax(distance)
193 # Create a boolean vector with only that asset marked as free
194 free = np.full_like(w, False, dtype=np.bool_)
195 free[index] = True
196 return free