Coverage for src/cvxcla/first.py: 100%

1"""First turning point computation for the Critical Line Algorithm.

3This module provides functions to compute the first turning point on the efficient frontier,

4which is the portfolio with the highest expected return that satisfies the constraints.

5Two implementations are provided: a direct algorithm and a linear programming approach.

6"""

8from __future__ import annotations

10import numpy as np

11from numpy.typing import NDArray

12from scipy.optimize import linprog # type: ignore[import-untyped]

14from .types import TurningPoint

17#

18def init_algo(

19 mean: NDArray[np.float64],

20 lower_bounds: NDArray[np.float64],

21 upper_bounds: NDArray[np.float64],

22 total: float = 1.0,

23) -> TurningPoint:

24 """Compute the first turning point for a single all-ones budget constraint.

26 The key insight behind Markowitz's CLA is to find first the

27 turning point associated with the highest expected return, and then

28 compute the sequence of turning points, each with a lower expected

29 return than the previous. That first turning point consists in the

30 smallest subset of assets with highest return such that the sum of

31 their upper boundaries equals or exceeds the budget ``total``.

33 We sort the expected returns in descending order.

34 This gives us a sequence for searching for the

35 first free asset. All weights are initially set to their lower bounds,

36 and following the sequence from the previous step, we move those

37 weights from the lower to the upper bound until the sum of weights

38 reaches ``total``. The last iterated weight is then reduced

39 to comply with the constraint that the sum of weights equals ``total``.

40 This last weight is the first free asset,

41 and the resulting vector of weights the first turning point.

43 Args:

44 mean: Vector of expected returns.

45 lower_bounds: Lower box bounds.

46 upper_bounds: Upper box bounds.

47 total: Target sum of weights (the right-hand side ``b`` of the all-ones

48 budget constraint ``sum(w) = total``; ``1`` for fully-invested,

49 ``0`` for dollar-neutral, ``> 1`` for a leveraged total).

50 """

51 if np.any(lower_bounds > upper_bounds):

52 msg = "Lower bounds must be less than or equal to upper bounds"

53 raise ValueError(msg)

55 # Initialize weights to lower bounds

56 weights = np.copy(lower_bounds).astype(np.float64)

57 free = np.full_like(mean, False, dtype=np.bool_)

59 # Move weights from lower to upper bound until the sum reaches ``total``. The

60 # check needs a tolerance: the increment ``total - sum(weights)`` can bring the

61 # sum to ``total`` only up to floating-point error, and without the slack the

62 # loop would move on and mark the NEXT asset (sitting on its bound) as free

63 # while the genuinely interior asset stays blocked.

64 for index in np.argsort(-mean):

65 weights[index] += np.min([upper_bounds[index] - lower_bounds[index], total - np.sum(weights)])

66 if np.sum(weights) >= total - 1e-12:

67 free[index] = True

68 break

70 if not np.any(free):

71 # # We have not reached the target sum of weights...

72 msg = "Could not construct a fully invested portfolio"

73 raise ValueError(msg)

75 # Return first turning point, the point with the highest expected return.

76 return TurningPoint(free=free, weights=weights)

79def first_vertex_lp(

80 mean: NDArray[np.float64],

81 lower_bounds: NDArray[np.float64],

82 upper_bounds: NDArray[np.float64],

83 a: NDArray[np.float64],

84 b: NDArray[np.float64],

85 tol: float,

86 g: NDArray[np.float64] | None = None,

87 h: NDArray[np.float64] | None = None,

88) -> TurningPoint:

89 """Compute the first turning point for a general ``A w = b``, ``G w <= h`` system.

91 The maximum-return vertex of the feasible polytope

92 ``{w : A w = b, G w <= h, lower <= w <= upper}`` is a linear program,

93 ``maximize mean @ w``. The greedy fill of :func:`init_algo` only solves the

94 single all-ones budget with no inequality rows; for a general (weighted, or

95 multi-row) ``A`` or any ``G`` we solve the LP directly with HiGHS (via

96 :func:`scipy.optimize.linprog`), which returns a vertex. The free set is read

97 off the solution (assets strictly inside their box bounds) and the initial

98 active inequality set off the tight rows (``g_i w`` at ``h_i`` to tolerance).

100 Args:

101 mean: Vector of expected returns.

102 lower_bounds: Lower box bounds.

103 upper_bounds: Upper box bounds.

104 a: Equality-constraint matrix (``m x n``).

105 b: Equality-constraint right-hand side (length ``m``).

106 tol: Tolerance for classifying an asset as free (strictly interior) and a

107 row as active (tight).

108 g: Inequality-constraint matrix (``p x n``); ``None`` means no rows.

109 h: Inequality-constraint right-hand side (length ``p``).

110

111 Returns:

112 The maximum-return vertex as a :class:`TurningPoint`, carrying the active

113 inequality rows in ``active_ineq``.

114

115 Raises:

116 ValueError: If the linear program is infeasible or unbounded (the

117 constraints admit no maximum-return vertex), or if that vertex is

118 degenerate: the free set does not span the equality rows together with

119 the active inequality rows, so the reduced KKT system would be

120 singular. That case is declined here rather than left to surface as an

121 opaque singular-matrix error later in the trace.

122 """

123 g = np.zeros((0, mean.shape[0])) if g is None else np.asarray(g, dtype=np.float64)

124 h = np.zeros(0) if h is None else np.asarray(h, dtype=np.float64)

125

126 # maximize mean @ w == minimize -mean @ w, subject to A w = b, G w <= h, box.

127 result = linprog(

128 c=-np.asarray(mean, dtype=np.float64),

129 A_eq=np.asarray(a, dtype=np.float64),

130 b_eq=np.asarray(b, dtype=np.float64),

131 A_ub=g if g.shape[0] else None,

132 b_ub=h if g.shape[0] else None,

133 bounds=list(zip(lower_bounds, upper_bounds, strict=True)),

134 method="highs",

135 )

136 if not result.success:

137 msg = f"Could not find a maximum-return vertex (linear program: {result.message})"

138 raise ValueError(msg)

139

140 weights = np.asarray(result.x, dtype=np.float64)

141 free = (weights > lower_bounds + tol) & (weights < upper_bounds - tol)

142 active_ineq = (g @ weights >= h - tol) if g.shape[0] else np.zeros(0, dtype=bool)

143

144 # The free set must span the equality rows together with the active inequality

145 # rows: C = [A ; G_active] restricted to the free assets must have full row

146 # rank, or the reduced KKT solve is singular. A degenerate maximum-return

147 # vertex (a basic asset pinned on a bound) violates this; decline it with an

148 # actionable diagnosis instead of letting it surface as an opaque "Singular

149 # matrix" error downstream.

150 c = np.vstack([a, g[active_ineq]])

151 mc = c.shape[0]

152 n_free = int(np.count_nonzero(free))

153 # rank(C[:, free]) <= min(mc, n_free), so fewer free assets than active rows

154 # is degenerate by itself. Testing this first also keeps matrix_rank off a

155 # zero-column block, whose empty singular-value reduction raises on numpy 2.0.

156 if n_free < mc or np.linalg.matrix_rank(c[:, free]) < mc:

157 msg = (

158 f"The maximum-return vertex is degenerate (free-set size {n_free}, "

159 f"active constraints {mc}): a basic asset sits exactly on a box bound, so the free set "

160 "does not span the active equality and inequality rows and the reduced KKT system is "

161 "singular. Tracing a frontier from a degenerate first vertex is not yet supported; perturb "

162 "the bounds or the constraints so the maximum-return vertex is non-degenerate."

163 )

164 raise ValueError(msg)

165

166 return TurningPoint(free=free, weights=weights, active_ineq=active_ineq)

167

168

169def _free(

170 w: NDArray[np.float64], lower_bounds: NDArray[np.float64], upper_bounds: NDArray[np.float64]

171) -> NDArray[np.bool_]:

172 """Determine which asset should be free in the turning point.

173

174 This helper function identifies the asset that should be marked as free

175 in the turning point. It selects the asset that is furthest from its bounds,

176 which helps ensure numerical stability in the algorithm.

177

178 Args:

179 w: Vector of portfolio weights.

180 lower_bounds: Vector of lower bounds for asset weights.

181 upper_bounds: Vector of upper bounds for asset weights.

182

183 Returns:

184 A boolean vector indicating which asset is free (True) and which are blocked (False).

185

186 """

187 # Calculate the distance from each weight to its nearest bound

188 distance = np.min(np.array([np.abs(w - lower_bounds), np.abs(upper_bounds - w)]), axis=0)

189

190 # Find the index of the asset furthest from its bounds

191 index = np.argmax(distance)

192

193 # Create a boolean vector with only that asset marked as free

194 free = np.full_like(w, False, dtype=np.bool_)

195 free[index] = True

196 return free