Coverage for src/cvxcla/cla.py: 100%

1"""Markowitz implementation of the Critical Line Algorithm.

3This module provides the CLA class, which implements the Critical Line Algorithm

4as described by Harry Markowitz and colleagues. The algorithm computes the entire

5efficient frontier by finding all turning points, which are the points where the

6set of assets at their bounds changes.

7"""

9import logging

10from dataclasses import dataclass, field

11from functools import cached_property

12from typing import TYPE_CHECKING, NamedTuple

14import numpy as np

15from numpy.typing import NDArray

17if TYPE_CHECKING:

18 from .builder import ProblemBuilder

20from .first import first_vertex_lp, init_algo

21from .operators import DenseCovariance, QuadraticForm, bordered_solve

22from .pathtracer import InequalityConstrained, trace

23from .types import Frontier, FrontierPoint, TurningPoint

25# A genuinely rank-deficient free block has a reciprocal condition number at

26# round-off level (~1e-16); a well-posed or merely near-degenerate block sits

27# many orders above it (>= ~1e-4 across the degeneracy sweep in

28# experiments/degeneracy_boundary.py). The 1e-12 cut sits in the wide gap between

29# the two and is the conventional numerical-singularity scale.

30_RCOND_FLOOR = 1e-12 # pragma: no mutate

33class _Segment(NamedTuple):

34 """The affine critical-line segment valid at one turning point.

36 Bundles the affine path ``w(lam) = r_alpha + lam * r_beta``, the multiplier

37 gradients ``gamma``/``delta`` that drive the leave-a-bound events, and the

38 active-set masks the event scan needs. This is what ``CLA.segment`` returns

39 to the generic path tracer.

41 For general inequality constraints ``G w <= h`` the segment also carries the

42 affine inequality multipliers ``eta_alpha + lam * eta_beta`` (one entry per

43 inequality row; meaningful for *active* rows, which release when the

44 multiplier crosses zero) and the active-row mask ``active_ineq``. The slacks

45 that drive an *inactive* row becoming active are recomputed from

46 ``r_alpha``/``r_beta`` directly in ``_ineq_event_ratios``.

47 """

49 r_alpha: NDArray[np.float64]

50 r_beta: NDArray[np.float64]

51 gamma: NDArray[np.float64]

52 delta: NDArray[np.float64]

53 at_upper: NDArray[np.bool_]

54 at_lower: NDArray[np.bool_]

55 free_in: NDArray[np.bool_]

56 active_ineq: NDArray[np.bool_]

57 eta_alpha: NDArray[np.float64]

58 eta_beta: NDArray[np.float64]

61@dataclass(frozen=True)

62class CLA(InequalityConstrained):

63 """Critical Line Algorithm implementation based on Markowitz's approach.

65 This class implements the Critical Line Algorithm as described by Harry Markowitz

66 and colleagues. It computes the entire efficient frontier by finding all turning

67 points, which are the points where the set of assets at their bounds changes.

69 The algorithm starts with the first turning point (the portfolio with the highest

70 expected return) and then iteratively computes the next turning point with a lower

71 expected return until it reaches the minimum variance portfolio.

73 Attributes:

74 mean: Vector of expected returns for each asset.

75 covariance: Covariance matrix of asset returns, either as a plain

76 ``numpy`` array or as a ``CovarianceOperator`` backend

77 (see ``cvxcla.operators``).

78 lower_bounds: Vector of lower bounds for asset weights.

79 upper_bounds: Vector of upper bounds for asset weights.

80 a: Equality-constraint matrix ``A`` of ``A w = b`` (``m x n``). The

81 canonical case is the single all-ones budget row (``sum(w) = b``),

82 but an arbitrary equality system is supported: weighted single rows

83 and ``m > 1`` rows (e.g. budget plus sector- or factor-neutrality).

84 The all-ones budget (any right-hand side, including ``0`` for

85 dollar-neutral) uses the greedy first vertex of

86 :func:`cvxcla.first.init_algo`; a general ``A`` uses the

87 linear-programming first vertex of

88 :func:`cvxcla.first.first_vertex_lp`.

89 b: Equality-constraint right-hand side ``b`` (length ``m``); ``[1]`` for

90 the fully-invested budget, ``[0]`` for dollar-neutral, and so on.

91 g: Optional inequality-constraint matrix ``G`` of ``G w <= h``

92 (``p x n``), e.g. a group- or sector-exposure cap. ``None`` (the

93 default) means no inequality rows, recovering the equality-only

94 problem exactly. A ``>=`` constraint is expressed by negating both

95 ``g`` and ``h``. Each *active* row (held at equality ``g_i w = h_i``)

96 enters the reduced KKT system as an extra equality row, so the

97 covariance is still touched only through the ``QuadraticForm``

98 interface; box bounds remain a separate per-variable active set.

99 h: Optional inequality-constraint right-hand side ``h`` (length ``p``).

100 turning_points: List of turning points on the efficient frontier.

101 tol: Tolerance for numerical calculations.

102 logger: Logger instance for logging information and errors.

103

104 """

105

106 mean: NDArray[np.float64]

107 covariance: NDArray[np.float64] | QuadraticForm

108 lower_bounds: NDArray[np.float64]

109 upper_bounds: NDArray[np.float64]

110 a: NDArray[np.float64]

111 b: NDArray[np.float64]

112 g: NDArray[np.float64] | None = None

113 h: NDArray[np.float64] | None = None

114 turning_points: list[TurningPoint] = field(default_factory=list)

115 tol: float = 1e-5 # pragma: no mutate

116 logger: logging.Logger = field(default_factory=lambda: logging.getLogger(__name__))

117

118 @classmethod

119 def problem(cls, mean: NDArray[np.float64], covariance: NDArray[np.float64] | QuadraticForm) -> "ProblemBuilder":

120 """Start a fluent :class:`cvxcla.builder.ProblemBuilder` for this problem.

121

122 A readability convenience over the explicit constructor: chain

123 ``.long_only()``/``.budget()``/``.equality()``/``.inequality()`` and finish

124 with ``.trace()``. The builder maps one-to-one onto the constructor

125 arguments and adds no modelling power.

126

127 Args:

128 mean: Vector of expected returns of length ``n``.

129 covariance: Covariance matrix or ``QuadraticForm`` backend.

130

131 Returns:

132 A ``ProblemBuilder`` ready to accept constraints.

133 """

134 from .builder import ProblemBuilder

135

136 return ProblemBuilder(mean, covariance)

137

138 @cached_property

139 def covariance_operator(self) -> QuadraticForm:

140 """Return the covariance as a ``QuadraticForm`` backend.

141

142 A plain ``numpy`` covariance matrix is wrapped in ``DenseCovariance``;

143 an object already implementing the protocol is passed through. This is

144 the single point where the input form is normalised.

145 """

146 if isinstance(self.covariance, QuadraticForm):

147 return self.covariance

148 return DenseCovariance(self.covariance)

149

150 @property

151 def dimension(self) -> int:

152 """Number of assets ``n`` (the problem dimension for the path tracer)."""

153 return len(self.mean)

154

155 @cached_property

156 def _free_blocks_well_conditioned(self) -> bool:

157 """Whether every free-block solve along the trace is numerically safe.

158

159 Decided once, up front. By Cauchy's interlacing theorem every principal

160 submatrix of the symmetric PSD covariance is at least as well conditioned

161 as the whole matrix -- deleting rows/columns cannot decrease the smallest

162 eigenvalue nor increase the largest -- so the reciprocal condition number

163 of any free block is ``>=`` that of the full covariance. Hence if the full

164 covariance clears the singularity floor, no free block encountered along

165 the trace can be singular, and the per-turning-point conditioning guard in

166 :meth:`_emit` is provably never triggered. We then skip it, paying one

167 conditioning test here instead of one at every turning point (the latter

168 is a full eigendecomposition of the free block, as costly as the KKT solve,

169 so it otherwise dominates the trace). The up-front test itself goes through

170 :meth:`~cvxcla.operators.QuadraticForm.rcond_floor_cleared`, which the dense

171 backend settles with a Cholesky factorisation plus a condition estimate

172 rather than its own full eigendecomposition.

173

174 When the full covariance is itself near-singular (for example a sample

175 covariance from fewer observations than assets) this is ``False`` and the

176 per-step guard in :meth:`_emit` runs unchanged, preserving the degeneracy

177 diagnosis exactly.

178 """

179 # ``rcond_floor_cleared`` is an optional fast-path hook (the dense backend

180 # answers it with a Cholesky factorisation plus a condition estimate). A

181 # backend that does not provide it falls back to the exact full-mask rcond.

182 cleared = getattr(self.covariance_operator, "rcond_floor_cleared", None)

183 if cleared is not None:

184 return bool(cleared(_RCOND_FLOOR))

185 full = np.ones(self.dimension, dtype=bool)

186 return self.covariance_operator.rcond_free(full) >= _RCOND_FLOOR

187

188 def __post_init__(self) -> None:

189 """Initialize the CLA object and compute the efficient frontier.

190

191 This method is automatically called after initialization. It computes

192 the entire efficient frontier by finding all turning points, starting

193 from the first turning point (highest expected return) and iteratively

194 computing the next turning point with a lower expected return until

195 it reaches the minimum variance portfolio.

196

197 The actual walk is driven by the generic ``cvxcla.pathtracer.trace``

198 loop; this class supplies the portfolio-specific hooks (``begin``,

199 ``segment``, ``event_matrix``, ``step``, ``finish``) it calls.

200

201 The reduced KKT system at each turning point is solved by block

202 elimination: a single multi-RHS solve against the free covariance block

203 (via the covariance backend), covering the constraint columns and the

204 alpha and beta systems together so ``Sigma_FF`` is factorised once, then a

205 small Schur-complement solve ``A_F @ Sigma_FF^{-1} @ A_F.T`` over the

206 equality (and active inequality) rows. The covariance only enters through

207 the ``QuadraticForm`` interface, so structured backends (e.g.

208 ``FactorCovariance``) never materialise an n x n matrix.

209

210 Raises:

211 RuntimeError: If all variables are blocked, which would make the

212 system of equations singular.

213 ValueError: If the inequality matrix ``g`` and vector ``h`` have

214 mismatched or wrong shapes.

215

216 """

217 if self.g_matrix.shape[1] != self.dimension:

218 msg = f"g must have {self.dimension} columns, got shape {self.g_matrix.shape}"

219 raise ValueError(msg)

220 if self.h_vector.shape[0] != self.g_matrix.shape[0]:

221 msg = f"h must have {self.g_matrix.shape[0]} entries, got {self.h_vector.shape[0]}"

222 raise ValueError(msg)

223 trace(self)

224

225 def begin(self) -> tuple[float, TurningPoint]:

226 """Record the first turning point and start the trace at ``lambda = inf``.

227

228 Returns:

229 ``(inf, first_turning_point)``: the starting lambda bound and the

230 initial state for the path tracer.

231 """

232 first = self._first_turning_point()

233 self._append(first)

234 return np.inf, first

235

236 def segment(self, state: TurningPoint) -> _Segment:

237 """Solve the reduced KKT system for the critical-line segment at ``state``."""

238 at_upper, at_lower, free_in, fixed_weights = self._active_set(state)

239 r_alpha, r_beta, gamma, delta, eta_alpha, eta_beta = self._solve_kkt(free_in, fixed_weights, state.active_ineq)

240 return _Segment(

241 r_alpha, r_beta, gamma, delta, at_upper, at_lower, free_in, state.active_ineq, eta_alpha, eta_beta

242 )

243

244 def event_matrix(self, state: TurningPoint, segment: _Segment) -> NDArray[np.float64]: # noqa: ARG002

245 """Return the ``(n + p, 4)`` matrix of candidate critical lambdas for ``segment``.

246

247 The first ``n`` rows are the box events (a free weight reaching a bound, a

248 blocked multiplier changing sign); the trailing ``p`` rows are the

249 inequality-row events (an inactive row's slack reaching zero, an active

250 row's multiplier changing sign). The generic tracer treats the two blocks

251 uniformly; ``step`` decodes a row index ``>= n`` as a row event.

252

253 ``state`` is part of the uniform ``ParametricProblem`` signature; the CLA

254 does not need it here because ``segment`` already bundles the active-set

255 masks derived from it.

256 """

257 box = self._event_ratios(

258 segment.r_alpha,

259 segment.r_beta,

260 segment.gamma,

261 segment.delta,

262 segment.free_in,

263 segment.at_upper,

264 segment.at_lower,

265 )

266 ineq = self._ineq_event_ratios(segment)

267 return np.vstack([box, ineq])

268

269 def step(self, state: TurningPoint, segment: _Segment, sec: int, direction: int, lam: float) -> TurningPoint:

270 """Emit the turning point at ``lam`` after flipping the activity at ``sec``.

271

272 ``sec < n`` is a box event on asset ``sec``: a "leaves a bound" event

273 (``direction`` in {2, 3}) makes it free, a "moves to a bound" event

274 (``direction`` in {0, 1}) blocks it. ``sec >= n`` is an inequality-row

275 event on row ``sec - n``: ``direction == 0`` activates the row (its slack

276 reached zero), ``direction == 1`` releases it (its multiplier reached

277 zero). The weight vector is continuous across either event.

278 """

279 n = self.dimension

280 free = state.free

281 active_ineq = state.active_ineq

282 if sec < n:

283 free = free.copy()

284 free[sec] = direction >= 2

285 else:

286 active_ineq = active_ineq.copy()

287 active_ineq[sec - n] = direction == 0

288 self._emit(lam, segment.r_alpha + lam * segment.r_beta, free, active_ineq)

289 return self.turning_points[-1]

290

291 def finish(self, state: TurningPoint, segment: _Segment) -> None:

292 """Emit the minimum-variance endpoint at ``lambda = 0``."""

293 self._emit(0.0, segment.r_alpha, state.free, state.active_ineq)

294

295 def _active_set(

296 self, last: TurningPoint

297 ) -> tuple[NDArray[np.bool_], NDArray[np.bool_], NDArray[np.bool_], NDArray[np.float64]]:

298 """Identify the active set at ``last`` and the weights pinned to bounds.

299

300 A blocked asset sitting (to tolerance) on a bound is held fixed there and

301 excluded from the reduced KKT solve; every other asset is *in*. Returns

302 the upper-bound mask, the lower-bound mask, the in-set mask, and the

303 full-length vector of weights fixed at their bounds.

304

305 Args:

306 last: The most recent turning point, whose free set and weights define

307 the active set.

308

309 Returns:

310 ``(at_upper, at_lower, free_in, fixed_weights)``.

311

312 Raises:

313 RuntimeError: If every asset is blocked, which makes the reduced

314 system singular.

315 """

316 blocked = ~last.free

317 if np.all(blocked):

318 msg = "All variables cannot be blocked"

319 raise RuntimeError(msg)

320

321 at_upper = blocked & (np.abs(last.weights - self.upper_bounds) <= self.tol) # pragma: no mutate

322 at_lower = blocked & (np.abs(last.weights - self.lower_bounds) <= self.tol) # pragma: no mutate

323 free_in = ~(at_upper | at_lower)

324

325 fixed_weights = np.zeros(len(self.mean))

326 fixed_weights[at_upper] = self.upper_bounds[at_upper]

327 fixed_weights[at_lower] = self.lower_bounds[at_lower]

328 return at_upper, at_lower, free_in, fixed_weights

329

330 def _solve_kkt(

331 self,

332 free_in: NDArray[np.bool_],

333 fixed_weights: NDArray[np.float64],

334 active_ineq: NDArray[np.bool_] | None = None,

335 ) -> tuple[

336 NDArray[np.float64],

337 NDArray[np.float64],

338 NDArray[np.float64],

339 NDArray[np.float64],

340 NDArray[np.float64],

341 NDArray[np.float64],

342 ]:

343 """Solve the reduced KKT system for the current critical-line segment.

344

345 Block elimination over the *stacked* constraint matrix ``C = [A ; G_S]``,

346 where ``G_S`` are the currently-active inequality rows held at equality.

347 Because an active inequality row enters the stationarity/feasibility system

348 exactly as an equality row does, the same elimination handles both: a

349 multi-right-hand-side solve against the free covariance block ``Sigma_FF``

350 (via the backend, so structured covariances never materialise an

351 ``n x n`` matrix) feeds an ``(m + |S|) x (m + |S|)`` Schur complement

352 ``C_F Sigma_FF^{-1} C_F.T``. With no active inequality rows this is the

353 plain equality solve.

354

355 Args:

356 free_in: Boolean mask of the assets in the reduced solve.

357 fixed_weights: Full-length weights of the assets held at their bounds.

358 active_ineq: Boolean mask (length ``p``) of the active inequality rows;

359 ``None`` means no inequality rows.

360

361 Returns:

362 ``(r_alpha, r_beta, gamma, delta, eta_alpha, eta_beta)``: the affine

363 segment ``w(lam) = r_alpha + lam * r_beta``, the box-multiplier

364 gradients ``gamma``/``delta`` that drive the leave-a-bound events, and

365 the affine inequality multipliers ``eta_alpha + lam * eta_beta``

366 (length ``p``, non-zero only on active rows) that drive the

367 release-a-row events.

368 """

369 if active_ineq is None:

370 active_ineq = np.zeros(self.g_matrix.shape[0], dtype=bool)

371 m = self.a.shape[0]

372 ns = len(self.mean)

373 cov = self.covariance_operator

374 out = ~free_in

375 # Stack the active inequality rows beneath the equality rows; the active

376 # rows are held at equality (g_i w = h_i), so C/d is the equality system

377 # of the reduced QP at this vertex.

378 c = np.vstack([self.a, self.g_matrix[active_ineq]])

379 d = np.concatenate([self.b, self.h_vector[active_ineq]])

380 c_free = c[:, free_in]

381

382 # The reduced KKT system is the shared bordered solve: the constant system

383 # carries the blocked-weight shift -Sigma_FB w_B and the reduced constraint

384 # right-hand side d - C_B w_B; the slope system carries the mean with a zero

385 # constraint right-hand side (A r_beta = 0).

386 x_alpha, x_beta, nu_alpha, nu_beta = bordered_solve(

387 cov,

388 free_in,

389 c_free,

390 -cov.cross(free_in, fixed_weights),

391 self.mean[free_in],

392 d - c[:, out] @ fixed_weights[out],

393 np.zeros(c.shape[0]),

394 )

395

396 r_alpha = fixed_weights.copy()

397 r_alpha[free_in] = x_alpha

398 r_beta = np.zeros(ns)

399 r_beta[free_in] = x_beta

400

401 gamma = cov.matvec(r_alpha) + c.T @ nu_alpha

402 delta = cov.matvec(r_beta) + c.T @ nu_beta - self.mean

403

404 # The tail of the stacked multiplier is the inequality multiplier eta(lam)

405 # = eta_alpha + lam eta_beta, scattered back to full length p (zero on the

406 # inactive rows, which have no release event).

407 eta_alpha = np.zeros(self.g_matrix.shape[0])

408 eta_beta = np.zeros(self.g_matrix.shape[0])

409 eta_alpha[active_ineq] = nu_alpha[m:]

410 eta_beta[active_ineq] = nu_beta[m:]

411 return r_alpha, r_beta, gamma, delta, eta_alpha, eta_beta

412

413 def _event_ratios(

414 self,

415 r_alpha: NDArray[np.float64],

416 r_beta: NDArray[np.float64],

417 gamma: NDArray[np.float64],

418 delta: NDArray[np.float64],

419 free_in: NDArray[np.bool_],

420 at_upper: NDArray[np.bool_],

421 at_lower: NDArray[np.bool_],

422 ) -> NDArray[np.float64]:

423 """Critical lambda for every candidate event, as an ``(n, 4)`` matrix.

424

425 Along the segment ``w(lam) = r_alpha + lam * r_beta`` a free weight can

426 reach a box bound (columns 0/1, "moves to a bound") and a blocked weight's

427 multiplier can change sign so it re-enters the free set (columns 2/3,

428 "leaves a bound"). Entries with no event are ``-inf``.

429

430 A free weight moves with even a tiny slope, so given a long enough lam

431 range it still crosses a bound; filtering slopes at ``self.tol`` would

432 miss such crossings and let weights drift out of bounds. Only slopes at

433 floating-point noise level are excluded: below ``sqrt(machine epsilon)`` a

434 slope is indistinguishable from solve noise, and the huge ratios it would

435 produce only amplify rounding errors.

436

437 Args:

438 r_alpha: Segment intercept ``w(0)``.

439 r_beta: Segment slope ``dw/dlam``.

440 gamma: Multiplier gradient for the alpha system.

441 delta: Multiplier gradient for the beta system.

442 free_in: Mask of assets in the reduced solve.

443 at_upper: Mask of assets blocked at their upper bound.

444 at_lower: Mask of assets blocked at their lower bound.

445

446 Returns:

447 The ``(n, 4)`` matrix of critical lambdas.

448 """

449 ns = len(self.mean)

450 eps = np.sqrt(np.finfo(np.float64).eps)

451 # 4 columns = the 4 event types; extra unused columns are harmless.

452 l_mat = np.full((ns, 4), -np.inf) # pragma: no mutate

453

454 # Precompute each event mask exactly once. The <,> vs <=,>= choice at

455 # the eps boundary is numerically irrelevant — a slope/derivative

456 # landing exactly on +/-sqrt(machine-eps) never occurs with real

457 # data — so those boundary comparisons are marked no-mutate.

458 beta_down = free_in & (r_beta < -eps) # pragma: no mutate

459 beta_up = free_in & (r_beta > +eps) # pragma: no mutate

460 delta_down = at_upper & (delta < -eps) # pragma: no mutate

461 delta_up = at_lower & (delta > +eps) # pragma: no mutate

462

463 # Columns 0,1 are "moves to a bound" (free->blocked) and 2,3 are

464 # "leaves a bound" (blocked->free); the next-free update only tests

465 # dirchg >= 2, so swapping a column *within* a group (0<->1 or 2<->3) is

466 # behaviourally identical and marked no-mutate. Crossing the 1<->2 group

467 # boundary IS exercised by the frontier tests.

468 l_mat[beta_down, 0] = ( # pragma: no mutate

469 self.upper_bounds[beta_down] - r_alpha[beta_down]

470 ) / r_beta[beta_down]

471 l_mat[beta_up, 1] = (self.lower_bounds[beta_up] - r_alpha[beta_up]) / r_beta[beta_up]

472 l_mat[delta_down, 2] = -gamma[delta_down] / delta[delta_down] # pragma: no mutate

473 l_mat[delta_up, 3] = -gamma[delta_up] / delta[delta_up]

474 return l_mat

475

476 def _ineq_event_ratios(self, segment: _Segment) -> NDArray[np.float64]:

477 """Critical lambda for every inequality-row event, as a ``(p, 4)`` matrix.

478

479 The row analogue of :meth:`_event_ratios`. Along the segment an *inactive*

480 row ``i`` becomes active when its slack ``s_i(lam) = g_i w(lam) - h_i``

481 rises to zero from the feasible (negative) side (column 0); an *active*

482 row releases when its multiplier ``eta_i(lam)`` falls to zero (column 1).

483 Both are affine in ``lam``, so the critical lambda is the same

484 ``-intercept / slope`` ratio used for the box events, with the same

485 ``sqrt(machine eps)`` slope floor: a slope below noise level is

486 indistinguishable from solve round-off and would only produce a huge,

487 rounding-dominated ratio. Entries with no event are ``-inf``. Columns 2

488 and 3 are unused (kept so the block stacks onto the ``(n, 4)`` box block).

489

490 Args:

491 segment: The current segment, carrying the affine weights, the

492 inequality multipliers, and the active-row mask.

493

494 Returns:

495 The ``(p, 4)`` matrix of critical lambdas.

496 """

497 p = self.g_matrix.shape[0]

498 l_mat = np.full((p, 4), -np.inf) # pragma: no mutate

499 if p == 0:

500 return l_mat

501

502 eps = np.sqrt(np.finfo(np.float64).eps)

503 active = segment.active_ineq

504 inactive = ~active

505

506 # Enter: an inactive row's slack rises to zero. The slope/intercept split

507 # comes straight from the affine weights; the slope sign mirrors the box

508 # "moves to a bound" event (decreasing lam must raise the slack).

509 s_alpha = self.g_matrix @ segment.r_alpha - self.h_vector

510 s_beta = self.g_matrix @ segment.r_beta

511 enter = inactive & (s_beta < -eps) # pragma: no mutate

512 l_mat[enter, 0] = -s_alpha[enter] / s_beta[enter]

513

514 # Release: an active row's non-negative multiplier falls back to zero,

515 # the row analogue of a blocked multiplier changing sign.

516 release = active & (segment.eta_beta > +eps) # pragma: no mutate

517 l_mat[release, 1] = -segment.eta_alpha[release] / segment.eta_beta[release]

518 return l_mat

519

520 def __len__(self) -> int:

521 """Get the number of turning points in the efficient frontier.

522

523 Returns:

524 The number of turning points currently stored in the object.

525

526 """

527 return len(self.turning_points)

528

529 def _first_turning_point(self) -> TurningPoint:

530 """Calculate the first turning point on the efficient frontier.

531

532 The first turning point is the maximum-return vertex of the feasible

533 polytope. For the all-ones budget constraint with no inequality rows it is

534 found by the greedy fill of ``init_algo``; for a general equality system

535 ``A w = b`` or any ``G w <= h`` it is found by solving the linear program

536 in ``first_vertex_lp``, which also reports the initially-active rows.

537

538 Returns:

539 A TurningPoint object representing the first point on the efficient frontier.

540

541 """

542 if self.g_matrix.shape[0] == 0 and self.a.shape[0] == 1 and np.allclose(self.a, 1.0):

543 return init_algo(

544 mean=self.mean,

545 lower_bounds=self.lower_bounds,

546 upper_bounds=self.upper_bounds,

547 total=float(self.b[0]),

548 )

549 return first_vertex_lp(

550 mean=self.mean,

551 lower_bounds=self.lower_bounds,

552 upper_bounds=self.upper_bounds,

553 a=self.a,

554 b=self.b,

555 tol=self.tol,

556 g=self.g_matrix,

557 h=self.h_vector,

558 )

559

560 def _append(self, tp: TurningPoint, tol: float | None = None) -> None:

561 """Append a turning point to the list of turning points.

562

563 This method validates that the turning point satisfies the constraints

564 before adding it to the list.

565

566 Args:

567 tp: The turning point to append.

568 tol: Tolerance for constraint validation. If None, uses the class's

569 tol attribute. Pass 0 for exact validation.

570

571 Raises:

572 ValueError: If the turning point violates any constraints.

573

574 """

575 tol = self.tol if tol is None else tol

576

577 if not np.all(tp.weights >= (self.lower_bounds - tol)): # pragma: no mutate

578 msg = "Weights below lower bounds"

579 raise ValueError(msg)

580 if not np.all(tp.weights <= (self.upper_bounds + tol)): # pragma: no mutate

581 msg = "Weights above upper bounds"

582 raise ValueError(msg)

583 if not np.allclose(self.a @ tp.weights, self.b, atol=1e-7):

584 msg = "Weights violate the equality constraint A w = b"

585 raise ValueError(msg)

586 if self.g_matrix.shape[0] and not np.all(self.g_matrix @ tp.weights <= self.h_vector + tol):

587 msg = "Weights violate the inequality constraint G w <= h"

588 raise ValueError(msg)

589

590 self.turning_points.append(tp)

591

592 def _emit(

593 self,

594 lamb: float,

595 weights: NDArray[np.float64],

596 free: NDArray[np.bool_],

597 active_ineq: NDArray[np.bool_],

598 ) -> None:

599 """Build and store a turning point, projecting away sub-tolerance round-off.

600

601 Orchestrates the three steps taken at every turning point: refuse the point

602 if the free-asset block is numerically singular (see

603 :meth:`_guard_degeneracy`); project the candidate back onto the feasible

604 set to clear sub-tolerance round-off (see :meth:`_project_feasible`); then

605 validate and store it (see :meth:`_append`).

606

607 On tie-heavy or near-degenerate problems (a short, near-rank-deficient

608 sample covariance, duplicated assets, or many coincident events) accumulated

609 floating-point round-off over the many turning points of a large trace can

610 place a free weight a hair outside its box. The covariance there has

611 near-flat directions (its small eigenvalues) and the round-off lies in

612 exactly those directions, so the candidate is optimal to solver precision

613 but not exactly feasible; the projection clears it and is a strict no-op for

614 the well-posed turning points that are already feasible.

615 """

616 self._guard_degeneracy(lamb, free)

617 weights = self._project_feasible(weights, active_ineq)

618 self._append(TurningPoint(lamb=lamb, weights=weights, free=free, active_ineq=active_ineq))

619

620 def _guard_degeneracy(self, lamb: float, free: NDArray[np.bool_]) -> None:

621 """Refuse the turning point when the free-asset block is numerically singular.

622

623 We distinguish two regimes by the conditioning of the free-asset block.

624 While that block stays numerically full rank its solve is reliable and any

625 box violation is round-off, which :meth:`_project_feasible` clears. Once the

626 free set grows past the covariance rank the block is numerically singular

627 and its solve is unreliable; whatever weights it produces (feasible or not)

628 cannot be trusted, so we refuse and raise an actionable diagnosis instead of

629 silently returning a possibly-suboptimal frontier.

630

631 The discriminator is the free block's reciprocal condition number, read

632 from its symmetric eigenvalues. Unlike the magnitude of the box violation,

633 which is the residual of a singular solve and therefore varies with the

634 BLAS/LAPACK build, the conditioning is deterministic and portable, so the

635 completed-vs-declined boundary is the same on every platform.

636

637 The per-turning-point conditioning check is skipped entirely when the full

638 covariance is well conditioned: by interlacing no free block can then be

639 singular, so the check is provably redundant (see

640 :attr:`_free_blocks_well_conditioned`). It runs only when the full

641 covariance is itself near-singular, which is exactly the regime that can

642 produce an untrustworthy free-block solve.

643

644 Args:

645 lamb: Lambda value of the candidate turning point, used in the message.

646 free: Boolean mask of the free assets at the candidate.

647

648 Raises:

649 ValueError: With a degeneracy-specific message when the free-asset

650 block is numerically singular (an unreliable solve); otherwise

651 returns without effect.

652 """

653 # When the full covariance clears the floor, interlacing guarantees every

654 # free block does too, so the per-step guard can never fire -- skip it and

655 # the costly per-step rcond. Only a near-singular full covariance needs the

656 # check, and there it runs exactly as before.

657 if not self._free_blocks_well_conditioned:

658 rcond = self.covariance_operator.rcond_free(free)

659 if rcond < _RCOND_FLOOR:

660 n_free = int(np.count_nonzero(free))

661 msg = (

662 f"Critical Line Algorithm hit a degeneracy at lambda={lamb:.4g} "

663 f"(free-set size {n_free}): the free-asset covariance block is "

664 f"numerically singular (reciprocal condition number {rcond:.2g}), "

665 "so its solve is unreliable and the turning point cannot be "

666 "trusted. The trace was stopped rather than risk silently "

667 "returning a suboptimal frontier. This happens when the free set "

668 "grows past the covariance rank (for example a sample covariance "

669 "from far fewer days than assets). Use a well-conditioned, "

670 "full-rank estimate (ample history), or a FactorCovariance backend "

671 "(diagonal-plus-low-rank), which is positive definite by construction."

672 )

673 raise ValueError(msg)

674

675 def _project_feasible(

676 self, weights: NDArray[np.float64], active_ineq: NDArray[np.bool_] | None = None

677 ) -> NDArray[np.float64]:

678 """Project ``weights`` onto the feasible region, clearing round-off.

679

680 On tie-heavy or near-degenerate problems accumulated round-off can place a

681 candidate a hair outside its box at a well-conditioned vertex. Projecting it

682 back onto ``{w : lower <= w <= upper, C w = d}`` clears that round-off and

683 lets the trace continue. The projection is a strict no-op for well-posed,

684 already-feasible turning points (the common case), so it does not perturb

685 the exact frontier.

686

687 Dispatches to the closed-form capped-simplex projection for the canonical

688 all-ones budget with no active inequality row (see

689 :meth:`_project_capped_simplex`), and to the general alternating projection

690 otherwise (see :meth:`_project_alternating`).

691

692 Args:

693 weights: The candidate weight vector to project.

694 active_ineq: Boolean mask (length ``p``) of the active inequality rows;

695 ``None`` means no inequality rows.

696

697 Returns:

698 The projected weight vector, feasible to the box and the constraints.

699 """

700 if active_ineq is None:

701 active_ineq = np.zeros(self.g_matrix.shape[0], dtype=bool)

702 lower, upper = self.lower_bounds, self.upper_bounds

703 # Well-posed turning points are already strictly feasible: return them

704 # unchanged so the projection never perturbs the exact frontier.

705 if np.all(weights >= lower) and np.all(weights <= upper):

706 return weights

707

708 if not active_ineq.any() and self.a.shape[0] == 1 and np.allclose(self.a, 1.0):

709 return self._project_capped_simplex(weights)

710 return self._project_alternating(weights, active_ineq)

711

712 def _project_capped_simplex(self, weights: NDArray[np.float64]) -> NDArray[np.float64]:

713 """Euclidean projection onto the capped simplex for the all-ones budget.

714

715 Computed by water-filling a single shift ``theta`` so that

716 ``sum(clip(w - theta, lower, upper)) = b``. A plain clip-then-rescale is

717 deliberately *not* used: rescaling the clipped weights to restore the budget

718 can push capped weights back over their bound when many assets are capped at

719 once (heavy ties under a tight cap), re-introducing the very infeasibility

720 the projection is meant to clear.

721

722 Args:

723 weights: The candidate weight vector, known to violate its box.

724

725 Returns:

726 The projected weight vector, on the budget and inside the box.

727 """

728 lower, upper = self.lower_bounds, self.upper_bounds

729 # sum(clip(w - theta, lower, upper)) is non-increasing in theta; bisect

730 # for the theta that hits the budget. The bracket clips to all-upper

731 # (sum at least the budget) at theta_lo and all-lower (at most the

732 # budget) at theta_hi, so a root is guaranteed for any feasible problem.

733 budget = float(self.b[0])

734 theta_lo = float((weights - upper).min()) - 1.0

735 theta_hi = float((weights - lower).max()) + 1.0

736 for _ in range(100):

737 theta = 0.5 * (theta_lo + theta_hi)

738 if float(np.clip(weights - theta, lower, upper).sum()) > budget:

739 theta_lo = theta

740 else:

741 theta_hi = theta

742 return np.clip(weights - 0.5 * (theta_lo + theta_hi), lower, upper)

743

744 def _project_alternating(self, weights: NDArray[np.float64], active_ineq: NDArray[np.bool_]) -> NDArray[np.float64]:

745 """Alternating projection onto the box and the affine set ``{C w = d}``.

746

747 ``C`` stacks the equality rows ``A`` and the active inequality rows ``G_S``

748 (held at equality ``g_i w = h_i``). The candidate already satisfies

749 ``C w = d`` (the reduced KKT solve enforces it) and the inactive inequality

750 rows keep a margin, so a few iterations alternating a box clip with the

751 affine projection converge to a point feasible to the box, the equalities,

752 and every inequality.

753

754 Args:

755 weights: The candidate weight vector, known to violate its box.

756 active_ineq: Boolean mask (length ``p``) of the active inequality rows.

757

758 Returns:

759 The projected weight vector, feasible to the box and the constraints.

760 """

761 lower, upper = self.lower_bounds, self.upper_bounds

762 c = np.vstack([self.a, self.g_matrix[active_ineq]])

763 d = np.concatenate([self.b, self.h_vector[active_ineq]])

764 gram = c @ c.T

765 projected = weights

766 for _ in range(100):

767 projected = np.clip(projected, lower, upper)

768 projected = projected - c.T @ np.linalg.solve(gram, c @ projected - d)

769 return np.clip(projected, lower, upper)

770

771 @property

772 def frontier(self) -> Frontier:

773 """Get the efficient frontier constructed from the turning points.

774

775 This property creates a Frontier object from the list of turning points,

776 which can be used to analyze the risk-return characteristics of the

777 efficient portfolios.

778

779 Returns:

780 A Frontier object representing the efficient frontier.

781

782 """

783 return Frontier(

784 covariance=self.covariance,

785 mean=self.mean,

786 frontier=[FrontierPoint(point.weights) for point in self.turning_points],

787 )