Coverage for src / cvx / markowitz / linalg / pca.py: 100%
34 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-08 13:49 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-08 13:49 +0000
1# Copyright 2023 Stanford University Convex Optimization Group
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""PCA analysis with numpy."""
16from __future__ import annotations
18from dataclasses import dataclass
20import numpy as np
22from .types import Matrix
25@dataclass
26class PCA:
27 """Principal component analysis computed with NumPy only."""
29 returns: Matrix
30 n_components: int = 0
32 def __post_init__(self) -> None:
33 """Validate inputs and compute factors, exposures, and eigenvalues."""
34 if self.n_components > self.returns.shape[1]:
35 raise ValueError("The number of components cannot exceed the number of assets")
37 # compute the principal components without sklearn
38 # 1. compute the correlation
39 cov = np.cov(self.returns.T)
40 cov = np.atleast_2d(cov)
42 # 2. compute the eigenvalues and eigenvectors
43 self.eigenvalues, eigenvectors = np.linalg.eigh(cov)
45 # 3. sort the eigenvalues in descending order
46 idx = self.eigenvalues.argsort()[::-1]
47 self.eigenvalues = self.eigenvalues[idx]
48 eigenvectors = eigenvectors[:, idx]
49 # 4. compute the factors
50 self.factors = self.returns @ eigenvectors[:, : self.n_components]
52 self.exposure = np.transpose(eigenvectors[:, : self.n_components])
54 @property
55 def explained_variance(self) -> Matrix:
56 """Proportion of total variance explained by the retained components."""
57 return np.array(self.eigenvalues[: self.n_components] / np.sum(self.eigenvalues))
59 @property
60 def cov(self) -> Matrix:
61 """Covariance matrix of retained factors."""
62 return np.atleast_2d(np.cov(self.factors.T))
64 @property
65 def systematic_returns(self) -> Matrix:
66 """Portion of returns explained by the PCA factors (F E^T)."""
67 return np.array(self.factors @ self.exposure)
69 @property
70 def idiosyncratic_returns(self) -> Matrix:
71 """Residual returns after removing the systematic (factor) component."""
72 return self.returns - self.systematic_returns
74 @property
75 def idiosyncratic_vola(self) -> Matrix:
76 """Per-asset standard deviation of idiosyncratic (residual) returns."""
77 return np.array(np.std(self.idiosyncratic_returns, axis=0))