Coverage for src / cvx / markowitz / linalg / pca.py: 100%

34 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-08 13:49 +0000

1# Copyright 2023 Stanford University Convex Optimization Group 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14"""PCA analysis with numpy.""" 

15 

16from __future__ import annotations 

17 

18from dataclasses import dataclass 

19 

20import numpy as np 

21 

22from .types import Matrix 

23 

24 

25@dataclass 

26class PCA: 

27 """Principal component analysis computed with NumPy only.""" 

28 

29 returns: Matrix 

30 n_components: int = 0 

31 

32 def __post_init__(self) -> None: 

33 """Validate inputs and compute factors, exposures, and eigenvalues.""" 

34 if self.n_components > self.returns.shape[1]: 

35 raise ValueError("The number of components cannot exceed the number of assets") 

36 

37 # compute the principal components without sklearn 

38 # 1. compute the correlation 

39 cov = np.cov(self.returns.T) 

40 cov = np.atleast_2d(cov) 

41 

42 # 2. compute the eigenvalues and eigenvectors 

43 self.eigenvalues, eigenvectors = np.linalg.eigh(cov) 

44 

45 # 3. sort the eigenvalues in descending order 

46 idx = self.eigenvalues.argsort()[::-1] 

47 self.eigenvalues = self.eigenvalues[idx] 

48 eigenvectors = eigenvectors[:, idx] 

49 # 4. compute the factors 

50 self.factors = self.returns @ eigenvectors[:, : self.n_components] 

51 

52 self.exposure = np.transpose(eigenvectors[:, : self.n_components]) 

53 

54 @property 

55 def explained_variance(self) -> Matrix: 

56 """Proportion of total variance explained by the retained components.""" 

57 return np.array(self.eigenvalues[: self.n_components] / np.sum(self.eigenvalues)) 

58 

59 @property 

60 def cov(self) -> Matrix: 

61 """Covariance matrix of retained factors.""" 

62 return np.atleast_2d(np.cov(self.factors.T)) 

63 

64 @property 

65 def systematic_returns(self) -> Matrix: 

66 """Portion of returns explained by the PCA factors (F E^T).""" 

67 return np.array(self.factors @ self.exposure) 

68 

69 @property 

70 def idiosyncratic_returns(self) -> Matrix: 

71 """Residual returns after removing the systematic (factor) component.""" 

72 return self.returns - self.systematic_returns 

73 

74 @property 

75 def idiosyncratic_vola(self) -> Matrix: 

76 """Per-asset standard deviation of idiosyncratic (residual) returns.""" 

77 return np.array(np.std(self.idiosyncratic_returns, axis=0))