Coverage for src / cvx / risk / linalg / valid.py: 100%

7 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-09 03:39 +0000

1"""Matrix validation utilities for handling non-finite values. 

2 

3This module provides functions for validating and cleaning matrices that may 

4contain non-finite values (NaN or infinity). This is particularly useful when 

5working with financial data where missing values are common. 

6 

7Example: 

8 Extract the valid submatrix from a covariance matrix with missing data: 

9 

10 >>> import numpy as np 

11 >>> from cvx.risk.linalg import valid 

12 >>> # Create a covariance matrix with some NaN values on diagonal 

13 >>> cov = np.array([[np.nan, 0.5, 0.2], 

14 ... [0.5, 2.0, 0.3], 

15 ... [0.2, 0.3, np.nan]]) 

16 >>> # Get valid indicator and submatrix 

17 >>> v, submatrix = valid(cov) 

18 >>> v # Second row/column is valid 

19 array([False, True, False]) 

20 >>> submatrix 

21 array([[2.]]) 

22 

23""" 

24 

25# Copyright 2023 Stanford University Convex Optimization Group 

26# 

27# Licensed under the Apache License, Version 2.0 (the "License"); 

28# you may not use this file except in compliance with the License. 

29# You may obtain a copy of the License at 

30# 

31# http://www.apache.org/licenses/LICENSE-2.0 

32# 

33# Unless required by applicable law or agreed to in writing, software 

34# distributed under the License is distributed on an "AS IS" BASIS, 

35# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

36# See the License for the specific language governing permissions and 

37# limitations under the License. 

38from __future__ import annotations 

39 

40import numpy as np 

41 

42 

43def valid(matrix: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 

44 """Extract the valid subset of a matrix by removing rows/columns with non-finite values. 

45 

46 This function identifies rows and columns in a square matrix that contain 

47 non-finite values (NaN or infinity) on the diagonal and removes them, 

48 returning both the indicator vector and the resulting valid submatrix. 

49 

50 This is useful when working with covariance matrices where some assets 

51 may have missing or invalid data. 

52 

53 Args: 

54 matrix: A square n x n matrix to be validated. Typically a covariance 

55 or correlation matrix. 

56 

57 Returns: 

58 A tuple containing: 

59 - v: Boolean vector of shape (n,) indicating which rows/columns are 

60 valid (True for valid, False for invalid). 

61 - submatrix: The valid submatrix with invalid rows/columns removed. 

62 Shape is (k, k) where k is the number of True values in v. 

63 

64 Raises: 

65 AssertionError: If the input matrix is not square (n x n). 

66 

67 Example: 

68 Basic usage with a covariance matrix: 

69 

70 >>> import numpy as np 

71 >>> from cvx.risk.linalg import valid 

72 >>> # Create a 3x3 matrix with one invalid entry 

73 >>> cov = np.array([[1.0, 0.5, 0.2], 

74 ... [0.5, np.nan, 0.3], 

75 ... [0.2, 0.3, 1.0]]) 

76 >>> v, submatrix = valid(cov) 

77 >>> v 

78 array([ True, False, True]) 

79 >>> submatrix 

80 array([[1. , 0.2], 

81 [0.2, 1. ]]) 

82 

83 Handling a fully valid matrix: 

84 

85 >>> cov = np.array([[1.0, 0.5], [0.5, 1.0]]) 

86 >>> v, submatrix = valid(cov) 

87 >>> v 

88 array([ True, True]) 

89 >>> np.allclose(submatrix, cov) 

90 True 

91 

92 Handling infinity values: 

93 

94 >>> cov = np.array([[1.0, 0.5, 0.2], 

95 ... [0.5, np.inf, 0.3], 

96 ... [0.2, 0.3, 1.0]]) 

97 >>> v, submatrix = valid(cov) 

98 >>> v 

99 array([ True, False, True]) 

100 >>> submatrix 

101 array([[1. , 0.2], 

102 [0.2, 1. ]]) 

103 

104 Multiple invalid entries: 

105 

106 >>> cov = np.array([[np.nan, 0.1, 0.2, 0.3], 

107 ... [0.1, 2.0, 0.4, 0.5], 

108 ... [0.2, 0.4, np.nan, 0.6], 

109 ... [0.3, 0.5, 0.6, 3.0]]) 

110 >>> v, submatrix = valid(cov) 

111 >>> v 

112 array([False, True, False, True]) 

113 >>> submatrix.shape 

114 (2, 2) 

115 >>> submatrix 

116 array([[2. , 0.5], 

117 [0.5, 3. ]]) 

118 

119 Using with portfolio optimization (skip assets with missing data): 

120 

121 >>> from cvx.risk.sample import SampleCovariance 

122 >>> import cvxpy as cp 

123 >>> # Full covariance has invalid data for asset 1 

124 >>> full_cov = np.array([[1.0, np.nan, 0.2], 

125 ... [np.nan, np.nan, np.nan], 

126 ... [0.2, np.nan, 1.0]]) 

127 >>> v, valid_cov = valid(full_cov) 

128 >>> v 

129 array([ True, False, True]) 

130 >>> # Optimize only valid assets 

131 >>> model = SampleCovariance(num=2) 

132 >>> model.update( 

133 ... cov=valid_cov, 

134 ... lower_assets=np.zeros(2), 

135 ... upper_assets=np.ones(2) 

136 ... ) 

137 >>> weights = cp.Variable(2) 

138 >>> risk = model.estimate(weights) 

139 >>> isinstance(risk, cp.Expression) 

140 True 

141 

142 Non-square matrix raises assertion: 

143 

144 >>> try: 

145 ... valid(np.array([[1, 2, 3], [4, 5, 6]])) 

146 ... except AssertionError: 

147 ... print("Caught assertion error for non-square matrix") 

148 Caught assertion error for non-square matrix 

149 

150 Note: 

151 The function checks only the diagonal elements for validity. It assumes 

152 that if the diagonal is finite, the entire row/column is valid. This is 

153 a common assumption for covariance matrices. 

154 

155 """ 

156 # make sure matrix is quadratic 

157 if matrix.shape[0] != matrix.shape[1]: 

158 raise AssertionError 

159 

160 v = np.isfinite(np.diag(matrix)) 

161 return v, matrix[:, v][v]