Coverage for src/cvxsimulator/builder.py: 100%

77 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-10 18:45 +0000

1"""Builder class for the CVX Simulator.""" 

2 

3# Copyright 2023 Stanford University Convex Optimization Group 

4# 

5# Licensed under the Apache License, Version 2.0 (the "License"); 

6# you may not use this file except in compliance with the License. 

7# You may obtain a copy of the License at 

8# 

9# http://www.apache.org/licenses/LICENSE-2.0 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16from __future__ import annotations 

17 

18from collections.abc import Generator 

19from dataclasses import dataclass 

20 

21import numpy as np 

22import pandas as pd 

23import polars as pl 

24 

25from .portfolio import Portfolio 

26from .state import State 

27from .utils.interpolation import valid 

28 

29 

30def polars2pandas(dframe: pl.DataFrame, date_col="date") -> pd.DataFrame: 

31 """Convert a Polars DataFrame to a Pandas DataFrame. 

32 

33 Ensuring the date column is cast to a datetime format and 

34 all other columns are cast to Float64. The resulting Pandas DataFrame is indexed by the specified date column. 

35 

36 Args: 

37 dframe (pl.DataFrame): The Polars DataFrame to be converted. 

38 date_col (str): The name of the column containing date values, defaults to "date". 

39 

40 Returns: 

41 pd.DataFrame: The converted Pandas DataFrame with the date column as its index. 

42 

43 """ 

44 dframe = dframe.with_columns(pl.col(date_col).cast(pl.Datetime("ns"))) 

45 dframe = dframe.with_columns([pl.col(col).cast(pl.Float64) for col in dframe.columns if col != date_col]) 

46 return dframe.to_pandas().set_index(date_col) 

47 

48 

49@dataclass 

50class Builder: 

51 """The Builder is an auxiliary class used to build portfolios. 

52 

53 It overloads the __iter__ method to allow the class to iterate over 

54 the timestamps for which the portfolio data is available. 

55 

56 In each iteration we can update the portfolio by setting either 

57 the weights, the position or the cash position. 

58 

59 After the iteration has been completed we build a Portfolio object 

60 by calling the build method. 

61 """ 

62 

63 prices: pd.DataFrame 

64 

65 _state: State | None = None 

66 _units: pd.DataFrame | None = None 

67 _aum: pd.Series | None = None 

68 initial_aum: float = 1e6 

69 

70 def __post_init__(self) -> None: 

71 """Initialize the Builder instance after creation. 

72 

73 This method is automatically called after the object is initialized. 

74 It sets up the internal state, creates empty DataFrames for units and AUM, 

75 and initializes the AUM with the provided initial_aum value. 

76 

77 The method performs several validations on the prices DataFrame: 

78 - Checks that the index is monotonically increasing 

79 - Checks that the index has unique values 

80 

81 Returns 

82 ------- 

83 None 

84 

85 """ 

86 # assert isinstance(self.prices, pd.DataFrame) 

87 if not self.prices.index.is_monotonic_increasing: 

88 raise ValueError("Index must be monotonically increasing") 

89 

90 if not self.prices.index.is_unique: 

91 raise ValueError("Index must have unique values") 

92 

93 self._state = State() 

94 

95 self._units = pd.DataFrame( 

96 index=self.prices.index, 

97 columns=self.prices.columns, 

98 data=np.nan, 

99 dtype=float, 

100 ) 

101 

102 self._aum = pd.Series(index=self.prices.index, dtype=float) 

103 

104 self._state.aum = self.initial_aum 

105 

106 @property 

107 def valid(self): 

108 """Check the validity of price data for each asset. 

109 

110 This property analyzes each column of the prices DataFrame to determine 

111 if there are any missing values between the first and last valid data points. 

112 

113 Returns 

114 ------- 

115 pd.DataFrame 

116 A DataFrame with the same columns as prices, containing boolean values 

117 indicating whether each asset's price series is valid (True) or has 

118 missing values in the middle (False) 

119 

120 Notes 

121 ----- 

122 A valid price series can have missing values at the beginning or end, 

123 but not in the middle between the first and last valid data points. 

124 

125 """ 

126 return self.prices.apply(valid) 

127 

128 @property 

129 def intervals(self): 

130 """Get the first and last valid index for each asset's price series. 

131 

132 This property identifies the time range for which each asset has valid price data. 

133 

134 Returns 

135 ------- 

136 pd.DataFrame 

137 A DataFrame with assets as rows and two columns: 

138 - 'first': The first valid index (timestamp) for each asset 

139 - 'last': The last valid index (timestamp) for each asset 

140 

141 Notes 

142 ----- 

143 This is useful for determining the valid trading period for each asset, 

144 especially when different assets have different data availability periods. 

145 

146 """ 

147 return self.prices.apply( 

148 lambda ts: pd.Series({"first": ts.first_valid_index(), "last": ts.last_valid_index()}) 

149 ).transpose() 

150 

151 @property 

152 def index(self) -> pd.DatetimeIndex: 

153 """The index of the portfolio. 

154 

155 Returns: pd.Index: A pandas index representing the 

156 time period for which the portfolio data is available. 

157 """ 

158 return pd.DatetimeIndex(self.prices.index) 

159 

160 @property 

161 def current_prices(self) -> np.ndarray: 

162 """Get the current prices for all assets in the portfolio. 

163 

164 This property retrieves the current prices from the internal state 

165 for all assets that are currently in the portfolio. 

166 

167 Returns 

168 ------- 

169 np.array 

170 An array of current prices for all assets in the portfolio 

171 

172 Notes 

173 ----- 

174 The prices are retrieved from the internal state, which is updated 

175 during iteration through the portfolio's time index. 

176 

177 """ 

178 return self._state.prices[self._state.assets].to_numpy() 

179 

180 def __iter__(self) -> Generator[tuple[pd.DatetimeIndex, State]]: 

181 """Iterate over object in a for loop. 

182 

183 The method yields a list of dates seen so far and returns a tuple 

184 containing the list of dates and the current portfolio state. 

185 

186 Yield: 

187 time: a pandas DatetimeIndex object containing the dates seen so far. 

188 state: the current state of the portfolio, 

189 

190 taking into account the stock prices at each interval. 

191 

192 """ 

193 for t in self.index: 

194 # update the current prices for the portfolio 

195 self._state.prices = self.prices.loc[t] 

196 

197 # update the current time for the state 

198 self._state.time = t 

199 

200 # yield the vector of times seen so far and the current state 

201 yield self.index[self.index <= t], self._state 

202 

203 @property 

204 def position(self) -> pd.Series: 

205 """The position property returns the current position of the portfolio. 

206 

207 It returns a pandas Series object containing the current position of the portfolio. 

208 

209 Returns: pd.Series: a pandas Series object containing the current position of the portfolio. 

210 """ 

211 return self._units.loc[self._state.time] 

212 

213 @position.setter 

214 def position(self, position: pd.Series) -> None: 

215 """Set the current position of the portfolio. 

216 

217 This setter updates the position (number of units) for each asset in the portfolio 

218 at the current time point. It also updates the internal state's position. 

219 

220 Parameters 

221 ---------- 

222 position : pd.Series 

223 A pandas Series containing the new position (number of units) for each asset 

224 

225 Returns 

226 ------- 

227 None 

228 

229 """ 

230 self._units.loc[self._state.time, self._state.assets] = position 

231 self._state.position = position 

232 

233 @property 

234 def cashposition(self): 

235 """Get the current cash value of each position in the portfolio. 

236 

237 This property calculates the cash value of each position by multiplying 

238 the number of units by the current price for each asset. 

239 

240 Returns 

241 ------- 

242 pd.Series 

243 A pandas Series containing the cash value of each position, 

244 indexed by asset 

245 

246 Notes 

247 ----- 

248 This is different from the 'cash' property, which represents 

249 uninvested money. This property represents the market value 

250 of each invested position. 

251 

252 """ 

253 return self.position * self.current_prices 

254 

255 @property 

256 def units(self): 

257 """Get the complete history of portfolio holdings. 

258 

259 This property returns the entire DataFrame of holdings (units) for all 

260 assets over all time points in the portfolio. 

261 

262 Returns 

263 ------- 

264 pd.DataFrame 

265 A DataFrame containing the number of units held for each asset over time, 

266 with dates as index and assets as columns 

267 

268 Notes 

269 ----- 

270 This property is particularly useful for testing and for building 

271 the final Portfolio object via the build() method. 

272 

273 """ 

274 return self._units 

275 

276 @cashposition.setter 

277 def cashposition(self, cashposition: pd.Series) -> None: 

278 """Set the current cash value of each position in the portfolio. 

279 

280 This setter updates the cash value of each position and automatically 

281 converts the cash values to positions (units) using the current prices. 

282 

283 Parameters 

284 ---------- 

285 cashposition : pd.Series 

286 A pandas Series containing the new cash value for each position, 

287 indexed by asset 

288 

289 Returns 

290 ------- 

291 None 

292 

293 Notes 

294 ----- 

295 This is a convenient way to specify positions in terms of currency 

296 amounts rather than number of units. The conversion formula is: 

297 position = cashposition / prices 

298 

299 """ 

300 self.position = cashposition / self.current_prices 

301 

302 def build(self): 

303 """Create a new Portfolio instance from the current builder state. 

304 

305 This method creates a new immutable Portfolio object based on the 

306 current state of the Builder, which can be used for analysis and reporting. 

307 

308 Returns 

309 ------- 

310 Portfolio 

311 A new instance of the Portfolio class with the attributes 

312 (prices, units, aum) as specified in the Builder 

313 

314 Notes 

315 ----- 

316 The resulting Portfolio object will be immutable (frozen) and will 

317 have the same data as the Builder from which it was built, but 

318 with a different interface focused on analysis rather than construction. 

319 

320 """ 

321 return Portfolio(prices=self.prices, units=self.units, aum=self.aum) 

322 

323 @property 

324 def weights(self) -> np.ndarray: 

325 """Get the current portfolio weights for each asset. 

326 

327 This property retrieves the weight of each asset in the portfolio 

328 from the internal state. Weights represent the proportion of the 

329 portfolio's value invested in each asset. 

330 

331 Returns 

332 ------- 

333 np.array 

334 An array of weights for each asset in the portfolio 

335 

336 Notes 

337 ----- 

338 Weights sum to 1.0 for a fully invested portfolio with no leverage. 

339 Negative weights represent short positions. 

340 

341 """ 

342 return self._state.weights[self._state.assets].to_numpy() 

343 

344 @weights.setter 

345 def weights(self, weights: np.ndarray) -> None: 

346 """Set the current portfolio weights for each asset. 

347 

348 This setter updates the portfolio weights and automatically converts 

349 the weights to positions (units) using the current prices and NAV. 

350 

351 Parameters 

352 ---------- 

353 weights : np.array 

354 An array of weights for each asset in the portfolio 

355 

356 Returns 

357 ------- 

358 None 

359 

360 Notes 

361 ----- 

362 This is a convenient way to rebalance the portfolio by specifying 

363 the desired allocation as weights rather than exact positions. 

364 The conversion formula is: position = NAV * weights / prices 

365 

366 """ 

367 self.position = self._state.nav * weights / self.current_prices 

368 

369 @property 

370 def aum(self): 

371 """Get the assets under management (AUM) history of the portfolio. 

372 

373 This property returns the entire series of AUM values over time, 

374 representing the total value of the portfolio at each time point. 

375 

376 Returns 

377 ------- 

378 pd.Series 

379 A Series containing the AUM values over time, with dates as index 

380 

381 Notes 

382 ----- 

383 AUM (assets under management) represents the total value of the portfolio, 

384 including both invested positions and uninvested cash. 

385 

386 """ 

387 return self._aum 

388 

389 @aum.setter 

390 def aum(self, aum): 

391 """Set the current assets under management (AUM) of the portfolio. 

392 

393 This setter updates the AUM value at the current time point and 

394 also updates the internal state's AUM. 

395 

396 Parameters 

397 ---------- 

398 aum : float 

399 The new AUM value to set 

400 

401 Returns 

402 ------- 

403 None 

404 

405 Notes 

406 ----- 

407 Changing the AUM affects the portfolio's ability to take positions, 

408 as position sizes are often calculated as a fraction of AUM. 

409 

410 """ 

411 self._aum[self._state.time] = aum 

412 self._state.aum = aum