Coverage for src/cvxsimulator/builder.py: 100%
77 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-10 18:45 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-10 18:45 +0000
1"""Builder class for the CVX Simulator."""
3# Copyright 2023 Stanford University Convex Optimization Group
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16from __future__ import annotations
18from collections.abc import Generator
19from dataclasses import dataclass
21import numpy as np
22import pandas as pd
23import polars as pl
25from .portfolio import Portfolio
26from .state import State
27from .utils.interpolation import valid
30def polars2pandas(dframe: pl.DataFrame, date_col="date") -> pd.DataFrame:
31 """Convert a Polars DataFrame to a Pandas DataFrame.
33 Ensuring the date column is cast to a datetime format and
34 all other columns are cast to Float64. The resulting Pandas DataFrame is indexed by the specified date column.
36 Args:
37 dframe (pl.DataFrame): The Polars DataFrame to be converted.
38 date_col (str): The name of the column containing date values, defaults to "date".
40 Returns:
41 pd.DataFrame: The converted Pandas DataFrame with the date column as its index.
43 """
44 dframe = dframe.with_columns(pl.col(date_col).cast(pl.Datetime("ns")))
45 dframe = dframe.with_columns([pl.col(col).cast(pl.Float64) for col in dframe.columns if col != date_col])
46 return dframe.to_pandas().set_index(date_col)
49@dataclass
50class Builder:
51 """The Builder is an auxiliary class used to build portfolios.
53 It overloads the __iter__ method to allow the class to iterate over
54 the timestamps for which the portfolio data is available.
56 In each iteration we can update the portfolio by setting either
57 the weights, the position or the cash position.
59 After the iteration has been completed we build a Portfolio object
60 by calling the build method.
61 """
63 prices: pd.DataFrame
65 _state: State | None = None
66 _units: pd.DataFrame | None = None
67 _aum: pd.Series | None = None
68 initial_aum: float = 1e6
70 def __post_init__(self) -> None:
71 """Initialize the Builder instance after creation.
73 This method is automatically called after the object is initialized.
74 It sets up the internal state, creates empty DataFrames for units and AUM,
75 and initializes the AUM with the provided initial_aum value.
77 The method performs several validations on the prices DataFrame:
78 - Checks that the index is monotonically increasing
79 - Checks that the index has unique values
81 Returns
82 -------
83 None
85 """
86 # assert isinstance(self.prices, pd.DataFrame)
87 if not self.prices.index.is_monotonic_increasing:
88 raise ValueError("Index must be monotonically increasing")
90 if not self.prices.index.is_unique:
91 raise ValueError("Index must have unique values")
93 self._state = State()
95 self._units = pd.DataFrame(
96 index=self.prices.index,
97 columns=self.prices.columns,
98 data=np.nan,
99 dtype=float,
100 )
102 self._aum = pd.Series(index=self.prices.index, dtype=float)
104 self._state.aum = self.initial_aum
106 @property
107 def valid(self):
108 """Check the validity of price data for each asset.
110 This property analyzes each column of the prices DataFrame to determine
111 if there are any missing values between the first and last valid data points.
113 Returns
114 -------
115 pd.DataFrame
116 A DataFrame with the same columns as prices, containing boolean values
117 indicating whether each asset's price series is valid (True) or has
118 missing values in the middle (False)
120 Notes
121 -----
122 A valid price series can have missing values at the beginning or end,
123 but not in the middle between the first and last valid data points.
125 """
126 return self.prices.apply(valid)
128 @property
129 def intervals(self):
130 """Get the first and last valid index for each asset's price series.
132 This property identifies the time range for which each asset has valid price data.
134 Returns
135 -------
136 pd.DataFrame
137 A DataFrame with assets as rows and two columns:
138 - 'first': The first valid index (timestamp) for each asset
139 - 'last': The last valid index (timestamp) for each asset
141 Notes
142 -----
143 This is useful for determining the valid trading period for each asset,
144 especially when different assets have different data availability periods.
146 """
147 return self.prices.apply(
148 lambda ts: pd.Series({"first": ts.first_valid_index(), "last": ts.last_valid_index()})
149 ).transpose()
151 @property
152 def index(self) -> pd.DatetimeIndex:
153 """The index of the portfolio.
155 Returns: pd.Index: A pandas index representing the
156 time period for which the portfolio data is available.
157 """
158 return pd.DatetimeIndex(self.prices.index)
160 @property
161 def current_prices(self) -> np.ndarray:
162 """Get the current prices for all assets in the portfolio.
164 This property retrieves the current prices from the internal state
165 for all assets that are currently in the portfolio.
167 Returns
168 -------
169 np.array
170 An array of current prices for all assets in the portfolio
172 Notes
173 -----
174 The prices are retrieved from the internal state, which is updated
175 during iteration through the portfolio's time index.
177 """
178 return self._state.prices[self._state.assets].to_numpy()
180 def __iter__(self) -> Generator[tuple[pd.DatetimeIndex, State]]:
181 """Iterate over object in a for loop.
183 The method yields a list of dates seen so far and returns a tuple
184 containing the list of dates and the current portfolio state.
186 Yield:
187 time: a pandas DatetimeIndex object containing the dates seen so far.
188 state: the current state of the portfolio,
190 taking into account the stock prices at each interval.
192 """
193 for t in self.index:
194 # update the current prices for the portfolio
195 self._state.prices = self.prices.loc[t]
197 # update the current time for the state
198 self._state.time = t
200 # yield the vector of times seen so far and the current state
201 yield self.index[self.index <= t], self._state
203 @property
204 def position(self) -> pd.Series:
205 """The position property returns the current position of the portfolio.
207 It returns a pandas Series object containing the current position of the portfolio.
209 Returns: pd.Series: a pandas Series object containing the current position of the portfolio.
210 """
211 return self._units.loc[self._state.time]
213 @position.setter
214 def position(self, position: pd.Series) -> None:
215 """Set the current position of the portfolio.
217 This setter updates the position (number of units) for each asset in the portfolio
218 at the current time point. It also updates the internal state's position.
220 Parameters
221 ----------
222 position : pd.Series
223 A pandas Series containing the new position (number of units) for each asset
225 Returns
226 -------
227 None
229 """
230 self._units.loc[self._state.time, self._state.assets] = position
231 self._state.position = position
233 @property
234 def cashposition(self):
235 """Get the current cash value of each position in the portfolio.
237 This property calculates the cash value of each position by multiplying
238 the number of units by the current price for each asset.
240 Returns
241 -------
242 pd.Series
243 A pandas Series containing the cash value of each position,
244 indexed by asset
246 Notes
247 -----
248 This is different from the 'cash' property, which represents
249 uninvested money. This property represents the market value
250 of each invested position.
252 """
253 return self.position * self.current_prices
255 @property
256 def units(self):
257 """Get the complete history of portfolio holdings.
259 This property returns the entire DataFrame of holdings (units) for all
260 assets over all time points in the portfolio.
262 Returns
263 -------
264 pd.DataFrame
265 A DataFrame containing the number of units held for each asset over time,
266 with dates as index and assets as columns
268 Notes
269 -----
270 This property is particularly useful for testing and for building
271 the final Portfolio object via the build() method.
273 """
274 return self._units
276 @cashposition.setter
277 def cashposition(self, cashposition: pd.Series) -> None:
278 """Set the current cash value of each position in the portfolio.
280 This setter updates the cash value of each position and automatically
281 converts the cash values to positions (units) using the current prices.
283 Parameters
284 ----------
285 cashposition : pd.Series
286 A pandas Series containing the new cash value for each position,
287 indexed by asset
289 Returns
290 -------
291 None
293 Notes
294 -----
295 This is a convenient way to specify positions in terms of currency
296 amounts rather than number of units. The conversion formula is:
297 position = cashposition / prices
299 """
300 self.position = cashposition / self.current_prices
302 def build(self):
303 """Create a new Portfolio instance from the current builder state.
305 This method creates a new immutable Portfolio object based on the
306 current state of the Builder, which can be used for analysis and reporting.
308 Returns
309 -------
310 Portfolio
311 A new instance of the Portfolio class with the attributes
312 (prices, units, aum) as specified in the Builder
314 Notes
315 -----
316 The resulting Portfolio object will be immutable (frozen) and will
317 have the same data as the Builder from which it was built, but
318 with a different interface focused on analysis rather than construction.
320 """
321 return Portfolio(prices=self.prices, units=self.units, aum=self.aum)
323 @property
324 def weights(self) -> np.ndarray:
325 """Get the current portfolio weights for each asset.
327 This property retrieves the weight of each asset in the portfolio
328 from the internal state. Weights represent the proportion of the
329 portfolio's value invested in each asset.
331 Returns
332 -------
333 np.array
334 An array of weights for each asset in the portfolio
336 Notes
337 -----
338 Weights sum to 1.0 for a fully invested portfolio with no leverage.
339 Negative weights represent short positions.
341 """
342 return self._state.weights[self._state.assets].to_numpy()
344 @weights.setter
345 def weights(self, weights: np.ndarray) -> None:
346 """Set the current portfolio weights for each asset.
348 This setter updates the portfolio weights and automatically converts
349 the weights to positions (units) using the current prices and NAV.
351 Parameters
352 ----------
353 weights : np.array
354 An array of weights for each asset in the portfolio
356 Returns
357 -------
358 None
360 Notes
361 -----
362 This is a convenient way to rebalance the portfolio by specifying
363 the desired allocation as weights rather than exact positions.
364 The conversion formula is: position = NAV * weights / prices
366 """
367 self.position = self._state.nav * weights / self.current_prices
369 @property
370 def aum(self):
371 """Get the assets under management (AUM) history of the portfolio.
373 This property returns the entire series of AUM values over time,
374 representing the total value of the portfolio at each time point.
376 Returns
377 -------
378 pd.Series
379 A Series containing the AUM values over time, with dates as index
381 Notes
382 -----
383 AUM (assets under management) represents the total value of the portfolio,
384 including both invested positions and uninvested cash.
386 """
387 return self._aum
389 @aum.setter
390 def aum(self, aum):
391 """Set the current assets under management (AUM) of the portfolio.
393 This setter updates the AUM value at the current time point and
394 also updates the internal state's AUM.
396 Parameters
397 ----------
398 aum : float
399 The new AUM value to set
401 Returns
402 -------
403 None
405 Notes
406 -----
407 Changing the AUM affects the portfolio's ability to take positions,
408 as position sizes are often calculated as a fraction of AUM.
410 """
411 self._aum[self._state.time] = aum
412 self._state.aum = aum