Skip to content

Commit a0e4030

Browse files
committed
refactor: remove pandas.concat from cycles
1 parent e84d398 commit a0e4030

File tree

8 files changed

+89
-78
lines changed

8 files changed

+89
-78
lines changed

okama/asset_list.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ def recovery_periods(self) -> pd.Series:
425425
"""
426426
cummax = self.wealth_indexes.cummax()
427427
growth = cummax.pct_change()[1:]
428-
max_recovery_periods = pd.Series(dtype=int)
428+
recovery_data = {} # Collect data to create Series once at the end
429429
for name in self.symbols:
430430
namespace = name.split(".", 1)[-1]
431431
if namespace == "INFL":
@@ -436,9 +436,9 @@ def recovery_periods(self) -> pd.Series:
436436
s2 = s1.groupby(s1_1).cumsum()
437437
# Max recovery period date should not be in the border (it's not recovered)
438438
max_period = s2.max() if s2.idxmax().to_timestamp() != self.last_date else np.nan
439-
ser = pd.Series(max_period, index=[name])
440-
max_recovery_periods = pd.concat([max_recovery_periods, ser])
441-
return max_recovery_periods
439+
recovery_data[name] = max_period
440+
# Use Int64 (nullable integer) to support NaN values
441+
return pd.Series(recovery_data, dtype="Int64")
442442

443443
def get_cagr(self, period: Optional[int] = None, real: bool = False) -> pd.Series:
444444
"""
@@ -1261,12 +1261,13 @@ def tracking_difference_annual(self) -> pd.DataFrame:
12611261
>>> al = ok.AssetList(['SP500TR.INDX', 'VOO.US', 'SPXS.LSE'], inflation=False)
12621262
>>> al.tracking_difference_annual.plot(kind='bar')
12631263
"""
1264-
result = pd.DataFrame()
1264+
rows_list = [] # Collect all rows to concatenate once at the end
12651265
for x in self.assets_ror.resample("Y"):
12661266
df = x[1]
12671267
wealth_index = helpers.Frame.get_wealth_indexes(df)
12681268
row = helpers.Index.tracking_difference(wealth_index).iloc[[-1]]
1269-
result = pd.concat([result, row], ignore_index=False)
1269+
rows_list.append(row)
1270+
result = pd.concat(rows_list, ignore_index=False)
12701271
result.index = result.index.asfreq("Y")
12711272
return result
12721273

okama/common/helpers/helpers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ def rolling_fn(df: pd.DataFrame, window: int, fn: Callable, window_below_year: b
601601
The window should be in months.
602602
"""
603603
check_rolling_window(window=window, ror=df, window_below_year=window_below_year)
604-
output = pd.DataFrame()
604+
results_list = [] # Collect all results to concatenate once at the end
605605
for start_date in df.index:
606606
end_date = start_date + window
607607
df_window = df.loc[start_date:end_date, :]
@@ -610,5 +610,6 @@ def rolling_fn(df: pd.DataFrame, window: int, fn: Callable, window_below_year: b
610610
if period_length.n < window:
611611
break
612612
windows_result = fn(df_window).iloc[-1, :]
613-
output = pd.concat([output, windows_result.to_frame().T], copy=False)
613+
results_list.append(windows_result.to_frame().T)
614+
output = pd.concat(results_list, copy=False) if results_list else pd.DataFrame()
614615
return output

okama/common/make_asset_list.py

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def get_item(symbol):
150150
asset_obj_list = Parallel(n_jobs=-1, backend="threading")(delayed(get_item)(s) for s in ls)
151151
return {obj.symbol: obj for obj in asset_obj_list}
152152

153-
def _make_list(self, first_date, last_date) -> dict:
153+
def _make_list(self, first_date: Optional[str], last_date: Optional[str]) -> dict:
154154
"""
155155
Make an asset list from a list of symbols.
156156
"""
@@ -164,54 +164,63 @@ def _make_list(self, first_date, last_date) -> dict:
164164
last_dates: Dict[str, pd.Timestamp] = {}
165165
names: Dict[str, str] = {}
166166
currencies: Dict[str, str] = {}
167-
df = pd.DataFrame()
168167
input_first_date = pd.to_datetime(first_date) if first_date else None
169168
input_last_date = pd.to_datetime(last_date) if last_date else None
170-
for i, asset_item in enumerate(self.asset_obj_dict.values()):
169+
170+
# Collect all rate of return series first, then concatenate once (more efficient)
171+
ror_series_list: List[pd.Series] = []
172+
for asset_item in self.asset_obj_dict.values():
171173
# get asset own first and last dates
172174
asset_own_first_date = asset_item.first_date
173175
asset_own_last_date = asset_item.last_date
174-
if i == 0: # required to use pd.concat below (df should not be empty).
175-
df = self._make_ror(asset_item, base_currency_ticker)
176-
else:
177-
new = self._make_ror(asset_item, base_currency_ticker)
178-
df = pd.concat([df, new], axis=1, join="inner", copy="false")
176+
177+
ror_series = self._make_ror(asset_item, base_currency_ticker)
178+
ror_series_list.append(ror_series)
179+
179180
# get asset first and last dates after adjusting to the currency
180-
asset_first_date = df.index[0].to_timestamp()
181-
asset_last_date = df.index[-1].to_timestamp()
181+
asset_first_date = ror_series.index[0].to_timestamp()
182+
asset_last_date = ror_series.index[-1].to_timestamp()
183+
182184
# check first and last dates
183-
fd = [asset_first_date, input_first_date]
184-
ld = [asset_last_date, input_last_date]
185-
fd_max = max(x for x in fd if x is not None)
186-
ld_min = min(x for x in ld if x is not None)
185+
fd_max = max(x for x in [asset_first_date, input_first_date] if x is not None)
186+
ld_min = min(x for x in [asset_last_date, input_last_date] if x is not None)
187187
if helpers.Date.get_difference_in_months(ld_min, fd_max).n < 2:
188188
raise ShortPeriodLengthError(
189-
f"{asset_item.symbol} historical data period length is too short. " f"It must be at least 3 months."
189+
f"{asset_item.symbol} historical data period length is too short. It must be at least 3 months."
190190
)
191+
191192
# append data to dictionaries
192193
currencies[asset_item.symbol] = asset_item.currency
193194
names[asset_item.symbol] = asset_item.name
194195
first_dates[asset_item.symbol] = asset_first_date
195196
last_dates[asset_item.symbol] = asset_last_date
196197
own_first_dates[asset_item.symbol] = asset_own_first_date
197198
own_last_dates[asset_item.symbol] = asset_own_last_date
199+
200+
# Concatenate all series at once (more efficient than repeated pd.concat in loop)
201+
df = pd.concat(ror_series_list, axis=1, join="inner")
202+
198203
first_dates[base_currency_ticker] = currency_first_date
199204
last_dates[base_currency_ticker] = currency_last_date
200205
own_last_dates[base_currency_ticker] = currency_last_date
201206
own_first_dates[base_currency_ticker] = currency_first_date
202207
currencies["asset list"] = base_currency_ticker
208+
203209
# get first and last dates
204210
first_date_list = list(first_dates.values()) + [input_first_date]
205211
last_date_list = list(last_dates.values()) + [input_last_date]
206212
list_first_date = max(x for x in first_date_list if x is not None)
207213
list_last_date = min(x for x in last_date_list if x is not None)
208-
# range of last and first dates not limeted by AssetList first_date & lastdate parameters
214+
215+
# range of last and first dates not limited by AssetList first_date & last_date parameters
209216
own_first_dates_sorted: list = sorted(own_first_dates.items(), key=lambda y: y[1])
210217
own_last_dates_sorted: list = sorted(own_last_dates.items(), key=lambda y: y[1])
218+
211219
if isinstance(df, pd.Series):
212220
# required to convert Series to DataFrame for single asset list
213221
df = df.to_frame()
214222
df.columns.name = "Symbols" # required for Plotly charts
223+
215224
return dict(
216225
first_date=list_first_date,
217226
last_date=list_last_date,
@@ -244,7 +253,7 @@ def _adjust_ror_to_currency(cls, returns: pd.Series, asset_currency: asset.Asset
244253
asset_mult = returns + 1.0
245254
currency_mult = asset_currency.ror + 1.0
246255
# join dataframes to have the same Time Series Index
247-
df = pd.concat([asset_mult, currency_mult], axis=1, join="inner", copy="false")
256+
df = pd.concat([asset_mult, currency_mult], axis=1, join="inner")
248257
currency_mult = df.iloc[:, -1]
249258
asset_mult = df.iloc[:, 0]
250259
x = asset_mult * currency_mult - 1.0
@@ -272,7 +281,7 @@ def _add_inflation(self) -> pd.DataFrame:
272281
Add inflation column to returns DataFrame.
273282
"""
274283
if hasattr(self, "inflation"):
275-
return pd.concat([self._assets_ror, self.inflation_ts], axis=1, join="inner", copy="false")
284+
return pd.concat([self._assets_ror, self.inflation_ts], axis=1, join="inner")
276285
else:
277286
return self._assets_ror
278287

okama/frontier/multi_period.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,10 +1201,11 @@ def mdp_points(self) -> pd.DataFrame:
12011201
"""
12021202
if self._mdp_points.empty:
12031203
target_cagrs = self._target_cagr_range_left
1204-
df = pd.DataFrame(dtype="float")
1204+
rows_list = [] # Collect all rows to concatenate once at the end
12051205
for x in target_cagrs:
12061206
row = self.get_most_diversified_portfolio(target_return=x)
1207-
df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)
1207+
rows_list.append(row)
1208+
df = pd.DataFrame.from_records(rows_list)
12081209
df = helpers.Frame.change_columns_order(df, ["Risk", "CAGR"])
12091210
self._mdp_points = df
12101211
return self._mdp_points
@@ -1275,14 +1276,15 @@ def get_monte_carlo(self, n: int = 100) -> pd.DataFrame:
12751276
Rebalance(**args).return_ror_ts_ef,
12761277
ror=self.assets_ror,
12771278
)
1278-
random_portfolios = pd.DataFrame()
1279+
rows_list = [] # Collect all rows to create DataFrame once at the end
12791280
for _, data in portfolios_ror.iterrows():
12801281
risk_monthly = data.std()
12811282
mean_return = data.mean()
12821283
risk = helpers.Float.annualize_risk(risk_monthly, mean_return)
12831284
cagr = helpers.Frame.get_cagr(data)
12841285
row = {"Risk": risk, "CAGR": cagr}
1285-
random_portfolios = pd.concat([random_portfolios, pd.DataFrame(row, index=[0])], ignore_index=True)
1286+
rows_list.append(row)
1287+
random_portfolios = pd.DataFrame.from_records(rows_list)
12861288
return random_portfolios
12871289

12881290
def plot_pair_ef(self, tickers="tickers", figsize: Optional[tuple] = None) -> Axes:

okama/frontier/single_period.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -777,10 +777,11 @@ def ef_points(self) -> pd.DataFrame:
777777
"""
778778
if self._ef_points.empty:
779779
target_rs = self.mean_return_range
780-
df = pd.DataFrame(dtype="float")
780+
rows_list = [] # Collect all rows to concatenate once at the end
781781
for x in target_rs:
782782
row = self.minimize_risk(x, monthly_return=True)
783-
df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)
783+
rows_list.append(row)
784+
df = pd.DataFrame.from_records(rows_list)
784785
df = helpers.Frame.change_columns_order(df, ["Risk", "Mean return", "CAGR"])
785786
self._ef_points = df
786787
return self._ef_points
@@ -848,10 +849,11 @@ def mdp_points(self) -> pd.DataFrame:
848849
"""
849850
if self._mdp_points.empty:
850851
target_rs = self.mean_return_range
851-
df = pd.DataFrame(dtype="float")
852+
rows_list = [] # Collect all rows to concatenate once at the end
852853
for x in target_rs:
853854
row = self.get_most_diversified_portfolio(target_return=x, monthly_return=True)
854-
df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)
855+
rows_list.append(row)
856+
df = pd.DataFrame.from_records(rows_list)
855857
df = helpers.Frame.change_columns_order(df, ["Risk", "Mean return", "CAGR"])
856858
self._mdp_points = df
857859
return self._mdp_points
@@ -918,27 +920,27 @@ def get_monte_carlo(self, n: int = 100, kind: str = "mean") -> pd.DataFrame:
918920
weights_series = helpers.Float.get_random_weights(n, self.assets_ror.shape[1], self.bounds)
919921

920922
# Portfolio risk and return for each set of weights
921-
random_portfolios = pd.DataFrame(dtype=float)
923+
points_list = [] # Collect all points to create DataFrame once at the end
924+
second_column = "Return" if kind == "mean" else "CAGR"
925+
asset_labels = self.get_assets_tickers()
922926
for weights in weights_series:
923927
risk_monthly = helpers.Frame.get_portfolio_risk(weights, self.assets_ror)
924928
mean_return_monthly = helpers.Frame.get_portfolio_mean_return(weights, self.assets_ror)
925929
risk = helpers.Float.annualize_risk(risk_monthly, mean_return_monthly)
926930
mean_return = helpers.Float.annualize_return(mean_return_monthly)
927-
second_column = "Return" if kind == "mean" else "CAGR"
928931

929-
asset_labels = self.get_assets_tickers()
930932
point = dict(zip(asset_labels, weights))
931933
point["Risk"] = risk
932934
if kind.lower() == "cagr":
933935
cagr = helpers.Float.approx_return_risk_adjusted(mean_return, risk)
934936
point["CAGR"] = cagr
935-
936937
elif kind.lower() == "mean":
937938
point["Return"] = mean_return
938939
else:
939940
raise ValueError('kind should be "mean" or "cagr"')
940-
random_portfolios = pd.concat([random_portfolios, pd.DataFrame(point, index=[0])], ignore_index=True)
941-
random_portfolios = helpers.Frame.change_columns_order(random_portfolios, ["Risk", second_column])
941+
points_list.append(point)
942+
random_portfolios = pd.DataFrame.from_records(points_list)
943+
random_portfolios = helpers.Frame.change_columns_order(random_portfolios, ["Risk", second_column])
942944
return random_portfolios
943945

944946
def plot_transition_map(self, x_axe: str = "risk", figsize: Optional[tuple] = None) -> Axes:

okama/macro.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
141141
DataFrame
142142
Table of descriptive statistics for a list of assets.
143143
"""
144-
description = pd.DataFrame()
144+
all_rows = [] # Collect all rows to concatenate once at the end
145145
dt0 = self.last_date
146146
df = self.values_monthly
147147
# YTD properties
@@ -160,8 +160,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
160160
row4 = {self.symbol: min_value.iloc[0]}
161161
row4.update(period=min_value.index.values[0].strftime("%Y-%m"), property="min value")
162162

163-
rows_df = pd.DataFrame.from_records([row1, row2, row3, row4], index=[0, 1, 2, 3])
164-
description = pd.concat([description, rows_df], ignore_index=True)
163+
all_rows.extend([row1, row2, row3, row4])
165164
# properties for a given list of periods
166165
for i in years:
167166
dt = helpers.Date.subtract_years(dt0, i)
@@ -191,8 +190,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
191190
row3.update(property="max value")
192191
row4.update(property="min value")
193192

194-
new_rows = pd.DataFrame.from_records([row1, row2, row3, row4], index=[0, 1, 2, 3])
195-
description = pd.concat([description, new_rows], ignore_index=True)
193+
all_rows.extend([row1, row2, row3, row4])
196194
# Full period
197195
# Arithmetic mean
198196
row0 = {self.symbol: df.mean()}
@@ -211,8 +209,9 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
211209
min_value = df.nsmallest(n=1)
212210
row3 = {self.symbol: min_value.iloc[0]}
213211
row3.update(period=min_value.index.values[0].strftime("%Y-%m"), property="min value")
214-
new_rows = pd.DataFrame.from_records([row0, row1, row2, row3], index=[0, 1, 2, 3])
215-
description = pd.concat([description, new_rows], ignore_index=True)
212+
all_rows.extend([row0, row1, row2, row3])
213+
# Concatenate all rows at once (more efficient than repeated pd.concat in loop)
214+
description = pd.DataFrame.from_records(all_rows)
216215
return helpers.Frame.change_columns_order(description, ["property", "period"], position="first")
217216

218217

@@ -393,7 +392,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
393392
16 max 12m inflation 1920-06 0.236888
394393
17 1000 purchasing power 109 years, 3 months 33.875745
395394
"""
396-
description = pd.DataFrame()
395+
all_rows = [] # Collect all rows to concatenate once at the end
397396
dt0 = self.last_date
398397
df = self.values_monthly
399398
# YTD inflation properties
@@ -405,8 +404,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
405404

406405
row2 = {self.symbol: helpers.Float.get_purchasing_power(inflation)}
407406
row2.update(period="YTD", property="1000 purchasing power")
408-
rows_df = pd.DataFrame.from_records([row1, row2], index=[0, 1])
409-
description = pd.concat([description, rows_df], ignore_index=True)
407+
all_rows.extend([row1, row2])
410408

411409
# inflation properties for a given list of periods
412410
for i in years:
@@ -442,31 +440,32 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
442440

443441
row4.update(period=f"{i} years", property="1000 purchasing power")
444442

445-
df_rows = pd.DataFrame.from_records([row1, row2, row3, row4], index=[0, 1, 2, 3])
446-
description = pd.concat([description, df_rows], ignore_index=True)
443+
all_rows.extend([row1, row2, row3, row4])
447444
# Annual inflation for full period available
448445
ts = df
449446
full_inflation = helpers.Frame.get_cagr(ts)
450447
row = {self.symbol: full_inflation}
451448
row.update(period=self._pl_txt, property="annual inflation")
452-
description = pd.concat([description, pd.DataFrame(row, index=[0])], ignore_index=True)
449+
all_rows.append(row)
453450
# compound inflation
454451
comp_inflation = helpers.Frame.get_cumulative_return(ts)
455452
row = {self.symbol: comp_inflation}
456453
row.update(period=self._pl_txt, property="compound inflation")
457-
description = pd.concat([description, pd.DataFrame(row, index=[0])], ignore_index=True)
454+
all_rows.append(row)
458455
# max inflation for full period available
459456
max_inflation = self.rolling_inflation.nlargest(n=1)
460457
row = {self.symbol: max_inflation.iloc[0]}
461458
row.update(
462459
period=max_inflation.index.values[0].strftime("%Y-%m"),
463460
property="max 12m inflation",
464461
)
465-
description = pd.concat([description, pd.DataFrame(row, index=[0])], ignore_index=True)
462+
all_rows.append(row)
466463
# purchase power
467464
row = {self.symbol: helpers.Float.get_purchasing_power(comp_inflation)}
468465
row.update(period=self._pl_txt, property="1000 purchasing power")
469-
description = pd.concat([description, pd.DataFrame(row, index=[0])], ignore_index=True)
466+
all_rows.append(row)
467+
# Concatenate all rows at once (more efficient than repeated pd.concat in loop)
468+
description = pd.DataFrame.from_records(all_rows)
470469
return helpers.Frame.change_columns_order(description, ["property", "period"], position="first")
471470

472471

0 commit comments

Comments
 (0)