refactor: remove pandas.concat from cycles

chilango74 · chilango74 · commit a0e403074344 · 2025-11-28T09:15:12.000+03:00
diff --git a/okama/asset_list.py b/okama/asset_list.py
@@ -425,7 +425,7 @@ def recovery_periods(self) -> pd.Series:
         """
         cummax = self.wealth_indexes.cummax()
         growth = cummax.pct_change()[1:]
-        max_recovery_periods = pd.Series(dtype=int)
+        recovery_data = {}  # Collect data to create Series once at the end
         for name in self.symbols:
             namespace = name.split(".", 1)[-1]
             if namespace == "INFL":
@@ -436,9 +436,9 @@ def recovery_periods(self) -> pd.Series:
             s2 = s1.groupby(s1_1).cumsum()
             # Max recovery period date should not be in the border (it's not recovered)
             max_period = s2.max() if s2.idxmax().to_timestamp() != self.last_date else np.nan
-            ser = pd.Series(max_period, index=[name])
-            max_recovery_periods = pd.concat([max_recovery_periods, ser])
-        return max_recovery_periods
+            recovery_data[name] = max_period
+        # Use Int64 (nullable integer) to support NaN values
+        return pd.Series(recovery_data, dtype="Int64")
 
     def get_cagr(self, period: Optional[int] = None, real: bool = False) -> pd.Series:
         """
@@ -1261,12 +1261,13 @@ def tracking_difference_annual(self) -> pd.DataFrame:
         >>> al = ok.AssetList(['SP500TR.INDX', 'VOO.US', 'SPXS.LSE'], inflation=False)
         >>> al.tracking_difference_annual.plot(kind='bar')
         """
-        result = pd.DataFrame()
+        rows_list = []  # Collect all rows to concatenate once at the end
         for x in self.assets_ror.resample("Y"):
             df = x[1]
             wealth_index = helpers.Frame.get_wealth_indexes(df)
             row = helpers.Index.tracking_difference(wealth_index).iloc[[-1]]
-            result = pd.concat([result, row], ignore_index=False)
+            rows_list.append(row)
+        result = pd.concat(rows_list, ignore_index=False)
         result.index = result.index.asfreq("Y")
         return result
 
diff --git a/okama/common/helpers/helpers.py b/okama/common/helpers/helpers.py
@@ -601,7 +601,7 @@ def rolling_fn(df: pd.DataFrame, window: int, fn: Callable, window_below_year: b
         The window should be in months.
         """
         check_rolling_window(window=window, ror=df, window_below_year=window_below_year)
-        output = pd.DataFrame()
+        results_list = []  # Collect all results to concatenate once at the end
         for start_date in df.index:
             end_date = start_date + window
             df_window = df.loc[start_date:end_date, :]
@@ -610,5 +610,6 @@ def rolling_fn(df: pd.DataFrame, window: int, fn: Callable, window_below_year: b
             if period_length.n < window:
                 break
             windows_result = fn(df_window).iloc[-1, :]
-            output = pd.concat([output, windows_result.to_frame().T], copy=False)
+            results_list.append(windows_result.to_frame().T)
+        output = pd.concat(results_list, copy=False) if results_list else pd.DataFrame()
         return output
diff --git a/okama/common/make_asset_list.py b/okama/common/make_asset_list.py
@@ -150,7 +150,7 @@ def get_item(symbol):
         asset_obj_list = Parallel(n_jobs=-1, backend="threading")(delayed(get_item)(s) for s in ls)
         return {obj.symbol: obj for obj in asset_obj_list}
 
-    def _make_list(self, first_date, last_date) -> dict:
+    def _make_list(self, first_date: Optional[str], last_date: Optional[str]) -> dict:
         """
         Make an asset list from a list of symbols.
         """
@@ -164,54 +164,63 @@ def _make_list(self, first_date, last_date) -> dict:
         last_dates: Dict[str, pd.Timestamp] = {}
         names: Dict[str, str] = {}
         currencies: Dict[str, str] = {}
-        df = pd.DataFrame()
         input_first_date = pd.to_datetime(first_date) if first_date else None
         input_last_date = pd.to_datetime(last_date) if last_date else None
-        for i, asset_item in enumerate(self.asset_obj_dict.values()):
+
+        # Collect all rate of return series first, then concatenate once (more efficient)
+        ror_series_list: List[pd.Series] = []
+        for asset_item in self.asset_obj_dict.values():
             # get asset own first and last dates
             asset_own_first_date = asset_item.first_date
             asset_own_last_date = asset_item.last_date
-            if i == 0:  # required to use pd.concat below (df should not be empty).
-                df = self._make_ror(asset_item, base_currency_ticker)
-            else:
-                new = self._make_ror(asset_item, base_currency_ticker)
-                df = pd.concat([df, new], axis=1, join="inner", copy="false")
+
+            ror_series = self._make_ror(asset_item, base_currency_ticker)
+            ror_series_list.append(ror_series)
+
             # get asset first and last dates after adjusting to the currency
-            asset_first_date = df.index[0].to_timestamp()
-            asset_last_date = df.index[-1].to_timestamp()
+            asset_first_date = ror_series.index[0].to_timestamp()
+            asset_last_date = ror_series.index[-1].to_timestamp()
+
             # check first and last dates
-            fd = [asset_first_date, input_first_date]
-            ld = [asset_last_date, input_last_date]
-            fd_max = max(x for x in fd if x is not None)
-            ld_min = min(x for x in ld if x is not None)
+            fd_max = max(x for x in [asset_first_date, input_first_date] if x is not None)
+            ld_min = min(x for x in [asset_last_date, input_last_date] if x is not None)
             if helpers.Date.get_difference_in_months(ld_min, fd_max).n < 2:
                 raise ShortPeriodLengthError(
-                    f"{asset_item.symbol} historical data period length is too short. " f"It must be at least 3 months."
+                    f"{asset_item.symbol} historical data period length is too short. It must be at least 3 months."
                 )
+
             # append data to dictionaries
             currencies[asset_item.symbol] = asset_item.currency
             names[asset_item.symbol] = asset_item.name
             first_dates[asset_item.symbol] = asset_first_date
             last_dates[asset_item.symbol] = asset_last_date
             own_first_dates[asset_item.symbol] = asset_own_first_date
             own_last_dates[asset_item.symbol] = asset_own_last_date
+
+        # Concatenate all series at once (more efficient than repeated pd.concat in loop)
+        df = pd.concat(ror_series_list, axis=1, join="inner")
+
         first_dates[base_currency_ticker] = currency_first_date
         last_dates[base_currency_ticker] = currency_last_date
         own_last_dates[base_currency_ticker] = currency_last_date
         own_first_dates[base_currency_ticker] = currency_first_date
         currencies["asset list"] = base_currency_ticker
+
         # get first and last dates
         first_date_list = list(first_dates.values()) + [input_first_date]
         last_date_list = list(last_dates.values()) + [input_last_date]
         list_first_date = max(x for x in first_date_list if x is not None)
         list_last_date = min(x for x in last_date_list if x is not None)
-        # range of last and first dates not limeted by AssetList first_date & lastdate parameters
+
+        # range of last and first dates not limited by AssetList first_date & last_date parameters
         own_first_dates_sorted: list = sorted(own_first_dates.items(), key=lambda y: y[1])
         own_last_dates_sorted: list = sorted(own_last_dates.items(), key=lambda y: y[1])
+
         if isinstance(df, pd.Series):
             # required to convert Series to DataFrame for single asset list
             df = df.to_frame()
         df.columns.name = "Symbols"  # required for Plotly charts
+
         return dict(
             first_date=list_first_date,
             last_date=list_last_date,
@@ -244,7 +253,7 @@ def _adjust_ror_to_currency(cls, returns: pd.Series, asset_currency: asset.Asset
         asset_mult = returns + 1.0
         currency_mult = asset_currency.ror + 1.0
         # join dataframes to have the same Time Series Index
-        df = pd.concat([asset_mult, currency_mult], axis=1, join="inner", copy="false")
+        df = pd.concat([asset_mult, currency_mult], axis=1, join="inner")
         currency_mult = df.iloc[:, -1]
         asset_mult = df.iloc[:, 0]
         x = asset_mult * currency_mult - 1.0
@@ -272,7 +281,7 @@ def _add_inflation(self) -> pd.DataFrame:
         Add inflation column to returns DataFrame.
         """
         if hasattr(self, "inflation"):
-            return pd.concat([self._assets_ror, self.inflation_ts], axis=1, join="inner", copy="false")
+            return pd.concat([self._assets_ror, self.inflation_ts], axis=1, join="inner")
         else:
             return self._assets_ror
 
diff --git a/okama/frontier/multi_period.py b/okama/frontier/multi_period.py
@@ -1201,10 +1201,11 @@ def mdp_points(self) -> pd.DataFrame:
         """
         if self._mdp_points.empty:
             target_cagrs = self._target_cagr_range_left
-            df = pd.DataFrame(dtype="float")
+            rows_list = []  # Collect all rows to concatenate once at the end
             for x in target_cagrs:
                 row = self.get_most_diversified_portfolio(target_return=x)
-                df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)
+                rows_list.append(row)
+            df = pd.DataFrame.from_records(rows_list)
             df = helpers.Frame.change_columns_order(df, ["Risk", "CAGR"])
             self._mdp_points = df
         return self._mdp_points
@@ -1275,14 +1276,15 @@ def get_monte_carlo(self, n: int = 100) -> pd.DataFrame:
             Rebalance(**args).return_ror_ts_ef,
             ror=self.assets_ror,
         )
-        random_portfolios = pd.DataFrame()
+        rows_list = []  # Collect all rows to create DataFrame once at the end
         for _, data in portfolios_ror.iterrows():
             risk_monthly = data.std()
             mean_return = data.mean()
             risk = helpers.Float.annualize_risk(risk_monthly, mean_return)
             cagr = helpers.Frame.get_cagr(data)
             row = {"Risk": risk, "CAGR": cagr}
-            random_portfolios = pd.concat([random_portfolios, pd.DataFrame(row, index=[0])], ignore_index=True)
+            rows_list.append(row)
+        random_portfolios = pd.DataFrame.from_records(rows_list)
         return random_portfolios
 
     def plot_pair_ef(self, tickers="tickers", figsize: Optional[tuple] = None) -> Axes:
diff --git a/okama/frontier/single_period.py b/okama/frontier/single_period.py
@@ -777,10 +777,11 @@ def ef_points(self) -> pd.DataFrame:
         """
         if self._ef_points.empty:
             target_rs = self.mean_return_range
-            df = pd.DataFrame(dtype="float")
+            rows_list = []  # Collect all rows to concatenate once at the end
             for x in target_rs:
                 row = self.minimize_risk(x, monthly_return=True)
-                df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)
+                rows_list.append(row)
+            df = pd.DataFrame.from_records(rows_list)
             df = helpers.Frame.change_columns_order(df, ["Risk", "Mean return", "CAGR"])
             self._ef_points = df
         return self._ef_points
@@ -848,10 +849,11 @@ def mdp_points(self) -> pd.DataFrame:
         """
         if self._mdp_points.empty:
             target_rs = self.mean_return_range
-            df = pd.DataFrame(dtype="float")
+            rows_list = []  # Collect all rows to concatenate once at the end
             for x in target_rs:
                 row = self.get_most_diversified_portfolio(target_return=x, monthly_return=True)
-                df = pd.concat([df, pd.DataFrame(row, index=[0])], ignore_index=True)
+                rows_list.append(row)
+            df = pd.DataFrame.from_records(rows_list)
             df = helpers.Frame.change_columns_order(df, ["Risk", "Mean return", "CAGR"])
             self._mdp_points = df
         return self._mdp_points
@@ -918,27 +920,27 @@ def get_monte_carlo(self, n: int = 100, kind: str = "mean") -> pd.DataFrame:
         weights_series = helpers.Float.get_random_weights(n, self.assets_ror.shape[1], self.bounds)
 
         # Portfolio risk and return for each set of weights
-        random_portfolios = pd.DataFrame(dtype=float)
+        points_list = []  # Collect all points to create DataFrame once at the end
+        second_column = "Return" if kind == "mean" else "CAGR"
+        asset_labels = self.get_assets_tickers()
         for weights in weights_series:
             risk_monthly = helpers.Frame.get_portfolio_risk(weights, self.assets_ror)
             mean_return_monthly = helpers.Frame.get_portfolio_mean_return(weights, self.assets_ror)
             risk = helpers.Float.annualize_risk(risk_monthly, mean_return_monthly)
             mean_return = helpers.Float.annualize_return(mean_return_monthly)
-            second_column = "Return" if kind == "mean" else "CAGR"
 
-            asset_labels = self.get_assets_tickers()
             point = dict(zip(asset_labels, weights))
             point["Risk"] = risk
             if kind.lower() == "cagr":
                 cagr = helpers.Float.approx_return_risk_adjusted(mean_return, risk)
                 point["CAGR"] = cagr
-
             elif kind.lower() == "mean":
                 point["Return"] = mean_return
             else:
                 raise ValueError('kind should be "mean" or "cagr"')
-            random_portfolios = pd.concat([random_portfolios, pd.DataFrame(point, index=[0])], ignore_index=True)
-            random_portfolios = helpers.Frame.change_columns_order(random_portfolios, ["Risk", second_column])
+            points_list.append(point)
+        random_portfolios = pd.DataFrame.from_records(points_list)
+        random_portfolios = helpers.Frame.change_columns_order(random_portfolios, ["Risk", second_column])
         return random_portfolios
 
     def plot_transition_map(self, x_axe: str = "risk", figsize: Optional[tuple] = None) -> Axes:
diff --git a/okama/macro.py b/okama/macro.py
@@ -141,7 +141,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
         DataFrame
             Table of descriptive statistics for a list of assets.
         """
-        description = pd.DataFrame()
+        all_rows = []  # Collect all rows to concatenate once at the end
         dt0 = self.last_date
         df = self.values_monthly
         # YTD properties
@@ -160,8 +160,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
         row4 = {self.symbol: min_value.iloc[0]}
         row4.update(period=min_value.index.values[0].strftime("%Y-%m"), property="min value")
 
-        rows_df = pd.DataFrame.from_records([row1, row2, row3, row4], index=[0, 1, 2, 3])
-        description = pd.concat([description, rows_df], ignore_index=True)
+        all_rows.extend([row1, row2, row3, row4])
         # properties for a given list of periods
         for i in years:
             dt = helpers.Date.subtract_years(dt0, i)
@@ -191,8 +190,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
             row3.update(property="max value")
             row4.update(property="min value")
 
-            new_rows = pd.DataFrame.from_records([row1, row2, row3, row4], index=[0, 1, 2, 3])
-            description = pd.concat([description, new_rows], ignore_index=True)
+            all_rows.extend([row1, row2, row3, row4])
         # Full period
         # Arithmetic mean
         row0 = {self.symbol: df.mean()}
@@ -211,8 +209,9 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
         min_value = df.nsmallest(n=1)
         row3 = {self.symbol: min_value.iloc[0]}
         row3.update(period=min_value.index.values[0].strftime("%Y-%m"), property="min value")
-        new_rows = pd.DataFrame.from_records([row0, row1, row2, row3], index=[0, 1, 2, 3])
-        description = pd.concat([description, new_rows], ignore_index=True)
+        all_rows.extend([row0, row1, row2, row3])
+        # Concatenate all rows at once (more efficient than repeated pd.concat in loop)
+        description = pd.DataFrame.from_records(all_rows)
         return helpers.Frame.change_columns_order(description, ["property", "period"], position="first")
 
 
@@ -393,7 +392,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
         16      max 12m inflation              1920-06    0.236888
         17  1000 purchasing power  109 years, 3 months   33.875745
         """
-        description = pd.DataFrame()
+        all_rows = []  # Collect all rows to concatenate once at the end
         dt0 = self.last_date
         df = self.values_monthly
         # YTD inflation properties
@@ -405,8 +404,7 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
 
         row2 = {self.symbol: helpers.Float.get_purchasing_power(inflation)}
         row2.update(period="YTD", property="1000 purchasing power")
-        rows_df = pd.DataFrame.from_records([row1, row2], index=[0, 1])
-        description = pd.concat([description, rows_df], ignore_index=True)
+        all_rows.extend([row1, row2])
 
         # inflation properties for a given list of periods
         for i in years:
@@ -442,31 +440,32 @@ def describe(self, years: Tuple[int, ...] = (1, 5, 10)) -> pd.DataFrame:
 
             row4.update(period=f"{i} years", property="1000 purchasing power")
 
-            df_rows = pd.DataFrame.from_records([row1, row2, row3, row4], index=[0, 1, 2, 3])
-            description = pd.concat([description, df_rows], ignore_index=True)
+            all_rows.extend([row1, row2, row3, row4])
         # Annual inflation for full period available
         ts = df
         full_inflation = helpers.Frame.get_cagr(ts)
         row = {self.symbol: full_inflation}
         row.update(period=self._pl_txt, property="annual inflation")
-        description = pd.concat([description, pd.DataFrame(row, index=[0])], ignore_index=True)
+        all_rows.append(row)
         # compound inflation
         comp_inflation = helpers.Frame.get_cumulative_return(ts)
         row = {self.symbol: comp_inflation}
         row.update(period=self._pl_txt, property="compound inflation")
-        description = pd.concat([description, pd.DataFrame(row, index=[0])], ignore_index=True)
+        all_rows.append(row)
         # max inflation for full period available
         max_inflation = self.rolling_inflation.nlargest(n=1)
         row = {self.symbol: max_inflation.iloc[0]}
         row.update(
             period=max_inflation.index.values[0].strftime("%Y-%m"),
             property="max 12m inflation",
         )
-        description = pd.concat([description, pd.DataFrame(row, index=[0])], ignore_index=True)
+        all_rows.append(row)
         # purchase power
         row = {self.symbol: helpers.Float.get_purchasing_power(comp_inflation)}
         row.update(period=self._pl_txt, property="1000 purchasing power")
-        description = pd.concat([description, pd.DataFrame(row, index=[0])], ignore_index=True)
+        all_rows.append(row)
+        # Concatenate all rows at once (more efficient than repeated pd.concat in loop)
+        description = pd.DataFrame.from_records(all_rows)
         return helpers.Frame.change_columns_order(description, ["property", "period"], position="first")
 
 
diff --git a/okama/portfolios/core.py b/okama/portfolios/core.py
diff --git a/okama/portfolios/dcf_calculations.py b/okama/portfolios/dcf_calculations.py