pysal · rongboxu · Jun 14, 2023 · Jun 20, 2023 · Aug 7, 2023 · Aug 7, 2023
diff --git a/spopt/locate/p_median.py b/spopt/locate/p_median.py
@@ -592,7 +592,7 @@ class KNearestPMedian(PMedian):
     capacities : np.array or None
         An array of facility capacities. None if capacity constraints are
         not considered.
-    k_list : np.array
+    k_array : np.array
         An array of k values representing the number of nearest facilities
         for each client.
     distance_metric : str
@@ -628,7 +628,7 @@ def __init__(
         clients: np.array,
         facilities: np.array,
         weights: np.array,
-        k_list: np.array,
+        k_array: np.array,
         p_facilities: int,
         capacities: np.array = None,
         distance_metric: str = "euclidean",
@@ -638,7 +638,7 @@ def __init__(
         self.clients = clients
         self.facilities = facilities
         self.weights = weights
-        self.k_list = k_list
+        self.k_array = k_array
         self.p_facilities = p_facilities
         self.capacities = capacities
         self.distance_metric = distance_metric
@@ -713,12 +713,11 @@ def _create_sparse_matrix(self) -> None:
         column_shape = len(self.facilities)
 
         # check the k value with the total number of facilities
-        for k in self.k_list:
-            if k > column_shape:
-                raise ValueError(
-                    f"The value of k should be no more than the number of total"
-                    f"facilities ({column_shape})."
-                )
+        if not (self.k_array <= column_shape).all():
+            raise ValueError(
+                f"The value of k should be no more than the number of total"
+                f"facilities ({column_shape})."
+            )
 
         # Initialize empty lists to store the data for the sparse matrix
         data = []
@@ -728,7 +727,7 @@ def _create_sparse_matrix(self) -> None:
         # create the suitable Tree
         tree = build_best_tree(self.facilities, self.distance_metric)
 
-        for i, k in enumerate(self.k_list):
+        for i, k in enumerate(self.k_array):
             # Query the Tree to find the k nearest facilities for each client
             distances, k_nearest_facilities_indices = tree.query([self.clients[i]], k=k)
 
@@ -746,26 +745,26 @@ def _create_sparse_matrix(self) -> None:
             (data, (row_index, col_index)), shape=(row_shape, column_shape)
         )
 
-    def _update_k_list(self) -> None:
+    def _update_k_array(self) -> None:
         """
-        Increase the k value for clients with any g_i > 0 and update the k list.
+        Increase the k value for clients with any g_i > 0 and update the k array.
 
         This method is used to adjust the k values for clients based on their
         placeholder variable g_i. For clients with g_i greater than 0, the
-        corresponding k value is increased by 1 in the new k list.
+        corresponding k value is increased by 1 in the new k array.
 
         Returns
         -------
 
         None
         """
 
-        new_k_list = self.k_list.copy()
+        new_k_array = self.k_array.copy()
         placeholder_vars = getattr(self, "placeholder_vars")
         for i in range(len(placeholder_vars)):
             if placeholder_vars[i].value() > 0:
-                new_k_list[i] = new_k_list[i] + 1
-        self.k_list = new_k_list
+                new_k_array[i] = new_k_array[i] + 1
+        self.k_array = new_k_array
 
     def _from_sparse_matrix(self) -> None:
         """
@@ -849,7 +848,7 @@ def from_geodataframe(
         weights_cols: str,
         p_facilities: int,
         facility_capacity_col: str = None,
-        k_list: np.array = None,
+        k_array: np.array = None,
         distance_metric: str = "euclidean",
         name: str = "k-nearest-p-median",
     ):
@@ -873,8 +872,8 @@ def from_geodataframe(
         facility_capacity_col : str, optional
             The column name in gdf_fac representing the capacity of each facility,
             by default None.
-        k_list : np.array, optional
-            An array of integers representing the list of k values for each client.
+        k_array : np.array, optional
+            An array of integers representing the k values for each client.
             If not provided, a default value of 5 or the number of facilities,
             whichever is smaller, will be used.
         distance_metric : str, optional
@@ -895,7 +894,7 @@ def from_geodataframe(
 
         Create the input data and attributes.
 
-        >>> k_list = np.array([1, 1])
+        >>> k = np.array([1, 1])
         >>> demand_data = {
         ...    'ID': [1, 2],
         ...    'geometry': [Point(0.5, 1), Point(1.5, 1)],
@@ -911,7 +910,7 @@ def from_geodataframe(
 
         >>> k_nearest_pmedian = KNearestPMedian.from_geodataframe(
         ...     gdf_demand, gdf_fac,'geometry','geometry', weights_cols='demand',
-        ...     2, facility_capacity_col='capacity', k_list = k_list)
+        ...     2, facility_capacity_col='capacity', k_array = k)
         >>> k_nearest_pmedian = k_nearest_pmedian.solve(pulp.PULP_CBC_CMD(msg=False))
 
         Get the facility-client associations.
@@ -929,8 +928,8 @@ def from_geodataframe(
         >>> round(k_nearest_pmedian.mean_dist, 3)
         0.809
 
-        Get the k list for the last iteration.
-        >>> print(k_nearest_pmedian.k_list)
+        Get the k values for the last iteration.
+        >>> print(k_nearest_pmedian.k_array)
         [2, 1]
 
         """
@@ -952,15 +951,16 @@ def from_geodataframe(
         dem_data = np.array([dem.x.to_numpy(), dem.y.to_numpy()]).T
         fac_data = np.array([fac.x.to_numpy(), fac.y.to_numpy()]).T
 
-        # check the values of k_list
-        if k_list is None:
-            k_list = np.full(len(dem_data), np.minimum(len(fac_data), 5))
-        else:
-            if not (k_list <= len(fac_data)).all():
-                raise ValueError(
-                    f"The value of k should be no more than the number of total"
-                    f"facilities ({len(fac_data)})."
-                )
+        # check the values of k_array
+        if k_array is None:
+            k_array = np.full(len(dem_data), np.minimum(len(fac_data), 5))
+        elif not isinstance(k_array, np.ndarray):
+            raise TypeError("k_array should be a numpy array.")
+        elif not (k_array <= len(fac_data)).all():
+            raise ValueError(
+                f"The value of k should be no more than the number of total "
+                f"facilities, which is {len(fac_data)}."
+            )
 
         # demand and capacity
         service_load = gdf_demand[weights_cols].to_numpy()
@@ -974,7 +974,7 @@ def from_geodataframe(
             dem_data,
             fac_data,
             service_load,
-            k_list,
+            k_array,
             p_facilities,
             facility_capacities,
             distance_metric,
@@ -1052,7 +1052,7 @@ def solve(self, solver: pulp.LpSolver, results: bool = True):
                 if placeholder_vars[i].value() > 0
             )
             if sum_gi > 0:
-                self._update_k_list()
+                self._update_k_array()
 
         if results:
             self.facility_client_array()

diff --git a/spopt/tests/test_knearest_p_median.py b/spopt/tests/test_knearest_p_median.py
@@ -15,19 +15,19 @@
 class TestKNearestPMedian:
     def setup_method(self) -> None:
         # Create the test data
-        k_list = numpy.array([1, 1])
-        demand_data = {
+        k = numpy.array([1, 1])
+        self.demand_data = {
             "ID": [1, 2],
             "geometry": [Point(0.5, 1), Point(1.5, 1)],
             "demand": [1, 1],
         }
-        facility_data = {
+        self.facility_data = {
             "ID": [101, 102, 103],
             "geometry": [Point(1, 1), Point(0, 2), Point(2, 0)],
             "capacity": [1, 1, 1],
         }
-        gdf_demand = geopandas.GeoDataFrame(demand_data, crs="EPSG:4326")
-        gdf_fac = geopandas.GeoDataFrame(facility_data, crs="EPSG:4326")
+        gdf_demand = geopandas.GeoDataFrame(self.demand_data, crs="EPSG:4326")
+        gdf_fac = geopandas.GeoDataFrame(self.facility_data, crs="EPSG:4326")
         self.k_nearest_pmedian = KNearestPMedian.from_geodataframe(
             gdf_demand,
             gdf_fac,
@@ -36,7 +36,7 @@ def setup_method(self) -> None:
             "demand",
             p_facilities=2,
             facility_capacity_col="capacity",
-            k_list=k_list,
+            k_array=k,
         )
 
     def test_knearest_p_median_from_geodataframe(self):
@@ -67,3 +67,55 @@ def test_solve(self):
         assert self.k_nearest_pmedian.fac2cli == fac2cli_known
         assert self.k_nearest_pmedian.cli2fac == cli2fac_known
         assert self.k_nearest_pmedian.mean_dist == mean_dist_known
+
+    def test_error_k_array_non_numpy_array(self):
+        gdf_demand = geopandas.GeoDataFrame(self.demand_data, crs="EPSG:4326")
+        gdf_fac = geopandas.GeoDataFrame(self.facility_data, crs="EPSG:4326")
+        k = [1, 1]
+        with pytest.raises(TypeError):
+            KNearestPMedian.from_geodataframe(
+                gdf_demand,
+                gdf_fac,
+                "geometry",
+                "geometry",
+                "demand",
+                p_facilities=2,
+                facility_capacity_col="capacity",
+                k_array=k,
+            )
+
+    def test_error_k_array_invalid_value(self):
+        gdf_demand = geopandas.GeoDataFrame(self.demand_data, crs="EPSG:4326")
+        gdf_fac = geopandas.GeoDataFrame(self.facility_data, crs="EPSG:4326")
+
+        k = numpy.array([1, 4])
+        with pytest.raises(ValueError):
+            KNearestPMedian.from_geodataframe(
+                gdf_demand,
+                gdf_fac,
+                "geometry",
+                "geometry",
+                "demand",
+                p_facilities=2,
+                facility_capacity_col="capacity",
+                k_array=k,
+            )
+
+    def test_error_geodataframe_crs_mismatch(self):
+        gdf_demand = geopandas.GeoDataFrame(self.demand_data, crs="EPSG:4326")
+        gdf_fac = geopandas.GeoDataFrame(
+            self.facility_data, crs="EPSG:3857"
+        )  # Different CRS
+
+        k = numpy.array([1, 1])
+        with pytest.raises(ValueError):
+            KNearestPMedian.from_geodataframe(
+                gdf_demand,
+                gdf_fac,
+                "geometry",
+                "geometry",
+                "demand",
+                p_facilities=2,
+                facility_capacity_col="capacity",
+                k_array=k,
+            )