1111from itertools import chain
1212from numbers import Real , Integral
1313from threading import Lock
14- from typing import List , TYPE_CHECKING
14+ from typing import List , TYPE_CHECKING , Union
1515
1616import bottleneck as bn
1717import numpy as np
@@ -208,8 +208,19 @@ def __init__(self, domain):
208208 setattr (self , v .name .replace (" " , "_" ), v )
209209
210210
211- class _ArrayConversion :
211+ def _compute_column (func , * args , ** kwargs ):
212+ col = func (* args , ** kwargs )
213+ if isinstance (col , np .ndarray ) and col .ndim != 1 :
214+ err = f"{ type (col )} must return a column, not { col .ndim } d array"
215+ if col .ndim == 2 :
216+ warnings .warn (err )
217+ col = col .reshape (- 1 )
218+ else :
219+ raise ValueError (err )
220+ return col
212221
222+
223+ class _ArrayConversion :
213224 def __init__ (self , target , src_cols , variables , is_sparse , source_domain ):
214225 self .target = target
215226 self .src_cols = src_cols
@@ -300,9 +311,9 @@ def get_columns(self, source, row_indices, n_rows, out=None, target_indices=None
300311 shared = col .compute_shared (sourceri )
301312 _idcache_save (shared_cache , (col .compute_shared , source ), shared )
302313 col_array = match_density (
303- col ( sourceri , shared_data = shared ))
314+ _compute_column ( col , sourceri , shared_data = shared ))
304315 else :
305- col_array = match_density (col ( sourceri ))
316+ col_array = match_density (_compute_column ( col , sourceri ))
306317 elif col < 0 :
307318 col_array = match_density (
308319 source .metas [row_indices , - 1 - col ]
@@ -1425,7 +1436,7 @@ def add_column(self, variable, data, to_metas=None):
14251436 domain = Domain (attrs , classes , metavars )
14261437 new_table = self .transform (domain )
14271438 with new_table .unlocked (new_table .metas if to_metas else new_table .X ):
1428- new_table .get_column_view (variable )[ 0 ][:] = data
1439+ new_table .set_column (variable , data )
14291440 return new_table
14301441
14311442 def is_sparse (self ):
@@ -1554,43 +1565,114 @@ def shuffle(self):
15541565 self .W = self .W [ind ]
15551566 self .ids = self .ids [ind ]
15561567
1557- def get_column_view (self , index ):
1568+ @deprecated ("Table.get_column (or Table.set_column if you must)" )
1569+ def get_column_view (self , index : Union [Integral , Variable ]) -> np .ndarray :
15581570 """
1559- Return a vector - as a view, not a copy - with a column of the table,
1560- and a bool flag telling whether this column is sparse. Note that
1561- vertical slicing of sparse matrices is inefficient.
1571+ An obsolete function that was supposed to return a view with a column
1572+ of the table, and a bool flag telling whether this column is sparse.
1573+
1574+ The function *sometimes* returns a copy. This happens if the variable
1575+ is computed or if values of discrete attribute need to be remapped due
1576+ to different encoding.
1577+
1578+ Note that vertical slicing of sparse matrices is inefficient.
15621579
15631580 :param index: the index of the column
15641581 :type index: int, str or Orange.data.Variable
15651582 :return: (one-dimensional numpy array, sparse)
15661583 """
1567-
1568- def rx (M ):
1569- if sp .issparse (M ):
1570- return np .asarray (M .todense ())[:, 0 ], True
1571- else :
1572- return M , False
1573-
15741584 if isinstance (index , Integral ):
15751585 col_index = index
15761586 else :
15771587 col_index = self .domain .index (index )
1578- if col_index >= 0 :
1579- if col_index < self .X .shape [1 ]:
1580- col = rx (self .X [:, col_index ])
1581- elif self ._Y .ndim == 1 and col_index == self ._X .shape [1 ]:
1582- col = rx (self ._Y )
1583- else :
1584- col = rx (self ._Y [:, col_index - self .X .shape [1 ]])
1585- else :
1586- col = rx (self .metas [:, - 1 - col_index ])
1588+ col = self ._get_column_view (col_index )
1589+
1590+ sparse = sp .issparse (col )
1591+ if sparse :
1592+ # `index` below can be integer or a Variable
1593+ warnings .warn ("get_column_view is returning a dense copy column "
1594+ f"{ index } " )
1595+ col = np .asarray (col .todense ())[:, 0 ]
15871596
15881597 if isinstance (index , DiscreteVariable ) \
15891598 and index .values != self .domain [col_index ].values :
1590- col = index .get_mapper_from (self .domain [col_index ])(col [0 ]), col [1 ]
1591- col [0 ].flags .writeable = False
1599+ col = index .get_mapper_from (self .domain [col_index ])(col )
1600+ col .flags .writeable = False
1601+ warnings .warn ("get_column_view is returning a mapped copy of "
1602+ f"column { index .name } " )
1603+ return col , sparse
1604+
1605+ def _get_column_view (self , index : Integral ) -> np .ndarray :
1606+ if index >= 0 :
1607+ if index < self .X .shape [1 ]:
1608+ return self .X [:, index ]
1609+ elif self ._Y .ndim == 1 and index == self ._X .shape [1 ]:
1610+ return self ._Y
1611+ else :
1612+ return self ._Y [:, index - self .X .shape [1 ]]
1613+ else :
1614+ return self .metas [:, - 1 - index ]
1615+
1616+ def get_column (self , index , copy = False ):
1617+ """
1618+ Return a column with values of `index`.
1619+
1620+ If `index` is an instance of variable that does not exist in the domain
1621+ but has `compute_value`, `get_column` calls `compute_value`. Otherwise,
1622+ it returns a view into the table unless `copy` is set to `True`.
1623+
1624+ Args:
1625+ index (int or str or Variable): attribute
1626+ copy (bool): if set to True, ensure the result is a copy, not a view
1627+
1628+ Returns:
1629+ column (np.array): data column
1630+ """
1631+ if isinstance (index , Variable ) and index not in self .domain :
1632+ if index .compute_value is None :
1633+ raise ValueError (f"variable { index .name } is not in domain" )
1634+ return _compute_column (index .compute_value , self )
1635+
1636+ mapper = None
1637+ if not isinstance (index , Integral ):
1638+ if isinstance (index , DiscreteVariable ) \
1639+ and index .values != self .domain [index ].values :
1640+ mapper = index .get_mapper_from (self .domain [index ])
1641+ index = self .domain .index (index )
1642+
1643+ col = self ._get_column_view (index )
1644+ if sp .issparse (col ):
1645+ col = col .toarray ().reshape (- 1 )
1646+ if col .dtype == object and self .domain [index ].is_primitive ():
1647+ col = col .astype (np .float64 )
1648+ if mapper is not None :
1649+ col = mapper (col )
1650+ if copy and col .base is not None :
1651+ col = col .copy ()
15921652 return col
15931653
1654+ def set_column (self , index : Union [int , str , Variable ], data ):
1655+ """
1656+ Set the values in the given column do `data`.
1657+
1658+ This function may be useful, but try avoiding it.
1659+
1660+ Table (or the corresponding
1661+ part must be unlocked). If variable is discrete, its encoding must
1662+ match the variable in the domain.
1663+
1664+ Args:
1665+ index (int, str, Variable): index of a column
1666+ data (object): a single value or 1d array of length len(self)
1667+ """
1668+ if not isinstance (index , Integral ):
1669+ if isinstance (index , DiscreteVariable ) \
1670+ and self .domain [index ].values != index .values :
1671+ raise ValueError (f"cannot set data for variable { index .name } "
1672+ "with different encoding" )
1673+ index = self .domain .index (index )
1674+ self ._get_column_view (index )[:] = data
1675+
15941676 def _filter_is_defined (self , columns = None , negate = False ):
15951677 # structure of function is obvious; pylint: disable=too-many-branches
15961678 def _sp_anynan (a ):
@@ -1620,10 +1702,8 @@ def _sp_anynan(a):
16201702 else :
16211703 remove = np .zeros (len (self ), dtype = bool )
16221704 for column in columns :
1623- col , sparse = self .get_column_view (column )
1624- if sparse :
1625- remove += col == 0
1626- elif self .domain [column ].is_primitive ():
1705+ col = self .get_column (column )
1706+ if self .domain [column ].is_primitive ():
16271707 remove += bn .anynan ([col .astype (float )], axis = 0 )
16281708 else :
16291709 remove += col .astype (bool )
@@ -1650,7 +1730,7 @@ def _filter_has_class(self, negate=False):
16501730 def _filter_same_value (self , column , value , negate = False ):
16511731 if not isinstance (value , Real ):
16521732 value = self .domain [column ].to_val (value )
1653- sel = self .get_column_view (column )[ 0 ] == value
1733+ sel = self .get_column (column ) == value
16541734 if negate :
16551735 sel = np .logical_not (sel )
16561736 return self .from_table_rows (self , sel )
@@ -1736,7 +1816,7 @@ def get_col_indices():
17361816 raise TypeError ("Invalid filter" )
17371817
17381818 def col_filter (col_idx ):
1739- col = self .get_column_view (col_idx )[ 0 ]
1819+ col = self .get_column (col_idx )
17401820 if isinstance (filter , IsDefined ):
17411821 if self .domain [col_idx ].is_primitive ():
17421822 return ~ np .isnan (col .astype (float ))
0 commit comments