Skip to content

Commit ae3326d

Browse files
committed
tests and documentation for API set/reset values
1 parent 8b28348 commit ae3326d

File tree

3 files changed

+87
-20
lines changed

3 files changed

+87
-20
lines changed

gbd_core/api.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -66,22 +66,42 @@ def query(self, gbd_query=None, hashes=[], resolve=[], collapse="group_concat",
6666
return pd.DataFrame(result, columns=[ group_by ] + (resolve or []))
6767

6868

69-
def set_values(self, feature, value, hashes):
70-
if not self.feature_exists(feature):
71-
raise GBDException("Feature '{}' does not exist".format(feature))
72-
try:
73-
self.database.set_values(feature, value, hashes)
74-
except Exception as err:
75-
raise GBDException(str(err))
69+
def set_values(self, name, value, hashes, target_db=None):
70+
""" Set feature value for given hashes
71+
72+
Args:
73+
name (str): feature name
74+
value (str): value to be set
75+
hashes (list): list of hashes (=benchmark ids)
76+
target_db (str, optional): name of target database
77+
if None, default database (first in list) is used
78+
79+
Raises:
80+
GBDException, if feature does not exist
81+
"""
82+
if not self.feature_exists(name, target_db):
83+
raise GBDException("Feature '{}' does not exist".format(name))
84+
self.database.set_values(name, value, hashes, target_db)
7685

7786

78-
# Remove the attribute value for the given hashes
79-
def reset_values(self, feature, values=[], hashes=[]):
80-
if not self.feature_exists(feature):
87+
def reset_values(self, feature, values=[], hashes=[], target_db=None):
88+
""" Reset feature value for given hashes
89+
90+
Args:
91+
feature (str): feature name
92+
values (list, optional): list of values to be reset
93+
hashes (list, optional): list of hashes (=benchmark ids) to be reset
94+
target_db (str, optional): name of target database
95+
if None, default database (first in list) is used
96+
97+
Raises:
98+
GBDException, if feature does not exist
99+
"""
100+
if not self.feature_exists(feature, target_db):
81101
raise GBDException("Feature '{}' does not exist".format(feature))
82-
for values_slice in util.slice_iterator(values, 1000):
83-
for hashes_slice in util.slice_iterator(hashes, 1000):
84-
self.database.delete_values(feature, values_slice, hashes_slice)
102+
for values_slice in util.slice_iterator(values, 10):
103+
for hashes_slice in util.slice_iterator(hashes, 10):
104+
self.database.delete(feature, values_slice, hashes_slice, target_db)
85105

86106

87107
def get_databases(self):

gbd_core/database.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def create_feature(self, name, default_value=None, target_db=None, permissive=Fa
175175

176176

177177
def set_values(self, fname, value, hashes, target_db=None):
178-
finfo = self.finfo(fname)
178+
finfo = self.finfo(fname, target_db)
179179
self.schemas[finfo.database].set_values(fname, value, hashes)
180180

181181

@@ -214,15 +214,15 @@ def delete_feature(self, fname, target_db=None):
214214

215215
def delete(self, fname, values=[], hashes=[], target_db=None):
216216
finfo = self.finfo(fname, target_db)
217-
w1 = "{col} IN ('{v}')".format(col=finfo.column, v="', '".join(values)) if len(values) else "1=1"
218-
w2 = "hash IN ('{h}')".format(h="', '".join(hashes)) if len(hashes) else "1=1"
219-
where = "{} AND {}".format(w1, w2)
217+
w1 = "{cl} IN ('{v}')".format(cl=finfo.column, v="', '".join(values))
218+
w2 = "hash IN ('{h}')".format(h="', '".join(hashes))
219+
where = "{} AND {}".format(w1 if len(values) else "1=1", w2 if len(hashes) else "1=1")
220220
db = finfo.database
221221
if finfo.default is None:
222222
hashlist = [ r[0] for r in self.query("SELECT DISTINCT(hash) FROM {d}.{tab} WHERE {w}".format(d=db, tab=fname, w=where)) ]
223223
self.execute("DELETE FROM {d}.{tab} WHERE {w}".format(d=db, tab=fname, w=where))
224224
remaining = [ r[0] for r in self.query("SELECT DISTINCT(hash) FROM {d}.{tab} WHERE hash in ('{h}')".format(d=db, tab=fname, h="', '".join(hashlist))) ]
225225
setnone = [ h for h in hashlist if not h in remaining ]
226-
self.execute("UPDATE {d}.{tab} SET {col} = 'None' WHERE hash IN ('{h}')".format(d=db, tab="features", col=fname, h="', '".join(setnone)))
226+
self.execute("UPDATE {d}.features SET {col} = 'None' WHERE hash IN ('{h}')".format(d=db, col=fname, h="', '".join(setnone)))
227227
else:
228-
self.execute("UPDATE {d}.{tab} SET {col} = '{default}' WHERE {w}".format(d=db, tab="features", col=fname, default=finfo.default, w=where))
228+
self.execute("UPDATE {d}.features SET {col} = '{default}' WHERE {w}".format(d=db, col=fname, default=finfo.default, w=where))

tests/test_api.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,51 @@ def test_rename_feature(self):
6060
self.assertTrue(self.api.feature_exists("A", self.name1))
6161
self.assertTrue(self.api.feature_exists("B", self.name1))
6262
with self.assertRaises(GBDException):
63-
self.api.rename_feature("A", "B", self.name1)
63+
self.api.rename_feature("A", "B", self.name1)
64+
65+
def test_set_values(self):
66+
self.api.create_feature("A", None, self.name1) # feature is multi-valued
67+
self.api.create_feature("B", "empty", self.name1) # feature has default value
68+
self.api.create_feature("A", "empty", self.name2) # shadowed feature
69+
# value1 (set values, default values emerge)
70+
self.api.set_values("A", "value1", [ str(i) for i in range(100) ], self.name1)
71+
df = self.api.query("A = value1", resolve=["A", "B"])
72+
self.assertCountEqual(df['hash'].tolist(), [ str(i) for i in range(100) ])
73+
self.assertCountEqual(df['A'].tolist(), [ "value1" for _ in range(100) ])
74+
self.assertCountEqual(df['B'].tolist(), [ "empty" for _ in range(100) ])
75+
# value2 (set values, feature is multi-valued)
76+
self.api.set_values("A", "value2", [ str(i) for i in range(50) ], self.name1)
77+
df = self.api.query("A = value1 or A = value2", resolve=["A"], collapse=None)
78+
self.assertCountEqual(df['A'].tolist(), [ "value2" for _ in range(50) ] + [ "value1" for _ in range(100) ])
79+
# value3 (set values of shadowed feature by specifying target-database)
80+
self.api.set_values("A", "value3", [ str(i) for i in range(50) ], self.name2)
81+
df = self.api.query("A = value1 or A = value2", resolve=["A"], collapse=None)
82+
self.assertCountEqual(df['A'].tolist(), [ "value2" for _ in range(50) ] + [ "value1" for _ in range(100) ])
83+
self.api.database.commit()
84+
api2 = GBD([self.file2])
85+
df = api2.query("A = value3", resolve=["A"])
86+
self.assertCountEqual(df["A"].tolist(), [ "value3" for _ in range(50) ])
87+
88+
def test_reset_values(self):
89+
self.api.create_feature("A", None, self.name1)
90+
self.api.create_feature("B", "empty", self.name1)
91+
self.api.create_feature("A", "empty", self.name2)
92+
self.api.set_values("A", "value1", [ str(i) for i in range(100) ], self.name1)
93+
self.api.set_values("A", "value2", [ str(i) for i in range(100) ], self.name1)
94+
self.api.set_values("B", "value3", [ str(i) for i in range(100) ], self.name1)
95+
self.api.set_values("A", "value1", [ str(i) for i in range(100) ], self.name2)
96+
# reset values in A
97+
self.api.reset_values("A", [ "value1" ], [ str(i) for i in range(50) ], self.name1)
98+
df = self.api.query(None, hashes=[ str(i) for i in range(100) ], resolve=["A"], collapse=None)
99+
self.assertCountEqual(df['A'].tolist(), [ "value1" for _ in range(50) ] + [ "value2" for _ in range(100) ])
100+
# reset values in B
101+
self.api.reset_values("B", [ "value3" ], [ str(i) for i in range(50) ], self.name1)
102+
df = self.api.query(None, hashes=[ str(i) for i in range(100) ], resolve=["B"])
103+
self.assertCountEqual(df['B'].tolist(), [ "value3" for _ in range(50) ] + [ "empty" for _ in range(50) ])
104+
# reset values in shadowed A
105+
self.api.database.verbose = True
106+
self.api.reset_values("A", [ "value1" ], [ str(i) for i in range(50) ], self.name2)
107+
self.api.database.commit()
108+
api2 = GBD([self.file2])
109+
df = api2.query("A = value1", resolve=["A"])
110+
self.assertCountEqual(df["A"].tolist(), [ "value1" for _ in range(50) ])

0 commit comments

Comments
 (0)