Skip to content
This repository was archived by the owner on Sep 9, 2020. It is now read-only.

Commit a5f2379

Browse files
dolekspprett
authored andcommitted
Adding prediction explanations support to batch scoring. (#124)
* Adding reason codes support to batch scoring. * Renaming reason codes to prediction explanations * Add documentation and update rate limiting. * Renaming the output fields to be explanation. * Removing sampling change and fixing the linter. * Reallinging the argument table. * Renaming reason codes to prediction explanations * Reallinging the argument table * Old api and prediction explanations incompatible 1. Moved argument to dataset group and updated helpstring 2. Added possibility to put argument into config file 3. Added check for api version and prediction explanations incompatibility 4. Tests * Update prediction explanation doc Both CHANGES and Readme * Bump version to 1.15.0
1 parent ba73d7a commit a5f2379

16 files changed

+794
-62
lines changed

CHANGES.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1-
1.14.3 (Unreleased)
1+
1.15.0 (Unreleased)
22
===================
33

4+
Enhancements
5+
------------
6+
* Added new argument ``-max_prediction_explanations`` that allows batch scoring with predictions explanations and adds ``explanation_N_feature`` and ``explanation_N_strength`` to each row in output document (where ``N ∈ (1, max_prediction_explanations)`` )
7+
48
1.14.2 (2018 Nov 14)
59
=======================
610

README.rst

Lines changed: 42 additions & 40 deletions
Large diffs are not rendered by default.

datarobot_batch_scoring/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.14.2'
1+
__version__ = '1.15.0'

datarobot_batch_scoring/api_response_handlers/pred_api_v10.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import operator
22
import json
3+
from six.moves import zip
34

45
from datarobot_batch_scoring.exceptions import UnexpectedKeptColumnCount
56

@@ -70,6 +71,22 @@ def format_data(result, batch, **opts):
7071
written_fields = out_fields[1:]
7172
comb = [row[1:] for row in pred]
7273

74+
if 'predictionExplanations' in single_row:
75+
num_reason_codes = len(single_row['predictionExplanations'])
76+
for num in range(1, num_reason_codes + 1):
77+
written_fields += [
78+
'explanation_{0}_feature'.format(num),
79+
'explanation_{0}_strength'.format(num)
80+
]
81+
for in_row, out_row in zip(result, comb):
82+
reason_codes = []
83+
for raw_reason_code in in_row['predictionExplanations']:
84+
reason_codes += [
85+
raw_reason_code['feature'],
86+
raw_reason_code['strength']
87+
]
88+
out_row.extend(reason_codes)
89+
7390
return written_fields, comb
7491

7592

datarobot_batch_scoring/batch_scoring.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def run_batch_predictions(base_url, base_headers, user, pwd,
7575
max_batch_size=None, compression=None,
7676
field_size_limit=None,
7777
verify_ssl=True,
78-
deployment_id=None):
78+
deployment_id=None,
79+
max_prediction_explanations=0):
7980

8081
if field_size_limit is not None:
8182
csv.field_size_limit(field_size_limit)
@@ -122,13 +123,23 @@ def run_batch_predictions(base_url, base_headers, user, pwd,
122123
base_headers['content-type'] = 'text/csv; charset=utf8'
123124
if compression:
124125
base_headers['Content-Encoding'] = 'gzip'
126+
125127
if import_id:
126-
endpoint = base_url + '/'.join((import_id, 'predict'))
128+
endpoint = base_url + import_id
127129
elif deployment_id is not None:
128130
endpoint = base_url + '/'.join(
129-
('deployments', deployment_id, 'predictions'))
131+
('deployments', deployment_id))
132+
else:
133+
endpoint = base_url + '/'.join((pid, lid))
134+
135+
if max_prediction_explanations:
136+
endpoint += '/predictionExplanations?maxCodes=' + \
137+
str(max_prediction_explanations)
130138
else:
131-
endpoint = base_url + '/'.join((pid, lid, 'predict'))
139+
if deployment_id is not None:
140+
endpoint += '/predictions'
141+
else:
142+
endpoint += '/predict'
132143

133144
encoding = investigate_encoding_and_dialect(
134145
dataset=dataset,

datarobot_batch_scoring/main.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from datarobot_batch_scoring import __version__
1010
from datarobot_batch_scoring.api_response_handlers import (
11-
RESPONSE_HANDLERS, PRED_API_V10)
11+
RESPONSE_HANDLERS, PRED_API_V10, API_V1)
1212
from datarobot_batch_scoring.batch_scoring import (run_batch_predictions)
1313
from datarobot_batch_scoring.exceptions import ShelveError
1414
from datarobot_batch_scoring.utils import (UI, get_config_file,
@@ -60,6 +60,7 @@ def parse_args(argv, standalone=False, deployment_aware=False):
6060
'stdout': False,
6161
'auto_sample': False,
6262
'api_version': PRED_API_V10,
63+
'max_prediction_explanations': 0
6364
}
6465
parser = argparse.ArgumentParser(
6566
description=DESCRIPTION, epilog=EPILOG,
@@ -131,6 +132,13 @@ def parse_args(argv, standalone=False, deployment_aware=False):
131132
dataset_gr.add_argument('dataset', type=str,
132133
help='Specifies the .csv input file that '
133134
'the script scores.')
135+
dataset_gr.add_argument('--max_prediction_explanations',
136+
type=int,
137+
default=defaults['max_prediction_explanations'],
138+
help='The maximum number of prediction '
139+
'explanations that will be generate for '
140+
'each prediction.'
141+
'Not compatible with api version `api/v1`')
134142

135143
conn_gr = parser.add_argument_group('Connection control')
136144
conn_gr.add_argument('--timeout', type=int,
@@ -324,6 +332,12 @@ def parse_generic_options(parsed_args):
324332
dataset = parsed_args['dataset']
325333
if not os.path.exists(dataset):
326334
ui.fatal('file {} does not exist.'.format(dataset))
335+
api_version = parsed_args['api_version']
336+
max_prediction_explanations = parsed_args['max_prediction_explanations']
337+
if api_version == API_V1 and max_prediction_explanations > 0:
338+
ui.fatal('Prediction explanation is not available for '
339+
'api_version `api/v1` please use the '
340+
'`predApi/v1.0` or deployments endpoint')
327341

328342
ui.debug('batch_scoring v{}'.format(__version__))
329343

@@ -348,6 +362,8 @@ def parse_generic_options(parsed_args):
348362
'skip_row_id': skip_row_id,
349363
'timeout': timeout,
350364
'verify_ssl': parsed_args['verify_ssl'],
365+
'max_prediction_explanations':
366+
parsed_args['max_prediction_explanations'],
351367
}
352368

353369

datarobot_batch_scoring/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def verify_objectid(value):
6060
OptKey('field_size_limit'): t.Int,
6161
OptKey('ca_bundle'): t.String,
6262
OptKey('no_verify_ssl'): t.StrBool,
63+
OptKey('max_prediction_explanations'): t.Int,
6364
}).allow_extra('*')
6465

6566

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
# batch scoring script may be installed in pretty outdated envs. So let's do it
2525
# old-fashioned way by adding condition here.
2626
#
27-
# [1] https://github.com/agronholm/pythonfutures/commit/d0393ad626d25622927bb0ed47d35ddb2f6cd321
27+
# [1] https://github.com/agronholm/pythonfutures/commit/d0393ad626d25622927bb0ed47d35ddb2f6cd321 # noqa: E501
2828
# [2] https://www.python.org/dev/peps/pep-0508/#environment-markers
2929
if sys.version_info[0] > 2:
3030
install_requires = [req

tests/fixtures/10kDiabetes.csv

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
readmitted,rowID,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,medical_specialty,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,diag_1,diag_2,diag_3,number_diagnoses,max_glu_serum,A1Cresult,metformin,repaglinide,nateglinide,chlorpropamide,glimepiride,acetohexamide,glipizide,glyburide,tolbutamide,pioglitazone,rosiglitazone,acarbose,miglitol,troglitazone,tolazamide,examide,citoglipton,insulin,glyburide.metformin,glipizide.metformin,glimepiride.pioglitazone,metformin.rosiglitazone,metformin.pioglitazone,change,diabetesMed,diag_1_desc,diag_2_desc,diag_3_desc
2+
FALSE,1,Caucasian,Female,[50-60),?,Elective,Discharged to home,Physician Referral,1,CP,Surgery-Neuro,35,4,21,0,0,0,723,723,719,9,None,None,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Spinal stenosis in cervical region,Spinal stenosis in cervical region,"Effusion of joint, site unspecified"
3+
FALSE,2,Caucasian,Female,[20-30),[50-75),Urgent,Discharged to home,Physician Referral,2,UN,?,8,5,5,0,0,0,664,648,285,6,None,None,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,"First-degree perineal laceration, unspecified as to episode of care or not applicable","Diabetes mellitus of mother, complicating pregnancy, childbirth, or the puerperium, unspecified as to episode of care or not applicable",Sideroblastic anemia
4+
TRUE,3,Caucasian,Male,[80-90),?,Not Available,Discharged/transferred to home with home health service,,7,MC,Family/GeneralPractice,12,0,21,0,0,1,481,428,276,9,>200,None,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,Yes,Pneumococcal pneumonia [Streptococcus pneumoniae pneumonia],"Congestive heart failure, unspecified",Hyperosmolality and/or hypernatremia
5+
FALSE,4,AfricanAmerican,Female,[50-60),?,Emergency,Discharged to home,Transfer from another health care facility,4,UN,?,33,1,5,0,0,0,682,41,250,3,None,None,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,Yes,Cellulitis and abscess of face,"Streptococcus infection in conditions classified elsewhere and of unspecified site, streptococcus, unspecified","Diabetes mellitus without mention of complication, type II or unspecified type, not stated as uncontrolled"
6+
FALSE,5,AfricanAmerican,Female,[50-60),?,Emergency,Discharged to home,Emergency Room,5,?,Psychiatry,31,0,13,0,0,0,296,250.01,298,7,None,None,Steady,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,Ch,Yes,"Bipolar I disorder, single manic episode, unspecified","Diabetes mellitus without mention of complication, type I [juvenile type], not stated as uncontrolled",Depressive type psychosis
7+
FALSE,6,Caucasian,Male,[70-80),?,Elective,Discharged to home,Physician Referral,4,?,Cardiology,29,0,10,0,0,0,428,427,414,8,None,None,Steady,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,"Congestive heart failure, unspecified",Paroxysmal supraventricular tachycardia,"Coronary atherosclerosis of unspecified type of vessel, native or graft"
8+
FALSE,7,Caucasian,Female,[60-70),?,Elective,Expired,Physician Referral,6,MC,InternalMedicine,46,1,20,0,0,0,434,345,584,8,None,None,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,No,Yes,Cerebral thrombosis without mention of cerebral infarction,"Generalized nonconvulsive epilepsy, without mention of intractable epilepsy",Acute kidney failure
9+
FALSE,8,Caucasian,Female,[50-60),?,Emergency,Discharged to home,Emergency Room,2,?,?,49,1,17,2,1,1,558,562,455,9,None,Norm,Steady,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,Other and unspecified noninfectious gastroenteritis and colitis,Diverticulosis of small intestine (without mention of hemorrhage),Internal hemorrhoids without mention of complication
10+
FALSE,9,Caucasian,Male,[50-60),?,,Discharged to home,,3,?,Family/GeneralPractice,54,0,10,0,0,1,428,425,70,9,None,None,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,"Congestive heart failure, unspecified",Endomyocardial fibrosis,Viral hepatitis A with hepatic coma
11+
TRUE,10,Caucasian,Male,[60-70),?,Elective,Discharged to home,Physician Referral,5,?,Surgery-Cardiovascular/Thoracic,47,2,12,0,0,0,440,998,998,5,None,None,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,Atherosclerosis of aorta,"Postoperative shock, unspecified","Postoperative shock, unspecified"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
row_id,0,1,explanation_1_feature,explanation_1_strength,explanation_2_feature,explanation_2_strength,explanation_3_feature,explanation_3_strength,explanation_4_feature,explanation_4_strength,explanation_5_feature,explanation_5_strength
2+
0,0.7374983461,0.2625016539,medical_specialty,-0.2230822974,number_diagnoses,0.2010719684,diag_1,-0.1882141621,number_inpatient,-0.1584939956,diag_3,-0.1538334979
3+
1,0.7534670614,0.2465329386,weight,0.4616938769,diag_2,-0.2817732676,payer_code,-0.1999593278,age,-0.1700208122,num_lab_procedures,-0.1699786261
4+
2,0.673941752,0.326058248,discharge_disposition_id,0.3047681596,number_inpatient,0.2266360201,medical_specialty,-0.2049645033,admission_source_id,-0.1710960504,num_lab_procedures,-0.1579545292
5+
3,0.884682017,0.115317983,number_diagnoses,-0.4503520826,diag_2,-0.2808908801,admission_source_id,-0.2398130772,payer_code,-0.2143588931,number_inpatient,-0.1707111743
6+
4,0.7116849878,0.2883150122,medical_specialty,-0.1814823805,race,-0.1575075199,number_inpatient,-0.1568230769,admission_source_id,0.1354087199,diag_3,-0.13507482
7+
5,0.5909996414,0.4090003586,medical_specialty,-0.1884195522,diag_2,-0.1807624347,number_inpatient,-0.142271611,admission_type_id,0.1354998853,number_diagnoses,0.1320297742
8+
6,0.8647944819,0.1352055181,discharge_disposition_id,-1.1295014348,medical_specialty,-0.2076687977,number_inpatient,-0.1461341051,payer_code,-0.1242433687,admission_type_id,0.0949499145
9+
7,0.5144554583,0.4855445417,number_inpatient,0.2826505121,diag_1,-0.2274929185,number_emergency,0.2071603004,number_diagnoses,0.1620381118,discharge_disposition_id,0.1560600053
10+
8,0.3746314486,0.6253685514,admission_type_id,0.5009928124,number_inpatient,0.2401329203,medical_specialty,-0.1651539909,admission_source_id,-0.1571784835,number_diagnoses,0.1377384773
11+
9,0.7117755608,0.2882244392,medical_specialty,-0.3821848966,number_diagnoses,-0.273826215,diag_3,0.229366976,number_inpatient,-0.1604879998,admission_type_id,0.1231549458

0 commit comments

Comments
 (0)