Skip to content

Commit e5d8943

Browse files
committed
move vtreat parameters out of pipeline parameters
1 parent b5903de commit e5d8943

19 files changed

+108
-310
lines changed

Examples/Pipeline/Pipeline_Example.ipynb

Lines changed: 7 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@
8888
"name": "stderr",
8989
"output_type": "stream",
9090
"text": [
91-
"/Users/johnmount/opt/anaconda3/envs/ai_academy_3_7/lib/python3.7/site-packages/vtreat/vtreat_api.py:369: UserWarning: called transform on same data used to fit (this causes over-fit, please use fit_transform() instead)\n",
92-
" \"called transform on same data used to fit (this causes over-fit, please use fit_transform() instead)\")\n"
91+
"/Users/johnmount/opt/anaconda3/envs/ai_academy_3_7/lib/python3.7/site-packages/vtreat/vtreat_api.py:348: UserWarning: possibly called transform on same data used to fit (this causes over-fit, please use fit_transform() instead)\n",
92+
" \"possibly called transform on same data used to fit (this causes over-fit, please use fit_transform() instead)\")\n"
9393
]
9494
},
9595
{
@@ -134,22 +134,7 @@
134134
"text": [
135135
"Pipeline(memory=None,\n",
136136
" steps=[('preprocessor',\n",
137-
" vtreat.vtreat_api.BinomialOutcomeTreatment(outcome_target=True,\n",
138-
"params={'coders': {'clean_copy',\n",
139-
" 'deviation_code',\n",
140-
" 'impact_code',\n",
141-
" 'indicator_code',\n",
142-
" 'logit_code',\n",
143-
" 'missing_indicator',\n",
144-
" 'prevalence_code'},\n",
145-
" 'cross_validation_k': 5,\n",
146-
" 'cross_validation_plan': <vtreat.cross_plan.KWayCrossPlanYStratified object at 0x10fa81b50>,\n",
147-
" '...\n",
148-
" 'missingness_imputation': <function mean at 0x11093bb90>,\n",
149-
" 'sparse_indicators': True,\n",
150-
" 'use_hierarchical_estimate': True,\n",
151-
" 'user_transforms': []},\n",
152-
")),\n",
137+
" vtreat.vtreat_api.BinomialOutcomeTreatment(outcome_target=True, )),\n",
153138
" ('classifier',\n",
154139
" LogisticRegression(C=1.0, class_weight=None, dual=False,\n",
155140
" fit_intercept=True, intercept_scaling=1,\n",
@@ -210,7 +195,7 @@
210195
"name": "stdout",
211196
"output_type": "stream",
212197
"text": [
213-
"{'use_hierarchical_estimate': True, 'coders': {'prevalence_code', 'logit_code', 'indicator_code', 'deviation_code', 'impact_code', 'missing_indicator', 'clean_copy'}, 'filter_to_recommended': True, 'indicator_min_fraction': 0.1, 'cross_validation_plan': <vtreat.cross_plan.KWayCrossPlanYStratified object at 0x10fa81b50>, 'cross_validation_k': 5, 'user_transforms': [], 'sparse_indicators': True, 'missingness_imputation': <function mean at 0x11093bb90>, 'outcome_target': True}\n"
198+
"{}\n"
214199
]
215200
}
216201
],
@@ -236,47 +221,15 @@
236221
"name": "stdout",
237222
"output_type": "stream",
238223
"text": [
239-
"{'memory': None, 'steps': [('preprocessor', vtreat.vtreat_api.BinomialOutcomeTreatment(outcome_target=True,\n",
240-
"params={'coders': {'clean_copy',\n",
241-
" 'deviation_code',\n",
242-
" 'impact_code',\n",
243-
" 'indicator_code',\n",
244-
" 'logit_code',\n",
245-
" 'missing_indicator',\n",
246-
" 'prevalence_code'},\n",
247-
" 'cross_validation_k': 5,\n",
248-
" 'cross_validation_plan': <vtreat.cross_plan.KWayCrossPlanYStratified object at 0x10fa81b50>,\n",
249-
" 'filter_to_recommended': True,\n",
250-
" 'indicator_min_fraction': 0.1,\n",
251-
" 'missingness_imputation': <function mean at 0x11093bb90>,\n",
252-
" 'sparse_indicators': True,\n",
253-
" 'use_hierarchical_estimate': True,\n",
254-
" 'user_transforms': []},\n",
255-
")), ('classifier', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
224+
"{'memory': None, 'steps': [('preprocessor', vtreat.vtreat_api.BinomialOutcomeTreatment(outcome_target=True, )), ('classifier', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
256225
" intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
257226
" multi_class='warn', n_jobs=None, penalty='l2',\n",
258227
" random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
259-
" warm_start=False))], 'verbose': False, 'preprocessor': vtreat.vtreat_api.BinomialOutcomeTreatment(outcome_target=True,\n",
260-
"params={'coders': {'clean_copy',\n",
261-
" 'deviation_code',\n",
262-
" 'impact_code',\n",
263-
" 'indicator_code',\n",
264-
" 'logit_code',\n",
265-
" 'missing_indicator',\n",
266-
" 'prevalence_code'},\n",
267-
" 'cross_validation_k': 5,\n",
268-
" 'cross_validation_plan': <vtreat.cross_plan.KWayCrossPlanYStratified object at 0x10fa81b50>,\n",
269-
" 'filter_to_recommended': True,\n",
270-
" 'indicator_min_fraction': 0.1,\n",
271-
" 'missingness_imputation': <function mean at 0x11093bb90>,\n",
272-
" 'sparse_indicators': True,\n",
273-
" 'use_hierarchical_estimate': True,\n",
274-
" 'user_transforms': []},\n",
275-
"), 'classifier': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
228+
" warm_start=False))], 'verbose': False, 'preprocessor': vtreat.vtreat_api.BinomialOutcomeTreatment(outcome_target=True, ), 'classifier': LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
276229
" intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
277230
" multi_class='warn', n_jobs=None, penalty='l2',\n",
278231
" random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
279-
" warm_start=False), 'preprocessor__use_hierarchical_estimate': True, 'preprocessor__coders': {'prevalence_code', 'logit_code', 'indicator_code', 'deviation_code', 'impact_code', 'missing_indicator', 'clean_copy'}, 'preprocessor__filter_to_recommended': True, 'preprocessor__indicator_min_fraction': 0.1, 'preprocessor__cross_validation_plan': <vtreat.cross_plan.KWayCrossPlanYStratified object at 0x10fa81b50>, 'preprocessor__cross_validation_k': 5, 'preprocessor__user_transforms': [], 'preprocessor__sparse_indicators': True, 'preprocessor__missingness_imputation': <function mean at 0x11093bb90>, 'preprocessor__outcome_target': True, 'classifier__C': 1.0, 'classifier__class_weight': None, 'classifier__dual': False, 'classifier__fit_intercept': True, 'classifier__intercept_scaling': 1, 'classifier__l1_ratio': None, 'classifier__max_iter': 100, 'classifier__multi_class': 'warn', 'classifier__n_jobs': None, 'classifier__penalty': 'l2', 'classifier__random_state': None, 'classifier__solver': 'lbfgs', 'classifier__tol': 0.0001, 'classifier__verbose': 0, 'classifier__warm_start': False}\n"
232+
" warm_start=False), 'classifier__C': 1.0, 'classifier__class_weight': None, 'classifier__dual': False, 'classifier__fit_intercept': True, 'classifier__intercept_scaling': 1, 'classifier__l1_ratio': None, 'classifier__max_iter': 100, 'classifier__multi_class': 'warn', 'classifier__n_jobs': None, 'classifier__penalty': 'l2', 'classifier__random_state': None, 'classifier__solver': 'lbfgs', 'classifier__tol': 0.0001, 'classifier__verbose': 0, 'classifier__warm_start': False}\n"
280233
]
281234
}
282235
],

Examples/Pipeline/Pipeline_Example.md

Lines changed: 0 additions & 165 deletions
This file was deleted.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ in a statistically sound manner.
1212
Install `vtreat` with either of:
1313

1414
* `pip install vtreat`
15-
* `pip install https://github.com/WinVector/pyvtreat/raw/master/pkg/dist/vtreat-0.3.5.tar.gz`
15+
* `pip install https://github.com/WinVector/pyvtreat/raw/master/pkg/dist/vtreat-0.3.6.tar.gz`
1616

1717
# Details
1818

coverage.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ pkg/tests/test_util.py . [100%]
2020
Name Stmts Miss Cover
2121
-----------------------------------------------
2222
pkg/vtreat/__init__.py 6 0 100%
23-
pkg/vtreat/cross_plan.py 94 52 45%
24-
pkg/vtreat/transform.py 13 8 38%
23+
pkg/vtreat/cross_plan.py 104 57 45%
24+
pkg/vtreat/transform.py 17 10 41%
2525
pkg/vtreat/util.py 161 26 84%
26-
pkg/vtreat/vtreat_api.py 327 133 59%
26+
pkg/vtreat/vtreat_api.py 295 101 66%
2727
pkg/vtreat/vtreat_impl.py 481 79 84%
2828
-----------------------------------------------
29-
TOTAL 1082 298 72%
29+
TOTAL 1064 273 74%
3030

3131

32-
============================== 13 passed in 7.16s ==============================
32+
============================== 13 passed in 6.62s ==============================

pkg/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ in a statistically sound manner.
1212
Install `vtreat` with either of:
1313

1414
* `pip install vtreat`
15-
* `pip install https://github.com/WinVector/pyvtreat/raw/master/pkg/dist/vtreat-0.3.4.tar.gz`
15+
* `pip install https://github.com/WinVector/pyvtreat/raw/master/pkg/dist/vtreat-0.3.6.tar.gz`
1616

1717
# Details
1818

pkg/build/lib/vtreat/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from vtreat.vtreat_api import *
88

99
__docformat__ = "restructuredtext"
10-
__version__ = "0.3.5"
10+
__version__ = "0.3.6"
1111

1212
__doc__ = """
1313
This<https://github.com/WinVector/pyvtreat> is the Python version of the vtreat data preparation system

pkg/build/lib/vtreat/cross_plan.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,17 @@ class CrossValidationPlan:
1515
"""Data splitting plan"""
1616

1717
def __init__(self):
18-
self.verbose_ = False
18+
pass
1919

2020
def split_plan(self, *, n_rows=None, k_folds=None, data=None, y=None):
2121
raise NotImplementedError("base class called")
2222

23+
def __repr__(self):
24+
return "vtreat.cross_plan.CrossValidationPlan()"
25+
26+
def __str__(self):
27+
return self.__repr__()
28+
2329

2430
def k_way_cross_plan(n_rows, k_folds):
2531
"""randomly split range(n_rows) into k_folds disjoint groups"""
@@ -59,6 +65,9 @@ def split_plan(self, *, n_rows=None, k_folds=None, data=None, y=None):
5965
raise ValueError("k_folds must not be None")
6066
return k_way_cross_plan(n_rows=n_rows, k_folds=k_folds)
6167

68+
def __repr__(self):
69+
return "vtreat.cross_plan.KWayCrossPlan()"
70+
6271

6372
def k_way_cross_plan_y_stratified(n_rows, k_folds, y):
6473
"""randomly split range(n_rows) into k_folds disjoint groups, attempting an even y-distribution"""
@@ -116,6 +125,9 @@ def split_plan(self, *, n_rows=None, k_folds=None, data=None, y=None):
116125
raise ValueError("y must not be None")
117126
return k_way_cross_plan_y_stratified(n_rows=n_rows, k_folds=k_folds, y=y)
118127

128+
def __repr__(self):
129+
return "vtreat.cross_plan.KWayCrossPlanYStratified()"
130+
119131

120132
def order_cross_plan(k_folds, order_vector):
121133
"""Build a k_folds cross validation plan based on the ordered series"""
@@ -170,3 +182,6 @@ def split_plan(self, *, n_rows=None, k_folds=None, data=None, y=None):
170182
raise ValueError("k_folds must not be None")
171183
order_vector = data[self.order_column_name_]
172184
return order_cross_plan(k_folds=k_folds, order_vector=order_vector)
185+
186+
def __repr__(self):
187+
return "vtreat.cross_plan.OrderedCrossPlan()"

pkg/build/lib/vtreat/transform.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,15 @@ def transform(self, X):
1919
def fit_transform(self, X, y):
2020
self.fit(X, y)
2121
return self.transform(X)
22+
23+
def __repr__(self):
24+
return ("vtreat.transform.UserTransform("
25+
+ "treatment=" + self.treatment_.__repr__()
26+
+ ") {"
27+
+ "'y_aware_': " + str(self.y_aware_)
28+
+ ", " + "'treatment_': " + str(self.treatment_)
29+
+ ", " + "'incoming_vars_': " + str(self.incoming_vars_)
30+
+ "}")
31+
32+
def __str__(self):
33+
return self.__repr__()

0 commit comments

Comments
 (0)