diff --git a/coverage.txt b/coverage.txt index df3342c..d53ebcc 100644 --- a/coverage.txt +++ b/coverage.txt @@ -102,4 +102,4 @@ pkg/vtreat/vtreat_impl.py 711 61 91% ------------------------------------------------------------- TOTAL 1593 126 92% -================= 45 passed, 15 warnings in 137.81s (0:02:17) ================== +================== 45 passed, 15 warnings in 81.34s (0:01:21) ================== diff --git a/docs/vtreat.html b/docs/vtreat.html index 7e50c8c..4d96c0b 100644 --- a/docs/vtreat.html +++ b/docs/vtreat.html @@ -114,57 +114,58 @@

8# noinspection PyUnresolvedReferences 9import numpy 10 -11from vtreat.vtreat_api import * +11from vtreat.vtreat_api import unsupervised_parameters, vtreat_parameters, BinomialOutcomeTreatment, MultinomialOutcomeTreatment, NumericOutcomeTreatment, UnsupervisedTreatment 12 -13__docformat__ = "restructuredtext" -14__version__ = "1.3.0" -15 -16__doc__ = """ -17This<https://github.com/WinVector/pyvtreat> is the Python version of the vtreat data preparation system -18(also available as an R package<https://winvector.github.io/vtreat/>. -19 -20vtreat is a DataFrame processor/conditioner that prepares -21real-world data for supervised machine learning or predictive modeling -22in a statistically sound manner. -23 -24vtreat takes an input DataFrame -25that has a specified column called "the outcome variable" (or "y") -26that is the quantity to be predicted (and must not have missing -27values). Other input columns are possible explanatory variables -28(typically numeric or categorical/string-valued, these columns may -29have missing values) that the user later wants to use to predict "y". -30In practice such an input DataFrame may not be immediately suitable -31for machine learning procedures that often expect only numeric -32explanatory variables, and may not tolerate missing values. -33 -34To solve this, vtreat builds a transformed DataFrame where all -35explanatory variable columns have been transformed into a number of -36numeric explanatory variable columns, without missing values. The -37vtreat implementation produces derived numeric columns that capture -38most of the information relating the explanatory columns to the -39specified "y" or dependent/outcome column through a number of numeric -40transforms (indicator variables, impact codes, prevalence codes, and -41more). This transformed DataFrame is suitable for a wide range of -42supervised learning methods from linear regression, through gradient -43boosted machines. -44 -45The idea is: you can take a DataFrame of messy real world data and -46easily, faithfully, reliably, and repeatably prepare it for machine -47learning using documented methods using vtreat. Incorporating -48vtreat into your machine learning workflow lets you quickly work -49with very diverse structured data. -50 -51Worked examples can be found `here`<https://github.com/WinVector/pyvtreat/tree/master/Examples>. -52 -53For more detail please see here: `arXiv:1611.09477 -54stat.AP`<https://arxiv.org/abs/1611.09477> (the documentation describes the R version, -55however all of the examples can be found worked in Python -56`here`<https://github.com/WinVector/pyvtreat/tree/master/Examples/vtreat_paper1>). -57 -58vtreat is available -59as a `Python/Pandas package`<https://github.com/WinVector/vtreat>, -60and also as an `R package`<https://github.com/WinVector/vtreat>. -61""" +13 +14__docformat__ = "restructuredtext" +15__version__ = "1.3.0" +16 +17__doc__ = """ +18This<https://github.com/WinVector/pyvtreat> is the Python version of the vtreat data preparation system +19(also available as an R package<https://winvector.github.io/vtreat/>. +20 +21vtreat is a DataFrame processor/conditioner that prepares +22real-world data for supervised machine learning or predictive modeling +23in a statistically sound manner. +24 +25vtreat takes an input DataFrame +26that has a specified column called "the outcome variable" (or "y") +27that is the quantity to be predicted (and must not have missing +28values). Other input columns are possible explanatory variables +29(typically numeric or categorical/string-valued, these columns may +30have missing values) that the user later wants to use to predict "y". +31In practice such an input DataFrame may not be immediately suitable +32for machine learning procedures that often expect only numeric +33explanatory variables, and may not tolerate missing values. +34 +35To solve this, vtreat builds a transformed DataFrame where all +36explanatory variable columns have been transformed into a number of +37numeric explanatory variable columns, without missing values. The +38vtreat implementation produces derived numeric columns that capture +39most of the information relating the explanatory columns to the +40specified "y" or dependent/outcome column through a number of numeric +41transforms (indicator variables, impact codes, prevalence codes, and +42more). This transformed DataFrame is suitable for a wide range of +43supervised learning methods from linear regression, through gradient +44boosted machines. +45 +46The idea is: you can take a DataFrame of messy real world data and +47easily, faithfully, reliably, and repeatably prepare it for machine +48learning using documented methods using vtreat. Incorporating +49vtreat into your machine learning workflow lets you quickly work +50with very diverse structured data. +51 +52Worked examples can be found `here`<https://github.com/WinVector/pyvtreat/tree/master/Examples>. +53 +54For more detail please see here: `arXiv:1611.09477 +55stat.AP`<https://arxiv.org/abs/1611.09477> (the documentation describes the R version, +56however all of the examples can be found worked in Python +57`here`<https://github.com/WinVector/pyvtreat/tree/master/Examples/vtreat_paper1>). +58 +59vtreat is available +60as a `Python/Pandas package`<https://github.com/WinVector/vtreat>, +61and also as an `R package`<https://github.com/WinVector/vtreat>. +62""" diff --git a/pkg/build/lib/vtreat/__init__.py b/pkg/build/lib/vtreat/__init__.py index 7460420..5905ae9 100644 --- a/pkg/build/lib/vtreat/__init__.py +++ b/pkg/build/lib/vtreat/__init__.py @@ -8,7 +8,8 @@ # noinspection PyUnresolvedReferences import numpy -from vtreat.vtreat_api import * +from vtreat.vtreat_api import unsupervised_parameters, vtreat_parameters, BinomialOutcomeTreatment, MultinomialOutcomeTreatment, NumericOutcomeTreatment, UnsupervisedTreatment + __docformat__ = "restructuredtext" __version__ = "1.3.0" diff --git a/pkg/dist/vtreat-1.3.0-py3-none-any.whl b/pkg/dist/vtreat-1.3.0-py3-none-any.whl index 63f9a10..2acd68f 100644 Binary files a/pkg/dist/vtreat-1.3.0-py3-none-any.whl and b/pkg/dist/vtreat-1.3.0-py3-none-any.whl differ diff --git a/pkg/dist/vtreat-1.3.0.tar.gz b/pkg/dist/vtreat-1.3.0.tar.gz index 09b14d1..968157f 100644 Binary files a/pkg/dist/vtreat-1.3.0.tar.gz and b/pkg/dist/vtreat-1.3.0.tar.gz differ