Skip to content

Commit

Permalink
update&test documentation #65
Browse files Browse the repository at this point in the history
  • Loading branch information
hbaniecki committed Apr 11, 2020
1 parent 3c8da6b commit 58eabcc
Show file tree
Hide file tree
Showing 16 changed files with 485 additions and 429 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ modelStudio_my_test.R
^LICENSE$
^CONTRIBUTING.md
^\.github$
^pickle$
4 changes: 1 addition & 3 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
config:
- {os: windows-latest, r: '3.6'}
- {os: macOS-latest, r: '3.6'}
- {os: macOS-latest, r: 'devel'}
- {os: ubuntu-16.04, r: '3.5', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
- {os: ubuntu-16.04, r: '3.6', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
#- {os: macOS-latest, r: 'devel'} this is bugged

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
Expand Down Expand Up @@ -71,8 +71,6 @@ jobs:
- name: Install dependencies
run: |
remotes::install_deps(dependencies = TRUE)
remotes::install_github("https://github.com/r-lib/xml2")
remotes::install_cran("processx")
remotes::install_cran("rcmdcheck")
shell: Rscript {0}

Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,5 @@ modelStudio.Rproj
#*.html

docs/*

*.pickle
51 changes: 32 additions & 19 deletions R/modelStudio.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@
#' @references
#'
#' \itemize{
#' \item Wrapper for the function is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
#' \item The input object is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
#' \item Feature Importance, Ceteris Paribus, Partial Dependence and Accumulated Dependence plots
#' are implemented in \href{https://modeloriented.github.io/ingredients/}{\bold{ingredients}}
#' \item Break Down and Shapley Values plots are implemented in \href{https://modeloriented.github.io/iBreakDown/}{\bold{iBreakDown}}
#' }
#'
#' @seealso
#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/vignette_examples.html}{\bold{modelStudio - R & python examples}}
#' and \href{https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html}{\bold{modelStudio - perks and features}}
#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html}{\bold{modelStudio - R & Python examples}}
#' and \href{(https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html}{\bold{modelStudio - perks and features}}
#'
#' @examples
#' library("DALEX")
Expand All @@ -55,9 +55,7 @@
#' #:# ex1 classification on 'titanic_imputed' dataset
#'
#' # fit a model
#' model_titanic <- glm(survived ~.,
#' data = titanic_imputed,
#' family = "binomial")
#' model_titanic <- glm(survived ~., data = titanic_imputed, family = "binomial")
#'
#' # create an explainer for the model
#' explainer_titanic <- explain(model_titanic,
Expand All @@ -70,8 +68,9 @@
#' rownames(new_observations) <- c("Lucas","James")
#'
#' # make a studio for the model
#' modelStudio(explainer_titanic, new_observations,
#' N = 100, B = 10)
#' modelStudio(explainer_titanic,
#' new_observations,
#' N = 100, B = 10) # faster example
#'
#' \donttest{
#'
Expand All @@ -88,32 +87,46 @@
#' rownames(new_apartments) <- c("ap1","ap2")
#'
#' # change dashboard dimensions and animation length
#' modelStudio(explainer_apartments, new_apartments,
#' facet_dim = c(2, 3), time = 800)
#' modelStudio(explainer_apartments,
#' new_apartments,
#' facet_dim = c(2, 3),
#' time = 800)
#'
#' # add information about true labels
#' modelStudio(explainer_apartments, new_apartments,
#' new_observation_y = apartments[1:2, 1])
#' modelStudio(explainer_apartments,
#' new_apartments,
#' new_observation_y = new_apartments$m2.price)
#'
#' # don't compute EDA plots
#' modelStudio(explainer_apartments, eda = FALSE)
#' modelStudio(explainer_apartments,
#' eda = FALSE)
#'
#'
#' #:# ex3 xgboost model on 'HR' dataset
#' library("xgboost")
#'
#' model_matrix <- model.matrix(status == "fired" ~ . -1, HR)
#' data <- xgb.DMatrix(model_matrix, label = HR$status == "fired")
#' # fit a model
#' HR_matrix <- model.matrix(status == "fired" ~ . -1, HR)
#'
#' xgb_matrix <- xgb.DMatrix(HR_matrix, label = HR$status == "fired")
#'
#' params <- list(max_depth = 7, objective = "binary:logistic", eval_metric = "auc")
#'
#' model_HR <- xgb.train(params, data, nrounds = 300)
#' model_HR <- xgb.train(params, xgb_matrix, nrounds = 300)
#'
#' # create an explainer for the model
#' explainer_HR <- explain(model_HR,
#' data = model_matrix,
#' y = HR$status == "fired")
#' data = HR_matrix,
#' y = HR$status == "fired",
#' label = "xgboost")
#'
#' modelStudio(explainer_HR)
#' # pick observations
#' new_observation <- HR_matrix[1:2, , drop=FALSE]
#' rownames(new_observation) <- c("id1", "id2")

#' # make a studio for the model
#' modelStudio(explainer_HR,
#' new_observation)
#'
#' }
#'
Expand Down
24 changes: 13 additions & 11 deletions R/modelStudioOptions.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,35 +48,35 @@
#' @references
#'
#' \itemize{
#' \item Wrapper for the function is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
#' \item The input object is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
#' \item Feature Importance, Ceteris Paribus, Partial Dependence and Accumulated Dependence plots
#' are implemented in \href{https://modeloriented.github.io/ingredients/}{\bold{ingredients}}
#' \item Break Down and Shapley Values plots are implemented in \href{https://modeloriented.github.io/iBreakDown/}{\bold{iBreakDown}}
#' }
#'
#' @seealso
#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/vignette_examples.html}{\bold{modelStudio - R & python examples}}
#' and \href{https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html}{\bold{modelStudio - perks and features}}
#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html}{\bold{modelStudio - R & Python examples}}
#' and \href{(https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html}{\bold{modelStudio - perks and features}}
#'
#' @examples
#' library("DALEX")
#' library("modelStudio")
#'
#' # fit a model
#' model_apartments <- glm(m2.price ~. ,
#' data = apartments)
#' model_apartments <- glm(m2.price ~. , data = apartments)
#'
#' # create an explainer for the model
#' explainer_apartments <- DALEX::explain(model_apartments,
#' data = apartments,
#' y = apartments$m2.price)
#' y = apartments$m2.price,
#' label = "glm")
#'
#' # pick observations
#' new_apartments <- apartments[1:2,]
#' rownames(new_apartments) <- c("ap1","ap2")
#' new_observation <- apartments[1:2,]
#' rownames(new_observation) <- c("ap1","ap2")
#'
#' # modify default options
#' op <- modelStudioOptions(
#' new_options <- modelStudioOptions(
#' show_subtitle = TRUE,
#' bd_subtitle = "Hello World",
#' line_size = 5,
Expand All @@ -88,8 +88,10 @@
#' )
#'
#' # make a studio for the model
#' modelStudio(explainer_apartments, new_apartments,
#' N = 100, B = 10, options = op)
#' modelStudio(explainer_apartments,
#' new_observation,
#' options = new_options,
#' N = 100, B = 10) # faster example
#'
#' @export
#' @rdname modelStudioOptions
Expand Down
77 changes: 40 additions & 37 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@

The `modelStudio` package **automates the Explanatory Analysis of Machine Learning predictive models**. Generate advanced interactive and animated model explanations in the form of a **serverless HTML site** with only one line of code. This tool is model agnostic, therefore compatible with most of the black box predictive models and frameworks (e.g.&nbsp;`mlr/mlr3`, `xgboost`, `caret`, `h2o`, `scikit-learn`, `lightGBM`, `keras/tensorflow`).

The main `modelStudio()` function computes various (instance and dataset level) model explanations and produces an&nbsp**interactive,&nbsp;customisable dashboard made with D3.js**. It consists of multiple panels for plots with their short descriptions. Easily&nbsp;**save&nbsp;and&nbsp;share** the dashboard with others. Tools for model exploration unite with tools for EDA (Exploratory Data Analysis) to give a broad overview of the model behavior.
The main `modelStudio()` function computes various (instance and dataset level) model explanations and produces an&nbsp;**interactive,&nbsp;customisable dashboard made with D3.js**. It consists of multiple panels for plots with their short descriptions. Easily&nbsp;**save&nbsp;and&nbsp;share** the dashboard with others. Tools for model exploration unite with tools for EDA (Exploratory Data Analysis) to give a broad overview of the model behavior.

<!--- [explain FIFA19](https://pbiecek.github.io/explainFIFA19/) &emsp; --->
<!--- [explain Lung Cancer](https://github.com/hbaniecki/transparent_xai/) &emsp; --->
&emsp; &emsp; &emsp; &emsp; &emsp; &emsp;
[**explain FIFA20**](https://pbiecek.github.io/explainFIFA20/) &emsp;
[**R & Python examples**](http://modelstudio.drwhy.ai/articles/vignette_examples.html) &emsp;
[**R & Python examples**](http://modelstudio.drwhy.ai/articles/ms-r-python-examples.html) &emsp;
[**More Resources**](http://modelstudio.drwhy.ai/#more-resources) &emsp;
[**FAQ & Troubleshooting**](https://github.com/ModelOriented/modelStudio/issues/54)

Expand All @@ -41,9 +41,7 @@ library("DALEX")
library("modelStudio")

# fit a model
model <- glm(survived ~.,
data = titanic_imputed,
family = "binomial")
model <- glm(survived ~., data = titanic_imputed, family = "binomial")

# create an explainer for the model
explainer <- explain(model,
Expand All @@ -59,18 +57,18 @@ modelStudio(explainer)

![](man/figures/long.gif)

## R & Python Examples [more](http://modelstudio.drwhy.ai/articles/vignette_examples.html)
## R & Python Examples [more](http://modelstudio.drwhy.ai/articles/ms-r-python-examples.html)

The `modelStudio()` function uses `DALEX` explainers created with `DALEX::explain()` or `DALEXtra::explain_*()`.

```r
# update main dependencies
install.packages("ingredients")
install.packages("iBreakDown")

# packages for explainer objects
install.packages("DALEX")
install.packages("DALEXtra")

# update main dependencies
install.packages("ingredients")
install.packages("iBreakDown")
```

### mlr [dashboard](https://modeloriented.github.io/modelStudio/mlr.html)
Expand All @@ -87,19 +85,16 @@ data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index, ]
test <- data[-index, ]
train <- data[index,]
test <- data[-index,]

# mlr ClassifTask takes target as factor
train$survived <- as.factor(train$survived)

# fit a model
task <- makeClassifTask(id = "titanic",
data = train,
target = "survived")
task <- makeClassifTask(id = "titanic", data = train, target = "survived")

learner <- makeLearner("classif.ranger",
predict.type = "prob")
learner <- makeLearner("classif.ranger", predict.type = "prob")

model <- train(learner, task)

Expand All @@ -110,7 +105,7 @@ explainer <- explain_mlr(model,
label = "mlr")

# pick observations
new_observation <- test[1:2, ]
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
Expand All @@ -132,17 +127,18 @@ data <- DALEX::titanic_imputed

# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index, ]
test <- data[-index, ]
train <- data[index,]
test <- data[-index,]

train_matrix <- model.matrix(survived ~.-1, train)
test_matrix <- model.matrix(survived ~.-1, test)

# fit a model
xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived)
params <- list(eta = 0.01, subsample = 0.6, max_depth = 7, min_child_weight = 3,
objective = "binary:logistic", eval_metric = "auc")
model <- xgb.train(params, xgb_matrix, nrounds = 1000)

params <- list(max_depth = 7, objective = "binary:logistic", eval_metric = "auc")

model <- xgb.train(params, xgb_matrix, nrounds = 500)

# create an explainer for the model
explainer <- explain(model,
Expand All @@ -151,7 +147,7 @@ explainer <- explain(model,
label = "xgboost")

# pick observations
new_observation <- test_matrix[1:2,,drop=FALSE]
new_observation <- test_matrix[1:2, , drop=FALSE]
rownames(new_observation) <- c("id1", "id2")

# make a studio for the model
Expand All @@ -170,6 +166,11 @@ pip3 install dalex --force

Use `pickle` Python module and `reticulate` R package to easily make a studio for a model.

```{r eval = FALSE}
# package for pickle load
install.packages("reticulate")
```

In this example we will fit a `Pipeline MLPClassifier` model on `titanic` data.

First, use `dalex` in Python:
Expand All @@ -193,45 +194,47 @@ y = data.survived
X_train, X_test, y_train, y_test = train_test_split(X, y)

# fit a pipeline model
numeric_features = ['age', 'fare', 'sibsp', 'parch']
numeric_transformer = Pipeline(
numerical_features = ['age', 'fare', 'sibsp', 'parch']
numerical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
]
]
)
categorical_features = ['gender', 'class', 'embarked']
categorical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
]
]
)

preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
]
]
)

classifier = MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500)

model = Pipeline(
steps=[
('preprocessor', preprocessor),
('classifier', MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500))
]
('classifier', classifier)
]
)
model.fit(X_train, y_train)

# create an explainer for the model
explainer = dx.Explainer(model, X_test, y_test, label = 'scikit-learn')
explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')

#! remove residual_function before dump !
explainer.residual_function = None

# pack the explainer into a pickle file
import pickle
pickle_out = open("explainer_scikitlearn.pickle","wb")
pickle_out = open('explainer_scikitlearn.pickle', 'wb')
pickle.dump(explainer, pickle_out)
pickle_out.close()
```
Expand All @@ -241,7 +244,7 @@ Then, use `modelStudio` in R:
```r
# load the explainer from the pickle file
library(reticulate)
explainer <- py_load_object('explainer_scikitlearn.pickle', pickle = "pickle")
explainer <- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle")

# make a studio for the model
library(modelStudio)
Expand All @@ -261,9 +264,9 @@ or with [`r2d3::save_d3_html()`](https://rstudio.github.io/r2d3/articles/publish

- Theoretical introduction to the plots: [Explanatory Model Analysis. Explore, Explain and Examine Predictive Models.](https://pbiecek.github.io/ema)

- Vignette: [modelStudio - R & python examples](https://modeloriented.github.io/modelStudio/articles/vignette_examples.html)
- Vignette: [modelStudio - R & Python examples](https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html)

- Vignette: [modelStudio - perks and features](https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html)
- Vignette: [modelStudio - perks and features](https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html)

- Conference poster: [MLinPL2019](misc/MLinPL2019_modelStudio_poster.pdf)

Expand Down
Loading

0 comments on commit 58eabcc

Please sign in to comment.