update&test documentation #65

ModelOriented · Apr 11, 2020 · 58eabcc · 58eabcc
1 parent 3c8da6b
commit 58eabcc
Show file tree

Hide file tree

Showing 16 changed files with 485 additions and 429 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -18,3 +18,4 @@ modelStudio_my_test.R
 ^LICENSE$
 ^CONTRIBUTING.md
 ^\.github$
+^pickle$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -27,9 +27,9 @@ jobs:
         config:
           - {os: windows-latest, r: '3.6'}
           - {os: macOS-latest, r: '3.6'}
-          - {os: macOS-latest, r: 'devel'}
           - {os: ubuntu-16.04, r: '3.5', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
           - {os: ubuntu-16.04, r: '3.6', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"}
+          #- {os: macOS-latest, r: 'devel'} this is bugged
 
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
@@ -71,8 +71,6 @@ jobs:
       - name: Install dependencies
         run: |
           remotes::install_deps(dependencies = TRUE)
-          remotes::install_github("https://github.com/r-lib/xml2")
-          remotes::install_cran("processx")
           remotes::install_cran("rcmdcheck")
         shell: Rscript {0}
 

diff --git a/.gitignore b/.gitignore
@@ -45,3 +45,5 @@ modelStudio.Rproj
 #*.html
 
 docs/*
+
+*.pickle
diff --git a/R/modelStudio.R b/R/modelStudio.R
@@ -38,15 +38,15 @@
 #' @references
 #'
 #' \itemize{
-#'   \item Wrapper for the function is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
+#'   \item The input object is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
 #'   \item Feature Importance, Ceteris Paribus, Partial Dependence and Accumulated Dependence plots
 #' are implemented in \href{https://modeloriented.github.io/ingredients/}{\bold{ingredients}}
 #'   \item Break Down and Shapley Values plots are implemented in \href{https://modeloriented.github.io/iBreakDown/}{\bold{iBreakDown}}
 #' }
 #'
 #' @seealso
-#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/vignette_examples.html}{\bold{modelStudio - R & python examples}}
-#' and \href{https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html}{\bold{modelStudio - perks and features}}
+#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html}{\bold{modelStudio - R & Python examples}}
+#' and \href{(https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html}{\bold{modelStudio - perks and features}}
 #'
 #' @examples
 #' library("DALEX")
@@ -55,9 +55,7 @@
 #' #:# ex1 classification on 'titanic_imputed' dataset
 #'
 #' # fit a model
-#' model_titanic <- glm(survived ~.,
-#'                      data = titanic_imputed,
-#'                      family = "binomial")
+#' model_titanic <- glm(survived ~., data = titanic_imputed, family = "binomial")
 #'
 #' # create an explainer for the model
 #' explainer_titanic <- explain(model_titanic,
@@ -70,8 +68,9 @@
 #' rownames(new_observations) <- c("Lucas","James")
 #'
 #' # make a studio for the model
-#' modelStudio(explainer_titanic, new_observations,
-#'             N = 100, B = 10)
+#' modelStudio(explainer_titanic,
+#'             new_observations,
+#'             N = 100, B = 10) # faster example
 #'
 #' \donttest{
 #'
@@ -88,32 +87,46 @@
 #' rownames(new_apartments) <- c("ap1","ap2")
 #'
 #' # change dashboard dimensions and animation length
-#' modelStudio(explainer_apartments, new_apartments,
-#'             facet_dim = c(2, 3), time = 800)
+#' modelStudio(explainer_apartments,
+#'             new_apartments,
+#'             facet_dim = c(2, 3),
+#'             time = 800)
 #'
 #' # add information about true labels
-#' modelStudio(explainer_apartments, new_apartments,
-#'             new_observation_y = apartments[1:2, 1])
+#' modelStudio(explainer_apartments,
+#'             new_apartments,
+#'             new_observation_y = new_apartments$m2.price)
 #'
 #' # don't compute EDA plots
-#' modelStudio(explainer_apartments, eda = FALSE)
+#' modelStudio(explainer_apartments,
+#'             eda = FALSE)
 #'
 #'
 #' #:# ex3 xgboost model on 'HR' dataset
 #' library("xgboost")
 #'
-#' model_matrix <- model.matrix(status == "fired" ~ . -1, HR)
-#' data <- xgb.DMatrix(model_matrix, label = HR$status == "fired")
+#' # fit a model
+#' HR_matrix <- model.matrix(status == "fired" ~ . -1, HR)
+#'
+#' xgb_matrix <- xgb.DMatrix(HR_matrix, label = HR$status == "fired")
 #'
 #' params <- list(max_depth = 7, objective = "binary:logistic", eval_metric = "auc")
 #'
-#' model_HR <- xgb.train(params, data, nrounds = 300)
+#' model_HR <- xgb.train(params, xgb_matrix, nrounds = 300)
 #'
+#' # create an explainer for the model
 #' explainer_HR <- explain(model_HR,
-#'                         data = model_matrix,
-#'                         y = HR$status == "fired")
+#'                         data = HR_matrix,
+#'                         y = HR$status == "fired",
+#'                         label = "xgboost")
 #'
-#' modelStudio(explainer_HR)
+#' # pick observations
+#' new_observation <- HR_matrix[1:2, , drop=FALSE]
+#' rownames(new_observation) <- c("id1", "id2")
+
+#' # make a studio for the model
+#' modelStudio(explainer_HR,
+#'             new_observation)
 #'
 #' }
 #'

diff --git a/R/modelStudioOptions.R b/R/modelStudioOptions.R
@@ -48,35 +48,35 @@
 #' @references
 #'
 #' \itemize{
-#'   \item Wrapper for the function is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
+#'   \item The input object is implemented in \href{https://modeloriented.github.io/DALEX/}{\bold{DALEX}}
 #'   \item Feature Importance, Ceteris Paribus, Partial Dependence and Accumulated Dependence plots
 #' are implemented in \href{https://modeloriented.github.io/ingredients/}{\bold{ingredients}}
 #'   \item Break Down and Shapley Values plots are implemented in \href{https://modeloriented.github.io/iBreakDown/}{\bold{iBreakDown}}
 #' }
 #'
 #' @seealso
-#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/vignette_examples.html}{\bold{modelStudio - R & python examples}}
-#' and \href{https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html}{\bold{modelStudio - perks and features}}
+#' Vignettes: \href{https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html}{\bold{modelStudio - R & Python examples}}
+#' and \href{(https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html}{\bold{modelStudio - perks and features}}
 #'
 #' @examples
 #' library("DALEX")
 #' library("modelStudio")
 #'
 #' # fit a model
-#' model_apartments <- glm(m2.price ~. ,
-#'                         data = apartments)
+#' model_apartments <- glm(m2.price ~. , data = apartments)
 #'
 #' # create an explainer for the model
 #' explainer_apartments <- DALEX::explain(model_apartments,
 #'                                        data = apartments,
-#'                                        y = apartments$m2.price)
+#'                                        y = apartments$m2.price,
+#'                                        label = "glm")
 #'
 #' # pick observations
-#' new_apartments <- apartments[1:2,]
-#' rownames(new_apartments) <- c("ap1","ap2")
+#' new_observation <- apartments[1:2,]
+#' rownames(new_observation) <- c("ap1","ap2")
 #'
 #' # modify default options
-#' op <- modelStudioOptions(
+#' new_options <- modelStudioOptions(
 #'   show_subtitle = TRUE,
 #'   bd_subtitle = "Hello World",
 #'   line_size = 5,
@@ -88,8 +88,10 @@
 #' )
 #'
 #' # make a studio for the model
-#' modelStudio(explainer_apartments, new_apartments,
-#'             N = 100, B = 10, options = op)
+#' modelStudio(explainer_apartments,
+#'             new_observation,
+#'             options = new_options,
+#'             N = 100,  B = 10) # faster example
 #'
 #' @export
 #' @rdname modelStudioOptions

diff --git a/README.md b/README.md
@@ -10,13 +10,13 @@
 
 The `modelStudio` package **automates the Explanatory Analysis of Machine Learning predictive models**. Generate advanced interactive and animated model explanations in the form of a **serverless HTML site** with only one line of code. This tool is model agnostic, therefore compatible with most of the black box predictive models and frameworks (e.g.&nbsp;`mlr/mlr3`, `xgboost`, `caret`, `h2o`, `scikit-learn`, `lightGBM`, `keras/tensorflow`).
 
-The main `modelStudio()` function computes various (instance and dataset level) model explanations and produces an&nbsp**interactive,&nbsp;customisable dashboard made with D3.js**. It consists of multiple panels for plots with their short descriptions. Easily&nbsp;**save&nbsp;and&nbsp;share** the dashboard with others. Tools for model exploration unite with tools for EDA (Exploratory Data Analysis) to give a broad overview of the model behavior.
+The main `modelStudio()` function computes various (instance and dataset level) model explanations and produces an&nbsp;**interactive,&nbsp;customisable dashboard made with D3.js**. It consists of multiple panels for plots with their short descriptions. Easily&nbsp;**save&nbsp;and&nbsp;share** the dashboard with others. Tools for model exploration unite with tools for EDA (Exploratory Data Analysis) to give a broad overview of the model behavior.
 
 <!--- [explain FIFA19](https://pbiecek.github.io/explainFIFA19/) &emsp; --->
 <!--- [explain Lung Cancer](https://github.com/hbaniecki/transparent_xai/) &emsp; --->
 &emsp; &emsp; &emsp; &emsp; &emsp; &emsp;
 [**explain FIFA20**](https://pbiecek.github.io/explainFIFA20/) &emsp;
-[**R & Python examples**](http://modelstudio.drwhy.ai/articles/vignette_examples.html) &emsp;
+[**R & Python examples**](http://modelstudio.drwhy.ai/articles/ms-r-python-examples.html) &emsp;
 [**More Resources**](http://modelstudio.drwhy.ai/#more-resources) &emsp;
 [**FAQ & Troubleshooting**](https://github.com/ModelOriented/modelStudio/issues/54)
 
@@ -41,9 +41,7 @@ library("DALEX")
 library("modelStudio")
 
 # fit a model
-model <- glm(survived ~.,
-             data = titanic_imputed,
-             family = "binomial")
+model <- glm(survived ~., data = titanic_imputed, family = "binomial")
 
 # create an explainer for the model    
 explainer <- explain(model,
@@ -59,18 +57,18 @@ modelStudio(explainer)
 
 ![](man/figures/long.gif)
 
-## R & Python Examples [more](http://modelstudio.drwhy.ai/articles/vignette_examples.html)
+## R & Python Examples [more](http://modelstudio.drwhy.ai/articles/ms-r-python-examples.html)
 
 The `modelStudio()` function uses `DALEX` explainers created with `DALEX::explain()` or `DALEXtra::explain_*()`.
 
 ```r
-# update main dependencies
-install.packages("ingredients")
-install.packages("iBreakDown")
-
 # packages for explainer objects
 install.packages("DALEX")
 install.packages("DALEXtra")
+
+# update main dependencies
+install.packages("ingredients")
+install.packages("iBreakDown")
 ```
 
 ### mlr [dashboard](https://modeloriented.github.io/modelStudio/mlr.html)
@@ -87,19 +85,16 @@ data <- DALEX::titanic_imputed
 
 # split the data
 index <- sample(1:nrow(data), 0.7*nrow(data))
-train <- data[index, ]
-test <- data[-index, ]
+train <- data[index,]
+test <- data[-index,]
 
 # mlr ClassifTask takes target as factor
 train$survived <- as.factor(train$survived)
 
 # fit a model
-task <- makeClassifTask(id = "titanic",
-                        data = train,
-                        target = "survived")
+task <- makeClassifTask(id = "titanic", data = train, target = "survived")
 
-learner <- makeLearner("classif.ranger",
-                       predict.type = "prob")
+learner <- makeLearner("classif.ranger", predict.type = "prob")
 
 model <- train(learner, task)
 
@@ -110,7 +105,7 @@ explainer <- explain_mlr(model,
                          label = "mlr")
 
 # pick observations
-new_observation <- test[1:2, ]
+new_observation <- test[1:2,]
 rownames(new_observation) <- c("id1", "id2")
 
 # make a studio for the model
@@ -132,17 +127,18 @@ data <- DALEX::titanic_imputed
 
 # split the data
 index <- sample(1:nrow(data), 0.7*nrow(data))
-train <- data[index, ]
-test <- data[-index, ]
+train <- data[index,]
+test <- data[-index,]
 
 train_matrix <- model.matrix(survived ~.-1, train)
 test_matrix <- model.matrix(survived ~.-1, test)
 
 # fit a model
 xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived)
-params <- list(eta = 0.01, subsample = 0.6, max_depth = 7, min_child_weight = 3,
-               objective = "binary:logistic", eval_metric = "auc")
-model <- xgb.train(params, xgb_matrix, nrounds = 1000)
+
+params <- list(max_depth = 7, objective = "binary:logistic", eval_metric = "auc")
+
+model <- xgb.train(params, xgb_matrix, nrounds = 500)
 
 # create an explainer for the model
 explainer <- explain(model,
@@ -151,7 +147,7 @@ explainer <- explain(model,
                      label = "xgboost")
 
 # pick observations
-new_observation <- test_matrix[1:2,,drop=FALSE]
+new_observation <- test_matrix[1:2, , drop=FALSE]
 rownames(new_observation) <- c("id1", "id2")
 
 # make a studio for the model
@@ -170,6 +166,11 @@ pip3 install dalex --force
 
 Use `pickle` Python module and `reticulate` R package to easily make a studio for a model.
 
+```{r eval = FALSE}
+# package for pickle load
+install.packages("reticulate")
+```
+
 In this example we will fit a `Pipeline MLPClassifier` model on `titanic` data.
 
 First, use `dalex` in Python:
@@ -193,45 +194,47 @@ y = data.survived
 X_train, X_test, y_train, y_test = train_test_split(X, y)
 
 # fit a pipeline model
-numeric_features = ['age', 'fare', 'sibsp', 'parch']
-numeric_transformer = Pipeline(
+numerical_features = ['age', 'fare', 'sibsp', 'parch']
+numerical_transformer = Pipeline(
   steps=[
     ('imputer', SimpleImputer(strategy='median')),
     ('scaler', StandardScaler())
-    ]
+  ]
 )
 categorical_features = ['gender', 'class', 'embarked']
 categorical_transformer = Pipeline(
   steps=[
     ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
     ('onehot', OneHotEncoder(handle_unknown='ignore'))
-    ]
+  ]
 )
 
 preprocessor = ColumnTransformer(
   transformers=[
-    ('num', numeric_transformer, numeric_features),
+    ('num', numerical_transformer, numerical_features),
     ('cat', categorical_transformer, categorical_features)
-    ]
+  ]
 )
 
+classifier = MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500)
+
 model = Pipeline(
   steps=[
     ('preprocessor', preprocessor),
-    ('classifier', MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500))
-    ]
+    ('classifier', classifier)
+  ]
 )
 model.fit(X_train, y_train)
 
 # create an explainer for the model
-explainer = dx.Explainer(model, X_test, y_test, label = 'scikit-learn')
+explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')
 
 #! remove residual_function before dump !
 explainer.residual_function = None
 
 # pack the explainer into a pickle file
 import pickle
-pickle_out = open("explainer_scikitlearn.pickle","wb")
+pickle_out = open('explainer_scikitlearn.pickle', 'wb')
 pickle.dump(explainer, pickle_out)
 pickle_out.close()
 ```
@@ -241,7 +244,7 @@ Then, use `modelStudio` in R:
 ```r
 # load the explainer from the pickle file
 library(reticulate)
-explainer <- py_load_object('explainer_scikitlearn.pickle', pickle = "pickle")
+explainer <- py_load_object("explainer_scikitlearn.pickle", pickle = "pickle")
 
 # make a studio for the model
 library(modelStudio)
@@ -261,9 +264,9 @@ or with [`r2d3::save_d3_html()`](https://rstudio.github.io/r2d3/articles/publish
 
   - Theoretical introduction to the plots: [Explanatory Model Analysis. Explore, Explain and Examine Predictive Models.](https://pbiecek.github.io/ema)
 
-  - Vignette: [modelStudio - R & python examples](https://modeloriented.github.io/modelStudio/articles/vignette_examples.html)  
+  - Vignette: [modelStudio - R & Python examples](https://modeloriented.github.io/modelStudio/articles/ms-r-python-examples.html)  
 
-  - Vignette: [modelStudio - perks and features](https://modeloriented.github.io/modelStudio/articles/vignette_modelStudio.html)  
+  - Vignette: [modelStudio - perks and features](https://modeloriented.github.io/modelStudio/articles/ms-perks-features.html)  
 
   - Conference poster: [MLinPL2019](misc/MLinPL2019_modelStudio_poster.pdf)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -45,3 +45,5 @@ modelStudio.Rproj
		#*.html

		docs/*

		*.pickle