tidymodels · topepo · Jul 2, 2020 · Jun 19, 2020 · Jun 22, 2020 · Jun 22, 2020
diff --git a/R/descriptors.R b/R/descriptors.R
@@ -26,28 +26,28 @@
 #'   column, `..y`.
 #'   }
 #'
-#' For example, if you use the model formula `Sepal.Width ~ .` with the `iris`
-#'  data, the values would be
+#' For example, if you use the model formula `circumference ~ .` with the
+#' built-in `Orange` data, the values would be
 #' \preformatted{
-#'  .preds() =   4          (the 4 columns in `iris`)
-#'  .cols()  =   5          (3 numeric columns + 2 from Species dummy variables)
-#'  .obs()   = 150
+#'  .preds() =   2          (the 2 remaining columns in `Orange`)
+#'  .cols()  =   5          (1 numeric column + 4 from Tree dummy variables)
+#'  .obs()   = 35
 #'  .lvls()  =  NA          (no factor outcome)
-#'  .facts() =   1          (the Species predictor)
-#'  .y()     = <vector>     (Sepal.Width as a vector)
-#'  .x()     = <data.frame> (The other 4 columns as a data frame)
+#'  .facts() =   1          (the Tree predictor)
+#'  .y()     = <vector>     (circumference as a vector)
+#'  .x()     = <data.frame> (The other 2 columns as a data frame)
 #'  .dat()   = <data.frame> (The full data set)
 #' }
 #'
-#' If the formula `Species ~ .` where used:
+#' If the formula `Tree ~ .` were used:
 #' \preformatted{
-#'  .preds() =   4          (the 4 numeric columns in `iris`)
-#'  .cols()  =   4          (same)
-#'  .obs()   = 150
-#'  .lvls()  =  c(setosa = 50, versicolor = 50, virginica = 50)
+#'  .preds() =   2          (the 2 numeric columns in `Orange`)
+#'  .cols()  =   2          (same)
+#'  .obs()   = 35
+#'  .lvls()  =  c("1" = 7, "2" = 7, "3" = 7, "4" = 7, "5" = 7)
 #'  .facts() =   0
-#'  .y()     = <vector>     (Species as a vector)
-#'  .x()     = <data.frame> (The other 4 columns as a data frame)
+#'  .y()     = <vector>     (Tree as a vector)
+#'  .x()     = <data.frame> (The other 2 columns as a data frame)
 #'  .dat()   = <data.frame> (The full data set)
 #' }
 #'

diff --git a/R/model_object_docs.R b/R/model_object_docs.R
@@ -53,18 +53,18 @@
 #' `parsnip` model functions do not do this. For example, using
 #'
 #'\preformatted{
-#'  rand_forest(mtry = ncol(iris) - 1)
+#'  rand_forest(mtry = ncol(mtcars) - 1)
 #' }
 #'
-#' **does not** execute `ncol(iris) - 1` when creating the specification.
+#' **does not** execute `ncol(mtcars) - 1` when creating the specification.
 #' This can be seen in the output:
 #'
 #'\preformatted{
-#'  > rand_forest(mtry = ncol(iris) - 1)
+#'  > rand_forest(mtry = ncol(mtcars) - 1)
 #'  Random Forest Model Specification (unknown)
 #'
 #'  Main Arguments:
-#'    mtry = ncol(iris) - 1
+#'    mtry = ncol(mtcars) - 1
 #'}
 #'
 #' The model functions save the argument _expressions_ and their
@@ -102,14 +102,14 @@
 #'  object is small. For example, using
 #'
 #'\preformatted{
-#'  rand_forest(mtry = ncol(!!iris) - 1)
+#'  rand_forest(mtry = ncol(!!mtcars) - 1)
 #' }
 #'
 #' would work (and be reproducible between sessions) but embeds
-#' the entire iris data set into the `mtry` expression:
+#' the entire mtcars data set into the `mtry` expression:
 #'
 #'\preformatted{
-#'  > rand_forest(mtry = ncol(!!iris) - 1)
+#'  > rand_forest(mtry = ncol(!!mtcars) - 1)
 #'  Random Forest Model Specification (unknown)
 #'
 #'  Main Arguments:
@@ -120,14 +120,14 @@
 #'  it, this wouldn't be too bad:
 #'
 #'\preformatted{
-#'  > mtry_val <- ncol(iris) - 1
+#'  > mtry_val <- ncol(mtcars) - 1
 #'  > mtry_val
-#'  [1] 4
+#'  [1] 10
 #'  > rand_forest(mtry = !!mtry_val)
 #'  Random Forest Model Specification (unknown)
 #'
 #'  Main Arguments:
-#'    mtry = 4
+#'    mtry = 10
 #'}
 #'
 #' More information on quosures and quasiquotation can be found at

diff --git a/R/nullmodel.R b/R/nullmodel.R
@@ -182,7 +182,6 @@ null_model <-
 #' @return A tibble with column `value`.
 #' @export
 #' @examples
-#' nullmodel(iris[,-5], iris$Species) %>% tidy()
 #'
 #' nullmodel(mtcars[,-1], mtcars$mpg) %>% tidy()
 

diff --git a/man/descriptors.Rd b/man/descriptors.Rd
diff --git a/man/model_spec.Rd b/man/model_spec.Rd
diff --git a/man/tidy.nullmodel.Rd b/man/tidy.nullmodel.Rd
diff --git a/tests/testthat/helper-objects.R b/tests/testthat/helper-objects.R
@@ -2,6 +2,7 @@ library(modeldata)
 
 data("wa_churn")
 data("lending_club")
+data("hpc_data")
 
 # ------------------------------------------------------------------------------
 

diff --git a/tests/testthat/test_adds.R b/tests/testthat/test_adds.R
@@ -4,14 +4,16 @@ library(dplyr)
 
 context("adding functions")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
 test_that('adding row indicies', {
-  iris_2 <- iris %>% add_rowindex()
-  expect_true(nrow(iris_2) == 150)
-  expect_true(sum(names(iris_2) == ".row") == 1)
-  expect_true(is.integer(iris_2$.row))
+  hpc_2 <- hpc %>% add_rowindex()
+  expect_true(nrow(hpc_2) == 150)
+  expect_true(sum(names(hpc_2) == ".row") == 1)
+  expect_true(is.integer(hpc_2$.row))
 
   mtcar_2 <- dplyr::as_tibble(mtcars) %>% dplyr::slice(0) %>% add_rowindex()
   expect_true(nrow(mtcar_2) == 0)

diff --git a/tests/testthat/test_boost_tree.R b/tests/testthat/test_boost_tree.R
@@ -6,6 +6,8 @@ library(rlang)
 
 context("boosted trees")
 source("helpers.R")
+source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -134,11 +136,11 @@ test_that('bad input', {
   expect_error(boost_tree(mode = "bogus"))
   expect_error({
     bt <- boost_tree(trees = -1) %>% set_engine("xgboost")
-    fit(bt, Species ~ ., iris)
+    fit(bt, class ~ ., hpc)
   })
   expect_error({
     bt <- boost_tree(min_n = -10)  %>% set_engine("xgboost")
-    fit(bt, Species ~ ., iris)
+    fit(bt, class ~ ., hpc)
   })
   expect_message(translate(boost_tree(mode = "classification"), engine = NULL))
   expect_error(translate(boost_tree(formula = y ~ x)))

diff --git a/tests/testthat/test_boost_tree_spark.R b/tests/testthat/test_boost_tree_spark.R
@@ -6,21 +6,21 @@ library(dplyr)
 
 context("boosted tree execution with spark")
 source(test_path("helper-objects.R"))
+hpc <- hpc_data[1:150, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
 test_that('spark execution', {
 
   skip_if_not_installed("sparklyr")
-
   library(sparklyr)
 
   sc <- try(spark_connect(master = "local"), silent = TRUE)
 
   skip_if(inherits(sc, "try-error"))
 
-  iris_bt_tr <- copy_to(sc, iris[-(1:4),   ], "iris_bt_tr", overwrite = TRUE)
-  iris_bt_te <- copy_to(sc, iris[  1:4 , -1], "iris_bt_te", overwrite = TRUE)
+  hpc_bt_tr <- copy_to(sc, hpc[-(1:4),   ], "hpc_bt_tr", overwrite = TRUE)
+  hpc_bt_te <- copy_to(sc, hpc[  1:4 , -1], "hpc_bt_te", overwrite = TRUE)
 
   # ----------------------------------------------------------------------------
 
@@ -30,8 +30,8 @@ test_that('spark execution', {
         boost_tree(trees = 5, mode = "regression") %>%
           set_engine("spark", seed = 12),
         control = ctrl,
-        Sepal_Length ~ .,
-        data = iris_bt_tr
+        class ~ .,
+        data = hpc_bt_tr
       ),
     regexp = NA
   )
@@ -43,29 +43,29 @@ test_that('spark execution', {
         boost_tree(trees = 5, mode = "regression") %>%
           set_engine("spark", seed = 12),
         control = ctrl,
-        Sepal_Length ~ .,
-        data = iris_bt_tr
+        compounds ~ .,
+        data = hpc_bt_tr
       ),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_pred <- predict(spark_reg_fit, iris_bt_te),
+    spark_reg_pred <- predict(spark_reg_fit, hpc_bt_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, iris_bt_te),
+    spark_reg_pred_num <- parsnip:::predict_numeric.model_fit(spark_reg_fit, hpc_bt_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_dup <- predict(spark_reg_fit_dup, iris_bt_te),
+    spark_reg_dup <- predict(spark_reg_fit_dup, hpc_bt_te),
     regexp = NA
   )
 
   expect_error(
-    spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, iris_bt_te),
+    spark_reg_num_dup <- parsnip:::predict_numeric.model_fit(spark_reg_fit_dup, hpc_bt_te),
     regexp = NA
   )