Merge pull request #342 from tidymodels/0-1-2-rc

topepo · web-flow · commit 58a74b4e320e · 2020-07-03T14:27:41.000-04:00
0.1.2 release candidate
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: parsnip
-Version: 0.1.1.9000
+Version: 0.1.2
 Title: A Common API to Modeling and Analysis Functions
 Description: A common interface is provided to allow users to specify a model without having to remember the different argument names across different functions or computational engines (e.g. 'R', 'Spark', 'Stan', etc). 
 Authors@R: c(
diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,4 @@
-# parsnip (development version)
+# parsnip 0.1.2
 
 ## Breaking Changes
 
diff --git a/R/boost_tree.R b/R/boost_tree.R
@@ -476,7 +476,7 @@ xgb_by_tree <- function(tree, object, new_data, type, ...) {
 #' @param weights An optional numeric vector of case weights. Note
 #'  that the data used for the case weights will not be used as a
 #'  splitting variable in the model (see
-#'  \url{http://www.rulequest.com/see5-win.html#CASEWEIGHT} for
+#'  \url{http://www.rulequest.com/see5-win.html} for
 #'  Quinlan's notes on case weights).
 #' @param minCases An integer for the smallest number of samples
 #'  that must be put in at least two of the splits.
diff --git a/README.md b/README.md
@@ -144,7 +144,7 @@ rand_forest(mtry = 10, trees = 2000) %>%
 #> Ranger result
 #> 
 #> Call:
-#>  ranger::ranger(formula = formula, data = data, mtry = ~10, num.trees = ~2000,      importance = ~"impurity", num.threads = 1, verbose = FALSE,      seed = sample.int(10^5, 1)) 
+#>  ranger::ranger(formula = mpg ~ ., data = data, mtry = ~10, num.trees = ~2000,      importance = ~"impurity", num.threads = 1, verbose = FALSE,      seed = sample.int(10^5, 1)) 
 #> 
 #> Type:                             Regression 
 #> Number of trees:                  2000 
@@ -154,8 +154,8 @@ rand_forest(mtry = 10, trees = 2000) %>%
 #> Target node size:                 5 
 #> Variable importance mode:         impurity 
 #> Splitrule:                        variance 
-#> OOB prediction error (MSE):       5.911312 
-#> R squared (OOB):                  0.837262
+#> OOB prediction error (MSE):       5.699772 
+#> R squared (OOB):                  0.8430857
 ```
 
 A list of all `parsnip` models across different CRAN packages can be
diff --git a/man/C5.0_train.Rd b/man/C5.0_train.Rd
diff --git a/man/contr_one_hot.Rd b/man/contr_one_hot.Rd
diff --git a/tests/testthat/test_linear_reg_glmnet.R b/tests/testthat/test_linear_reg_glmnet.R
@@ -68,8 +68,11 @@ test_that('glmnet prediction, single lambda', {
     y = hpc$input_fields
   )
 
-  uni_pred <- c(5.05125589060219, 4.86977761622526, 4.90912345599309, 4.93931874108359,
-                5.08755154547758)
+  # glmn_mod <- glmnet::glmnet(x = as.matrix(hpc[, num_pred]), y = hpc$input_fields,
+  #                            alpha = .3, nlambda = 15)
+
+  uni_pred <- c(640.599944271351, 196.646976529848, 186.279646400216, 194.673852228774,
+                198.126819755653)
 
   expect_equal(uni_pred, predict(res_xy, hpc[1:5, num_pred])$.pred, tolerance = 0.0001)
 
@@ -80,8 +83,8 @@ test_that('glmnet prediction, single lambda', {
     control = ctrl
   )
 
-  form_pred <- c(5.23960117346944, 5.08769210344022, 5.15129212608077, 5.12000510716518,
-                 5.26736239856889)
+  form_pred <- c(570.504089227118, 162.413061474088, 167.022896537861, 157.609071878082,
+                 165.887783741483)
 
   expect_equal(form_pred, predict(res_form, hpc[1:5,])$.pred, tolerance = 0.0001)
 })
@@ -118,16 +121,16 @@ test_that('glmnet prediction, multiple lambda', {
   mult_pred <-
     tibble::tribble(
       ~penalty,           ~.pred,
-      0.01, 5.01352459498158,
-      0.1, 5.05124049139868,
-      0.01, 4.71767499960808,
-      0.1, 4.87103404621362,
-      0.01,  4.7791916685127,
-      0.1, 4.91028250633598,
-      0.01, 4.83366808792755,
-      0.1,  4.9399094532023,
-      0.01, 5.07269451405628,
-      0.1, 5.08728178043569
+      0.01, 639.672880668187,
+      0.1, 639.672880668187,
+      0.01, 197.744613311359,
+      0.1, 197.744613311359,
+      0.01, 187.737940787615,
+      0.1, 187.737940787615,
+      0.01, 195.780487678662,
+      0.1, 195.780487678662,
+      0.01, 199.217707535882,
+      0.1, 199.217707535882
     )
 
   expect_equal(
@@ -163,16 +166,16 @@ test_that('glmnet prediction, multiple lambda', {
   form_pred <-
     tibble::tribble(
       ~penalty,           ~.pred,
-      0.01, 5.09237402805557,
-      0.1, 5.24228948237804,
-      0.01, 4.75071416991856,
-      0.1, 5.09448280355765,
-      0.01, 4.89375747015535,
-      0.1, 5.15636527125752,
-      0.01, 4.82338959520112,
-      0.1, 5.12592317615935,
-      0.01, 5.15481201301174,
-      0.1, 5.26930099973607
+      0.01, 570.474473760044,
+      0.1, 570.474473760044,
+      0.01, 164.040104978709,
+      0.1, 164.040104978709,
+      0.01, 168.709676954287,
+      0.1, 168.709676954287,
+      0.01, 159.173862504055,
+      0.1, 159.173862504055,
+      0.01, 167.559854709074,
+      0.1, 167.559854709074
     )
 
   expect_equal(
@@ -190,7 +193,7 @@ test_that('glmnet prediction, all lambda', {
   skip_if(run_glmnet)
 
   hpc_all <- linear_reg(mixture = .3) %>%
-    set_engine("glmnet")
+    set_engine("glmnet", nlambda = 7)
 
   res_xy <- fit_xy(
     hpc_all,
@@ -202,7 +205,7 @@ test_that('glmnet prediction, all lambda', {
   all_pred <- predict(res_xy$fit, newx = as.matrix(hpc[1:5, num_pred]))
   all_pred <- stack(as.data.frame(all_pred))
   all_pred$penalty <- rep(res_xy$fit$lambda, each = 5)
-  all_pred$rows <- rep(1:5, 2)
+  all_pred$rows <- rep(1:5, length(res_xy$fit$lambda))
   all_pred <- all_pred[order(all_pred$rows, all_pred$penalty), ]
   all_pred <- all_pred[, c("penalty", "values")]
   names(all_pred) <- c("penalty", ".pred")
@@ -223,7 +226,7 @@ test_that('glmnet prediction, all lambda', {
   form_pred <- predict(res_form$fit, newx = form_mat)
   form_pred <- stack(as.data.frame(form_pred))
   form_pred$penalty <- rep(res_form$fit$lambda, each = 5)
-  form_pred$rows <- rep(1:5, 2)
+  form_pred$rows <- rep(1:5, length(res_form$fit$lambda))
   form_pred <- form_pred[order(form_pred$rows, form_pred$penalty), ]
   form_pred <- form_pred[, c("penalty", "values")]
   names(form_pred) <- c("penalty", ".pred")
diff --git a/tests/testthat/test_linear_reg_stan.R b/tests/testthat/test_linear_reg_stan.R
@@ -3,7 +3,7 @@ library(parsnip)
 library(rlang)
 
 source(test_path("helper-objects.R"))
-hpc <- hpc_data[1:150, c(2:5, 8)]
+hpc <- hpc_data[, c(2:5, 8)]
 
 # ------------------------------------------------------------------------------
 
@@ -62,10 +62,10 @@ test_that('stan prediction', {
   skip_if_not_installed("rstanarm")
   skip_on_cran()
 
-  uni_pred <- c(5.01531691055198, 4.6896592504705, 4.74907435900005, 4.82563873798984,
-                5.08044844256827)
-  inl_pred <- c(3.47062722437493, 3.38380776677489, 3.29336980560884, 3.24669710332179,
-                3.42765162180813)
+  uni_pred <- c(1691.46306020449, 1494.27323520418, 1522.36011539284, 1493.39683598195,
+                1494.93053462084)
+  inl_pred <- c(429.164145548939, 256.32488428038, 254.949927688403, 255.007333947447,
+                255.336665165556)
 
   res_xy <- fit_xy(
     linear_reg() %>%
@@ -99,27 +99,29 @@ test_that('stan intervals', {
     control = quiet_ctrl
   )
 
+  set.seed(1231)
   confidence_parsnip <-
     predict(res_xy,
             new_data = hpc[1:5,],
             type = "conf_int",
             level = 0.93)
 
+  set.seed(1231)
   prediction_parsnip <-
     predict(res_xy,
             new_data = hpc[1:5,],
             type = "pred_int",
             level = 0.93)
 
-  ci_lower <- c(4.93164991101342, 4.60197941230393, 4.6671442757811, 4.74402724639963,
-               4.99248110476701)
-  ci_upper <- c(5.1002837047058, 4.77617561853506, 4.83183673602725, 4.90844811805409,
-                5.16979395659009)
+  ci_lower <- c(1577.25718753727, 1382.58210286254, 1399.96490471468, 1381.56774986889,
+                1383.25519963864)
+  ci_upper <- c(1809.28331613624, 1609.11912475981, 1646.44852457781, 1608.3327281785,
+                1609.4796390366)
 
-  pi_lower <- c(4.43202758985944, 4.09957733046886, 4.17664779714598, 4.24948546338885,
-                4.50058914781073)
-  pi_upper <- c(5.59783267637042, 5.25976504318669, 5.33296516452929, 5.41050668003565,
-                5.66355828140989)
+  pi_lower <- c(-4960.33135373564, -5123.82860109357, -5063.60881734505, -5341.21637448872,
+                -5184.63627366821)
+  pi_upper <- c(8345.56815544477, 7954.98392035813, 7890.10036321417, 7970.64062851536,
+                8247.10241974192)
 
   expect_equivalent(confidence_parsnip$.pred_lower, ci_lower, tolerance = 1e-2)
   expect_equivalent(confidence_parsnip$.pred_upper, ci_upper, tolerance = 1e-2)
diff --git a/tests/testthat/test_multinom_reg_glmnet.R b/tests/testthat/test_multinom_reg_glmnet.R
@@ -2,12 +2,13 @@ library(testthat)
 library(parsnip)
 library(rlang)
 library(tibble)
+library(dplyr)
 
 # ------------------------------------------------------------------------------
 
 context("multinom regression execution with glmnet")
 source(test_path("helper-objects.R"))
-hpc <- hpc_data[1:150, c(2:5, 8)]
+hpc <- hpc_data[, c(2:5, 8)]
 
 rows <- c(1, 51, 101)
 
@@ -117,10 +118,14 @@ test_that('glmnet probabilities, mulitiple lambda', {
   names(mult_pred) <- NULL
   mult_pred <- tibble(.pred = mult_pred)
 
-  expect_equal(
-    mult_pred$.pred,
-    multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams, type = "prob")$.pred
-  )
+  multi_pred_res <- multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams, type = "prob")
+
+  for (i in seq_along(multi_pred_res$.pred)) {
+    expect_equal(
+      mult_pred      %>% dplyr::slice(i) %>% pull(.pred) %>% purrr::pluck(1) %>% dplyr::select(starts_with(".pred")),
+      multi_pred_res %>% dplyr::slice(i) %>% pull(.pred) %>% purrr::pluck(1) %>% dplyr::select(starts_with(".pred"))
+    )
+  }
 
   mult_class <- factor(names(mult_probs)[apply(mult_probs, 1, which.max)],
                        levels = xy_fit$lvl)
@@ -134,10 +139,14 @@ test_that('glmnet probabilities, mulitiple lambda', {
   names(mult_class) <- NULL
   mult_class <- tibble(.pred = mult_class)
 
-  expect_equal(
-    mult_class$.pred,
-    multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams)$.pred
-  )
+  mult_class_res <- multi_predict(xy_fit, hpc[rows, 1:4], penalty = lams)
+
+  for (i in seq_along(mult_class_res$.pred)) {
+    expect_equal(
+      mult_class     %>% slice(i) %>% pull(.pred) %>% purrr::pluck(1) %>% dplyr::select(starts_with(".pred")),
+      mult_class_res %>% slice(i) %>% pull(.pred) %>% purrr::pluck(1) %>% dplyr::select(starts_with(".pred"))
+    )
+  }
 
   expect_error(
     multi_predict(xy_fit, newdata = hpc[rows, 1:4], penalty = lams),
@@ -157,7 +166,7 @@ test_that("class predictions are factors with all levels", {
   skip_if(run_glmnet)
 
   basic <- multinom_reg() %>% set_engine("glmnet") %>% fit(class ~ ., data = hpc)
-  nd <- hpc[hpc$class == "setosa", ]
+  nd <- hpc[hpc$class == "VF", ]
   yhat <- predict(basic, new_data = nd, penalty = .1)
   yhat_multi <- multi_predict(basic, new_data =  nd, penalty = .1)$.pred
   expect_is(yhat_multi[[1]]$.pred_class, "factor")

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# parsnip (development version)`
	`1`	`+# parsnip 0.1.2`
`2`	`2`
`3`	`3`	`## Breaking Changes`
`4`	`4`