Skip to content

Very experimental use of vctrs #4342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Imports:
scales (>= 0.5.0),
stats,
tibble,
vctrs,
withr (>= 2.0.0)
Suggests:
covr,
Expand Down
94 changes: 3 additions & 91 deletions R/compat-plyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ count <- function(df, vars = NULL, wt_var = NULL) {
# Create a shared unique id across two data frames such that common variable
# combinations in the two data frames gets the same id
join_keys <- function(x, y, by) {
joint <- rbind_dfs(list(x[by], y[by]))
joint <- vctrs::vec_rbind(x[by], y[by])
keys <- id(joint, drop = TRUE)
n_x <- nrow(x)
n_y <- nrow(y)
Expand Down Expand Up @@ -249,95 +249,7 @@ round_any <- function(x, accuracy, f = round) {
if (!is.numeric(x)) abort("`x` must be numeric")
f(x/accuracy) * accuracy
}
#' Bind data frames together by common column names
#'
#' This function is akin to `plyr::rbind.fill`, `dplyr::bind_rows`, and
#' `data.table::rbindlist`. It takes data frames in a list and stacks them on
#' top of each other, filling out values with `NA` if the column is missing from
#' a data.frame
#'
#' @param dfs A list of data frames
#'
#' @return A data.frame with the union of all columns from the data frames given
#' in `dfs`
#'
#' @keywords internal
#' @noRd
#'
rbind_dfs <- function(dfs) {
out <- list()
columns <- unique(unlist(lapply(dfs, names)))
nrows <- vapply(dfs, .row_names_info, integer(1), type = 2L)
total <- sum(nrows)
if (length(columns) == 0) return(new_data_frame(list(), total))
allocated <- rep(FALSE, length(columns))
names(allocated) <- columns
col_levels <- list()
ord_levels <- list()
for (df in dfs) {
new_columns <- intersect(names(df), columns[!allocated])
for (col in new_columns) {
if (is.factor(df[[col]])) {
all_ordered <- all(vapply(dfs, function(df) {
val <- .subset2(df, col)
is.null(val) || is.ordered(val)
}, logical(1)))
all_factors <- all(vapply(dfs, function(df) {
val <- .subset2(df, col)
is.null(val) || is.factor(val)
}, logical(1)))
if (all_ordered) {
ord_levels[[col]] <- unique(unlist(lapply(dfs, function(df) levels(.subset2(df, col)))))
} else if (all_factors) {
col_levels[[col]] <- unique(unlist(lapply(dfs, function(df) levels(.subset2(df, col)))))
}
out[[col]] <- rep(NA_character_, total)
} else {
out[[col]] <- rep(.subset2(df, col)[1][NA], total)
}
}
allocated[new_columns] <- TRUE
if (all(allocated)) break
}
is_date <- lapply(out, inherits, 'Date')
is_time <- lapply(out, inherits, 'POSIXct')
pos <- c(cumsum(nrows) - nrows + 1)
for (i in seq_along(dfs)) {
df <- dfs[[i]]
rng <- seq(pos[i], length.out = nrows[i])
for (col in names(df)) {
date_col <- inherits(df[[col]], 'Date')
time_col <- inherits(df[[col]], 'POSIXct')
if (is_date[[col]] && !date_col) {
out[[col]][rng] <- as.Date(
unclass(df[[col]]),
origin = ggplot_global$date_origin
)
} else if (is_time[[col]] && !time_col) {
out[[col]][rng] <- as.POSIXct(
unclass(df[[col]]),
origin = ggplot_global$time_origin
)
} else if (date_col || time_col || inherits(df[[col]], 'factor')) {
out[[col]][rng] <- as.character(df[[col]])
} else {
out[[col]][rng] <- df[[col]]
}
}
}
for (col in names(ord_levels)) {
out[[col]] <- ordered(out[[col]], levels = ord_levels[[col]])
}
for (col in names(col_levels)) {
out[[col]] <- factor(out[[col]], levels = col_levels[[col]])
}
attributes(out) <- list(
class = "data.frame",
names = names(out),
row.names = .set_row_names(total)
)
out
}

#' Apply function to unique subsets of a data.frame
#'
#' This function is akin to `plyr::ddply`. It takes a single data.frame,
Expand Down Expand Up @@ -379,7 +291,7 @@ dapply <- function(df, by, fun, ..., drop = TRUE) {

ids <- id(grouping_cols, drop = drop)
group_rows <- split_with_index(seq_len(nrow(df)), ids)
rbind_dfs(lapply(seq_along(group_rows), function(i) {
vctrs::vec_rbind(!!!lapply(seq_along(group_rows), function(i) {
cur_data <- df_rows(df, group_rows[[i]])
apply_fun(cur_data)
}))
Expand Down
2 changes: 1 addition & 1 deletion R/facet-.r
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ combine_vars <- function(data, env = emptyenv(), vars = NULL, drop = TRUE) {
))
}

base <- unique(rbind_dfs(values[has_all]))
base <- unique(vctrs::vec_rbind(!!!values[has_all]))
if (!drop) {
base <- unique_combs(base)
}
Expand Down
8 changes: 4 additions & 4 deletions R/facet-grid-.r
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ FacetGrid <- ggproto("FacetGrid", Facet,

base_rows <- combine_vars(data, params$plot_env, rows, drop = params$drop)
if (!params$as.table) {
rev_order <- function(x) factor(x, levels = rev(ulevels(x)))
base_rows[] <- lapply(base_rows, rev_order)
# rev_order <- function(x) factor(x, levels = rev(ulevels(x)))
# base_rows[] <- lapply(base_rows, rev_order)
}
base_cols <- combine_vars(data, params$plot_env, cols, drop = params$drop)
base <- df.grid(base_rows, base_cols)
Expand Down Expand Up @@ -274,8 +274,8 @@ FacetGrid <- ggproto("FacetGrid", Facet,
# Special case of no faceting
data$PANEL <- NO_PANEL
} else {
facet_vals[] <- lapply(facet_vals[], as.factor)
facet_vals[] <- lapply(facet_vals[], addNA, ifany = TRUE)
# facet_vals[] <- lapply(facet_vals[], as.factor)
# facet_vals[] <- lapply(facet_vals[], addNA, ifany = TRUE)

keys <- join_keys(facet_vals, layout, by = vars)

Expand Down
2 changes: 1 addition & 1 deletion R/facet-wrap.r
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ FacetWrap <- ggproto("FacetWrap", Facet,
}

facet_vals <- eval_facets(vars, data, params$.possible_columns)
facet_vals[] <- lapply(facet_vals[], as.factor)
# facet_vals[] <- lapply(facet_vals[], as.factor)

missing_facets <- setdiff(names(vars), names(facet_vals))
if (length(missing_facets) > 0) {
Expand Down
10 changes: 5 additions & 5 deletions R/fortify-spatial.r
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fortify.SpatialPolygonsDataFrame <- function(model, data, region = NULL, ...) {
attr <- as.data.frame(model)
# If not specified, split into regions based on polygons
if (is.null(region)) {
coords <- rbind_dfs(lapply(model@polygons,fortify))
coords <- vctrs::vec_rbind(!!!lapply(model@polygons,fortify))
message("Regions defined for each Polygons")
} else {
cp <- sp::polygons(model)
Expand All @@ -42,15 +42,15 @@ fortify.SpatialPolygonsDataFrame <- function(model, data, region = NULL, ...) {
#' @export
#' @method fortify SpatialPolygons
fortify.SpatialPolygons <- function(model, data, ...) {
rbind_dfs(lapply(model@polygons, fortify))
vctrs::vec_rbind(!!!lapply(model@polygons, fortify))
}

#' @rdname fortify.sp
#' @export
#' @method fortify Polygons
fortify.Polygons <- function(model, data, ...) {
subpolys <- model@Polygons
pieces <- rbind_dfs(lapply(seq_along(subpolys), function(i) {
pieces <- vctrs::vec_rbind(!!!lapply(seq_along(subpolys), function(i) {
df <- fortify(subpolys[[model@plotOrder[i]]])
df$piece <- i
df
Expand Down Expand Up @@ -78,15 +78,15 @@ fortify.Polygon <- function(model, data, ...) {
#' @export
#' @method fortify SpatialLinesDataFrame
fortify.SpatialLinesDataFrame <- function(model, data, ...) {
rbind_dfs(lapply(model@lines, fortify))
vctrs::vec_rbind(!!!lapply(model@lines, fortify))
}

#' @rdname fortify.sp
#' @export
#' @method fortify Lines
fortify.Lines <- function(model, data, ...) {
lines <- model@Lines
pieces <- rbind_dfs(lapply(seq_along(lines), function(i) {
pieces <- vctrs::vec_rbind(!!!lapply(seq_along(lines), function(i) {
df <- fortify(lines[[i]])
df$piece <- i
df
Expand Down
2 changes: 1 addition & 1 deletion R/stat-.r
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ Stat <- ggproto("Stat",
)
}, stats, groups, SIMPLIFY = FALSE)

rbind_dfs(stats)
vctrs::vec_rbind(!!!stats)
},

compute_group = function(self, data, scales) {
Expand Down
5 changes: 3 additions & 2 deletions R/stat-quantile.r
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,9 @@ StatQuantile <- ggproto("StatQuantile", Stat,
method <- match.fun(method) # allow users to supply their own methods
}

rbind_dfs(lapply(quantiles, quant_pred, data = data, method = method,
formula = formula, weight = weight, grid = grid, method.args = method.args))
pieces <- lapply(quantiles, quant_pred, data = data, method = method,
formula = formula, weight = weight, grid = grid, method.args = method.args)
vctrs::vec_rbind(!!!pieces)
}
)

Expand Down
6 changes: 0 additions & 6 deletions R/zzz.r
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ pathGrob <- NULL
ggplot_global$element_tree <- NULL # the current element tree for themes
reset_theme_settings() # sets the preceding three global variables to their actual defaults

# Used by rbind_dfs
date <- Sys.Date()
ggplot_global$date_origin <- date - unclass(date)
time <- Sys.time()
ggplot_global$time_origin <- time - unclass(time)

# To avoid namespace clash with dplyr.
# It seems surprising that this hack works
if (requireNamespace("dplyr", quietly = TRUE)) {
Expand Down
14 changes: 0 additions & 14 deletions tests/testthat/test-rbind-dfs.R

This file was deleted.