diff --git a/DESCRIPTION b/DESCRIPTION index b38880889..694078eb8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,7 @@ Imports: lifecycle (>= 1.0.2), rlang (>= 1.0.6) Suggests: - bit64, + bit64 (>= 4.0.5), covr, crayon, dplyr (>= 0.8.5), diff --git a/NEWS.md b/NEWS.md index 0f789df29..aaf8b5f42 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # vctrs (development version) +* Internal code around `vec_slice()` fallbacks to `[` has been cleaned up + (#1707). + +* We have removed some internal hacks required for very old versions of bit64 + regarding `NA_integer_` indices and slicing. If you notice any issues related + to this after updating vctrs, you likely need a newer version of bit64. + * `validate_list_of()` has been removed. It hasn't proven to be practically useful, and isn't used by any packages on CRAN (#1697). diff --git a/R/slice.R b/R/slice.R index e5ceb33bc..16a0a25b1 100644 --- a/R/slice.R +++ b/R/slice.R @@ -110,64 +110,13 @@ vec_slice <- function(x, i) { } # Called when `x` has dimensions -vec_slice_fallback <- function(x, i) { - out <- unclass(vec_proxy(x)) - vec_assert(out) - - d <- vec_dim_n(out) - if (d == 2) { - out <- out[i, , drop = FALSE] - } else { - miss_args <- rep(list(missing_arg()), d - 1) - out <- eval_bare(expr(out[i, !!!miss_args, drop = FALSE])) - } - - vec_restore(out, x) -} - -vec_slice_fallback_integer64 <- function(x, i) { +bracket_shaped_dispatch <- function(x, i) { d <- vec_dim_n(x) - - if (d == 2) { - out <- x[i, , drop = FALSE] - } else { - miss_args <- rep(list(missing_arg()), d - 1) - out <- eval_bare(expr(x[i, !!!miss_args, drop = FALSE])) - } - - is_na <- is.na(i) - - if (!any(is_na)) { - return(out) - } - - if (d == 2) { - out[is_na,] <- bit64::NA_integer64_ - } else { - eval_bare(expr(out[is_na, !!!miss_args] <- bit64::NA_integer64_)) - } - - out -} - -# bit64::integer64() objects do not have support for `NA_integer_` -# slicing. This manually replaces the garbage values that are created -# any time a slice with `NA_integer_` is made. -vec_slice_dispatch_integer64 <- function(x, i) { - out <- x[i] - - is_na <- is.na(i) - - if (!any(is_na)) { - return(out) - } - - out[is_na] <- bit64::NA_integer64_ - - out + miss_args <- rep(list(missing_arg()), d - 1) + x <- eval_bare(expr(x[i, !!!miss_args, drop = FALSE])) + x } - #' @rdname vec_slice #' @export `vec_slice<-` <- function(x, i, value) { diff --git a/src/globals.h b/src/globals.h index 0e879fa7a..6ce524719 100644 --- a/src/globals.h +++ b/src/globals.h @@ -17,9 +17,7 @@ struct syms { r_obj* to_arg; r_obj* value_arg; r_obj* vec_default_cast; - r_obj* vec_slice_dispatch_integer64; - r_obj* vec_slice_fallback; - r_obj* vec_slice_fallback_integer64; + r_obj* bracket_shaped_dispatch; r_obj* x_arg; r_obj* y_arg; }; @@ -36,9 +34,7 @@ struct chrs { }; struct fns { - r_obj* vec_slice_dispatch_integer64; - r_obj* vec_slice_fallback; - r_obj* vec_slice_fallback_integer64; + r_obj* bracket_shaped_dispatch; }; struct vec_args { diff --git a/src/slice-chop.c b/src/slice-chop.c index 19fe51414..ab752beb3 100644 --- a/src/slice-chop.c +++ b/src/slice-chop.c @@ -296,6 +296,10 @@ static SEXP chop_shaped(SEXP x, SEXP indices, struct vctrs_chop_info info) { } static SEXP chop_fallback(SEXP x, SEXP indices, struct vctrs_chop_info info) { + // TODO: Do we really care about micro performance that much here? Can we just + // merge with `chop_shaped_fallback()` now that `vec_slice_fallback()` + // handles shaped an unshaped vectors? + // Evaluate in a child of the global environment to allow dispatch // to custom functions. We define `[` to point to its base // definition to ensure consistent look-up. This is the same logic @@ -306,16 +310,8 @@ static SEXP chop_fallback(SEXP x, SEXP indices, struct vctrs_chop_info info) { Rf_defineVar(syms_i, info.index, env); // Construct call with symbols, not values, for performance. - // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 - // objects to ensure correct slicing with `NA_integer_`. - SEXP call; - if (is_integer64(x)) { - call = PROTECT(Rf_lang3(syms.vec_slice_dispatch_integer64, syms_x, syms_i)); - Rf_defineVar(syms.vec_slice_dispatch_integer64, fns.vec_slice_dispatch_integer64, env); - } else { - call = PROTECT(Rf_lang3(syms_bracket, syms_x, syms_i)); - Rf_defineVar(syms_bracket, fns_bracket, env); - } + SEXP call = PROTECT(Rf_lang3(syms_bracket, syms_x, syms_i)); + Rf_defineVar(syms_bracket, fns_bracket, env); for (R_len_t i = 0; i < info.out_size; ++i) { if (info.has_indices) { @@ -329,6 +325,7 @@ static SEXP chop_fallback(SEXP x, SEXP indices, struct vctrs_chop_info info) { SEXP elt = PROTECT(Rf_eval(call, env)); + // Same logic as `vec_slice_fallback()` if (!vec_is_restored(elt, x)) { elt = vec_restore(elt, x, vec_owned(elt)); } @@ -349,7 +346,6 @@ static r_obj* chop_fallback_shaped(r_obj* x, r_obj* indices, struct vctrs_chop_i ++(*info.p_index); } - // `vec_slice_fallback()` will also `vec_restore()` for us r_obj* elt = PROTECT(vec_slice_fallback(x, info.index)); SET_VECTOR_ELT(info.out, i, elt); diff --git a/src/slice.c b/src/slice.c index 26a67d838..1ad51fab1 100644 --- a/src/slice.c +++ b/src/slice.c @@ -207,34 +207,39 @@ r_obj* df_slice(r_obj* x, r_obj* subscript) { return out; } +static +r_obj* bracket_dispatch(r_obj* x, r_obj* subscript) { + return vctrs_dispatch2( + syms_bracket, fns_bracket, + syms_x, x, + syms_i, subscript + ); +} +static +r_obj* bracket_shaped_dispatch(r_obj* x, r_obj* subscript) { + return vctrs_dispatch2( + syms.bracket_shaped_dispatch, fns.bracket_shaped_dispatch, + syms_x, x, + syms_i, subscript + ); +} r_obj* vec_slice_fallback(r_obj* x, r_obj* subscript) { - // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 - // objects to ensure correct slicing with `NA_integer_`. - if (is_integer64(x)) { - return vctrs_dispatch2(syms.vec_slice_fallback_integer64, fns.vec_slice_fallback_integer64, - syms_x, x, - syms_i, subscript); - } + r_obj* out = r_null; - return vctrs_dispatch2(syms.vec_slice_fallback, fns.vec_slice_fallback, - syms_x, x, - syms_i, subscript); -} + if (has_dim(x)) { + out = KEEP(bracket_shaped_dispatch(x, subscript)); + } else { + out = KEEP(bracket_dispatch(x, subscript)); + } -static -r_obj* vec_slice_dispatch(r_obj* x, r_obj* subscript) { - // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 - // objects to ensure correct slicing with `NA_integer_`. - if (is_integer64(x)) { - return vctrs_dispatch2(syms.vec_slice_dispatch_integer64, fns.vec_slice_dispatch_integer64, - syms_x, x, - syms_i, subscript); + // Take over attribute restoration only if there is no `[` method + if (!vec_is_restored(out, x)) { + out = vec_restore(out, x, vec_owned(out)); } - return vctrs_dispatch2(syms_bracket, fns_bracket, - syms_x, x, - syms_i, subscript); + FREE(1); + return out; } bool vec_requires_fallback(r_obj* x, struct vctrs_proxy_info info) { @@ -300,18 +305,7 @@ r_obj* vec_slice_unsafe(r_obj* x, r_obj* subscript) { subscript = KEEP_N(compact_materialize(subscript), &nprot); } - r_obj* out; - - if (has_dim(x)) { - out = KEEP_N(vec_slice_fallback(x, subscript), &nprot); - } else { - out = KEEP_N(vec_slice_dispatch(x, subscript), &nprot); - } - - // Take over attribute restoration only if there is no `[` method - if (!vec_is_restored(out, x)) { - out = vec_restore(out, x, vec_owned(out)); - } + r_obj* out = vec_slice_fallback(x, subscript); FREE(nprot); return out; @@ -381,11 +375,15 @@ bool vec_is_restored(r_obj* x, r_obj* to) { return false; } - // Class is restored if it contains any other attributes than names. - // We might want to add support for data frames later on. + // Class is restored if it contains any other attributes than names, dim, or + // dimnames. We might want to add support for data frames later on. r_obj* node = attrib; while (node != r_null) { - if (r_node_tag(node) == r_syms.names) { + r_obj* tag = r_node_tag(node); + + if (tag == r_syms.names || + tag == r_syms.dim || + tag == r_syms.dim_names) { node = r_node_cdr(node); continue; } @@ -482,11 +480,6 @@ r_obj* ffi_slice_rep(r_obj* x, r_obj* ffi_i, r_obj* ffi_n) { void vctrs_init_slice(r_obj* ns) { - syms.vec_slice_dispatch_integer64 = r_sym("vec_slice_dispatch_integer64"); - syms.vec_slice_fallback = r_sym("vec_slice_fallback"); - syms.vec_slice_fallback_integer64 = r_sym("vec_slice_fallback_integer64"); - - fns.vec_slice_dispatch_integer64 = r_eval(syms.vec_slice_dispatch_integer64, ns); - fns.vec_slice_fallback = r_eval(syms.vec_slice_fallback, ns); - fns.vec_slice_fallback_integer64 = r_eval(syms.vec_slice_fallback_integer64, ns); + syms.bracket_shaped_dispatch = r_sym("bracket_shaped_dispatch"); + fns.bracket_shaped_dispatch = r_eval(syms.bracket_shaped_dispatch, ns); } diff --git a/src/utils.c b/src/utils.c index 687c43c4a..410c1728c 100644 --- a/src/utils.c +++ b/src/utils.c @@ -965,11 +965,6 @@ r_obj* colnames2(r_obj* x) { } } -// [[ include("utils.h") ]] -bool is_integer64(SEXP x) { - return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64"); -} - // [[ include("utils.h") ]] bool lgl_any_na(SEXP x) { R_xlen_t size = Rf_xlength(x); diff --git a/src/utils.h b/src/utils.h index fafc6b40c..69802868b 100644 --- a/src/utils.h +++ b/src/utils.h @@ -188,8 +188,6 @@ bool is_compact(SEXP x); SEXP compact_materialize(SEXP x); R_len_t vec_subscript_size(SEXP x); -bool is_integer64(SEXP x); - bool lgl_any_na(SEXP x); SEXP colnames(SEXP x); diff --git a/tests/testthat/test-slice.R b/tests/testthat/test-slice.R index 02939d748..71fddbdf1 100644 --- a/tests/testthat/test-slice.R +++ b/tests/testthat/test-slice.R @@ -272,6 +272,18 @@ test_that("vec_slice() unclasses input before calling `vec_restore()`", { }) test_that("can call `vec_slice()` from `[` methods with shaped objects without infloop", { + skip("Until infloop is talked about") + # TODO: I don't think it is our job to prevent this. + # You can already make this infloop today with non-shaped objects, i.e + # `[.vctrs_foobar` = function(x, i, ...) vec_slice(x, i) + # x <- structure(1:4, class = "vctrs_foobar") + # x[1] + # So we shouldn't be special casing shaped objects. + # I believe that if your `[` method is going to call `vec_slice()` (i.e. you + # are requesting native vctrs slicing), then you need to declare a + # `vec_proxy()` method as well to tell vctrs what it needs to be natively + # slicing. + local_methods( `[.vctrs_foobar` = function(x, i, ...) vec_slice(x, i) ) @@ -284,6 +296,28 @@ test_that("can call `vec_slice()` from `[` methods with shaped objects without i expect_identical(x[1], exp) }) +test_that("slicing shaped S3 objects that don't have a proxy method actually calls the `[` method (#1707)", { + # In particular, this is needed for `bit64::integer64()` to allow their `[` + # method to handle `NA_integer_` correctly. + called <- NULL + + local_methods( + `[.vctrs_foobar` = function(x, ...) { + called <<- TRUE + foobar(NextMethod()) + } + ) + + x <- foobar(1:6) + dim(x) <- c(3, 2) + + expect <- foobar(c(1L, 4L)) + dim(expect) <- c(1, 2) + + expect_identical(vec_slice(x, 1), expect) + expect_true(called) +}) + test_that("vec_slice() restores attributes on shaped S3 objects correctly", { x <- factor(c("a", "b", "c", "d", "e", "f")) dim(x) <- c(3, 2) @@ -351,6 +385,7 @@ test_that("vec_restore() is called after slicing data frames", { test_that("additional subscripts are forwarded to `[`", { local_methods( + vec_proxy.vctrs_foobar = function(x, ...) x, `[.vctrs_foobar` = function(x, i, ...) vec_index(x, i, ...) )