Skip to content

Commit f620726

Browse files
jeroenDavisVaughan
andauthored
Remove usage of ATTRIB() (#481)
* Backport CRAN change from cran/cpp11@6cc2e8b * Just use `Rf_getAttrib(x, R_RowNamesSymbol)` Since as of R 3.5, this is pretty efficient. It won't ever "fully expand" compact row names. The worst case is converting `c(NA, -n)` to an ALTREP compact intrange, which is still cheap. --------- Co-authored-by: Davis Vaughan <davis@posit.co>
1 parent b631b74 commit f620726

File tree

3 files changed

+129
-50
lines changed

3 files changed

+129
-50
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# cpp11 (development version)
22

3+
* Removed non-API usage of `ATTRIB()` (#481).
4+
35
* Improved hygiene around using C++ specific C compatibility headers (i.e. by using `<cstring>` rather than `<string.h>` and `<cstddef>` rather than `<stddef.h>`) (#454, @MichaelChirico).
46

57
* Fixed an rchk issue related to `std::initializer_list<named_arg>` (#457, @pachadotdev).

cpp11test/src/test-data_frame.cpp

Lines changed: 100 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,42 +26,123 @@ context("data_frame-C++") {
2626
}
2727

2828
test_that("data_frame::nrow works with 0x0 dfs") {
29+
// From bare list
2930
SEXP x = PROTECT(Rf_allocVector(VECSXP, 0));
30-
31-
cpp11::data_frame df(x);
32-
expect_true(df.nrow() == 0);
33-
31+
cpp11::data_frame x_df(x);
32+
expect_true(x_df.nrow() == 0);
3433
UNPROTECT(1);
35-
}
36-
37-
test_that("data_frame::nrow works with 10x0 dfs") {
38-
using namespace cpp11::literals;
39-
cpp11::writable::list x(0_xl);
40-
x.attr(R_RowNamesSymbol) = {NA_INTEGER, -10};
4134

42-
cpp11::data_frame df(x);
43-
expect_true(df.nrow() == 10);
35+
// From bare list with `R_RowNamesSymbol`
36+
SEXP y = PROTECT(Rf_allocVector(VECSXP, 0));
37+
SEXP y_row_names = PROTECT(Rf_allocVector(INTSXP, 2));
38+
SET_INTEGER_ELT(y_row_names, 0, NA_INTEGER);
39+
SET_INTEGER_ELT(y_row_names, 1, 0);
40+
Rf_setAttrib(y, R_RowNamesSymbol, y_row_names);
41+
cpp11::data_frame y_df(y);
42+
expect_true(y_df.nrow() == 0);
43+
UNPROTECT(2);
44+
45+
// From classed data frame with `R_RowNamesSymbol`
46+
SEXP z = PROTECT(Rf_allocVector(VECSXP, 0));
47+
SEXP z_row_names = PROTECT(Rf_allocVector(INTSXP, 2));
48+
SET_INTEGER_ELT(z_row_names, 0, NA_INTEGER);
49+
SET_INTEGER_ELT(z_row_names, 1, 0);
50+
Rf_setAttrib(z, R_RowNamesSymbol, z_row_names);
51+
SEXP z_class = PROTECT(Rf_allocVector(STRSXP, 1));
52+
SET_STRING_ELT(z_class, 0, Rf_mkChar("data.frame"));
53+
Rf_setAttrib(z, R_ClassSymbol, z_class);
54+
cpp11::data_frame z_df(z);
55+
expect_true(z_df.nrow() == 0);
56+
UNPROTECT(3);
4457
}
4558

4659
test_that("writable::data_frame::nrow works with 0x0 dfs") {
60+
using namespace cpp11::literals;
61+
62+
// From bare list
4763
SEXP x = PROTECT(Rf_allocVector(VECSXP, 0));
64+
cpp11::writable::data_frame x_df(x);
65+
expect_true(x_df.nrow() == 0);
66+
UNPROTECT(1);
4867

49-
cpp11::writable::data_frame df(x);
50-
expect_true(df.nrow() == 0);
68+
// From bare list with `R_RowNamesSymbol`
69+
cpp11::writable::list y(0_xl);
70+
y.attr(R_RowNamesSymbol) = {NA_INTEGER, 0};
71+
cpp11::writable::data_frame y_df(y);
72+
expect_true(y_df.nrow() == 0);
73+
74+
// From classed data frame with `R_RowNamesSymbol`
75+
cpp11::writable::list z(0_xl);
76+
z.attr(R_RowNamesSymbol) = {NA_INTEGER, 0};
77+
z.attr(R_ClassSymbol) = "data.frame";
78+
cpp11::writable::data_frame z_df(z);
79+
expect_true(z_df.nrow() == 0);
80+
}
5181

52-
UNPROTECT(1);
82+
test_that("data_frame::nrow works with 10x0 dfs") {
83+
// From bare list with `R_RowNamesSymbol`
84+
SEXP y = PROTECT(Rf_allocVector(VECSXP, 0));
85+
SEXP y_row_names = PROTECT(Rf_allocVector(INTSXP, 2));
86+
SET_INTEGER_ELT(y_row_names, 0, NA_INTEGER);
87+
SET_INTEGER_ELT(y_row_names, 1, 10);
88+
Rf_setAttrib(y, R_RowNamesSymbol, y_row_names);
89+
cpp11::data_frame y_df(y);
90+
expect_true(y_df.nrow() == 10);
91+
UNPROTECT(2);
92+
93+
// From classed data frame with `R_RowNamesSymbol`
94+
SEXP z = PROTECT(Rf_allocVector(VECSXP, 0));
95+
SEXP z_row_names = PROTECT(Rf_allocVector(INTSXP, 2));
96+
SET_INTEGER_ELT(z_row_names, 0, NA_INTEGER);
97+
SET_INTEGER_ELT(z_row_names, 1, 10);
98+
Rf_setAttrib(z, R_RowNamesSymbol, z_row_names);
99+
SEXP z_class = PROTECT(Rf_allocVector(STRSXP, 1));
100+
SET_STRING_ELT(z_class, 0, Rf_mkChar("data.frame"));
101+
Rf_setAttrib(z, R_ClassSymbol, z_class);
102+
cpp11::data_frame z_df(z);
103+
expect_true(z_df.nrow() == 10);
104+
UNPROTECT(3);
53105
}
54106

55107
test_that("writable::data_frame::nrow works with 10x0 dfs (#272)") {
56-
SEXP x = PROTECT(Rf_allocVector(VECSXP, 0));
108+
using namespace cpp11::literals;
57109

58-
bool is_altrep = false;
59-
R_xlen_t nrow = 10;
110+
// From bare list with `R_RowNamesSymbol`
111+
cpp11::writable::list y(0_xl);
112+
y.attr(R_RowNamesSymbol) = {NA_INTEGER, 10};
113+
cpp11::writable::data_frame y_df(y);
114+
expect_true(y_df.nrow() == 10);
115+
116+
// From classed data frame with `R_RowNamesSymbol`
117+
cpp11::writable::list z(0_xl);
118+
z.attr(R_RowNamesSymbol) = {NA_INTEGER, 10};
119+
z.attr(R_ClassSymbol) = "data.frame";
120+
cpp11::writable::data_frame z_df(z);
121+
expect_true(z_df.nrow() == 10);
60122

61123
// Manually specify `nrow` using special constructor
62-
cpp11::writable::data_frame df(x, is_altrep, nrow);
124+
bool is_altrep = false;
125+
SEXP x = PROTECT(Rf_allocVector(VECSXP, 0));
126+
cpp11::writable::data_frame df(x, is_altrep, 10);
63127
expect_true(df.nrow() == 10);
128+
UNPROTECT(1);
129+
}
130+
131+
test_that("data_frame::nrow works with 0x1 dfs") {
132+
// From bare list
133+
SEXP x = PROTECT(Rf_allocVector(VECSXP, 1));
134+
SET_VECTOR_ELT(x, 0, Rf_allocVector(INTSXP, 0));
135+
cpp11::data_frame x_df(x);
136+
expect_true(x_df.nrow() == 0);
137+
UNPROTECT(1);
138+
}
64139

140+
test_that("writable::data_frame::nrow works with 0x1 dfs") {
141+
// From bare list
142+
SEXP x = PROTECT(Rf_allocVector(VECSXP, 1));
143+
SET_VECTOR_ELT(x, 0, Rf_allocVector(INTSXP, 0));
144+
cpp11::writable::data_frame x_df(x);
145+
expect_true(x_df.nrow() == 0);
65146
UNPROTECT(1);
66147
}
67148

inst/include/cpp11/data_frame.hpp

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#pragma once
22

3-
#include <cstdlib> // for abs
4-
#include <cstdlib>
3+
#include <cstdlib> // for abs
54
#include <initializer_list> // for initializer_list
65
#include <string> // for string, basic_string
76
#include <utility> // for move
@@ -24,53 +23,50 @@ class data_frame : public list {
2423

2524
friend class writable::data_frame;
2625

27-
/* we cannot use Rf_getAttrib because it has a special case for c(NA, -n) and creates
28-
* the full vector */
29-
static SEXP get_attrib0(SEXP x, SEXP sym) {
30-
for (SEXP attr = ATTRIB(x); attr != R_NilValue; attr = CDR(attr)) {
31-
if (TAG(attr) == sym) {
32-
return CAR(attr);
33-
}
34-
}
35-
36-
return R_NilValue;
37-
}
38-
39-
static R_xlen_t calc_nrow(SEXP x) {
40-
auto nms = get_attrib0(x, R_RowNamesSymbol);
41-
bool has_short_rownames =
42-
(Rf_isInteger(nms) && Rf_xlength(nms) == 2 && INTEGER(nms)[0] == NA_INTEGER);
43-
if (has_short_rownames) {
44-
return static_cast<R_xlen_t>(abs(INTEGER(nms)[1]));
45-
}
46-
47-
if (!Rf_isNull(nms)) {
48-
return Rf_xlength(nms);
26+
static R_xlen_t calculate_nrow(SEXP x) {
27+
// If there is a `R_RowNamesSymbol`, we take the number of rows from there
28+
// (regardless of whether or not there is a `"data.frame"` class yet!).
29+
//
30+
// As of R >=3.5, `Rf_getAttrib(R_RowNamesSymbol)` returns one of the following:
31+
// - A character vector
32+
// - An integer vector
33+
// - An ALTREP integer compact intrange (converted cheaply from `c(NA, -n)`)
34+
//
35+
// We can take the `Rf_xlength()` of all of these cheaply.
36+
//
37+
// We used to worry about `Rf_getAttrib()` fully expanding `c(NA, -n)`, but with
38+
// ALTREP integer compact intranges that is no longer the case.
39+
SEXP row_names = Rf_getAttrib(x, R_RowNamesSymbol);
40+
if (row_names != R_NilValue) {
41+
return Rf_xlength(row_names);
4942
}
5043

44+
// Otherwise it's a bare list, and we infer the number of rows from the first element
45+
// (i.e. first column). Calling `Rf_xlength()` on the first column isn't 100% right
46+
// (it doesn't dispatch to `length()`, nor does it correctly handle df-cols or
47+
// matrix-cols), but it is close enough and people can use the data_frame constructor
48+
// that allows you to specify `nrow` directly as needed.
5149
if (Rf_xlength(x) == 0) {
5250
return 0;
51+
} else {
52+
return Rf_xlength(VECTOR_ELT(x, 0));
5353
}
54-
55-
return Rf_xlength(VECTOR_ELT(x, 0));
5654
}
5755

5856
public:
59-
/* Adapted from
60-
* https://github.com/wch/r-source/blob/f2a0dfab3e26fb42b8b296fcba40cbdbdbec767d/src/main/attrib.c#L198-L207
61-
*/
62-
R_xlen_t nrow() const { return calc_nrow(*this); }
57+
R_xlen_t nrow() const { return calculate_nrow(*this); }
6358
R_xlen_t ncol() const { return size(); }
6459
};
6560

6661
namespace writable {
6762
class data_frame : public cpp11::data_frame {
6863
private:
6964
writable::list set_data_frame_attributes(writable::list&& x) {
70-
return set_data_frame_attributes(std::move(x), calc_nrow(x));
65+
return set_data_frame_attributes(std::move(x), calculate_nrow(x));
7166
}
7267

7368
writable::list set_data_frame_attributes(writable::list&& x, R_xlen_t nrow) {
69+
// `Rf_setAttrib(R_RowNamesSymbol)` will keep `c(NA, -n)` in compact form
7470
x.attr(R_RowNamesSymbol) = {NA_INTEGER, -static_cast<int>(nrow)};
7571
x.attr(R_ClassSymbol) = "data.frame";
7672
return std::move(x);

0 commit comments

Comments
 (0)