Skip to content

Commit 2810927

Browse files
committed
Mostly comments, one small refactor
Some actual arrow_build_table tests, a CI job r 4.4 to 4.5 more imports, more test More test Attempt to use PPPM for binaries More minimal CI job? Also need working directory for renv? Do actually need system deps update renv Call it cran? is PPM actually the magic?
1 parent 326ac66 commit 2810927

File tree

8 files changed

+656
-285
lines changed

8 files changed

+656
-285
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: Crossbow nightly report R tests
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
paths:
7+
- 'crossbow-nightly-report/**'
8+
pull_request:
9+
branches: [ main ]
10+
paths:
11+
- 'crossbow-nightly-report/**'
12+
13+
jobs:
14+
test:
15+
runs-on: ubuntu-latest
16+
defaults:
17+
run:
18+
working-directory: ./crossbow-nightly-report
19+
20+
steps:
21+
- uses: actions/checkout@v3
22+
23+
24+
- name: Install system dependencies
25+
run: |
26+
sudo apt-get update
27+
sudo apt-get install -y libcurl4-openssl-dev libssl-dev libxml2-dev
28+
29+
- name: Set up R
30+
uses: r-lib/actions/setup-r@v2
31+
with:
32+
r-version: '4.5.0'
33+
use-public-rspm: true
34+
35+
- name: Restore packages using renv
36+
uses: r-lib/actions/setup-renv@v2
37+
with:
38+
working-directory: ./crossbow-nightly-report
39+
40+
- name: Install test dependencies
41+
run: install.packages('testthat')
42+
shell: Rscript {0}
43+
44+
- name: Run tests
45+
run: |
46+
library(testthat)
47+
test_results <- test_dir("tests", reporter = "summary", stop_on_failure = FALSE)
48+
test_result_df <- as.data.frame(test_results)
49+
# Exit with error code if any tests failed
50+
if (length(test_results) > 0 && any(c(test_result_df$failed, test_result_df$error))) {
51+
quit(status = 1)
52+
}
53+
shell: Rscript {0}

.github/workflows/nightly_dashboard.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050

5151
- uses: r-lib/actions/setup-r@v2
5252
with:
53-
r-version: '4.4.0'
53+
r-version: '4.5.0'
5454
use-public-rspm: true
5555

5656
# Needed due to https://github.com/r-lib/actions/issues/618

.github/workflows/performance-release-report.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
- name: Setup R
5858
uses: r-lib/actions/setup-r@v2
5959
with:
60-
r-version: '4.4.0'
60+
r-version: '4.5.0'
6161
use-public-rspm: true
6262

6363
# Needed due to https://github.com/r-lib/actions/issues/618

crossbow-nightly-report/R/functions.R

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
library(tibble)
2+
library(dplyr)
3+
library(lubridate)
4+
library(glue)
5+
library(tidyr)
6+
17
is_dev <- function() {
28
Sys.getenv("GITHUB_ACTIONS") != "true"
39
}
@@ -40,14 +46,24 @@ make_nice_names <- function(x) {
4046
toTitleCase(gsub("_", " ", names(x)))
4147
}
4248

43-
arrow_build_table <- function(nightly_data, type, task) {
49+
get_commit <- function(df, label) {
50+
df$arrow_commit[df$fail_label == label]
51+
}
52+
53+
arrow_build_table <- function(nightly_data, type, task, to_day = today()) {
54+
# Filter data for a specific build type and task
4455
type_task_data <- nightly_data %>%
4556
filter(build_type == type) %>%
4657
filter(task_name == task)
4758

48-
## filter for when the most recent run is a failure
49-
day_window <- today() - 2
59+
# Look at yesterday's date to determine recent failures
60+
# This is used as a window for identifying tasks that failed recently
61+
day_window <- to_day - 1
62+
63+
# Get records where the task failed recently, order by date (newest first)
64+
# and standardize task status values to "pass" and "fail"
5065
ordered_only_recent_fails <- type_task_data %>%
66+
# Only keep records where the task name appears in yesterday's failures
5167
filter(
5268
task_name %in%
5369
task_name[nightly_date == day_window & task_status != "success"]
@@ -61,15 +77,22 @@ arrow_build_table <- function(nightly_data, type, task) {
6177
)
6278
)
6379

80+
# If there are no recent failures, return a success summary or a null summary if the task is not active
6481
if (nrow(ordered_only_recent_fails) == 0) {
65-
## if there are no failures, return a version of the table that reflects that
82+
# Calculate days since the last run (regardless of status)
6683
days <- as.numeric(
6784
difftime(
68-
ymd(Sys.Date(), tz = "UTC"),
85+
ymd(to_day, tz = "UTC"),
6986
max(type_task_data$nightly_date)
7087
)
7188
)
89+
# Create a summary with success information
7290
success_df <- type_task_data %>%
91+
# Remove stale data by filtering out everything but the last ~2 days of runs
92+
# this makes it so that jobs that have been deleted (but are still in the 120 day look back)
93+
# don't continue to show up.
94+
filter(nightly_date >= to_day - 2) %>%
95+
# Then, take the most recent run since that's all we care about if there are no failures.
7396
slice_max(order_by = nightly_date) %>%
7497
mutate(
7598
since_last_successful_build = days,
@@ -87,33 +110,39 @@ arrow_build_table <- function(nightly_data, type, task) {
87110
last_successful_build,
88111
build_type
89112
)
113+
90114
return(success_df)
91115
}
92116

93-
## find first failure index
117+
# Find the length of the most recent consecutive failure streak
118+
# This uses run length encoding to identify the first sequence of failures
94119
idx_recent_fail <- rle(ordered_only_recent_fails$task_status)$lengths[1]
95120

96-
## expand failure index and give it some names
121+
# Create labels for the failure streak timeline
122+
# This builds a dataframe with positions and labels for the recent failure sequence
97123
failure_df <- tibble(fails_plus_one = seq(1, idx_recent_fail + 1)) %>%
98124
mutate(
99125
fail_label = case_when(
100-
fails_plus_one == idx_recent_fail ~ "first_failure",
101-
fails_plus_one == 1 ~ "most_recent_failure",
102-
fails_plus_one == idx_recent_fail + 1 ~ "last_successful_build",
103-
TRUE ~ paste0(fails_plus_one, " days ago")
126+
fails_plus_one == idx_recent_fail ~ "first_failure", # Where the failures began
127+
fails_plus_one == 1 ~ "most_recent_failure", # The most recent failure
128+
fails_plus_one == idx_recent_fail + 1 ~ "last_successful_build", # Last successful build before failures
129+
TRUE ~ paste0(fails_plus_one, " days ago") # General failure timeline
104130
)
105131
) %>%
132+
# Only keep the most recent 9 days of failures or specific labeled events
106133
filter(fails_plus_one <= 9 | grepl("failure|build", fail_label))
107134

108-
## inner_join to ordered data
135+
# Join the failure timeline labels with the actual build data
109136
df <- ordered_only_recent_fails %>%
110137
rowid_to_column() %>%
111138
inner_join(failure_df, by = c("rowid" = "fails_plus_one"))
112139

140+
# Calculate days since last successful build
113141
if (all(type_task_data$task_status %in% "failure")) {
114142
days <- NA_real_
115143
} else {
116-
## days since last successful build (need to add one)
144+
# Calculate days between most recent failure and last successful build
145+
# Adding 1 to include the day of the failure
117146
days <- sum(
118147
as.numeric(
119148
difftime(
@@ -125,10 +154,7 @@ arrow_build_table <- function(nightly_data, type, task) {
125154
)
126155
}
127156

128-
get_commit <- function(label) {
129-
df$arrow_commit[df$fail_label == label]
130-
}
131-
157+
# Format the final result as a table with build status information (one row per task)
132158
df %>%
133159
arrange(desc(fail_label)) %>%
134160
mutate(
@@ -137,12 +163,14 @@ arrow_build_table <- function(nightly_data, type, task) {
137163
)
138164
) %>%
139165
select(task_name, build_type, build_links, fail_label) %>%
166+
# Reshape data to have one column for each failure stage
140167
pivot_wider(names_from = fail_label, values_from = build_links) %>%
168+
# Add additional context columns
141169
mutate(
142170
since_last_successful_build = days,
143171
last_successful_commit = arrow_compare_links(
144-
get_commit("last_successful_build"),
145-
get_commit("first_failure")
172+
get_commit(df, "last_successful_build"),
173+
get_commit(df, "first_failure")
146174
),
147175
most_recent_status = "failing",
148176
.after = build_type

crossbow-nightly-report/air.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[format]
2+
line-width = 120

0 commit comments

Comments
 (0)