Skip to content

Commit 8053203

Browse files
authored
Merge pull request #184 from uclahs-cds/danknight-fuzz
Fuzz testing tree randomization
2 parents 521b449 + 7e40920 commit 8053203

File tree

3 files changed

+451
-0
lines changed

3 files changed

+451
-0
lines changed

R/fuzz.R

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
randomize.tree <- function(
2+
tree.df,
3+
randomize.angles = TRUE,
4+
randomize.node.color = TRUE,
5+
randomize.border.color = TRUE,
6+
randomize.border.width = TRUE,
7+
randomize.border.type = TRUE,
8+
randomize.edge.col = TRUE,
9+
randomize.edge.width = TRUE,
10+
randomize.edge.type = TRUE,
11+
randomize.edge.length = TRUE,
12+
randomize.plotting.direction = TRUE,
13+
plotting.direction = NULL,
14+
...
15+
) {
16+
node.ids <- c(get.root.node(tree.df));
17+
18+
default.line.type <- 'solid';
19+
line.types <- c(default.line.type, 'dotted', 'dashed');
20+
21+
if (check.randomization.value(randomize.angles, randomization.name = 'randomize.angles')) {
22+
spread.randomization.sd <- if (is.numeric(randomize.angles)) {
23+
if (randomize.angles <= 0) {
24+
stop('"randomize.angles" standard deviation value must be positive.');
25+
}
26+
randomize.angles;
27+
} else {
28+
0.5;
29+
}
30+
default.spread <- 1;
31+
if (!('spread' %in% colnames(tree.df))) {
32+
tree.df$spread <- default.spread;
33+
} else {
34+
tree.df[is.na(tree.df$spread), 'spread'] <- 1;
35+
}
36+
tree.df$spread <- tree.df$spread + rnorm(
37+
mean = 0,
38+
sd = spread.randomization.sd,
39+
n = nrow(tree.df)
40+
);
41+
tree.df[tree.df$spread < 0, 'spread'] <- 0;
42+
}
43+
44+
if (check.randomization.value(randomize.plotting.direction, randomization.name = 'plotting.direction')) {
45+
if (is.null(plotting.direction)) {
46+
plotting.direction <- sample(c('down', 'right', 'left', 'up'), size = 1);
47+
}
48+
plotting.direction <- radians.to.degrees(
49+
prep.plotting.direction(plotting.direction, radians = FALSE)
50+
);
51+
angle.randomization.sd <- if (is.numeric(randomize.plotting.direction)) {
52+
if (randomize.plotting.direction <= 0) {
53+
stop('"randomize.plotting.direction" standard deviation value must be positive.');
54+
}
55+
randomize.plotting.direction;
56+
} else {
57+
30;
58+
};
59+
plotting.direction <- plotting.direction + rnorm(sd = angle.randomization.sd, n = 1);
60+
}
61+
62+
if (check.randomization.value(randomize.node.color, randomization.name = 'randomize.node.color')) {
63+
node.color.randomization.prob <- if (is.numeric(randomize.node.color)) {
64+
if (randomize.node.color < 0 || randomize.node.color > 1) {
65+
stop('"randomize.node.color" probability must be between 0 and 1.')
66+
}
67+
randomize.node.color;
68+
} else {
69+
0.5;
70+
}
71+
node.color.scheme <- if (runif(1) <= node.color.randomization.prob) {
72+
generate.random.color();
73+
} else {
74+
NA;
75+
}
76+
77+
if (!('node.col' %in% colnames(tree.df))) {
78+
tree.df$node.col <- node.color.scheme;
79+
} else {
80+
tree.df[is.na(tree.df$node.col), 'node.col'] <- node.color.scheme;
81+
}
82+
override.node.col.i <- sapply(
83+
1:nrow(tree.df),
84+
function(i) runif(1) <= node.color.randomization.prob
85+
);
86+
tree.df[override.node.col.i, 'node.col'] <- sapply(
87+
1:sum(override.node.col.i),
88+
function(i) generate.random.color()
89+
);
90+
}
91+
92+
if (check.randomization.value(randomize.border.color, randomization.name = 'randomize.border.color')) {
93+
border.color.randomization.prob <- if (is.numeric(randomize.border.color)) {
94+
if (randomize.border.color < 0 || randomize.border.color > 1) {
95+
stop('"randomize.border.color" probability must be between 0 and 1.')
96+
}
97+
randomize.border.color;
98+
} else {
99+
0.3;
100+
}
101+
border.color.scheme <- if (runif(1) <= border.color.randomization.prob) {
102+
generate.random.color();
103+
} else {
104+
NA;
105+
}
106+
107+
if (!('border.col' %in% colnames(tree.df))) {
108+
tree.df$border.col <- border.color.scheme;
109+
} else {
110+
tree.df[is.na(tree.df$border.col), 'border.col'] <- node.color.scheme;
111+
}
112+
override.border.col.i <- sapply(
113+
1:nrow(tree.df),
114+
function(i) runif(1) <= border.color.randomization.prob
115+
);
116+
tree.df[override.border.col.i, 'border.col'] <- sapply(
117+
1:sum(override.border.col.i),
118+
function(i) generate.random.color()
119+
);
120+
}
121+
122+
if (check.randomization.value(randomize.border.width, randomization.name = 'randomize.border.width')) {
123+
border.width.randomization.sd <- if (is.numeric(randomize.border.width)) {
124+
if (randomize.border.width <= 0) {
125+
stop('"randomize.border.width" standard deviation value must be positive.');
126+
}
127+
randomize.border.width;
128+
} else {
129+
1;
130+
};
131+
default.border.width <- 1;
132+
133+
if (!('border.width' %in% colnames(tree.df))) {
134+
tree.df$border.width <- default.border.width;
135+
} else {
136+
tree.df[is.na(tree.df$border.width), 'border.width'] <- default.border.width;
137+
}
138+
tree.df[, 'border.width'] <- tree.df$border.width + rnorm(
139+
mean = 0,
140+
sd = border.width.randomization.sd,
141+
n = nrow(tree.df)
142+
);
143+
tree.df[tree.df$border.width <= 0, 'border.width'] <- 0;
144+
}
145+
146+
if (check.randomization.value(randomize.border.type, randomization.name = 'randomize.border.type')) {
147+
default.border.type <- sample(line.types, size = 1);
148+
149+
border.type.randomization.prob <- if (is.numeric(randomize.border.type)) {
150+
if (randomize.border.type < 0 || randomize.border.type > 1) {
151+
stop('"randomize.border.type" probability must be between 0 and 1.')
152+
}
153+
randomize.border.type;
154+
} else {
155+
0.3;
156+
}
157+
158+
if (!('border.type' %in% colnames(tree.df))) {
159+
tree.df$border.type <- default.border.type;
160+
} else {
161+
tree.df[is.na(tree.df$border.type), 'border.type'] <- default.border.type;
162+
}
163+
override.border.type.i <- runif(nrow(tree.df)) <= border.type.randomization.prob;
164+
tree.df[override.border.type.i, 'border.type'] <- sample(
165+
line.types,
166+
size = sum(override.border.type.i),
167+
replace = TRUE
168+
);
169+
}
170+
171+
edge.names <- sort(get.branch.names(tree.df));
172+
if (length(edge.names) < 1) {
173+
edge.names <- 1;
174+
}
175+
176+
edge.color.randomization.prob <- 0;
177+
if (check.randomization.value(randomize.edge.col, randomization.name = 'randomize.edge.col')) {
178+
edge.color.randomization.prob <- if (is.numeric(randomize.edge.col)) {
179+
if (randomize.edge.col < 0 || randomize.edge.col > 1) {
180+
stop('"randomize.edge.col" probability must be between 0 and 1.')
181+
}
182+
randomize.edge.col;
183+
} else {
184+
0.3;
185+
}
186+
}
187+
188+
if (check.randomization.value(randomize.edge.width, randomization.name = 'randomize.edge.width')) {
189+
edge.width.randomization.sd <- if (is.numeric(randomize.edge.width)) {
190+
if (randomize.edge.width <= 0) {
191+
stop('"randomize.edge.width" standard deviation value must be positive.');
192+
}
193+
randomize.edge.width;
194+
} else {
195+
1;
196+
};
197+
}
198+
199+
edge.type.randomization.prob <- 0;
200+
if (check.randomization.value(randomize.edge.type, randomization.name = 'randomize.edge.type')) {
201+
edge.type.randomization.prob <- if (is.numeric(randomize.edge.type)) {
202+
if (randomize.edge.type < 0 || randomize.edge.type > 1) {
203+
stop('"randomize.edge.type" probability must be between 0 and 1.')
204+
}
205+
randomize.edge.type;
206+
} else {
207+
0.3;
208+
}
209+
}
210+
211+
for (edge.name in edge.names) {
212+
if (check.randomization.value(randomize.edge.col)) {
213+
edge.color.scheme <- generate.random.color();
214+
215+
edge.col.column.name <- paste('edge.col', edge.name, sep = '.');
216+
if (!(edge.col.column.name %in% colnames(tree.df))) {
217+
tree.df[, edge.col.column.name] <- edge.color.scheme;
218+
} else {
219+
tree.df[is.na(tree.df[, edge.col.column.name]), edge.col.column.name] <- edge.color.scheme;
220+
}
221+
override.edge.col.i <- runif(n = nrow(tree.df), max = 1) <= edge.color.randomization.prob;
222+
tree.df[override.edge.col.i, edge.col.column.name] <- sapply(
223+
1:sum(override.edge.col.i),
224+
function(i) generate.random.color()
225+
);
226+
}
227+
228+
if (check.randomization.value(randomize.edge.width)) {
229+
base.edge.width.randomization.prob <- 0.5;
230+
default.edge.width <- if (runif(1) <= base.edge.width.randomization.prob) {
231+
max(0, rnorm(1, mean = 3));
232+
} else {
233+
3;
234+
}
235+
236+
edge.width.column.name <- paste('edge.width', edge.name, sep = '.');
237+
if (!(edge.width.column.name %in% colnames(tree.df))) {
238+
tree.df[, edge.width.column.name] <- default.edge.width;
239+
} else {
240+
tree.df[is.na(tree.df[, edge.width.column.name]), edge.col.column.name] <- default.edge.width;
241+
}
242+
tree.df[, edge.width.column.name] <- tree.df[, edge.width.column.name] + rnorm(
243+
sd = edge.width.randomization.sd,
244+
n = nrow(tree.df)
245+
);
246+
tree.df[, edge.width.column.name] <- sapply(
247+
tree.df[, edge.width.column.name],
248+
function(x) max(0, x)
249+
);
250+
}
251+
252+
if (check.randomization.value(randomize.edge.type)) {
253+
default.edge.type <- sample(line.types, size = 1);
254+
255+
edge.type.column.name <- paste('edge.type', edge.name, sep = '.');
256+
if (!(edge.type.column.name %in% colnames(tree.df))) {
257+
tree.df[, edge.type.column.name] <- default.edge.type;
258+
} else {
259+
tree.df[is.na(tree.df[, edge.type.column.name]), edge.col.column.name] <- default.edge.type;
260+
}
261+
override.edge.type.i <- runif(n = nrow(tree.df), max = 1) <= edge.type.randomization.prob;
262+
tree.df[override.edge.type.i, edge.type.column.name] <- sample(
263+
line.types,
264+
size = sum(override.edge.type.i),
265+
replace = TRUE
266+
);
267+
}
268+
269+
if (check.randomization.value(randomize.edge.length, randomization.name = 'randomize.edge.length')) {
270+
edge.length.column.name <- paste('length', edge.name, sep = '.');
271+
base.edge.length <- 10 ** runif(n = 1, min = 0, max = 6);
272+
273+
edge.length.randomization.proportion <- if (is.numeric(randomize.edge.length)) {
274+
if (randomize.edge.length <= 0) {
275+
stop('"randomize.edge.length" proportion must be positive.');
276+
}
277+
randomize.edge.length;
278+
} else {
279+
0.2;
280+
}
281+
282+
if (!(edge.length.column.name %in% colnames(tree.df))) {
283+
tree.df[, edge.length.column.name] <- base.edge.length;
284+
} else {
285+
tree.df[is.na(tree.df[, edge.length.column.name]), edge.length.column.name] <- base.edge.length;
286+
}
287+
edge.length.randomization.sd <- median(tree.df[, edge.length.column.name]) * edge.length.randomization.proportion;
288+
tree.df[, edge.length.column.name] <- tree.df[, edge.length.column.name] + rnorm(
289+
sd = edge.length.randomization.sd,
290+
n = nrow(tree.df)
291+
);
292+
tree.df[tree.df[, edge.length.column.name] < 0, edge.length.column.name]
293+
}
294+
}
295+
296+
result <- create.phylogenetic.tree(
297+
tree.df,
298+
plotting.direction = plotting.direction,
299+
...
300+
);
301+
return(result);
302+
}
303+
304+
check.randomization.value <- function(
305+
randomization,
306+
randomization.name = NULL
307+
) {
308+
if (is.null(randomization.name)) {
309+
randomization.name <- 'Randomization value';
310+
}
311+
if (length(randomization) != 1) {
312+
stop(paste(randomization.name, 'must be length 1.'));
313+
}
314+
315+
randomize.result <- FALSE;
316+
if (is.numeric(randomization)) {
317+
randomize.result <- TRUE;
318+
} else if (is.logical(randomization)) {
319+
randomize.result <- randomization;
320+
} else {
321+
stop(paste(randomization.name, 'must be numeric or TRUE/FALSE.'));
322+
}
323+
return(randomize.result);
324+
}

R/utility.R

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ degrees.to.radians <- function(degrees) {
3434
return(degrees * pi / 180);
3535
}
3636

37+
radians.to.degrees <- function(radians) {
38+
return(radians * 180 / pi);
39+
}
40+
3741
get.encoded.distance <- function(points) {
3842
if (!is.data.frame(points)) {
3943
stop(paste(
@@ -63,6 +67,9 @@ get.encoded.distance <- function(points) {
6367
return(encoded.distances);
6468
}
6569

70+
generate.random.color <- function() {
71+
rgb(runif(1), runif(1), runif(1));
72+
}
6673

6774
oxford.comma.vector.concat <- function(vec, empty.value = '', flatten.empty.value = TRUE) {
6875
if (length(vec) == 0) {

0 commit comments

Comments
 (0)