Skip to content

Commit 9b95a6d

Browse files
committed
arguments written out
1 parent c9d677c commit 9b95a6d

File tree

1 file changed

+24
-13
lines changed

1 file changed

+24
-13
lines changed

R/combine-vcf-with-pgs.R

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -105,19 +105,30 @@ combine.vcf.with.pgs <- function(vcf.data, pgs.weight.data) {
105105
missing.snp.pgs.weight.data <- subset(missing.snp.merged.data, select = colnames(pgs.weight.data));
106106
rm(missing.snp.merged.data);
107107

108-
# Split VCF$ID column into separate rows for each rsID (multiple rsIDs separated by ;)
109-
if (any(grepl(';', vcf.data$ID))) {
110-
split.rows <- strsplit(
111-
as.character(vcf.data$ID),
112-
';',
113-
fixed = TRUE
114-
);
115-
expanded.vcf <- vcf.data[rep(seq_len(nrow(vcf.data)), lengths(split.rows)), ]
116-
expanded.vcf$ID <- unlist(split.rows)
117-
split.rsid.vcf.data <- expanded.vcf
118-
} else {
119-
split.rsid.vcf.data <- vcf.data
120-
}
108+
# Expand the VCF$ID column to a row-per-rsID format.
109+
# Some variants have multiple rsIDs in the ID column separated by semicolons.
110+
# We detect such cases using grepl, split them, and expand the data so that each rsID has its own row.
111+
# we create a new data frame with the expanded rsID data
112+
if (any(grepl(';', vcf.data$ID))) {
113+
split.rows <- strsplit(
114+
x = as.character(vcf.data$ID),
115+
split = ';',
116+
fixed = TRUE
117+
);
118+
119+
row.indices <- rep(
120+
x = seq_len(nrow(vcf.data)),
121+
times = lengths(split.rows)
122+
);
123+
124+
expanded.vcf <- vcf.data[row.indices, ];
125+
126+
expanded.vcf$ID <- unlist(split.rows);
127+
128+
split.rsid.vcf.data <- expanded.vcf;
129+
} else {
130+
split.rsid.vcf.data <- vcf.data;
131+
}
121132

122133
# merge missing SNP data on split rsID
123134
merged.vcf.with.missing.pgs.data <- merge(

0 commit comments

Comments
 (0)