Skip to content

Commit 7e701a2

Browse files
committed
save original vcf rsID for output
1 parent 9b95a6d commit 7e701a2

File tree

1 file changed

+26
-24
lines changed

1 file changed

+26
-24
lines changed

R/combine-vcf-with-pgs.R

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -105,30 +105,31 @@ combine.vcf.with.pgs <- function(vcf.data, pgs.weight.data) {
105105
missing.snp.pgs.weight.data <- subset(missing.snp.merged.data, select = colnames(pgs.weight.data));
106106
rm(missing.snp.merged.data);
107107

108-
# Expand the VCF$ID column to a row-per-rsID format.
109-
# Some variants have multiple rsIDs in the ID column separated by semicolons.
110-
# We detect such cases using grepl, split them, and expand the data so that each rsID has its own row.
111-
# we create a new data frame with the expanded rsID data
112-
if (any(grepl(';', vcf.data$ID))) {
113-
split.rows <- strsplit(
114-
x = as.character(vcf.data$ID),
115-
split = ';',
116-
fixed = TRUE
117-
);
118-
119-
row.indices <- rep(
120-
x = seq_len(nrow(vcf.data)),
121-
times = lengths(split.rows)
122-
);
123-
124-
expanded.vcf <- vcf.data[row.indices, ];
108+
# Expand the VCF$ID column to a row-per-rsID format.
109+
# Some variants have multiple rsIDs in the ID column separated by semicolons.
110+
# We detect such cases using grepl, split them, and expand the data so that each rsID has its own row.
111+
# we create a new data frame with the expanded rsID data
112+
if (any(grepl(';', vcf.data$ID))) {
113+
split.rows <- strsplit(
114+
x = as.character(vcf.data$ID),
115+
split = ';',
116+
fixed = TRUE
117+
);
118+
119+
row.indices <- rep(
120+
x = seq_len(nrow(vcf.data)),
121+
times = lengths(split.rows)
122+
);
123+
124+
split.rsid.vcf.data <- vcf.data[row.indices, ];
125+
126+
split.rsid.vcf.data$ID.vcf.unsplit <- split.rsid.vcf.data$ID; # save original rsID names for final output
127+
split.rsid.vcf.data$ID <- unlist(split.rows);
125128

126-
expanded.vcf$ID <- unlist(split.rows);
127-
128-
split.rsid.vcf.data <- expanded.vcf;
129-
} else {
130-
split.rsid.vcf.data <- vcf.data;
131-
}
129+
} else {
130+
vcf.data$ID.vcf.unsplit <- vcf.data$ID; # save an ID.vcf.unsplit column for consistency
131+
split.rsid.vcf.data <- vcf.data;
132+
}
132133

133134
# merge missing SNP data on split rsID
134135
merged.vcf.with.missing.pgs.data <- merge(
@@ -162,7 +163,8 @@ combine.vcf.with.pgs <- function(vcf.data, pgs.weight.data) {
162163

163164
# add columns to match original merge
164165
merged.vcf.with.missing.pgs.data$ID.pgs <- merged.vcf.with.missing.pgs.data$ID;
165-
merged.vcf.with.missing.pgs.data$ID.vcf <- merged.vcf.with.missing.pgs.data$ID;
166+
merged.vcf.with.missing.pgs.data$ID.vcf <- merged.vcf.with.missing.pgs.data$ID.vcf.unsplit;
167+
merged.vcf.with.missing.pgs.data$ID.vcf.unsplit <- NULL;
166168
merged.vcf.with.missing.pgs.data$merge.strategy <- 'rsID';
167169

168170
# subset columns to match original merge

0 commit comments

Comments
 (0)