Skip to content

Commit 0fd71b7

Browse files
committed
add tests for semicolon-separated ID merge
1 parent 7e701a2 commit 0fd71b7

File tree

1 file changed

+122
-2
lines changed

1 file changed

+122
-2
lines changed

tests/testthat/test-vcf-pgs-merge.R

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,13 +284,22 @@ test_that(
284284
);
285285
test.vcf.data.missing.locus.matching.rsid <- data.frame(
286286
CHROM = c('chr1', 'chr3', 'chr2', 'chr4'),
287-
POS = c(1, 3, 3, 6),
287+
POS = c(1, 3, 3, 6), #rs2/chr2:3 is missing by POS but present by rsID, #rs5/chr4:6 is missing by rsID AND by POS
288288
ID = c('rs1', 'rs3', 'rs2', 'rs5'),
289289
REF = c('A', 'T', 'C', 'G'),
290290
ALT = c('T', 'A', 'G', 'C'),
291291
Indiv = c('sample1', 'sample2', 'sample3', 'sample4'),
292292
gt_GT_alleles = c('A/T', 'T/A', 'C/G', 'G/C')
293293
);
294+
test.vcf.data.missing.locus.matching.rsid.with.semicolons <- data.frame(
295+
CHROM = c('chr1', 'chr3', 'chr2', 'chr4'),
296+
POS = c(1, 3, 3, 6),
297+
ID = c('rs1;rsA', 'rs3', 'rs2;rsB', 'rs5;rsC'),
298+
REF = c('A', 'T', 'C', 'G'),
299+
ALT = c('T', 'A', 'G', 'C'),
300+
Indiv = c('sample1', 'sample2', 'sample3', 'sample4'),
301+
gt_GT_alleles = c('A/T', 'T/A', 'C/G', 'G/C')
302+
);
294303
test.pgs.weight.data <- data.frame(
295304
CHROM = c('chr1', 'chr3', 'chr2', 'chr4'),
296305
POS = c(1, 3, 2, 4),
@@ -316,6 +325,14 @@ test_that(
316325
'PGS is missing 1 SNPs from VCF'
317326
);
318327

328+
expect_warning(
329+
combine.vcf.with.pgs(
330+
vcf.data = test.vcf.data.missing.locus.matching.rsid.with.semicolons,
331+
pgs.weight.data = test.pgs.weight.data
332+
),
333+
'PGS is missing 1 SNPs from VCF'
334+
);
335+
319336
test.combine.vcf.with.pgs.no.missing <- combine.vcf.with.pgs(
320337
vcf.data = test.vcf.data.no.missing,
321338
pgs.weight.data = test.pgs.weight.data
@@ -331,6 +348,11 @@ test_that(
331348
pgs.weight.data = test.pgs.weight.data
332349
);
333350

351+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons <- combine.vcf.with.pgs(
352+
vcf.data = test.vcf.data.missing.locus.matching.rsid.with.semicolons,
353+
pgs.weight.data = test.pgs.weight.data
354+
);
355+
334356
# check that combine.vcf.with.pgs returns the correct number of rows
335357
expect_equal(
336358
nrow(test.combine.vcf.with.pgs.no.missing$merged.vcf.with.pgs.data),
@@ -344,6 +366,10 @@ test_that(
344366
nrow(test.combine.vcf.with.pgs.missing.locus.matching.rsid$merged.vcf.with.pgs.data),
345367
4
346368
);
369+
expect_equal(
370+
nrow(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$merged.vcf.with.pgs.data),
371+
4
372+
);
347373

348374
expect_equal(
349375
nrow(test.combine.vcf.with.pgs.missing$missing.snp.data),
@@ -357,6 +383,10 @@ test_that(
357383
nrow(test.combine.vcf.with.pgs.missing.locus.matching.rsid$missing.snp.data),
358384
1
359385
);
386+
expect_equal(
387+
nrow(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data),
388+
1
389+
);
360390

361391
# check that combine.vcf.with.pgs returns the correct number of columns
362392
expect_equal(
@@ -371,6 +401,10 @@ test_that(
371401
ncol(test.combine.vcf.with.pgs.missing.locus.matching.rsid$merged.vcf.with.pgs.data),
372402
11
373403
);
404+
expect_equal(
405+
ncol(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$merged.vcf.with.pgs.data),
406+
11
407+
);
374408

375409
# check that combine.vcf.with.pgs returns the correct columns
376410
expect_true(
@@ -382,6 +416,9 @@ test_that(
382416
expect_true(
383417
all(c('CHROM', 'POS', 'REF', 'ALT', 'Indiv', 'gt_GT_alleles', 'effect_allele', 'beta', 'ID.pgs', 'ID.vcf', 'merge.strategy') %in% colnames(test.combine.vcf.with.pgs.missing.locus.matching.rsid$merged.vcf.with.pgs.data))
384418
);
419+
expect_true(
420+
all(c('CHROM', 'POS', 'REF', 'ALT', 'Indiv', 'gt_GT_alleles', 'effect_allele', 'beta', 'ID.pgs', 'ID.vcf', 'merge.strategy') %in% colnames(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$merged.vcf.with.pgs.data))
421+
);
385422

386423
# check that combine.vcf.with.pgs returns the correct values
387424
expect_equal(
@@ -396,7 +433,10 @@ test_that(
396433
test.combine.vcf.with.pgs.missing.locus.matching.rsid$merged.vcf.with.pgs.data$CHROM,
397434
c('chr1', 'chr3', 'chr2', 'chr4')
398435
);
399-
436+
expect_equal(
437+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$merged.vcf.with.pgs.data$CHROM,
438+
c('chr1', 'chr3', 'chr2', 'chr4')
439+
);
400440

401441
expect_equal(
402442
test.combine.vcf.with.pgs.no.missing$merged.vcf.with.pgs.data$POS,
@@ -410,6 +450,10 @@ test_that(
410450
test.combine.vcf.with.pgs.missing.locus.matching.rsid$merged.vcf.with.pgs.data$POS,
411451
c(1, 3, 3, 4)
412452
);
453+
expect_equal(
454+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$merged.vcf.with.pgs.data$POS,
455+
c(1, 3, 3, 4)
456+
);
413457

414458
expect_equal(
415459
test.combine.vcf.with.pgs.no.missing$merged.vcf.with.pgs.data$ID.pgs,
@@ -423,7 +467,44 @@ test_that(
423467
test.combine.vcf.with.pgs.missing.locus.matching.rsid$merged.vcf.with.pgs.data$ID.pgs,
424468
c('rs1', 'rs3', 'rs2', 'rs4')
425469
);
470+
expect_equal(
471+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$merged.vcf.with.pgs.data$ID.pgs,
472+
c('rs1', 'rs3', 'rs2', 'rs4')
473+
);
426474

475+
expect_equal(
476+
test.combine.vcf.with.pgs.no.missing$merged.vcf.with.pgs.data$ID.pgs,
477+
test.combine.vcf.with.pgs.no.missing$merged.vcf.with.pgs.data$ID.vcf
478+
);
479+
expect_equal(
480+
test.combine.vcf.with.pgs.missing$merged.vcf.with.pgs.data$ID.vcf,
481+
c('rs1', 'rs2', 'rs3', NA)
482+
);
483+
expect_equal(
484+
test.combine.vcf.with.pgs.missing.locus.matching.rsid$merged.vcf.with.pgs.data$ID.vcf,
485+
c('rs1', 'rs3', 'rs2', NA)
486+
);
487+
expect_equal(
488+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$merged.vcf.with.pgs.data$ID.vcf,
489+
c('rs1;rsA', 'rs3', 'rs2;rsB', NA)
490+
);
491+
492+
expect_equal(
493+
test.combine.vcf.with.pgs.no.missing$merged.vcf.with.pgs.data$merge.strategy,
494+
rep('genomic coordinate', 4)
495+
);
496+
expect_equal(
497+
test.combine.vcf.with.pgs.missing$merged.vcf.with.pgs.data$merge.strategy,
498+
c(rep('genomic coordinate', 3), 'rsID')
499+
);
500+
expect_equal(
501+
test.combine.vcf.with.pgs.missing.locus.matching.rsid$merged.vcf.with.pgs.data$merge.strategy,
502+
c('genomic coordinate', 'genomic coordinate', 'rsID','rsID')
503+
);
504+
expect_equal(
505+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$merged.vcf.with.pgs.data$merge.strategy,
506+
c('genomic coordinate', 'genomic coordinate', 'rsID','rsID')
507+
);
427508

428509
# check that combine.vcf.with.pgs returns the correct values for missing SNPs
429510
expect_equal(
@@ -512,6 +593,45 @@ test_that(
512593
is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid$missing.snp.data$gt_GT_alleles)
513594
);
514595

596+
expect_equal(
597+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$CHROM.pgs,
598+
'chr4'
599+
);
600+
expect_true(
601+
is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$CHROM.vcf)
602+
);
603+
expect_equal(
604+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$POS.pgs,
605+
4
606+
);
607+
expect_true(
608+
is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$POS.vcf)
609+
);
610+
expect_equal(
611+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$ID,
612+
'rs4'
613+
);
614+
expect_equal(
615+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$effect_allele,
616+
'G'
617+
);
618+
expect_equal(
619+
test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$beta,
620+
4
621+
);
622+
expect_true(
623+
is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$REF)
624+
);
625+
expect_true(
626+
is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$ALT)
627+
);
628+
expect_true(
629+
is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$Indiv)
630+
);
631+
expect_true(
632+
is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons$missing.snp.data$gt_GT_alleles)
633+
);
634+
515635
}
516636
);
517637

0 commit comments

Comments
 (0)