@@ -284,13 +284,22 @@ test_that(
284284 );
285285 test.vcf.data.missing.locus.matching.rsid <- data.frame (
286286 CHROM = c(' chr1' , ' chr3' , ' chr2' , ' chr4' ),
287- POS = c(1 , 3 , 3 , 6 ),
287+ POS = c(1 , 3 , 3 , 6 ), # rs2/chr2:3 is missing by POS but present by rsID, #rs5/chr4:6 is missing by rsID AND by POS
288288 ID = c(' rs1' , ' rs3' , ' rs2' , ' rs5' ),
289289 REF = c(' A' , ' T' , ' C' , ' G' ),
290290 ALT = c(' T' , ' A' , ' G' , ' C' ),
291291 Indiv = c(' sample1' , ' sample2' , ' sample3' , ' sample4' ),
292292 gt_GT_alleles = c(' A/T' , ' T/A' , ' C/G' , ' G/C' )
293293 );
294+ test.vcf.data.missing.locus.matching.rsid.with.semicolons <- data.frame (
295+ CHROM = c(' chr1' , ' chr3' , ' chr2' , ' chr4' ),
296+ POS = c(1 , 3 , 3 , 6 ),
297+ ID = c(' rs1;rs1' , ' rs3' , ' rs2;rsB' , ' rs5;rsC' ),
298+ REF = c(' A' , ' T' , ' C' , ' G' ),
299+ ALT = c(' T' , ' A' , ' G' , ' C' ),
300+ Indiv = c(' sample1' , ' sample2' , ' sample3' , ' sample4' ),
301+ gt_GT_alleles = c(' A/T' , ' T/A' , ' C/G' , ' G/C' )
302+ );
294303 test.pgs.weight.data <- data.frame (
295304 CHROM = c(' chr1' , ' chr3' , ' chr2' , ' chr4' ),
296305 POS = c(1 , 3 , 2 , 4 ),
@@ -316,6 +325,14 @@ test_that(
316325 ' PGS is missing 1 SNPs from VCF'
317326 );
318327
328+ expect_warning(
329+ combine.vcf.with.pgs(
330+ vcf.data = test.vcf.data.missing.locus.matching.rsid.with.semicolons ,
331+ pgs.weight.data = test.pgs.weight.data
332+ ),
333+ ' PGS is missing 1 SNPs from VCF'
334+ );
335+
319336 test.combine.vcf.with.pgs.no.missing <- combine.vcf.with.pgs(
320337 vcf.data = test.vcf.data.no.missing ,
321338 pgs.weight.data = test.pgs.weight.data
@@ -331,6 +348,11 @@ test_that(
331348 pgs.weight.data = test.pgs.weight.data
332349 );
333350
351+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons <- combine.vcf.with.pgs(
352+ vcf.data = test.vcf.data.missing.locus.matching.rsid.with.semicolons ,
353+ pgs.weight.data = test.pgs.weight.data
354+ );
355+
334356 # check that combine.vcf.with.pgs returns the correct number of rows
335357 expect_equal(
336358 nrow(test.combine.vcf.with.pgs.no.missing $ merged.vcf.with.pgs.data ),
@@ -344,6 +366,10 @@ test_that(
344366 nrow(test.combine.vcf.with.pgs.missing.locus.matching.rsid $ merged.vcf.with.pgs.data ),
345367 4
346368 );
369+ expect_equal(
370+ nrow(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ merged.vcf.with.pgs.data ),
371+ 4
372+ );
347373
348374 expect_equal(
349375 nrow(test.combine.vcf.with.pgs.missing $ missing.snp.data ),
@@ -357,6 +383,10 @@ test_that(
357383 nrow(test.combine.vcf.with.pgs.missing.locus.matching.rsid $ missing.snp.data ),
358384 1
359385 );
386+ expect_equal(
387+ nrow(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data ),
388+ 1
389+ );
360390
361391 # check that combine.vcf.with.pgs returns the correct number of columns
362392 expect_equal(
@@ -371,6 +401,10 @@ test_that(
371401 ncol(test.combine.vcf.with.pgs.missing.locus.matching.rsid $ merged.vcf.with.pgs.data ),
372402 11
373403 );
404+ expect_equal(
405+ ncol(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ merged.vcf.with.pgs.data ),
406+ 11
407+ );
374408
375409 # check that combine.vcf.with.pgs returns the correct columns
376410 expect_true(
@@ -382,6 +416,9 @@ test_that(
382416 expect_true(
383417 all(c(' CHROM' , ' POS' , ' REF' , ' ALT' , ' Indiv' , ' gt_GT_alleles' , ' effect_allele' , ' beta' , ' ID.pgs' , ' ID.vcf' , ' merge.strategy' ) %in% colnames(test.combine.vcf.with.pgs.missing.locus.matching.rsid $ merged.vcf.with.pgs.data ))
384418 );
419+ expect_true(
420+ all(c(' CHROM' , ' POS' , ' REF' , ' ALT' , ' Indiv' , ' gt_GT_alleles' , ' effect_allele' , ' beta' , ' ID.pgs' , ' ID.vcf' , ' merge.strategy' ) %in% colnames(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ merged.vcf.with.pgs.data ))
421+ );
385422
386423 # check that combine.vcf.with.pgs returns the correct values
387424 expect_equal(
@@ -396,7 +433,10 @@ test_that(
396433 test.combine.vcf.with.pgs.missing.locus.matching.rsid $ merged.vcf.with.pgs.data $ CHROM ,
397434 c(' chr1' , ' chr3' , ' chr2' , ' chr4' )
398435 );
399-
436+ expect_equal(
437+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ merged.vcf.with.pgs.data $ CHROM ,
438+ c(' chr1' , ' chr3' , ' chr2' , ' chr4' )
439+ );
400440
401441 expect_equal(
402442 test.combine.vcf.with.pgs.no.missing $ merged.vcf.with.pgs.data $ POS ,
@@ -410,6 +450,10 @@ test_that(
410450 test.combine.vcf.with.pgs.missing.locus.matching.rsid $ merged.vcf.with.pgs.data $ POS ,
411451 c(1 , 3 , 3 , 4 )
412452 );
453+ expect_equal(
454+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ merged.vcf.with.pgs.data $ POS ,
455+ c(1 , 3 , 3 , 4 )
456+ );
413457
414458 expect_equal(
415459 test.combine.vcf.with.pgs.no.missing $ merged.vcf.with.pgs.data $ ID.pgs ,
@@ -423,7 +467,44 @@ test_that(
423467 test.combine.vcf.with.pgs.missing.locus.matching.rsid $ merged.vcf.with.pgs.data $ ID.pgs ,
424468 c(' rs1' , ' rs3' , ' rs2' , ' rs4' )
425469 );
470+ expect_equal(
471+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ merged.vcf.with.pgs.data $ ID.pgs ,
472+ c(' rs1' , ' rs3' , ' rs2' , ' rs4' )
473+ );
426474
475+ expect_equal(
476+ test.combine.vcf.with.pgs.no.missing $ merged.vcf.with.pgs.data $ ID.pgs ,
477+ test.combine.vcf.with.pgs.no.missing $ merged.vcf.with.pgs.data $ ID.vcf
478+ );
479+ expect_equal(
480+ test.combine.vcf.with.pgs.missing $ merged.vcf.with.pgs.data $ ID.vcf ,
481+ c(' rs1' , ' rs2' , ' rs3' , NA )
482+ );
483+ expect_equal(
484+ test.combine.vcf.with.pgs.missing.locus.matching.rsid $ merged.vcf.with.pgs.data $ ID.vcf ,
485+ c(' rs1' , ' rs3' , ' rs2' , NA )
486+ );
487+ expect_equal(
488+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ merged.vcf.with.pgs.data $ ID.vcf ,
489+ c(' rs1;rs1' , ' rs3' , ' rs2;rsB' , NA )
490+ );
491+
492+ expect_equal(
493+ test.combine.vcf.with.pgs.no.missing $ merged.vcf.with.pgs.data $ merge.strategy ,
494+ rep(' genomic coordinate' , 4 )
495+ );
496+ expect_equal(
497+ test.combine.vcf.with.pgs.missing $ merged.vcf.with.pgs.data $ merge.strategy ,
498+ c(rep(' genomic coordinate' , 3 ), ' rsID' )
499+ );
500+ expect_equal(
501+ test.combine.vcf.with.pgs.missing.locus.matching.rsid $ merged.vcf.with.pgs.data $ merge.strategy ,
502+ c(' genomic coordinate' , ' genomic coordinate' , ' rsID' ,' rsID' )
503+ );
504+ expect_equal(
505+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ merged.vcf.with.pgs.data $ merge.strategy ,
506+ c(' genomic coordinate' , ' genomic coordinate' , ' rsID' ,' rsID' )
507+ );
427508
428509 # check that combine.vcf.with.pgs returns the correct values for missing SNPs
429510 expect_equal(
@@ -512,6 +593,45 @@ test_that(
512593 is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid $ missing.snp.data $ gt_GT_alleles )
513594 );
514595
596+ expect_equal(
597+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ CHROM.pgs ,
598+ ' chr4'
599+ );
600+ expect_true(
601+ is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ CHROM.vcf )
602+ );
603+ expect_equal(
604+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ POS.pgs ,
605+ 4
606+ );
607+ expect_true(
608+ is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ POS.vcf )
609+ );
610+ expect_equal(
611+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ ID ,
612+ ' rs4'
613+ );
614+ expect_equal(
615+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ effect_allele ,
616+ ' G'
617+ );
618+ expect_equal(
619+ test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ beta ,
620+ 4
621+ );
622+ expect_true(
623+ is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ REF )
624+ );
625+ expect_true(
626+ is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ ALT )
627+ );
628+ expect_true(
629+ is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ Indiv )
630+ );
631+ expect_true(
632+ is.na(test.combine.vcf.with.pgs.missing.locus.matching.rsid.with.semicolons $ missing.snp.data $ gt_GT_alleles )
633+ );
634+
515635 }
516636 );
517637
0 commit comments