From ed50814ccba7fe72f296c466e0abf64d7923c51d Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 21 Nov 2012 15:57:05 -0500 Subject: [PATCH 01/10] Finally found a case where user errors were being masked behind other errors and could debug. It turns out that the checkForMaskedUserErrors() method needs to run recursively over all levels (calling exception.getCause()) to check for the original cause. --- .../sting/gatk/CommandLineGATK.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 0daad2c2b..4f9031329 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -118,17 +118,24 @@ public class CommandLineGATK extends CommandLineExecutable { public static final String DISK_QUOTA_EXCEEDED_ERROR = "Disk quota exceeded"; private static void checkForMaskedUserErrors(final Throwable t) { + // masked out of memory error + if ( t instanceof OutOfMemoryError ) + exitSystemWithUserError(new UserException.NotEnoughMemory()); + // masked user error + if ( t instanceof UserException || t instanceof TribbleException ) + exitSystemWithUserError(new UserException(t.getMessage())); + + // no message means no masked error final String message = t.getMessage(); if ( message == null ) return; - // we know what to do about the common "Too many open files" error + // too many open files error if ( message.contains("Too many open files") ) exitSystemWithUserError(new UserException.TooManyOpenFiles()); // malformed BAM looks like a SAM file - if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || - message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) ) + if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) ) exitSystemWithSamError(t); // can't close tribble index when writing @@ -138,12 +145,10 @@ public class CommandLineGATK extends CommandLineExecutable { // disk is full if ( message.contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || message.contains(DISK_QUOTA_EXCEEDED_ERROR) ) exitSystemWithUserError(new UserException.NoSpaceOnDevice()); - if ( t.getCause() != null && (t.getCause().getMessage().contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || t.getCause().getMessage().contains(DISK_QUOTA_EXCEEDED_ERROR)) ) - exitSystemWithUserError(new UserException.NoSpaceOnDevice()); - // masked out of memory error - if ( t.getCause() != null && t.getCause() instanceof OutOfMemoryError ) - exitSystemWithUserError(new UserException.NotEnoughMemory()); + // masked error wrapped in another one + if ( t.getCause() != null ) + checkForMaskedUserErrors(t.getCause()); } /** From 4f2229d399948200576695574ef46111f3ed542d Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 21 Nov 2012 16:01:26 -0500 Subject: [PATCH 02/10] As per the TODO message, I removed a check that was no longer necessary. Now ID is an allowable INFO field key. --- .../sting/utils/variantcontext/VariantContext.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 27a5b0c24..12f9cb20c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -184,9 +184,6 @@ public class VariantContext implements Feature { // to enable tribble integratio protected CommonInfo commonInfo = null; public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; - @Deprecated // ID is no longer stored in the attributes map - private final static String ID_KEY = "ID"; - public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet()); /** The location of this VariantContext */ @@ -287,10 +284,6 @@ public class VariantContext implements Feature { // to enable tribble integratio this.commonInfo = new CommonInfo(source, log10PError, filters, attributes); - // todo -- remove me when this check is no longer necessary - if ( this.commonInfo.hasAttribute(ID_KEY) ) - throw new IllegalArgumentException("Trying to create a VariantContext with a ID key. Please use provided constructor argument ID"); - if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); } // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles From c08b78274395c916db872417d4f17ebdacd39906 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Mon, 19 Nov 2012 14:39:06 -0500 Subject: [PATCH 03/10] Count isActive calls directly --- .../sting/gatk/traversals/TraverseActiveRegions.java | 5 +---- .../sting/utils/activeregion/ActivityProfile.java | 5 ----- .../gatk/traversals/TraverseActiveRegionsTest.java | 11 ++++++++--- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 4fe83f331..a2c37944a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -34,9 +34,6 @@ public class TraverseActiveRegions extends TraversalEngine workQueue = new LinkedList(); private final LinkedHashSet myReads = new LinkedHashSet(); - // package access for unit testing - ActivityProfile profile; - @Override public String getTraversalUnits() { return "active regions"; @@ -56,7 +53,7 @@ public class TraverseActiveRegions extends TraversalEngine activeRegions = new LinkedList(); - profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); + ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 38cfbb38d..e96eb843d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -103,11 +103,6 @@ public class ActivityProfile { isActiveList.add(result); } - // for unit testing - public List getActiveList() { - return isActiveList; - } - public int size() { return isActiveList.size(); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index 8740a8b68..edc818aca 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -41,6 +41,7 @@ public class TraverseActiveRegionsTest extends BaseTest { private class DummyActiveRegionWalker extends ActiveRegionWalker { private final double prob; + public List isActiveCalls = new ArrayList(); public DummyActiveRegionWalker() { this.prob = 1.0; @@ -48,6 +49,7 @@ public class TraverseActiveRegionsTest extends BaseTest { @Override public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + isActiveCalls.add(ref.getLocus()); return new ActivityProfileResult(ref.getLocus(), prob); } @@ -71,7 +73,7 @@ public class TraverseActiveRegionsTest extends BaseTest { private IndexedFastaSequenceFile reference; private GenomeLocParser genomeLocParser; - private ActiveRegionWalker walker; + private DummyActiveRegionWalker walker; @BeforeClass private void init() throws FileNotFoundException { @@ -83,18 +85,21 @@ public class TraverseActiveRegionsTest extends BaseTest { @Test public void testAllIntervalsSeen() throws Exception { List intervals = new ArrayList(); + List activeIntervals = new ArrayList(); + GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1); intervals.add(interval); LocusShardDataProvider dataProvider = createDataProvider(intervals); t.traverse(walker, dataProvider, 0); + activeIntervals.addAll(walker.isActiveCalls); boolean allGenomeLocsSeen = true; for (GenomeLoc loc : intervals) { boolean thisGenomeLocSeen = false; - for (ActivityProfileResult active : t.profile.getActiveList()) { - if (loc.equals(active.getLoc())) { + for (GenomeLoc activeLoc : activeIntervals) { + if (loc.equals(activeLoc)) { thisGenomeLocSeen = true; break; } From e8defcb20dfcc50cec565fbb7fd2e93479162ae5 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Mon, 19 Nov 2012 14:44:00 -0500 Subject: [PATCH 04/10] Test multiple bases and intervals --- .../traversals/TraverseActiveRegionsTest.java | 65 +++++++++++++------ 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index edc818aca..d61da5a83 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -83,49 +83,74 @@ public class TraverseActiveRegionsTest extends BaseTest { } @Test - public void testAllIntervalsSeen() throws Exception { + public void testAllBasesSeenSuite() { List intervals = new ArrayList(); List activeIntervals = new ArrayList(); GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1); intervals.add(interval); + testAllBasesSeen(intervals); - LocusShardDataProvider dataProvider = createDataProvider(intervals); + interval = genomeLocParser.createGenomeLoc("1", 10, 20); + intervals.add(interval); + testAllBasesSeen(intervals); + } - t.traverse(walker, dataProvider, 0); - activeIntervals.addAll(walker.isActiveCalls); + public void testAllBasesSeen(List intervals) { + List activeIntervals = new ArrayList(); + for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) { + t.traverse(walker, dataProvider, 0); + activeIntervals.addAll(walker.isActiveCalls); + } - boolean allGenomeLocsSeen = true; - for (GenomeLoc loc : intervals) { - boolean thisGenomeLocSeen = false; + boolean allBasesSeen = true; + for (GenomeLoc base : toBases(intervals)) { + boolean thisBaseSeen = false; for (GenomeLoc activeLoc : activeIntervals) { - if (loc.equals(activeLoc)) { - thisGenomeLocSeen = true; + if (base.equals(activeLoc)) { + thisBaseSeen = true; break; } } - if (!thisGenomeLocSeen) { - allGenomeLocsSeen = false; + if (!thisBaseSeen) { + allBasesSeen = false; break; } } - Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile"); + Assert.assertTrue(allBasesSeen, "Some intervals missing from activity profile"); } - private LocusShardDataProvider createDataProvider(List intervals) { + private List toBases(List intervals) { + List bases = new ArrayList(); + for (GenomeLoc interval : intervals) { + if (interval.size() == 1) + bases.add(interval); + else { + for (int location = interval.getStart(); location <= interval.getStop(); location++) { + bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location)); + } + } + } + return bases; + } + + private List createDataProviders(List intervals) { walker = new DummyActiveRegionWalker(); - StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList()); - Shard shard = new MockLocusShard(genomeLocParser, intervals); - WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs()); - WindowMaker.WindowMakerIterator window = windowMaker.next(); - GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); - //engine.setReferenceDataSource(reference); engine.setGenomeLocParser(genomeLocParser); t.initialize(engine); - return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList()); + Shard shard = new MockLocusShard(genomeLocParser, intervals); + + List providers = new ArrayList(); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs()); + for (WindowMaker.WindowMakerIterator window : windowMaker) { + providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList())); + } + + return providers; } } From 3fa3b00f4abd3159b67a8013505606bf96db1a38 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Mon, 19 Nov 2012 14:45:19 -0500 Subject: [PATCH 05/10] Add ActiveRegion tests and refactor --- .../traversals/TraverseActiveRegionsTest.java | 156 +++++++++++++----- 1 file changed, 115 insertions(+), 41 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index d61da5a83..ce4d400b4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -28,20 +28,23 @@ import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * Created with IntelliJ IDEA. * User: thibault * Date: 11/13/12 * Time: 2:47 PM + * + * Test the Active Region Traversal Contract + * http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract */ public class TraverseActiveRegionsTest extends BaseTest { private class DummyActiveRegionWalker extends ActiveRegionWalker { private final double prob; public List isActiveCalls = new ArrayList(); + public List mappedActiveRegions = new ArrayList(); public DummyActiveRegionWalker() { this.prob = 1.0; @@ -55,6 +58,7 @@ public class TraverseActiveRegionsTest extends BaseTest { @Override public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { + mappedActiveRegions.add(activeRegion); return 0; } @@ -73,7 +77,6 @@ public class TraverseActiveRegionsTest extends BaseTest { private IndexedFastaSequenceFile reference; private GenomeLocParser genomeLocParser; - private DummyActiveRegionWalker walker; @BeforeClass private void init() throws FileNotFoundException { @@ -83,61 +86,133 @@ public class TraverseActiveRegionsTest extends BaseTest { } @Test - public void testAllBasesSeenSuite() { + public void testAllBasesSeen() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); List intervals = new ArrayList(); - List activeIntervals = new ArrayList(); - GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1); - intervals.add(interval); - testAllBasesSeen(intervals); + intervals.add(genomeLocParser.createGenomeLoc("1", 1, 1)); + List activeIntervals = getIsActiveIntervals(walker, intervals); + // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call + verifyEqualIntervals(intervals, activeIntervals); - interval = genomeLocParser.createGenomeLoc("1", 10, 20); - intervals.add(interval); - testAllBasesSeen(intervals); + intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20)); + activeIntervals = getIsActiveIntervals(walker, intervals); + // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call + verifyEqualIntervals(intervals, activeIntervals); + + // TODO: more tests and edge cases } - public void testAllBasesSeen(List intervals) { + private List getIsActiveIntervals(DummyActiveRegionWalker walker, List intervals) { List activeIntervals = new ArrayList(); for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) { t.traverse(walker, dataProvider, 0); activeIntervals.addAll(walker.isActiveCalls); } - boolean allBasesSeen = true; - for (GenomeLoc base : toBases(intervals)) { - boolean thisBaseSeen = false; - for (GenomeLoc activeLoc : activeIntervals) { - if (base.equals(activeLoc)) { - thisBaseSeen = true; - break; - } - } - if (!thisBaseSeen) { - allBasesSeen = false; - break; - } - } - - Assert.assertTrue(allBasesSeen, "Some intervals missing from activity profile"); + return activeIntervals; } - private List toBases(List intervals) { - List bases = new ArrayList(); + @Test + public void testActiveRegionCoverage() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + List intervals = new ArrayList(); + + intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999)); + + List activeRegions = getActiveRegions(walker, intervals); + verifyActiveRegionCoverage(intervals, activeRegions); + + // TODO: more tests and edge cases + } + + private void verifyActiveRegionCoverage(List intervals, List activeRegions) { + List intervalStarts = new ArrayList(); + List intervalStops = new ArrayList(); + for (GenomeLoc interval : intervals) { - if (interval.size() == 1) - bases.add(interval); - else { - for (int location = interval.getStart(); location <= interval.getStop(); location++) { - bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location)); - } - } + intervalStarts.add(interval.getStartLocation()); + intervalStops.add(interval.getStopLocation()); } + + Map baseRegionMap = new HashMap(); + + for (ActiveRegion activeRegion : activeRegions) { + for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) { + // Contract: Regions do not overlap + Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region"); + baseRegionMap.put(activeLoc, activeRegion); + } + + GenomeLoc start = activeRegion.getLocation().getStartLocation(); + if (intervalStarts.contains(start)) + intervalStarts.remove(start); + + GenomeLoc stop = activeRegion.getLocation().getStopLocation(); + if (intervalStops.contains(stop)) + intervalStops.remove(stop); + } + + for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) { + // Contract: Each location in the interval(s) is in exactly one region + // Contract: The total set of regions exactly matches the analysis interval(s) + Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region"); + baseRegionMap.remove(baseLoc); + } + + // Contract: The total set of regions exactly matches the analysis interval(s) + Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals"); + + // Contract: All explicit interval boundaries must also be region boundaries + Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location"); + Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location"); + } + + private List getActiveRegions(DummyActiveRegionWalker walker, List intervals) { + for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) + t.traverse(walker, dataProvider, 0); + + return walker.mappedActiveRegions; + } + + private Collection toSingleBaseLocs(GenomeLoc interval) { + List bases = new ArrayList(); + if (interval.size() == 1) + bases.add(interval); + else { + for (int location = interval.getStart(); location <= interval.getStop(); location++) + bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location)); + } + return bases; } - private List createDataProviders(List intervals) { - walker = new DummyActiveRegionWalker(); + private Collection toSingleBaseLocs(List intervals) { + Set bases = new TreeSet(); // for sorting and uniqueness + for (GenomeLoc interval : intervals) + bases.addAll(toSingleBaseLocs(interval)); + return bases; + } + + private void verifyEqualIntervals(List aIntervals, List bIntervals) { + Collection aBases = toSingleBaseLocs(aIntervals); + Collection bBases = toSingleBaseLocs(bIntervals); + + Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size()); + + Iterator aIter = aBases.iterator(); + Iterator bIter = bBases.iterator(); + while (aIter.hasNext() && bIter.hasNext()) { + GenomeLoc aLoc = aIter.next(); + GenomeLoc bLoc = bIter.next(); + Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc); + } + } + + private List createDataProviders(List intervals) { GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); engine.setGenomeLocParser(genomeLocParser); t.initialize(engine); @@ -146,8 +221,7 @@ public class TraverseActiveRegionsTest extends BaseTest { Shard shard = new MockLocusShard(genomeLocParser, intervals); List providers = new ArrayList(); - WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs()); - for (WindowMaker.WindowMakerIterator window : windowMaker) { + for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) { providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList())); } From 3ad9128800922ab40aeb5504ff5b0e5a6402e591 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Tue, 20 Nov 2012 13:11:24 -0500 Subject: [PATCH 06/10] Add some reads - Move intervals and reads to init - Update intervals and reads --- .../traversals/TraverseActiveRegionsTest.java | 73 ++++++++++++++----- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index ce4d400b4..2780b7421 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.gatk.traversals; -import org.testng.Assert; +import net.sf.samtools.*; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import net.sf.picard.reference.IndexedFastaSequenceFile; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -22,6 +23,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -43,8 +45,8 @@ public class TraverseActiveRegionsTest extends BaseTest { private class DummyActiveRegionWalker extends ActiveRegionWalker { private final double prob; - public List isActiveCalls = new ArrayList(); - public List mappedActiveRegions = new ArrayList(); + protected List isActiveCalls = new ArrayList(); + protected List mappedActiveRegions = new ArrayList(); public DummyActiveRegionWalker() { this.prob = 1.0; @@ -76,30 +78,46 @@ public class TraverseActiveRegionsTest extends BaseTest { private final TraverseActiveRegions t = new TraverseActiveRegions(); private IndexedFastaSequenceFile reference; + private SAMSequenceDictionary dictionary; private GenomeLocParser genomeLocParser; + private List intervals; + private List reads; + @BeforeClass private void init() throws FileNotFoundException { reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference)); - SAMSequenceDictionary dictionary = reference.getSequenceDictionary(); + dictionary = reference.getSequenceDictionary(); genomeLocParser = new GenomeLocParser(dictionary); + + intervals = new ArrayList(); + intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20)); + intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000)); + // TODO: this fails! + //intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000)); + intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList(); + + reads = new ArrayList(); + reads.add(buildSAMRecord("overlap_overlapped_equal", "1", 10, 20)); + reads.add(buildSAMRecord("overlap_overlapped_unequal", "1", 10, 21)); + reads.add(buildSAMRecord("overlap_boundary_equal", "1", 1990, 2009)); + reads.add(buildSAMRecord("overlap_boundary_unequal", "1", 1995, 2050)); + reads.add(buildSAMRecord("extended_only", "1", 3000, 3100)); + reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990)); + reads.add(buildSAMRecord("simple", "20", 1000100, 1000150)); } @Test public void testAllBasesSeen() { DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); - List intervals = new ArrayList(); - intervals.add(genomeLocParser.createGenomeLoc("1", 1, 1)); List activeIntervals = getIsActiveIntervals(walker, intervals); // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call verifyEqualIntervals(intervals, activeIntervals); - intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20)); - activeIntervals = getIsActiveIntervals(walker, intervals); - // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call - verifyEqualIntervals(intervals, activeIntervals); - // TODO: more tests and edge cases } @@ -116,11 +134,6 @@ public class TraverseActiveRegionsTest extends BaseTest { @Test public void testActiveRegionCoverage() { DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); - List intervals = new ArrayList(); - - intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999)); - intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999)); - intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999)); List activeRegions = getActiveRegions(walker, intervals); verifyActiveRegionCoverage(intervals, activeRegions); @@ -212,17 +225,37 @@ public class TraverseActiveRegionsTest extends BaseTest { } } + // copied from LocusViewTemplate + protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { + SAMFileHeader header = new SAMFileHeader(); + header.setSequenceDictionary(dictionary); + GATKSAMRecord record = new GATKSAMRecord(header); + + record.setReadName(readName); + record.setReferenceIndex(dictionary.getSequenceIndex(contig)); + record.setAlignmentStart(alignmentStart); + + Cigar cigar = new Cigar(); + int len = alignmentEnd - alignmentStart + 1; + cigar.add(new CigarElement(len, CigarOperator.M)); + record.setCigar(cigar); + record.setReadBases(new byte[len]); + record.setBaseQualities(new byte[len]); + + return record; + } + private List createDataProviders(List intervals) { GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); engine.setGenomeLocParser(genomeLocParser); t.initialize(engine); - StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList()); + StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads); Shard shard = new MockLocusShard(genomeLocParser, intervals); List providers = new ArrayList(); for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) { - providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList())); + providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); } return providers; From c68bc95db6635ccbe90462d18aa5d3a4ae7ace38 Mon Sep 17 00:00:00 2001 From: Joel Thibault Date: Wed, 21 Nov 2012 16:22:57 -0500 Subject: [PATCH 07/10] Initial read mapping tests - Failing tests are commented out --- .../traversals/TraverseActiveRegionsTest.java | 115 ++++++++++++++++-- 1 file changed, 105 insertions(+), 10 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index 2780b7421..e4c7b2db0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -46,7 +46,7 @@ public class TraverseActiveRegionsTest extends BaseTest { private class DummyActiveRegionWalker extends ActiveRegionWalker { private final double prob; protected List isActiveCalls = new ArrayList(); - protected List mappedActiveRegions = new ArrayList(); + protected Map mappedActiveRegions = new HashMap(); public DummyActiveRegionWalker() { this.prob = 1.0; @@ -60,7 +60,7 @@ public class TraverseActiveRegionsTest extends BaseTest { @Override public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { - mappedActiveRegions.add(activeRegion); + mappedActiveRegions.put(activeRegion.getLocation(), activeRegion); return 0; } @@ -101,13 +101,16 @@ public class TraverseActiveRegionsTest extends BaseTest { intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList(); reads = new ArrayList(); - reads.add(buildSAMRecord("overlap_overlapped_equal", "1", 10, 20)); - reads.add(buildSAMRecord("overlap_overlapped_unequal", "1", 10, 21)); - reads.add(buildSAMRecord("overlap_boundary_equal", "1", 1990, 2009)); - reads.add(buildSAMRecord("overlap_boundary_unequal", "1", 1995, 2050)); + reads.add(buildSAMRecord("simple", "1", 100, 200)); + reads.add(buildSAMRecord("overlap_equal", "1", 10, 20)); + reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21)); + reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009)); + reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050)); reads.add(buildSAMRecord("extended_only", "1", 3000, 3100)); reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990)); - reads.add(buildSAMRecord("simple", "20", 1000100, 1000150)); + reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000)); + // TODO + //reads.add(buildSAMRecord("simple20", "20", 10100, 10150)); } @Test @@ -135,13 +138,13 @@ public class TraverseActiveRegionsTest extends BaseTest { public void testActiveRegionCoverage() { DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); - List activeRegions = getActiveRegions(walker, intervals); + Collection activeRegions = getActiveRegions(walker, intervals).values(); verifyActiveRegionCoverage(intervals, activeRegions); // TODO: more tests and edge cases } - private void verifyActiveRegionCoverage(List intervals, List activeRegions) { + private void verifyActiveRegionCoverage(List intervals, Collection activeRegions) { List intervalStarts = new ArrayList(); List intervalStops = new ArrayList(); @@ -183,7 +186,99 @@ public class TraverseActiveRegionsTest extends BaseTest { Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location"); } - private List getActiveRegions(DummyActiveRegionWalker walker, List intervals) { + @Test + public void testReadMapping() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + + // Contract: Each read has the Primary state in a single region (or none) + // This is the region of maximum overlap for the read (earlier if tied) + + // Contract: Each read has the Non-Primary state in all other regions it overlaps + // Contract: Each read has the Extended state in regions where it only overlaps if the region is extended + + // simple: Primary in 1:1-999 + // overlap_equal: Primary in 1:1-999 + // overlap_unequal: Primary in 1:1-999 + // boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 + // extended_only: Extended in 1:2000-2999 + // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 + // outside_intervals: none + + // TODO + // simple20: Primary in 20:10000-20000 + + Map activeRegions = getActiveRegions(walker, intervals); + ActiveRegion region; + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999)); + + verifyReadPrimary(region, "simple"); + verifyReadPrimary(region, "overlap_equal"); + verifyReadPrimary(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_only"); + // TODO: fail verifyReadNonPrimary(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + // TODO: fail verifyReadPrimary(region, "boundary_equal"); + // TODO: fail verifyReadNonPrimary(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_only"); + // TODO: fail verifyReadPrimary(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + // TODO: fail verifyReadNonPrimary(region, "boundary_equal"); + verifyReadPrimary(region, "boundary_unequal"); + // TODO: fail verifyReadExtended(region, "extended_only"); + // TODO: fail verifyReadExtended(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + + // TODO: more tests and edge cases + } + + private void verifyReadPrimary(ActiveRegion region, String readName) { + SAMRecord read = getRead(region, readName); + Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region); + } + + private void verifyReadNonPrimary(ActiveRegion region, String readName) { + SAMRecord read = getRead(region, readName); + Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region); + } + + private void verifyReadExtended(ActiveRegion region, String readName) { + Assert.fail("The Extended read state has not been implemented"); + } + + private void verifyReadNotPlaced(ActiveRegion region, String readName) { + for (SAMRecord read : region.getReads()) { + if (read.getReadName().equals(readName)) + Assert.fail("Read " + readName + " found in active region " + region); + } + } + + private SAMRecord getRead(ActiveRegion region, String readName) { + for (SAMRecord read : region.getReads()) { + if (read.getReadName().equals(readName)) + return read; + } + + Assert.fail("Read " + readName + " not found in active region " + region); + return null; + } + + private Map getActiveRegions(DummyActiveRegionWalker walker, List intervals) { for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) t.traverse(walker, dataProvider, 0); From 48f271c5bd825dc0f57249dc6164ec4bc3c35541 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 21 Nov 2012 17:23:41 -0500 Subject: [PATCH 10/10] Adding 80% support for multi-allelic variants -- Multi-allelic variants are split into their bi-allelic version, trimmed, and we attempt to provide a meaningful genotype for NA12878 here. It's not perfect and needs some discussion on how to handle het/alt variants -- Adding splitInBiallelic funtion to VariantContextUtils as well as extensive unit tests that also indirectly test reverseTrimAlleles (which worked perfectly FYI) --- .../variantcontext/VariantContextUtils.java | 34 +++++ .../VariantContextTestProvider.java | 2 +- .../VariantContextUtilsUnitTest.java | 119 +++++++++++++++++- 3 files changed, 150 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 81959c998..1f1867f75 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -979,6 +979,40 @@ public class VariantContextUtils { private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. + /** + * Split variant context into its biallelic components if there are more than 2 alleles + * + * For VC has A/B/C alleles, returns A/B and A/C contexts. + * Genotypes are all no-calls now (it's not possible to fix them easily) + * Alleles are right trimmed to satisfy VCF conventions + * + * If vc is biallelic or non-variant it is just returned + * + * Chromosome counts are updated (but they are by definition 0) + * + * @param vc a potentially multi-allelic variant context + * @return a list of bi-allelic (or monomorphic) variant context + */ + public static List splitVariantContextToBiallelics(final VariantContext vc) { + if ( ! vc.isVariant() || vc.isBiallelic() ) + // non variant or biallelics already satisfy the contract + return Collections.singletonList(vc); + else { + final List biallelics = new LinkedList(); + + for ( final Allele alt : vc.getAlternateAlleles() ) { + VariantContextBuilder builder = new VariantContextBuilder(vc); + final List alleles = Arrays.asList(vc.getReference(), alt); + builder.alleles(alleles); + builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); + calculateChromosomeCounts(builder, true); + biallelics.add(reverseTrimAlleles(builder.make())); + } + + return biallelics; + } + } + /** * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately) * diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 6785fa816..c57b2a44d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -782,7 +782,7 @@ public class VariantContextTestProvider { Assert.assertEquals(actual.getStart(), expected.getStart(), "start"); Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end"); Assert.assertEquals(actual.getID(), expected.getID(), "id"); - Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles"); + Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual); assertAttributesEquals(actual.getAttributes(), expected.getAttributes()); Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied"); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 114104d42..f3daa9e4c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; @@ -39,7 +39,7 @@ import java.io.FileNotFoundException; import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, C, Cref, ATC, ATCATC; + Allele Aref, T, C, G, Cref, ATC, ATCATC; private GenomeLocParser genomeLocParser; @BeforeSuite @@ -58,6 +58,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { Cref = Allele.create("C", true); T = Allele.create("T"); C = Allele.create("C"); + G = Allele.create("G"); ATC = Allele.create("ATC"); ATCATC = Allele.create("ATCATC"); } @@ -697,10 +698,120 @@ public class VariantContextUtilsUnitTest extends BaseTest { return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); } - @Test(dataProvider = "ReverseClippingPositionTestProvider") public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); Assert.assertEquals(result, cfg.expectedClip); } -} + + // -------------------------------------------------------------------------------- + // + // test splitting into bi-allelics + // + // -------------------------------------------------------------------------------- + + @DataProvider(name = "SplitBiallelics") + public Object[][] makeSplitBiallelics() throws CloneNotSupportedException { + List tests = new ArrayList(); + + final VariantContextBuilder root = new VariantContextBuilder("x", "20", 10, 10, Arrays.asList(Aref, C)); + + // biallelic -> biallelic + tests.add(new Object[]{root.make(), Arrays.asList(root.make())}); + + // monos -> monos + root.alleles(Arrays.asList(Aref)); + tests.add(new Object[]{root.make(), Arrays.asList(root.make())}); + + root.alleles(Arrays.asList(Aref, C, T)); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(Aref, C)).make(), + root.alleles(Arrays.asList(Aref, T)).make())}); + + root.alleles(Arrays.asList(Aref, C, T, G)); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(Aref, C)).make(), + root.alleles(Arrays.asList(Aref, T)).make(), + root.alleles(Arrays.asList(Aref, G)).make())}); + + final Allele C = Allele.create("C"); + final Allele CA = Allele.create("CA"); + final Allele CAA = Allele.create("CAA"); + final Allele CAAAA = Allele.create("CAAAA"); + final Allele CAAAAA = Allele.create("CAAAAA"); + final Allele Cref = Allele.create("C", true); + final Allele CAref = Allele.create("CA", true); + final Allele CAAref = Allele.create("CAA", true); + final Allele CAAAref = Allele.create("CAAA", true); + + root.alleles(Arrays.asList(Cref, CA, CAA)); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(Cref, CA)).make(), + root.alleles(Arrays.asList(Cref, CAA)).make())}); + + root.alleles(Arrays.asList(CAAref, C, CA)).stop(12); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(CAAref, C)).make(), + root.alleles(Arrays.asList(CAref, C)).stop(11).make())}); + + root.alleles(Arrays.asList(CAAAref, C, CA, CAA)).stop(13); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(CAAAref, C)).make(), + root.alleles(Arrays.asList(CAAref, C)).stop(12).make(), + root.alleles(Arrays.asList(CAref, C)).stop(11).make())}); + + root.alleles(Arrays.asList(CAAAref, CAAAAA, CAAAA, CAA, C)).stop(13); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(Cref, CAA)).stop(10).make(), + root.alleles(Arrays.asList(Cref, CA)).stop(10).make(), + root.alleles(Arrays.asList(CAref, C)).stop(11).make(), + root.alleles(Arrays.asList(CAAAref, C)).stop(13).make())}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "SplitBiallelics") + public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List expectedBiallelics) { + final List biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc); + Assert.assertEquals(biallelics.size(), expectedBiallelics.size()); + for ( int i = 0; i < biallelics.size(); i++ ) { + final VariantContext actual = biallelics.get(i); + final VariantContext expected = expectedBiallelics.get(i); + VariantContextTestProvider.assertEquals(actual, expected); + } + } + + @Test(dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes") + public void testSplitBiallelicsGenotypes(final VariantContext vc, final List expectedBiallelics) { + final List genotypes = new ArrayList(); + + int sampleI = 0; + for ( final List alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) { + genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles)); + } + genotypes.add(GenotypeBuilder.createMissing("missing", 2)); + + final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make(); + + final List biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes); + for ( int i = 0; i < biallelics.size(); i++ ) { + final VariantContext actual = biallelics.get(i); + Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples + + for ( final Genotype inputGenotype : genotypes ) { + final Genotype actualGenotype = actual.getGenotype(inputGenotype.getSampleName()); + Assert.assertNotNull(actualGenotype); + if ( ! vc.isVariant() || vc.isBiallelic() ) + Assert.assertEquals(actualGenotype, vcWithGenotypes.getGenotype(inputGenotype.getSampleName())); + else + Assert.assertTrue(actualGenotype.isNoCall()); + } + } + } +} \ No newline at end of file