diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 0daad2c2b..4f9031329 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -118,17 +118,24 @@ public class CommandLineGATK extends CommandLineExecutable { public static final String DISK_QUOTA_EXCEEDED_ERROR = "Disk quota exceeded"; private static void checkForMaskedUserErrors(final Throwable t) { + // masked out of memory error + if ( t instanceof OutOfMemoryError ) + exitSystemWithUserError(new UserException.NotEnoughMemory()); + // masked user error + if ( t instanceof UserException || t instanceof TribbleException ) + exitSystemWithUserError(new UserException(t.getMessage())); + + // no message means no masked error final String message = t.getMessage(); if ( message == null ) return; - // we know what to do about the common "Too many open files" error + // too many open files error if ( message.contains("Too many open files") ) exitSystemWithUserError(new UserException.TooManyOpenFiles()); // malformed BAM looks like a SAM file - if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || - message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) ) + if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) ) exitSystemWithSamError(t); // can't close tribble index when writing @@ -138,12 +145,10 @@ public class CommandLineGATK extends CommandLineExecutable { // disk is full if ( message.contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || message.contains(DISK_QUOTA_EXCEEDED_ERROR) ) exitSystemWithUserError(new UserException.NoSpaceOnDevice()); - if ( t.getCause() != null && (t.getCause().getMessage().contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || t.getCause().getMessage().contains(DISK_QUOTA_EXCEEDED_ERROR)) ) - exitSystemWithUserError(new UserException.NoSpaceOnDevice()); - // masked out of memory error - if ( t.getCause() != null && t.getCause() instanceof OutOfMemoryError ) - exitSystemWithUserError(new UserException.NotEnoughMemory()); + // masked error wrapped in another one + if ( t.getCause() != null ) + checkForMaskedUserErrors(t.getCause()); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 4fe83f331..a2c37944a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -34,9 +34,6 @@ public class TraverseActiveRegions extends TraversalEngine workQueue = new LinkedList(); private final LinkedHashSet myReads = new LinkedHashSet(); - // package access for unit testing - ActivityProfile profile; - @Override public String getTraversalUnits() { return "active regions"; @@ -56,7 +53,7 @@ public class TraverseActiveRegions extends TraversalEngine activeRegions = new LinkedList(); - profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); + ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 38cfbb38d..e96eb843d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -103,11 +103,6 @@ public class ActivityProfile { isActiveList.add(result); } - // for unit testing - public List getActiveList() { - return isActiveList; - } - public int size() { return isActiveList.size(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 27a5b0c24..12f9cb20c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -184,9 +184,6 @@ public class VariantContext implements Feature { // to enable tribble integratio protected CommonInfo commonInfo = null; public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; - @Deprecated // ID is no longer stored in the attributes map - private final static String ID_KEY = "ID"; - public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet()); /** The location of this VariantContext */ @@ -287,10 +284,6 @@ public class VariantContext implements Feature { // to enable tribble integratio this.commonInfo = new CommonInfo(source, log10PError, filters, attributes); - // todo -- remove me when this check is no longer necessary - if ( this.commonInfo.hasAttribute(ID_KEY) ) - throw new IllegalArgumentException("Trying to create a VariantContext with a ID key. Please use provided constructor argument ID"); - if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); } // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 81959c998..1f1867f75 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -979,6 +979,40 @@ public class VariantContextUtils { private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. + /** + * Split variant context into its biallelic components if there are more than 2 alleles + * + * For VC has A/B/C alleles, returns A/B and A/C contexts. + * Genotypes are all no-calls now (it's not possible to fix them easily) + * Alleles are right trimmed to satisfy VCF conventions + * + * If vc is biallelic or non-variant it is just returned + * + * Chromosome counts are updated (but they are by definition 0) + * + * @param vc a potentially multi-allelic variant context + * @return a list of bi-allelic (or monomorphic) variant context + */ + public static List splitVariantContextToBiallelics(final VariantContext vc) { + if ( ! vc.isVariant() || vc.isBiallelic() ) + // non variant or biallelics already satisfy the contract + return Collections.singletonList(vc); + else { + final List biallelics = new LinkedList(); + + for ( final Allele alt : vc.getAlternateAlleles() ) { + VariantContextBuilder builder = new VariantContextBuilder(vc); + final List alleles = Arrays.asList(vc.getReference(), alt); + builder.alleles(alleles); + builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); + calculateChromosomeCounts(builder, true); + biallelics.add(reverseTrimAlleles(builder.make())); + } + + return biallelics; + } + } + /** * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately) * diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index 8740a8b68..e4c7b2db0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.gatk.traversals; -import org.testng.Assert; +import net.sf.samtools.*; +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import net.sf.picard.reference.IndexedFastaSequenceFile; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -22,25 +23,30 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * Created with IntelliJ IDEA. * User: thibault * Date: 11/13/12 * Time: 2:47 PM + * + * Test the Active Region Traversal Contract + * http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract */ public class TraverseActiveRegionsTest extends BaseTest { private class DummyActiveRegionWalker extends ActiveRegionWalker { private final double prob; + protected List isActiveCalls = new ArrayList(); + protected Map mappedActiveRegions = new HashMap(); public DummyActiveRegionWalker() { this.prob = 1.0; @@ -48,11 +54,13 @@ public class TraverseActiveRegionsTest extends BaseTest { @Override public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + isActiveCalls.add(ref.getLocus()); return new ActivityProfileResult(ref.getLocus(), prob); } @Override public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) { + mappedActiveRegions.put(activeRegion.getLocation(), activeRegion); return 0; } @@ -70,57 +78,281 @@ public class TraverseActiveRegionsTest extends BaseTest { private final TraverseActiveRegions t = new TraverseActiveRegions(); private IndexedFastaSequenceFile reference; + private SAMSequenceDictionary dictionary; private GenomeLocParser genomeLocParser; - private ActiveRegionWalker walker; + + private List intervals; + private List reads; @BeforeClass private void init() throws FileNotFoundException { reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference)); - SAMSequenceDictionary dictionary = reference.getSequenceDictionary(); + dictionary = reference.getSequenceDictionary(); genomeLocParser = new GenomeLocParser(dictionary); + + intervals = new ArrayList(); + intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20)); + intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999)); + intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000)); + // TODO: this fails! + //intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000)); + intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList(); + + reads = new ArrayList(); + reads.add(buildSAMRecord("simple", "1", 100, 200)); + reads.add(buildSAMRecord("overlap_equal", "1", 10, 20)); + reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21)); + reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009)); + reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050)); + reads.add(buildSAMRecord("extended_only", "1", 3000, 3100)); + reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990)); + reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000)); + // TODO + //reads.add(buildSAMRecord("simple20", "20", 10100, 10150)); } @Test - public void testAllIntervalsSeen() throws Exception { - List intervals = new ArrayList(); - GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1); - intervals.add(interval); + public void testAllBasesSeen() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); - LocusShardDataProvider dataProvider = createDataProvider(intervals); + List activeIntervals = getIsActiveIntervals(walker, intervals); + // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call + verifyEqualIntervals(intervals, activeIntervals); - t.traverse(walker, dataProvider, 0); - - boolean allGenomeLocsSeen = true; - for (GenomeLoc loc : intervals) { - boolean thisGenomeLocSeen = false; - for (ActivityProfileResult active : t.profile.getActiveList()) { - if (loc.equals(active.getLoc())) { - thisGenomeLocSeen = true; - break; - } - } - if (!thisGenomeLocSeen) { - allGenomeLocsSeen = false; - break; - } - } - - Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile"); + // TODO: more tests and edge cases } - private LocusShardDataProvider createDataProvider(List intervals) { - walker = new DummyActiveRegionWalker(); + private List getIsActiveIntervals(DummyActiveRegionWalker walker, List intervals) { + List activeIntervals = new ArrayList(); + for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) { + t.traverse(walker, dataProvider, 0); + activeIntervals.addAll(walker.isActiveCalls); + } - StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList()); - Shard shard = new MockLocusShard(genomeLocParser, intervals); - WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs()); - WindowMaker.WindowMakerIterator window = windowMaker.next(); + return activeIntervals; + } + @Test + public void testActiveRegionCoverage() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + + Collection activeRegions = getActiveRegions(walker, intervals).values(); + verifyActiveRegionCoverage(intervals, activeRegions); + + // TODO: more tests and edge cases + } + + private void verifyActiveRegionCoverage(List intervals, Collection activeRegions) { + List intervalStarts = new ArrayList(); + List intervalStops = new ArrayList(); + + for (GenomeLoc interval : intervals) { + intervalStarts.add(interval.getStartLocation()); + intervalStops.add(interval.getStopLocation()); + } + + Map baseRegionMap = new HashMap(); + + for (ActiveRegion activeRegion : activeRegions) { + for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) { + // Contract: Regions do not overlap + Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region"); + baseRegionMap.put(activeLoc, activeRegion); + } + + GenomeLoc start = activeRegion.getLocation().getStartLocation(); + if (intervalStarts.contains(start)) + intervalStarts.remove(start); + + GenomeLoc stop = activeRegion.getLocation().getStopLocation(); + if (intervalStops.contains(stop)) + intervalStops.remove(stop); + } + + for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) { + // Contract: Each location in the interval(s) is in exactly one region + // Contract: The total set of regions exactly matches the analysis interval(s) + Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region"); + baseRegionMap.remove(baseLoc); + } + + // Contract: The total set of regions exactly matches the analysis interval(s) + Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals"); + + // Contract: All explicit interval boundaries must also be region boundaries + Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location"); + Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location"); + } + + @Test + public void testReadMapping() { + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + + // Contract: Each read has the Primary state in a single region (or none) + // This is the region of maximum overlap for the read (earlier if tied) + + // Contract: Each read has the Non-Primary state in all other regions it overlaps + // Contract: Each read has the Extended state in regions where it only overlaps if the region is extended + + // simple: Primary in 1:1-999 + // overlap_equal: Primary in 1:1-999 + // overlap_unequal: Primary in 1:1-999 + // boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 + // extended_only: Extended in 1:2000-2999 + // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 + // outside_intervals: none + + // TODO + // simple20: Primary in 20:10000-20000 + + Map activeRegions = getActiveRegions(walker, intervals); + ActiveRegion region; + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999)); + + verifyReadPrimary(region, "simple"); + verifyReadPrimary(region, "overlap_equal"); + verifyReadPrimary(region, "overlap_unequal"); + verifyReadNotPlaced(region, "boundary_equal"); + verifyReadNotPlaced(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_only"); + // TODO: fail verifyReadNonPrimary(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + // TODO: fail verifyReadPrimary(region, "boundary_equal"); + // TODO: fail verifyReadNonPrimary(region, "boundary_unequal"); + verifyReadNotPlaced(region, "extended_only"); + // TODO: fail verifyReadPrimary(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); + + verifyReadNotPlaced(region, "simple"); + verifyReadNotPlaced(region, "overlap_equal"); + verifyReadNotPlaced(region, "overlap_unequal"); + // TODO: fail verifyReadNonPrimary(region, "boundary_equal"); + verifyReadPrimary(region, "boundary_unequal"); + // TODO: fail verifyReadExtended(region, "extended_only"); + // TODO: fail verifyReadExtended(region, "extended_and_np"); + verifyReadNotPlaced(region, "outside_intervals"); + + // TODO: more tests and edge cases + } + + private void verifyReadPrimary(ActiveRegion region, String readName) { + SAMRecord read = getRead(region, readName); + Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region); + } + + private void verifyReadNonPrimary(ActiveRegion region, String readName) { + SAMRecord read = getRead(region, readName); + Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region); + } + + private void verifyReadExtended(ActiveRegion region, String readName) { + Assert.fail("The Extended read state has not been implemented"); + } + + private void verifyReadNotPlaced(ActiveRegion region, String readName) { + for (SAMRecord read : region.getReads()) { + if (read.getReadName().equals(readName)) + Assert.fail("Read " + readName + " found in active region " + region); + } + } + + private SAMRecord getRead(ActiveRegion region, String readName) { + for (SAMRecord read : region.getReads()) { + if (read.getReadName().equals(readName)) + return read; + } + + Assert.fail("Read " + readName + " not found in active region " + region); + return null; + } + + private Map getActiveRegions(DummyActiveRegionWalker walker, List intervals) { + for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) + t.traverse(walker, dataProvider, 0); + + return walker.mappedActiveRegions; + } + + private Collection toSingleBaseLocs(GenomeLoc interval) { + List bases = new ArrayList(); + if (interval.size() == 1) + bases.add(interval); + else { + for (int location = interval.getStart(); location <= interval.getStop(); location++) + bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location)); + } + + return bases; + } + + private Collection toSingleBaseLocs(List intervals) { + Set bases = new TreeSet(); // for sorting and uniqueness + for (GenomeLoc interval : intervals) + bases.addAll(toSingleBaseLocs(interval)); + + return bases; + } + + private void verifyEqualIntervals(List aIntervals, List bIntervals) { + Collection aBases = toSingleBaseLocs(aIntervals); + Collection bBases = toSingleBaseLocs(bIntervals); + + Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size()); + + Iterator aIter = aBases.iterator(); + Iterator bIter = bBases.iterator(); + while (aIter.hasNext() && bIter.hasNext()) { + GenomeLoc aLoc = aIter.next(); + GenomeLoc bLoc = bIter.next(); + Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc); + } + } + + // copied from LocusViewTemplate + protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { + SAMFileHeader header = new SAMFileHeader(); + header.setSequenceDictionary(dictionary); + GATKSAMRecord record = new GATKSAMRecord(header); + + record.setReadName(readName); + record.setReferenceIndex(dictionary.getSequenceIndex(contig)); + record.setAlignmentStart(alignmentStart); + + Cigar cigar = new Cigar(); + int len = alignmentEnd - alignmentStart + 1; + cigar.add(new CigarElement(len, CigarOperator.M)); + record.setCigar(cigar); + record.setReadBases(new byte[len]); + record.setBaseQualities(new byte[len]); + + return record; + } + + private List createDataProviders(List intervals) { GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); - //engine.setReferenceDataSource(reference); engine.setGenomeLocParser(genomeLocParser); t.initialize(engine); - return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList()); + StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads); + Shard shard = new MockLocusShard(genomeLocParser, intervals); + + List providers = new ArrayList(); + for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) { + providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); + } + + return providers; } } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 6785fa816..c57b2a44d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -782,7 +782,7 @@ public class VariantContextTestProvider { Assert.assertEquals(actual.getStart(), expected.getStart(), "start"); Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end"); Assert.assertEquals(actual.getID(), expected.getID(), "id"); - Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles"); + Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual); assertAttributesEquals(actual.getAttributes(), expected.getAttributes()); Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied"); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 114104d42..f3daa9e4c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; @@ -39,7 +39,7 @@ import java.io.FileNotFoundException; import java.util.*; public class VariantContextUtilsUnitTest extends BaseTest { - Allele Aref, T, C, Cref, ATC, ATCATC; + Allele Aref, T, C, G, Cref, ATC, ATCATC; private GenomeLocParser genomeLocParser; @BeforeSuite @@ -58,6 +58,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { Cref = Allele.create("C", true); T = Allele.create("T"); C = Allele.create("C"); + G = Allele.create("G"); ATC = Allele.create("ATC"); ATCATC = Allele.create("ATCATC"); } @@ -697,10 +698,120 @@ public class VariantContextUtilsUnitTest extends BaseTest { return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); } - @Test(dataProvider = "ReverseClippingPositionTestProvider") public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); Assert.assertEquals(result, cfg.expectedClip); } -} + + // -------------------------------------------------------------------------------- + // + // test splitting into bi-allelics + // + // -------------------------------------------------------------------------------- + + @DataProvider(name = "SplitBiallelics") + public Object[][] makeSplitBiallelics() throws CloneNotSupportedException { + List tests = new ArrayList(); + + final VariantContextBuilder root = new VariantContextBuilder("x", "20", 10, 10, Arrays.asList(Aref, C)); + + // biallelic -> biallelic + tests.add(new Object[]{root.make(), Arrays.asList(root.make())}); + + // monos -> monos + root.alleles(Arrays.asList(Aref)); + tests.add(new Object[]{root.make(), Arrays.asList(root.make())}); + + root.alleles(Arrays.asList(Aref, C, T)); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(Aref, C)).make(), + root.alleles(Arrays.asList(Aref, T)).make())}); + + root.alleles(Arrays.asList(Aref, C, T, G)); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(Aref, C)).make(), + root.alleles(Arrays.asList(Aref, T)).make(), + root.alleles(Arrays.asList(Aref, G)).make())}); + + final Allele C = Allele.create("C"); + final Allele CA = Allele.create("CA"); + final Allele CAA = Allele.create("CAA"); + final Allele CAAAA = Allele.create("CAAAA"); + final Allele CAAAAA = Allele.create("CAAAAA"); + final Allele Cref = Allele.create("C", true); + final Allele CAref = Allele.create("CA", true); + final Allele CAAref = Allele.create("CAA", true); + final Allele CAAAref = Allele.create("CAAA", true); + + root.alleles(Arrays.asList(Cref, CA, CAA)); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(Cref, CA)).make(), + root.alleles(Arrays.asList(Cref, CAA)).make())}); + + root.alleles(Arrays.asList(CAAref, C, CA)).stop(12); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(CAAref, C)).make(), + root.alleles(Arrays.asList(CAref, C)).stop(11).make())}); + + root.alleles(Arrays.asList(CAAAref, C, CA, CAA)).stop(13); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(CAAAref, C)).make(), + root.alleles(Arrays.asList(CAAref, C)).stop(12).make(), + root.alleles(Arrays.asList(CAref, C)).stop(11).make())}); + + root.alleles(Arrays.asList(CAAAref, CAAAAA, CAAAA, CAA, C)).stop(13); + tests.add(new Object[]{root.make(), + Arrays.asList( + root.alleles(Arrays.asList(Cref, CAA)).stop(10).make(), + root.alleles(Arrays.asList(Cref, CA)).stop(10).make(), + root.alleles(Arrays.asList(CAref, C)).stop(11).make(), + root.alleles(Arrays.asList(CAAAref, C)).stop(13).make())}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "SplitBiallelics") + public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List expectedBiallelics) { + final List biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc); + Assert.assertEquals(biallelics.size(), expectedBiallelics.size()); + for ( int i = 0; i < biallelics.size(); i++ ) { + final VariantContext actual = biallelics.get(i); + final VariantContext expected = expectedBiallelics.get(i); + VariantContextTestProvider.assertEquals(actual, expected); + } + } + + @Test(dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes") + public void testSplitBiallelicsGenotypes(final VariantContext vc, final List expectedBiallelics) { + final List genotypes = new ArrayList(); + + int sampleI = 0; + for ( final List alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) { + genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles)); + } + genotypes.add(GenotypeBuilder.createMissing("missing", 2)); + + final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make(); + + final List biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes); + for ( int i = 0; i < biallelics.size(); i++ ) { + final VariantContext actual = biallelics.get(i); + Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples + + for ( final Genotype inputGenotype : genotypes ) { + final Genotype actualGenotype = actual.getGenotype(inputGenotype.getSampleName()); + Assert.assertNotNull(actualGenotype); + if ( ! vc.isVariant() || vc.isBiallelic() ) + Assert.assertEquals(actualGenotype, vcWithGenotypes.getGenotype(inputGenotype.getSampleName())); + else + Assert.assertTrue(actualGenotype.isNoCall()); + } + } + } +} \ No newline at end of file