Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Menachem Fromer 2012-11-22 01:42:29 -05:00
commit d33a412b5f
8 changed files with 434 additions and 67 deletions

View File

@ -118,17 +118,24 @@ public class CommandLineGATK extends CommandLineExecutable {
public static final String DISK_QUOTA_EXCEEDED_ERROR = "Disk quota exceeded";
private static void checkForMaskedUserErrors(final Throwable t) {
// masked out of memory error
if ( t instanceof OutOfMemoryError )
exitSystemWithUserError(new UserException.NotEnoughMemory());
// masked user error
if ( t instanceof UserException || t instanceof TribbleException )
exitSystemWithUserError(new UserException(t.getMessage()));
// no message means no masked error
final String message = t.getMessage();
if ( message == null )
return;
// we know what to do about the common "Too many open files" error
// too many open files error
if ( message.contains("Too many open files") )
exitSystemWithUserError(new UserException.TooManyOpenFiles());
// malformed BAM looks like a SAM file
if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) ||
message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
exitSystemWithSamError(t);
// can't close tribble index when writing
@ -138,12 +145,10 @@ public class CommandLineGATK extends CommandLineExecutable {
// disk is full
if ( message.contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || message.contains(DISK_QUOTA_EXCEEDED_ERROR) )
exitSystemWithUserError(new UserException.NoSpaceOnDevice());
if ( t.getCause() != null && (t.getCause().getMessage().contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || t.getCause().getMessage().contains(DISK_QUOTA_EXCEEDED_ERROR)) )
exitSystemWithUserError(new UserException.NoSpaceOnDevice());
// masked out of memory error
if ( t.getCause() != null && t.getCause() instanceof OutOfMemoryError )
exitSystemWithUserError(new UserException.NotEnoughMemory());
// masked error wrapped in another one
if ( t.getCause() != null )
checkForMaskedUserErrors(t.getCause());
}
/**

View File

@ -34,9 +34,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
// package access for unit testing
ActivityProfile profile;
@Override
public String getTraversalUnits() {
return "active regions";
@ -56,7 +53,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
int minStart = Integer.MAX_VALUE;
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);

View File

@ -103,11 +103,6 @@ public class ActivityProfile {
isActiveList.add(result);
}
// for unit testing
public List<ActivityProfileResult> getActiveList() {
return isActiveList;
}
public int size() {
return isActiveList.size();
}

View File

@ -184,9 +184,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
protected CommonInfo commonInfo = null;
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
@Deprecated // ID is no longer stored in the attributes map
private final static String ID_KEY = "ID";
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
/** The location of this VariantContext */
@ -287,10 +284,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
// todo -- remove me when this check is no longer necessary
if ( this.commonInfo.hasAttribute(ID_KEY) )
throw new IllegalArgumentException("Trying to create a VariantContext with a ID key. Please use provided constructor argument ID");
if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }
// we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles

View File

@ -979,6 +979,40 @@ public class VariantContextUtils {
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
/**
* Split variant context into its biallelic components if there are more than 2 alleles
*
* For VC has A/B/C alleles, returns A/B and A/C contexts.
* Genotypes are all no-calls now (it's not possible to fix them easily)
* Alleles are right trimmed to satisfy VCF conventions
*
* If vc is biallelic or non-variant it is just returned
*
* Chromosome counts are updated (but they are by definition 0)
*
* @param vc a potentially multi-allelic variant context
* @return a list of bi-allelic (or monomorphic) variant context
*/
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
if ( ! vc.isVariant() || vc.isBiallelic() )
// non variant or biallelics already satisfy the contract
return Collections.singletonList(vc);
else {
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
for ( final Allele alt : vc.getAlternateAlleles() ) {
VariantContextBuilder builder = new VariantContextBuilder(vc);
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
builder.alleles(alleles);
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
calculateChromosomeCounts(builder, true);
biallelics.add(reverseTrimAlleles(builder.make()));
}
return biallelics;
}
}
/**
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
*

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.gatk.traversals;
import org.testng.Assert;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -22,25 +23,30 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 11/13/12
* Time: 2:47 PM
*
* Test the Active Region Traversal Contract
* http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract
*/
public class TraverseActiveRegionsTest extends BaseTest {
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
private final double prob;
protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
public DummyActiveRegionWalker() {
this.prob = 1.0;
@ -48,11 +54,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
@Override
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
isActiveCalls.add(ref.getLocus());
return new ActivityProfileResult(ref.getLocus(), prob);
}
@Override
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
mappedActiveRegions.put(activeRegion.getLocation(), activeRegion);
return 0;
}
@ -70,57 +78,281 @@ public class TraverseActiveRegionsTest extends BaseTest {
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
private IndexedFastaSequenceFile reference;
private SAMSequenceDictionary dictionary;
private GenomeLocParser genomeLocParser;
private ActiveRegionWalker<Integer, Integer> walker;
private List<GenomeLoc> intervals;
private List<SAMRecord> reads;
@BeforeClass
private void init() throws FileNotFoundException {
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
intervals = new ArrayList<GenomeLoc>();
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
// TODO: this fails!
//intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
reads = new ArrayList<SAMRecord>();
reads.add(buildSAMRecord("simple", "1", 100, 200));
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050));
reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
// TODO
//reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
}
@Test
public void testAllIntervalsSeen() throws Exception {
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
intervals.add(interval);
public void testAllBasesSeen() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
LocusShardDataProvider dataProvider = createDataProvider(intervals);
List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
// Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
verifyEqualIntervals(intervals, activeIntervals);
t.traverse(walker, dataProvider, 0);
boolean allGenomeLocsSeen = true;
for (GenomeLoc loc : intervals) {
boolean thisGenomeLocSeen = false;
for (ActivityProfileResult active : t.profile.getActiveList()) {
if (loc.equals(active.getLoc())) {
thisGenomeLocSeen = true;
break;
}
}
if (!thisGenomeLocSeen) {
allGenomeLocsSeen = false;
break;
}
}
Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile");
// TODO: more tests and edge cases
}
private LocusShardDataProvider createDataProvider(List<GenomeLoc> intervals) {
walker = new DummyActiveRegionWalker();
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) {
t.traverse(walker, dataProvider, 0);
activeIntervals.addAll(walker.isActiveCalls);
}
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
Shard shard = new MockLocusShard(genomeLocParser, intervals);
WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs());
WindowMaker.WindowMakerIterator window = windowMaker.next();
return activeIntervals;
}
@Test
public void testActiveRegionCoverage() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
verifyActiveRegionCoverage(intervals, activeRegions);
// TODO: more tests and edge cases
}
private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
List<GenomeLoc> intervalStarts = new ArrayList<GenomeLoc>();
List<GenomeLoc> intervalStops = new ArrayList<GenomeLoc>();
for (GenomeLoc interval : intervals) {
intervalStarts.add(interval.getStartLocation());
intervalStops.add(interval.getStopLocation());
}
Map<GenomeLoc, ActiveRegion> baseRegionMap = new HashMap<GenomeLoc, ActiveRegion>();
for (ActiveRegion activeRegion : activeRegions) {
for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) {
// Contract: Regions do not overlap
Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region");
baseRegionMap.put(activeLoc, activeRegion);
}
GenomeLoc start = activeRegion.getLocation().getStartLocation();
if (intervalStarts.contains(start))
intervalStarts.remove(start);
GenomeLoc stop = activeRegion.getLocation().getStopLocation();
if (intervalStops.contains(stop))
intervalStops.remove(stop);
}
for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) {
// Contract: Each location in the interval(s) is in exactly one region
// Contract: The total set of regions exactly matches the analysis interval(s)
Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region");
baseRegionMap.remove(baseLoc);
}
// Contract: The total set of regions exactly matches the analysis interval(s)
Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals");
// Contract: All explicit interval boundaries must also be region boundaries
Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location");
Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location");
}
@Test
public void testReadMapping() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
// Contract: Each read has the Primary state in a single region (or none)
// This is the region of maximum overlap for the read (earlier if tied)
// Contract: Each read has the Non-Primary state in all other regions it overlaps
// Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
// simple: Primary in 1:1-999
// overlap_equal: Primary in 1:1-999
// overlap_unequal: Primary in 1:1-999
// boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_only: Extended in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// TODO
// simple20: Primary in 20:10000-20000
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
ActiveRegion region;
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
verifyReadPrimary(region, "simple");
verifyReadPrimary(region, "overlap_equal");
verifyReadPrimary(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
// TODO: fail verifyReadNonPrimary(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
// TODO: fail verifyReadPrimary(region, "boundary_equal");
// TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_only");
// TODO: fail verifyReadPrimary(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
// TODO: fail verifyReadNonPrimary(region, "boundary_equal");
verifyReadPrimary(region, "boundary_unequal");
// TODO: fail verifyReadExtended(region, "extended_only");
// TODO: fail verifyReadExtended(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
// TODO: more tests and edge cases
}
private void verifyReadPrimary(ActiveRegion region, String readName) {
SAMRecord read = getRead(region, readName);
Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
}
private void verifyReadNonPrimary(ActiveRegion region, String readName) {
SAMRecord read = getRead(region, readName);
Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
}
private void verifyReadExtended(ActiveRegion region, String readName) {
Assert.fail("The Extended read state has not been implemented");
}
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
for (SAMRecord read : region.getReads()) {
if (read.getReadName().equals(readName))
Assert.fail("Read " + readName + " found in active region " + region);
}
}
private SAMRecord getRead(ActiveRegion region, String readName) {
for (SAMRecord read : region.getReads()) {
if (read.getReadName().equals(readName))
return read;
}
Assert.fail("Read " + readName + " not found in active region " + region);
return null;
}
private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
t.traverse(walker, dataProvider, 0);
return walker.mappedActiveRegions;
}
private Collection<GenomeLoc> toSingleBaseLocs(GenomeLoc interval) {
List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
if (interval.size() == 1)
bases.add(interval);
else {
for (int location = interval.getStart(); location <= interval.getStop(); location++)
bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
}
return bases;
}
private Collection<GenomeLoc> toSingleBaseLocs(List<GenomeLoc> intervals) {
Set<GenomeLoc> bases = new TreeSet<GenomeLoc>(); // for sorting and uniqueness
for (GenomeLoc interval : intervals)
bases.addAll(toSingleBaseLocs(interval));
return bases;
}
private void verifyEqualIntervals(List<GenomeLoc> aIntervals, List<GenomeLoc> bIntervals) {
Collection<GenomeLoc> aBases = toSingleBaseLocs(aIntervals);
Collection<GenomeLoc> bBases = toSingleBaseLocs(bIntervals);
Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size());
Iterator<GenomeLoc> aIter = aBases.iterator();
Iterator<GenomeLoc> bIter = bBases.iterator();
while (aIter.hasNext() && bIter.hasNext()) {
GenomeLoc aLoc = aIter.next();
GenomeLoc bLoc = bIter.next();
Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc);
}
}
// copied from LocusViewTemplate
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(dictionary);
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(readName);
record.setReferenceIndex(dictionary.getSequenceIndex(contig));
record.setAlignmentStart(alignmentStart);
Cigar cigar = new Cigar();
int len = alignmentEnd - alignmentStart + 1;
cigar.add(new CigarElement(len, CigarOperator.M));
record.setCigar(cigar);
record.setReadBases(new byte[len]);
record.setBaseQualities(new byte[len]);
return record;
}
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
//engine.setReferenceDataSource(reference);
engine.setGenomeLocParser(genomeLocParser);
t.initialize(engine);
return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads);
Shard shard = new MockLocusShard(genomeLocParser, intervals);
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) {
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
}
return providers;
}
}

View File

@ -782,7 +782,7 @@ public class VariantContextTestProvider {
Assert.assertEquals(actual.getStart(), expected.getStart(), "start");
Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end");
Assert.assertEquals(actual.getID(), expected.getID(), "id");
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual);
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");

View File

@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
import java.util.*;
public class VariantContextUtilsUnitTest extends BaseTest {
Allele Aref, T, C, Cref, ATC, ATCATC;
Allele Aref, T, C, G, Cref, ATC, ATCATC;
private GenomeLocParser genomeLocParser;
@BeforeSuite
@ -58,6 +58,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Cref = Allele.create("C", true);
T = Allele.create("T");
C = Allele.create("C");
G = Allele.create("G");
ATC = Allele.create("ATC");
ATCATC = Allele.create("ATCATC");
}
@ -697,10 +698,120 @@ public class VariantContextUtilsUnitTest extends BaseTest {
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
}
@Test(dataProvider = "ReverseClippingPositionTestProvider")
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
Assert.assertEquals(result, cfg.expectedClip);
}
}
// --------------------------------------------------------------------------------
//
// test splitting into bi-allelics
//
// --------------------------------------------------------------------------------
@DataProvider(name = "SplitBiallelics")
public Object[][] makeSplitBiallelics() throws CloneNotSupportedException {
List<Object[]> tests = new ArrayList<Object[]>();
final VariantContextBuilder root = new VariantContextBuilder("x", "20", 10, 10, Arrays.asList(Aref, C));
// biallelic -> biallelic
tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
// monos -> monos
root.alleles(Arrays.asList(Aref));
tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
root.alleles(Arrays.asList(Aref, C, T));
tests.add(new Object[]{root.make(),
Arrays.asList(
root.alleles(Arrays.asList(Aref, C)).make(),
root.alleles(Arrays.asList(Aref, T)).make())});
root.alleles(Arrays.asList(Aref, C, T, G));
tests.add(new Object[]{root.make(),
Arrays.asList(
root.alleles(Arrays.asList(Aref, C)).make(),
root.alleles(Arrays.asList(Aref, T)).make(),
root.alleles(Arrays.asList(Aref, G)).make())});
final Allele C = Allele.create("C");
final Allele CA = Allele.create("CA");
final Allele CAA = Allele.create("CAA");
final Allele CAAAA = Allele.create("CAAAA");
final Allele CAAAAA = Allele.create("CAAAAA");
final Allele Cref = Allele.create("C", true);
final Allele CAref = Allele.create("CA", true);
final Allele CAAref = Allele.create("CAA", true);
final Allele CAAAref = Allele.create("CAAA", true);
root.alleles(Arrays.asList(Cref, CA, CAA));
tests.add(new Object[]{root.make(),
Arrays.asList(
root.alleles(Arrays.asList(Cref, CA)).make(),
root.alleles(Arrays.asList(Cref, CAA)).make())});
root.alleles(Arrays.asList(CAAref, C, CA)).stop(12);
tests.add(new Object[]{root.make(),
Arrays.asList(
root.alleles(Arrays.asList(CAAref, C)).make(),
root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
root.alleles(Arrays.asList(CAAAref, C, CA, CAA)).stop(13);
tests.add(new Object[]{root.make(),
Arrays.asList(
root.alleles(Arrays.asList(CAAAref, C)).make(),
root.alleles(Arrays.asList(CAAref, C)).stop(12).make(),
root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
root.alleles(Arrays.asList(CAAAref, CAAAAA, CAAAA, CAA, C)).stop(13);
tests.add(new Object[]{root.make(),
Arrays.asList(
root.alleles(Arrays.asList(Cref, CAA)).stop(10).make(),
root.alleles(Arrays.asList(Cref, CA)).stop(10).make(),
root.alleles(Arrays.asList(CAref, C)).stop(11).make(),
root.alleles(Arrays.asList(CAAAref, C)).stop(13).make())});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "SplitBiallelics")
public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc);
Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
for ( int i = 0; i < biallelics.size(); i++ ) {
final VariantContext actual = biallelics.get(i);
final VariantContext expected = expectedBiallelics.get(i);
VariantContextTestProvider.assertEquals(actual, expected);
}
}
@Test(dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes")
public void testSplitBiallelicsGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
final List<Genotype> genotypes = new ArrayList<Genotype>();
int sampleI = 0;
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles));
}
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
for ( int i = 0; i < biallelics.size(); i++ ) {
final VariantContext actual = biallelics.get(i);
Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples
for ( final Genotype inputGenotype : genotypes ) {
final Genotype actualGenotype = actual.getGenotype(inputGenotype.getSampleName());
Assert.assertNotNull(actualGenotype);
if ( ! vc.isVariant() || vc.isBiallelic() )
Assert.assertEquals(actualGenotype, vcWithGenotypes.getGenotype(inputGenotype.getSampleName()));
else
Assert.assertTrue(actualGenotype.isNoCall());
}
}
}
}