Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
d33a412b5f
|
|
@ -118,17 +118,24 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
public static final String DISK_QUOTA_EXCEEDED_ERROR = "Disk quota exceeded";
|
||||
|
||||
private static void checkForMaskedUserErrors(final Throwable t) {
|
||||
// masked out of memory error
|
||||
if ( t instanceof OutOfMemoryError )
|
||||
exitSystemWithUserError(new UserException.NotEnoughMemory());
|
||||
// masked user error
|
||||
if ( t instanceof UserException || t instanceof TribbleException )
|
||||
exitSystemWithUserError(new UserException(t.getMessage()));
|
||||
|
||||
// no message means no masked error
|
||||
final String message = t.getMessage();
|
||||
if ( message == null )
|
||||
return;
|
||||
|
||||
// we know what to do about the common "Too many open files" error
|
||||
// too many open files error
|
||||
if ( message.contains("Too many open files") )
|
||||
exitSystemWithUserError(new UserException.TooManyOpenFiles());
|
||||
|
||||
// malformed BAM looks like a SAM file
|
||||
if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) ||
|
||||
message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
|
||||
if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
|
||||
exitSystemWithSamError(t);
|
||||
|
||||
// can't close tribble index when writing
|
||||
|
|
@ -138,12 +145,10 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
// disk is full
|
||||
if ( message.contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || message.contains(DISK_QUOTA_EXCEEDED_ERROR) )
|
||||
exitSystemWithUserError(new UserException.NoSpaceOnDevice());
|
||||
if ( t.getCause() != null && (t.getCause().getMessage().contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || t.getCause().getMessage().contains(DISK_QUOTA_EXCEEDED_ERROR)) )
|
||||
exitSystemWithUserError(new UserException.NoSpaceOnDevice());
|
||||
|
||||
// masked out of memory error
|
||||
if ( t.getCause() != null && t.getCause() instanceof OutOfMemoryError )
|
||||
exitSystemWithUserError(new UserException.NotEnoughMemory());
|
||||
// masked error wrapped in another one
|
||||
if ( t.getCause() != null )
|
||||
checkForMaskedUserErrors(t.getCause());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -34,9 +34,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
|
||||
|
||||
// package access for unit testing
|
||||
ActivityProfile profile;
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
|
|
@ -56,7 +53,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
|
||||
int minStart = Integer.MAX_VALUE;
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
||||
|
||||
|
|
|
|||
|
|
@ -103,11 +103,6 @@ public class ActivityProfile {
|
|||
isActiveList.add(result);
|
||||
}
|
||||
|
||||
// for unit testing
|
||||
public List<ActivityProfileResult> getActiveList() {
|
||||
return isActiveList;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return isActiveList.size();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -184,9 +184,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
protected CommonInfo commonInfo = null;
|
||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||
|
||||
@Deprecated // ID is no longer stored in the attributes map
|
||||
private final static String ID_KEY = "ID";
|
||||
|
||||
public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
|
||||
|
||||
/** The location of this VariantContext */
|
||||
|
|
@ -287,10 +284,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
|
||||
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
|
||||
|
||||
// todo -- remove me when this check is no longer necessary
|
||||
if ( this.commonInfo.hasAttribute(ID_KEY) )
|
||||
throw new IllegalArgumentException("Trying to create a VariantContext with a ID key. Please use provided constructor argument ID");
|
||||
|
||||
if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }
|
||||
|
||||
// we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles
|
||||
|
|
|
|||
|
|
@ -979,6 +979,40 @@ public class VariantContextUtils {
|
|||
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
|
||||
/**
|
||||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Genotypes are all no-calls now (it's not possible to fix them easily)
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
*
|
||||
* Chromosome counts are updated (but they are by definition 0)
|
||||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
// non variant or biallelics already satisfy the contract
|
||||
return Collections.singletonList(vc);
|
||||
else {
|
||||
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
|
||||
|
||||
for ( final Allele alt : vc.getAlternateAlleles() ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
calculateChromosomeCounts(builder, true);
|
||||
biallelics.add(reverseTrimAlleles(builder.make()));
|
||||
}
|
||||
|
||||
return biallelics;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.testng.Assert;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -22,25 +23,30 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
|||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 11/13/12
|
||||
* Time: 2:47 PM
|
||||
*
|
||||
* Test the Active Region Traversal Contract
|
||||
* http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract
|
||||
*/
|
||||
public class TraverseActiveRegionsTest extends BaseTest {
|
||||
|
||||
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||
private final double prob;
|
||||
protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
|
||||
protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
|
||||
|
||||
public DummyActiveRegionWalker() {
|
||||
this.prob = 1.0;
|
||||
|
|
@ -48,11 +54,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
|
||||
@Override
|
||||
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
isActiveCalls.add(ref.getLocus());
|
||||
return new ActivityProfileResult(ref.getLocus(), prob);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
|
||||
mappedActiveRegions.put(activeRegion.getLocation(), activeRegion);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -70,57 +78,281 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
|
||||
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private ActiveRegionWalker<Integer, Integer> walker;
|
||||
|
||||
private List<GenomeLoc> intervals;
|
||||
private List<SAMRecord> reads;
|
||||
|
||||
@BeforeClass
|
||||
private void init() throws FileNotFoundException {
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||
SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
|
||||
// TODO: this fails!
|
||||
//intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
|
||||
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
|
||||
|
||||
reads = new ArrayList<SAMRecord>();
|
||||
reads.add(buildSAMRecord("simple", "1", 100, 200));
|
||||
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
|
||||
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
|
||||
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
|
||||
reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050));
|
||||
reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
|
||||
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
||||
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
||||
// TODO
|
||||
//reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllIntervalsSeen() throws Exception {
|
||||
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
|
||||
GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
|
||||
intervals.add(interval);
|
||||
public void testAllBasesSeen() {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||
|
||||
LocusShardDataProvider dataProvider = createDataProvider(intervals);
|
||||
List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
|
||||
// Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
|
||||
verifyEqualIntervals(intervals, activeIntervals);
|
||||
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
|
||||
boolean allGenomeLocsSeen = true;
|
||||
for (GenomeLoc loc : intervals) {
|
||||
boolean thisGenomeLocSeen = false;
|
||||
for (ActivityProfileResult active : t.profile.getActiveList()) {
|
||||
if (loc.equals(active.getLoc())) {
|
||||
thisGenomeLocSeen = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!thisGenomeLocSeen) {
|
||||
allGenomeLocsSeen = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile");
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private LocusShardDataProvider createDataProvider(List<GenomeLoc> intervals) {
|
||||
walker = new DummyActiveRegionWalker();
|
||||
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) {
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
activeIntervals.addAll(walker.isActiveCalls);
|
||||
}
|
||||
|
||||
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
|
||||
Shard shard = new MockLocusShard(genomeLocParser, intervals);
|
||||
WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
return activeIntervals;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testActiveRegionCoverage() {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||
|
||||
Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
|
||||
verifyActiveRegionCoverage(intervals, activeRegions);
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
|
||||
List<GenomeLoc> intervalStarts = new ArrayList<GenomeLoc>();
|
||||
List<GenomeLoc> intervalStops = new ArrayList<GenomeLoc>();
|
||||
|
||||
for (GenomeLoc interval : intervals) {
|
||||
intervalStarts.add(interval.getStartLocation());
|
||||
intervalStops.add(interval.getStopLocation());
|
||||
}
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> baseRegionMap = new HashMap<GenomeLoc, ActiveRegion>();
|
||||
|
||||
for (ActiveRegion activeRegion : activeRegions) {
|
||||
for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) {
|
||||
// Contract: Regions do not overlap
|
||||
Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region");
|
||||
baseRegionMap.put(activeLoc, activeRegion);
|
||||
}
|
||||
|
||||
GenomeLoc start = activeRegion.getLocation().getStartLocation();
|
||||
if (intervalStarts.contains(start))
|
||||
intervalStarts.remove(start);
|
||||
|
||||
GenomeLoc stop = activeRegion.getLocation().getStopLocation();
|
||||
if (intervalStops.contains(stop))
|
||||
intervalStops.remove(stop);
|
||||
}
|
||||
|
||||
for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) {
|
||||
// Contract: Each location in the interval(s) is in exactly one region
|
||||
// Contract: The total set of regions exactly matches the analysis interval(s)
|
||||
Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region");
|
||||
baseRegionMap.remove(baseLoc);
|
||||
}
|
||||
|
||||
// Contract: The total set of regions exactly matches the analysis interval(s)
|
||||
Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals");
|
||||
|
||||
// Contract: All explicit interval boundaries must also be region boundaries
|
||||
Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location");
|
||||
Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadMapping() {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||
|
||||
// Contract: Each read has the Primary state in a single region (or none)
|
||||
// This is the region of maximum overlap for the read (earlier if tied)
|
||||
|
||||
// Contract: Each read has the Non-Primary state in all other regions it overlaps
|
||||
// Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
|
||||
|
||||
// simple: Primary in 1:1-999
|
||||
// overlap_equal: Primary in 1:1-999
|
||||
// overlap_unequal: Primary in 1:1-999
|
||||
// boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_only: Extended in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
|
||||
// TODO
|
||||
// simple20: Primary in 20:10000-20000
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
ActiveRegion region;
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
|
||||
verifyReadPrimary(region, "simple");
|
||||
verifyReadPrimary(region, "overlap_equal");
|
||||
verifyReadPrimary(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
// TODO: fail verifyReadNonPrimary(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
// TODO: fail verifyReadPrimary(region, "boundary_equal");
|
||||
// TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_only");
|
||||
// TODO: fail verifyReadPrimary(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
// TODO: fail verifyReadNonPrimary(region, "boundary_equal");
|
||||
verifyReadPrimary(region, "boundary_unequal");
|
||||
// TODO: fail verifyReadExtended(region, "extended_only");
|
||||
// TODO: fail verifyReadExtended(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private void verifyReadPrimary(ActiveRegion region, String readName) {
|
||||
SAMRecord read = getRead(region, readName);
|
||||
Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
|
||||
}
|
||||
|
||||
private void verifyReadNonPrimary(ActiveRegion region, String readName) {
|
||||
SAMRecord read = getRead(region, readName);
|
||||
Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
|
||||
}
|
||||
|
||||
private void verifyReadExtended(ActiveRegion region, String readName) {
|
||||
Assert.fail("The Extended read state has not been implemented");
|
||||
}
|
||||
|
||||
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
|
||||
for (SAMRecord read : region.getReads()) {
|
||||
if (read.getReadName().equals(readName))
|
||||
Assert.fail("Read " + readName + " found in active region " + region);
|
||||
}
|
||||
}
|
||||
|
||||
private SAMRecord getRead(ActiveRegion region, String readName) {
|
||||
for (SAMRecord read : region.getReads()) {
|
||||
if (read.getReadName().equals(readName))
|
||||
return read;
|
||||
}
|
||||
|
||||
Assert.fail("Read " + readName + " not found in active region " + region);
|
||||
return null;
|
||||
}
|
||||
|
||||
private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
|
||||
return walker.mappedActiveRegions;
|
||||
}
|
||||
|
||||
private Collection<GenomeLoc> toSingleBaseLocs(GenomeLoc interval) {
|
||||
List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
|
||||
if (interval.size() == 1)
|
||||
bases.add(interval);
|
||||
else {
|
||||
for (int location = interval.getStart(); location <= interval.getStop(); location++)
|
||||
bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
|
||||
}
|
||||
|
||||
return bases;
|
||||
}
|
||||
|
||||
private Collection<GenomeLoc> toSingleBaseLocs(List<GenomeLoc> intervals) {
|
||||
Set<GenomeLoc> bases = new TreeSet<GenomeLoc>(); // for sorting and uniqueness
|
||||
for (GenomeLoc interval : intervals)
|
||||
bases.addAll(toSingleBaseLocs(interval));
|
||||
|
||||
return bases;
|
||||
}
|
||||
|
||||
private void verifyEqualIntervals(List<GenomeLoc> aIntervals, List<GenomeLoc> bIntervals) {
|
||||
Collection<GenomeLoc> aBases = toSingleBaseLocs(aIntervals);
|
||||
Collection<GenomeLoc> bBases = toSingleBaseLocs(bIntervals);
|
||||
|
||||
Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size());
|
||||
|
||||
Iterator<GenomeLoc> aIter = aBases.iterator();
|
||||
Iterator<GenomeLoc> bIter = bBases.iterator();
|
||||
while (aIter.hasNext() && bIter.hasNext()) {
|
||||
GenomeLoc aLoc = aIter.next();
|
||||
GenomeLoc bLoc = bIter.next();
|
||||
Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc);
|
||||
}
|
||||
}
|
||||
|
||||
// copied from LocusViewTemplate
|
||||
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
|
||||
SAMFileHeader header = new SAMFileHeader();
|
||||
header.setSequenceDictionary(dictionary);
|
||||
GATKSAMRecord record = new GATKSAMRecord(header);
|
||||
|
||||
record.setReadName(readName);
|
||||
record.setReferenceIndex(dictionary.getSequenceIndex(contig));
|
||||
record.setAlignmentStart(alignmentStart);
|
||||
|
||||
Cigar cigar = new Cigar();
|
||||
int len = alignmentEnd - alignmentStart + 1;
|
||||
cigar.add(new CigarElement(len, CigarOperator.M));
|
||||
record.setCigar(cigar);
|
||||
record.setReadBases(new byte[len]);
|
||||
record.setBaseQualities(new byte[len]);
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
//engine.setReferenceDataSource(reference);
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
t.initialize(engine);
|
||||
|
||||
return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
|
||||
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads);
|
||||
Shard shard = new MockLocusShard(genomeLocParser, intervals);
|
||||
|
||||
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) {
|
||||
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
|
||||
return providers;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -782,7 +782,7 @@ public class VariantContextTestProvider {
|
|||
Assert.assertEquals(actual.getStart(), expected.getStart(), "start");
|
||||
Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end");
|
||||
Assert.assertEquals(actual.getID(), expected.getID(), "id");
|
||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
|
||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual);
|
||||
|
||||
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
|
||||
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
|
|||
import java.util.*;
|
||||
|
||||
public class VariantContextUtilsUnitTest extends BaseTest {
|
||||
Allele Aref, T, C, Cref, ATC, ATCATC;
|
||||
Allele Aref, T, C, G, Cref, ATC, ATCATC;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeSuite
|
||||
|
|
@ -58,6 +58,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
Cref = Allele.create("C", true);
|
||||
T = Allele.create("T");
|
||||
C = Allele.create("C");
|
||||
G = Allele.create("G");
|
||||
ATC = Allele.create("ATC");
|
||||
ATCATC = Allele.create("ATCATC");
|
||||
}
|
||||
|
|
@ -697,10 +698,120 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// test splitting into bi-allelics
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "SplitBiallelics")
|
||||
public Object[][] makeSplitBiallelics() throws CloneNotSupportedException {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final VariantContextBuilder root = new VariantContextBuilder("x", "20", 10, 10, Arrays.asList(Aref, C));
|
||||
|
||||
// biallelic -> biallelic
|
||||
tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
|
||||
|
||||
// monos -> monos
|
||||
root.alleles(Arrays.asList(Aref));
|
||||
tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
|
||||
|
||||
root.alleles(Arrays.asList(Aref, C, T));
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(Aref, C)).make(),
|
||||
root.alleles(Arrays.asList(Aref, T)).make())});
|
||||
|
||||
root.alleles(Arrays.asList(Aref, C, T, G));
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(Aref, C)).make(),
|
||||
root.alleles(Arrays.asList(Aref, T)).make(),
|
||||
root.alleles(Arrays.asList(Aref, G)).make())});
|
||||
|
||||
final Allele C = Allele.create("C");
|
||||
final Allele CA = Allele.create("CA");
|
||||
final Allele CAA = Allele.create("CAA");
|
||||
final Allele CAAAA = Allele.create("CAAAA");
|
||||
final Allele CAAAAA = Allele.create("CAAAAA");
|
||||
final Allele Cref = Allele.create("C", true);
|
||||
final Allele CAref = Allele.create("CA", true);
|
||||
final Allele CAAref = Allele.create("CAA", true);
|
||||
final Allele CAAAref = Allele.create("CAAA", true);
|
||||
|
||||
root.alleles(Arrays.asList(Cref, CA, CAA));
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(Cref, CA)).make(),
|
||||
root.alleles(Arrays.asList(Cref, CAA)).make())});
|
||||
|
||||
root.alleles(Arrays.asList(CAAref, C, CA)).stop(12);
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(CAAref, C)).make(),
|
||||
root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
|
||||
|
||||
root.alleles(Arrays.asList(CAAAref, C, CA, CAA)).stop(13);
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(CAAAref, C)).make(),
|
||||
root.alleles(Arrays.asList(CAAref, C)).stop(12).make(),
|
||||
root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
|
||||
|
||||
root.alleles(Arrays.asList(CAAAref, CAAAAA, CAAAA, CAA, C)).stop(13);
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(Cref, CAA)).stop(10).make(),
|
||||
root.alleles(Arrays.asList(Cref, CA)).stop(10).make(),
|
||||
root.alleles(Arrays.asList(CAref, C)).stop(11).make(),
|
||||
root.alleles(Arrays.asList(CAAAref, C)).stop(13).make())});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SplitBiallelics")
|
||||
public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
|
||||
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc);
|
||||
Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
|
||||
for ( int i = 0; i < biallelics.size(); i++ ) {
|
||||
final VariantContext actual = biallelics.get(i);
|
||||
final VariantContext expected = expectedBiallelics.get(i);
|
||||
VariantContextTestProvider.assertEquals(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes")
|
||||
public void testSplitBiallelicsGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
|
||||
int sampleI = 0;
|
||||
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
|
||||
genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles));
|
||||
}
|
||||
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
|
||||
|
||||
final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
|
||||
|
||||
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
|
||||
for ( int i = 0; i < biallelics.size(); i++ ) {
|
||||
final VariantContext actual = biallelics.get(i);
|
||||
Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples
|
||||
|
||||
for ( final Genotype inputGenotype : genotypes ) {
|
||||
final Genotype actualGenotype = actual.getGenotype(inputGenotype.getSampleName());
|
||||
Assert.assertNotNull(actualGenotype);
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
Assert.assertEquals(actualGenotype, vcWithGenotypes.getGenotype(inputGenotype.getSampleName()));
|
||||
else
|
||||
Assert.assertTrue(actualGenotype.isNoCall());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue