Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
4f243acaa6
|
|
@ -34,6 +34,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||||
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
|
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
|
||||||
|
|
||||||
|
// package access for unit testing
|
||||||
|
ActivityProfile profile;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getTraversalUnits() {
|
public String getTraversalUnits() {
|
||||||
return "active regions";
|
return "active regions";
|
||||||
|
|
@ -53,7 +56,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
|
|
||||||
int minStart = Integer.MAX_VALUE;
|
int minStart = Integer.MAX_VALUE;
|
||||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||||
|
|
||||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -277,8 +277,12 @@ public class VariantAnnotatorEngine {
|
||||||
if ( expression.fieldName.equals("ID") ) {
|
if ( expression.fieldName.equals("ID") ) {
|
||||||
if ( vc.hasID() )
|
if ( vc.hasID() )
|
||||||
infoAnnotations.put(expression.fullName, vc.getID());
|
infoAnnotations.put(expression.fullName, vc.getID());
|
||||||
|
} else if (expression.fieldName.equals("ALT")) {
|
||||||
|
infoAnnotations.put(expression.fullName, vc.getAlternateAllele(0).getDisplayString());
|
||||||
|
|
||||||
} else if ( vc.hasAttribute(expression.fieldName) ) {
|
} else if ( vc.hasAttribute(expression.fieldName) ) {
|
||||||
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
|
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -190,6 +190,10 @@ public class UnifiedGenotyperEngine {
|
||||||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
||||||
if ( vc != null )
|
if ( vc != null )
|
||||||
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
|
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
|
||||||
|
// todo - uncomment if we want to also emit a null ref call (with no QUAL) if there's no evidence for REF and if EMIT_ALL_SITES is set
|
||||||
|
// else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
|
||||||
|
// results.add(generateEmptyContext(tracker, refContext, null, rawContext));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -183,6 +183,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
||||||
@Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
|
@Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
|
||||||
private boolean keepSitesWithAC0 = false;
|
private boolean keepSitesWithAC0 = false;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName="numSamples", shortName="numSamples", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
|
||||||
|
private int numSamplesFromArgument = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying
|
* If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying
|
||||||
* variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc.
|
* variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc.
|
||||||
|
|
@ -589,6 +593,14 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
||||||
public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; }
|
public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; }
|
||||||
public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
|
public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
|
||||||
|
|
||||||
|
public int getNumberOfSamplesForEvaluation() {
|
||||||
|
if (sampleNamesForEvaluation!= null && !sampleNamesForEvaluation.isEmpty())
|
||||||
|
return sampleNamesForEvaluation.size();
|
||||||
|
else {
|
||||||
|
return numSamplesFromArgument;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }
|
public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }
|
||||||
|
|
||||||
public List<RodBinding<VariantContext>> getComps() { return comps; }
|
public List<RodBinding<VariantContext>> getComps() { return comps; }
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ public class AlleleCount extends VariantStratifier {
|
||||||
|
|
||||||
// There are ploidy x n sample chromosomes
|
// There are ploidy x n sample chromosomes
|
||||||
// TODO -- generalize to handle multiple ploidy
|
// TODO -- generalize to handle multiple ploidy
|
||||||
nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * getVariantEvalWalker().getSamplePloidy();
|
nchrom = getVariantEvalWalker().getNumberOfSamplesForEvaluation() * getVariantEvalWalker().getSamplePloidy();
|
||||||
if ( nchrom < 2 )
|
if ( nchrom < 2 )
|
||||||
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");
|
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -103,6 +103,11 @@ public class ActivityProfile {
|
||||||
isActiveList.add(result);
|
isActiveList.add(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for unit testing
|
||||||
|
public List<ActivityProfileResult> getActiveList() {
|
||||||
|
return isActiveList;
|
||||||
|
}
|
||||||
|
|
||||||
public int size() {
|
public int size() {
|
||||||
return isActiveList.size();
|
return isActiveList.size();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,126 @@
|
||||||
|
package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
|
import org.testng.Assert;
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created with IntelliJ IDEA.
|
||||||
|
* User: thibault
|
||||||
|
* Date: 11/13/12
|
||||||
|
* Time: 2:47 PM
|
||||||
|
*/
|
||||||
|
public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
|
|
||||||
|
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||||
|
private final double prob;
|
||||||
|
|
||||||
|
public DummyActiveRegionWalker() {
|
||||||
|
this.prob = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
return new ActivityProfileResult(ref.getLocus(), prob);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
|
||||||
|
|
||||||
|
private IndexedFastaSequenceFile reference;
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
private ActiveRegionWalker<Integer, Integer> walker;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
private void init() throws FileNotFoundException {
|
||||||
|
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||||
|
SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
|
||||||
|
genomeLocParser = new GenomeLocParser(dictionary);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAllIntervalsSeen() throws Exception {
|
||||||
|
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
|
||||||
|
GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
|
||||||
|
intervals.add(interval);
|
||||||
|
|
||||||
|
LocusShardDataProvider dataProvider = createDataProvider(intervals);
|
||||||
|
|
||||||
|
t.traverse(walker, dataProvider, 0);
|
||||||
|
|
||||||
|
boolean allGenomeLocsSeen = true;
|
||||||
|
for (GenomeLoc loc : intervals) {
|
||||||
|
boolean thisGenomeLocSeen = false;
|
||||||
|
for (ActivityProfileResult active : t.profile.getActiveList()) {
|
||||||
|
if (loc.equals(active.getLoc())) {
|
||||||
|
thisGenomeLocSeen = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!thisGenomeLocSeen) {
|
||||||
|
allGenomeLocsSeen = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile");
|
||||||
|
}
|
||||||
|
|
||||||
|
private LocusShardDataProvider createDataProvider(List<GenomeLoc> intervals) {
|
||||||
|
walker = new DummyActiveRegionWalker();
|
||||||
|
|
||||||
|
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
|
||||||
|
Shard shard = new MockLocusShard(genomeLocParser, intervals);
|
||||||
|
WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs());
|
||||||
|
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||||
|
|
||||||
|
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||||
|
//engine.setReferenceDataSource(reference);
|
||||||
|
engine.setGenomeLocParser(genomeLocParser);
|
||||||
|
t.initialize(engine);
|
||||||
|
|
||||||
|
return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue