Merge branch 'master' of ssh://gsa4/humgen/gsa-scr1/gsa-engineering/git/unstable
merging trivially after a commit
This commit is contained in:
commit
f4b39a7545
|
|
@ -18,3 +18,10 @@ queueScatterGather
|
|||
/bar*
|
||||
integrationtests/
|
||||
public/testdata/onTheFlyOutputTest.vcf
|
||||
private/testdata/onTheFlyOutputTest.vcf
|
||||
lib
|
||||
html
|
||||
gatkdocs
|
||||
dist
|
||||
build
|
||||
resources
|
||||
|
|
|
|||
|
|
@ -106,47 +106,49 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
}
|
||||
|
||||
@Override
|
||||
public synchronized void updateDataForRead(final GATKSAMRecord read, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
public synchronized void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
final ReadCovariates readCovariates = covariateKeySetFrom(read);
|
||||
if( !skip[offset] ) {
|
||||
final ReadCovariates readCovariates = covariateKeySetFrom(read);
|
||||
|
||||
tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
|
||||
tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
|
||||
tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
|
||||
tempFractionalErrorArray[EventType.BASE_INSERTION.index] = insertionErrors[offset];
|
||||
tempQualArray[EventType.BASE_DELETION.index] = read.getBaseDeletionQualities()[offset];
|
||||
tempFractionalErrorArray[EventType.BASE_DELETION.index] = deletionErrors[offset];
|
||||
tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
|
||||
tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
|
||||
tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
|
||||
tempFractionalErrorArray[EventType.BASE_INSERTION.index] = insertionErrors[offset];
|
||||
tempQualArray[EventType.BASE_DELETION.index] = read.getBaseDeletionQualities()[offset];
|
||||
tempFractionalErrorArray[EventType.BASE_DELETION.index] = deletionErrors[offset];
|
||||
|
||||
for (final EventType eventType : EventType.values()) {
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
final int eventIndex = eventType.index;
|
||||
final byte qual = tempQualArray[eventIndex];
|
||||
final double isError = tempFractionalErrorArray[eventIndex];
|
||||
for (final EventType eventType : EventType.values()) {
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
final int eventIndex = eventType.index;
|
||||
final byte qual = tempQualArray[eventIndex];
|
||||
final double isError = tempFractionalErrorArray[eventIndex];
|
||||
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||
else
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
|
||||
if (qualPreviousDatum == null)
|
||||
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
|
||||
else
|
||||
qualPreviousDatum.increment(1.0, isError);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
|
||||
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
|
||||
if (covPreviousDatum == null)
|
||||
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
|
||||
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
|
||||
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
|
||||
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
|
||||
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
|
||||
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
|
||||
else
|
||||
covPreviousDatum.increment(1.0, isError);
|
||||
rgPreviousDatum.combine(rgThisDatum);
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
|
||||
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
|
||||
if (qualPreviousDatum == null)
|
||||
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
|
||||
else
|
||||
qualPreviousDatum.increment(1.0, isError);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
|
||||
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
|
||||
if (covPreviousDatum == null)
|
||||
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
|
||||
else
|
||||
covPreviousDatum.increment(1.0, isError);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
|
|
@ -247,7 +247,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
* @return a linked list with all the reads produced by the clipping operations
|
||||
*/
|
||||
@Override
|
||||
public LinkedList<GATKSAMRecord> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public LinkedList<GATKSAMRecord> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
LinkedList<GATKSAMRecord> mappedReads;
|
||||
totalReads++;
|
||||
if (!debugRead.isEmpty() && read.getReadName().contains(debugRead))
|
||||
|
|
|
|||
|
|
@ -27,24 +27,23 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -52,6 +51,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -101,7 +101,7 @@ import java.util.*;
|
|||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=300)
|
||||
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
|
||||
|
||||
|
|
@ -308,7 +308,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
|
||||
if( !allelesToGenotype.contains(vc) ) {
|
||||
allelesToGenotype.add(vc); // save for later for processing during the ActiveRegion's map call. Should be folded into a ReadMetaDataTracker object
|
||||
allelesToGenotype.add(vc); // save for later for processing during the ActiveRegion's map call. Should be folded into a RefMetaDataTracker object
|
||||
}
|
||||
}
|
||||
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
|
||||
|
|
|
|||
|
|
@ -127,9 +127,9 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
@DataProvider(name = "PRTest")
|
||||
public Object[][] createPRTestData() {
|
||||
return new Object[][]{
|
||||
{new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")},
|
||||
{new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")},
|
||||
{new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")},
|
||||
{new PRTest("", "1532242f9fe90ef759a0faa5d85f61fb")},
|
||||
{new PRTest(" -qq -1", "3dd2c87915c96ac55c3872026574d8cb")},
|
||||
{new PRTest(" -qq 6", "5d012ee224f1cb4a7afac59e3655e20c")},
|
||||
{new PRTest(" -DIQ", "66aa65223f192ee39c1773aa187fd493")}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,4 +66,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e1f88fac91424740c0eaac1de48b3970");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("000fd36d5cf8090386bb2ac15e3ab0b5"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ public class FastaSequenceIndexBuilder {
|
|||
* Reset iterators and add contig to sequence index
|
||||
*/
|
||||
private void finishReadingContig(FastaSequenceIndex sequenceIndex) {
|
||||
sequenceIndex.add(new FastaSequenceIndexEntry(contig, location, size, (int) basesPerLine, (int) bytesPerLine, thisSequenceIndex++));
|
||||
sequenceIndex.add(new FastaSequenceIndexEntry(trimContigName(contig), location, size, (int) basesPerLine, (int) bytesPerLine, thisSequenceIndex++));
|
||||
status = Status.NONE;
|
||||
contig = "";
|
||||
size = 0;
|
||||
|
|
@ -258,6 +258,14 @@ public class FastaSequenceIndexBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Trims the contig name to the expected value by removing any characters after the first whitespace
|
||||
*/
|
||||
private static String trimContigName(final String contigName) {
|
||||
int whitespaceIndex = contigName.indexOf(' ');
|
||||
return ( whitespaceIndex == -1 ) ? contigName : contigName.substring(0, whitespaceIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores FastaSequenceIndex as a .fasta.fai file on local machine
|
||||
* Although method is public it cannot be called on any old FastaSequenceIndex - must be created by a FastaSequenceIndexBuilder
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -81,7 +81,7 @@ public class AlignmentValidation extends ReadWalker<Integer,Integer> {
|
|||
* @return Number of reads aligned by this map (aka 1).
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
//logger.info(String.format("examining read %s", read.getReadName()));
|
||||
|
||||
byte[] bases = read.getReadBases();
|
||||
|
|
|
|||
|
|
@ -117,6 +117,15 @@ public final class RodBinding<T extends Feature> {
|
|||
this.bound = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing purposes only. Creates a RodBinding sufficient for looking up associations to rawName
|
||||
* @param type
|
||||
* @param rawName
|
||||
*/
|
||||
public RodBinding(Class<T> type, final String rawName) {
|
||||
this(type, rawName, "missing", type.getSimpleName(), new Tags());
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an unbound RodBinding<T>. Only available for creating the globally unique UNBOUND object
|
||||
* @param type class this unbound RodBinding creates
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@ import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
|||
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
|
|
@ -49,8 +51,8 @@ import org.broadinstitute.sting.gatk.samples.SampleDB;
|
|||
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -131,6 +133,11 @@ public class GenomeAnalysisEngine {
|
|||
*/
|
||||
private Collection<ReadFilter> filters;
|
||||
|
||||
/**
|
||||
* Collection of the read transformers applied to the reads
|
||||
*/
|
||||
private List<ReadTransformer> readTransformers;
|
||||
|
||||
/**
|
||||
* Controls the allocation of threads between CPU vs IO.
|
||||
*/
|
||||
|
|
@ -354,6 +361,39 @@ public class GenomeAnalysisEngine {
|
|||
return Collections.unmodifiableList(filters);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of active, initialized read transformers
|
||||
*
|
||||
* @param walker the walker we need to apply read transformers too
|
||||
* @return a non-null list of read transformers
|
||||
*/
|
||||
public void initializeReadTransformers(final Walker walker) {
|
||||
final List<ReadTransformer> activeTransformers = new ArrayList<ReadTransformer>();
|
||||
|
||||
final ReadTransformersMode overrideMode = WalkerManager.getWalkerAnnotation(walker, ReadTransformersMode.class);
|
||||
final ReadTransformer.ApplicationTime overrideTime = overrideMode != null ? overrideMode.ApplicationTime() : null;
|
||||
|
||||
final PluginManager<ReadTransformer> pluginManager = new PluginManager<ReadTransformer>(ReadTransformer.class);
|
||||
|
||||
for ( final ReadTransformer transformer : pluginManager.createAllTypes() ) {
|
||||
transformer.initialize(overrideTime, this, walker);
|
||||
if ( transformer.enabled() )
|
||||
activeTransformers.add(transformer);
|
||||
}
|
||||
|
||||
setReadTransformers(activeTransformers);
|
||||
}
|
||||
|
||||
public List<ReadTransformer> getReadTransformers() {
|
||||
return readTransformers;
|
||||
}
|
||||
|
||||
private void setReadTransformers(final List<ReadTransformer> readTransformers) {
|
||||
if ( readTransformers == null )
|
||||
throw new ReviewedStingException("read transformers cannot be null");
|
||||
this.readTransformers = readTransformers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse out the thread allocation from the given command-line argument.
|
||||
*/
|
||||
|
|
@ -419,9 +459,6 @@ public class GenomeAnalysisEngine {
|
|||
argCollection.setDownsamplingMethod(method);
|
||||
}
|
||||
|
||||
public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); }
|
||||
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }
|
||||
|
||||
protected boolean includeReadsWithDeletionAtLoci() {
|
||||
return walker.includeReadsWithDeletionAtLoci();
|
||||
}
|
||||
|
|
@ -702,13 +739,12 @@ public class GenomeAnalysisEngine {
|
|||
protected void initializeDataSources() {
|
||||
logger.info("Strictness is " + argCollection.strictnessLevel);
|
||||
|
||||
// TODO -- REMOVE ME
|
||||
BAQ.DEFAULT_GOP = argCollection.BAQGOP;
|
||||
|
||||
validateSuppliedReference();
|
||||
setReferenceDataSource(argCollection.referenceFile);
|
||||
|
||||
validateSuppliedReads();
|
||||
initializeReadTransformers(walker);
|
||||
|
||||
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
|
||||
|
||||
for (ReadFilter filter : filters)
|
||||
|
|
@ -795,9 +831,6 @@ public class GenomeAnalysisEngine {
|
|||
// interrogating for the downsample method during command line recreation.
|
||||
setDownsamplingMethod(method);
|
||||
|
||||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
|
||||
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
|
||||
|
||||
|
|
@ -817,11 +850,8 @@ public class GenomeAnalysisEngine {
|
|||
method,
|
||||
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
|
||||
filters,
|
||||
readTransformers,
|
||||
includeReadsWithDeletionAtLoci(),
|
||||
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF,
|
||||
getWalkerBAQQualityMode(),
|
||||
refReader,
|
||||
getBaseRecalibration(),
|
||||
argCollection.defaultBaseQualities,
|
||||
removeProgramRecords);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,15 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
|
|
@ -34,12 +33,9 @@ public class ReadProperties {
|
|||
private final DownsamplingMethod downsamplingMethod;
|
||||
private final ValidationExclusion exclusionList;
|
||||
private final Collection<ReadFilter> supplementalFilters;
|
||||
private final List<ReadTransformer> readTransformers;
|
||||
private final boolean includeReadsWithDeletionAtLoci;
|
||||
private final boolean useOriginalBaseQualities;
|
||||
private final BAQ.CalculationMode cmode;
|
||||
private final BAQ.QualityMode qmode;
|
||||
private final IndexedFastaSequenceFile refReader; // read for BAQ, if desired
|
||||
private final BaseRecalibration bqsrApplier;
|
||||
private final byte defaultBaseQualities;
|
||||
|
||||
/**
|
||||
|
|
@ -95,6 +91,11 @@ public class ReadProperties {
|
|||
return supplementalFilters;
|
||||
}
|
||||
|
||||
|
||||
public List<ReadTransformer> getReadTransformers() {
|
||||
return readTransformers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether to use original base qualities.
|
||||
* @return Whether to use original base qualities.
|
||||
|
|
@ -103,16 +104,6 @@ public class ReadProperties {
|
|||
return useOriginalBaseQualities;
|
||||
}
|
||||
|
||||
|
||||
public BAQ.QualityMode getBAQQualityMode() { return qmode; }
|
||||
public BAQ.CalculationMode getBAQCalculationMode() { return cmode; }
|
||||
|
||||
public IndexedFastaSequenceFile getRefReader() {
|
||||
return refReader;
|
||||
}
|
||||
|
||||
public BaseRecalibration getBQSRApplier() { return bqsrApplier; }
|
||||
|
||||
/**
|
||||
* @return Default base quality value to fill reads missing base quality information.
|
||||
*/
|
||||
|
|
@ -134,9 +125,6 @@ public class ReadProperties {
|
|||
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
|
||||
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
|
||||
* bases will be seen in the pileups, and the deletions will be skipped silently.
|
||||
* @param cmode How should we apply the BAQ calculation to the reads?
|
||||
* @param qmode How should we apply the BAQ calculation to the reads?
|
||||
* @param refReader if applyBAQ is true, must be a valid pointer to a indexed fasta file reads so we can get the ref bases for BAQ calculation
|
||||
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
|
||||
*/
|
||||
public ReadProperties( Collection<SAMReaderID> samFiles,
|
||||
|
|
@ -146,11 +134,8 @@ public class ReadProperties {
|
|||
DownsamplingMethod downsamplingMethod,
|
||||
ValidationExclusion exclusionList,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
List<ReadTransformer> readTransformers,
|
||||
boolean includeReadsWithDeletionAtLoci,
|
||||
BAQ.CalculationMode cmode,
|
||||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
byte defaultBaseQualities) {
|
||||
this.readers = samFiles;
|
||||
this.header = header;
|
||||
|
|
@ -158,12 +143,9 @@ public class ReadProperties {
|
|||
this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod;
|
||||
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
|
||||
this.supplementalFilters = supplementalFilters;
|
||||
this.readTransformers = readTransformers;
|
||||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||
this.useOriginalBaseQualities = useOriginalBaseQualities;
|
||||
this.cmode = cmode;
|
||||
this.qmode = qmode;
|
||||
this.refReader = refReader;
|
||||
this.bqsrApplier = bqsrApplier;
|
||||
this.defaultBaseQualities = defaultBaseQualities;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,13 +29,14 @@ import org.broadinstitute.sting.commandline.Hidden;
|
|||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -319,11 +320,11 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
return downsamplingMethod;
|
||||
}
|
||||
|
||||
public static BAQ.QualityMode getBAQQualityMode(Walker walker) {
|
||||
return walker.getClass().getAnnotation(BAQMode.class).QualityMode();
|
||||
public static <T extends Annotation> T getWalkerAnnotation(final Walker walker, final Class<T> clazz) {
|
||||
return walker.getClass().getAnnotation(clazz);
|
||||
}
|
||||
|
||||
public static BAQ.ApplicationTime getBAQApplicationTime(Walker walker) {
|
||||
public static ReadTransformer.ApplicationTime getBAQApplicationTime(Walker walker) {
|
||||
return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ public class ReferenceContext {
|
|||
* @return The base at the given locus from the reference.
|
||||
*/
|
||||
public byte getBase() {
|
||||
return getBases()[(int)(locus.getStart() - window.getStart())];
|
||||
return getBases()[(locus.getStart() - window.getStart())];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,143 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ListIterator;
|
||||
|
||||
/**
|
||||
* Key algorithmic helper for ReadBasedReferenceOrderedData
|
||||
*
|
||||
* Takes a single iterator of features, and provides a single capability that returns
|
||||
* the list of RODs that overlap an interval. Allows sequential getOverlapping calls
|
||||
* from intervals provided that these intervals always have increasing getStart() values.
|
||||
*
|
||||
*/
|
||||
class IntervalOverlappingRODsFromStream {
|
||||
/**
|
||||
* Only held for QC purposes
|
||||
*/
|
||||
GenomeLoc lastQuery = null;
|
||||
|
||||
private final String name;
|
||||
private final LinkedList<GATKFeature> currentFeatures = new LinkedList<GATKFeature>();
|
||||
private final PeekableIterator<RODRecordList> futureFeatures;
|
||||
|
||||
/**
|
||||
* Create a new IntervalOverlappingRODsFromStream that reads elements from futureFeatures and
|
||||
* returns RODRecordLists having name
|
||||
*
|
||||
* @param name
|
||||
* @param futureFeatures
|
||||
*/
|
||||
IntervalOverlappingRODsFromStream(final String name, final PeekableIterator<RODRecordList> futureFeatures) {
|
||||
if ( futureFeatures == null ) throw new IllegalArgumentException("futureFeatures cannot be null");
|
||||
|
||||
this.name = name;
|
||||
this.futureFeatures = futureFeatures;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of RODs overlapping loc from this stream of RODs.
|
||||
*
|
||||
* Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart
|
||||
*
|
||||
* @param loc the interval to query
|
||||
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
|
||||
*/
|
||||
@Ensures({"overlaps(loc, result)",
|
||||
"! futureFeatures.hasNext() || futureFeatures.peek().getLocation().isPast(loc)",
|
||||
"result != null"})
|
||||
public RODRecordList getOverlapping(final GenomeLoc loc) {
|
||||
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
|
||||
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
|
||||
|
||||
trimCurrentFeaturesToLoc(loc);
|
||||
readOverlappingFutureFeatures(loc);
|
||||
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For contract assurance. Checks that all bindings in loc overlap
|
||||
*
|
||||
* @param loc
|
||||
* @param bindings
|
||||
* @return
|
||||
*/
|
||||
@Requires({"loc != null", "bindings != null"})
|
||||
private boolean overlaps(final GenomeLoc loc, final RODRecordList bindings) {
|
||||
for ( final GATKFeature feature : bindings )
|
||||
if ( ! feature.getLocation().overlapsP(loc) )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subset the features in all to those that overlap with loc
|
||||
*
|
||||
* The current features list contains everything read that cannot be thrown away yet, but not
|
||||
* everything in there necessarily overlaps with loc. Subset to just those that do overlap
|
||||
*
|
||||
* @param loc the location that features must overlap
|
||||
* @param all the list of all features
|
||||
* @return a subset of all that overlaps with loc
|
||||
*/
|
||||
@Requires({"loc != null", "all != null"})
|
||||
@Ensures("result.size() <= all.size()")
|
||||
private Collection<GATKFeature> subsetToOverlapping(final GenomeLoc loc, final Collection<GATKFeature> all) {
|
||||
final LinkedList<GATKFeature> overlapping = new LinkedList<GATKFeature>();
|
||||
for ( final GATKFeature feature : all )
|
||||
if ( feature.getLocation().overlapsP(loc) )
|
||||
overlapping.add(feature);
|
||||
return overlapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update function. Remove all elements of currentFeatures that end before loc
|
||||
*
|
||||
* @param loc the location to use
|
||||
*/
|
||||
@Requires("loc != null")
|
||||
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
|
||||
private void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
|
||||
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
|
||||
while ( it.hasNext() ) {
|
||||
final GATKFeature feature = it.next();
|
||||
if ( feature.getLocation().isBefore(loc) )
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update function: Read all elements from futureFeatures that overlap with loc
|
||||
*
|
||||
* Stops at the first element that starts before the end of loc, or the stream empties
|
||||
*
|
||||
* @param loc
|
||||
*/
|
||||
@Requires("loc != null")
|
||||
@Ensures("currentFeatures.size() >= old(currentFeatures.size())")
|
||||
private void readOverlappingFutureFeatures(final GenomeLoc loc) {
|
||||
while ( futureFeatures.hasNext() ) {
|
||||
final GenomeLoc nextLoc = futureFeatures.peek().getLocation();
|
||||
if ( nextLoc.isBefore(loc) ) {
|
||||
futureFeatures.next(); // next rod element is before loc, throw it away and keep looking
|
||||
} else if ( nextLoc.isPast(loc) ) {
|
||||
break; // next element is past loc, stop looking but don't pop it
|
||||
} else if ( nextLoc.overlapsP(loc) ) {
|
||||
// add overlapping elements to our current features, removing from stream
|
||||
for ( final GATKFeature feature : futureFeatures.next() ) {
|
||||
currentFeatures.add(feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -58,7 +58,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
|||
// todo -- warning, I removed the reference to the name from states
|
||||
bindings.add( state.iterator.seekForward(loc) );
|
||||
|
||||
return new RefMetaDataTracker(bindings, referenceContext);
|
||||
return new RefMetaDataTracker(bindings);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -23,40 +23,63 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/** a ROD view for reads. This provides the Read traversals a way of getting a ReadMetaDataTracker */
|
||||
/** a ROD view for reads. This provides the Read traversals a way of getting a RefMetaDataTracker */
|
||||
public class ReadBasedReferenceOrderedView implements View {
|
||||
private final WindowedData window;
|
||||
|
||||
public ReadBasedReferenceOrderedView(ShardDataProvider provider) {
|
||||
window = new WindowedData(provider);
|
||||
provider.register(this);
|
||||
}
|
||||
// a list of the RMDDataState (location->iterators)
|
||||
private final List<RMDDataState> states = new ArrayList<RMDDataState>(1);
|
||||
private final static RefMetaDataTracker EMPTY_TRACKER = new RefMetaDataTracker();
|
||||
|
||||
/**
|
||||
* for testing only please
|
||||
*
|
||||
* @param data the window provider
|
||||
* Used to get genome locs for reads
|
||||
*/
|
||||
ReadBasedReferenceOrderedView(WindowedData data) {
|
||||
window = data;
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* The total extent of all reads in this span. We create iterators from our RODs
|
||||
* from the start of this span, to the end.
|
||||
*/
|
||||
private final GenomeLoc shardSpan;
|
||||
|
||||
public ReadBasedReferenceOrderedView(final ShardDataProvider provider) {
|
||||
this.genomeLocParser = provider.getGenomeLocParser();
|
||||
// conditional to optimize the case where we don't have any ROD data
|
||||
this.shardSpan = provider.getReferenceOrderedData() != null ? ((ReadShard)provider.getShard()).getReadsSpan() : null;
|
||||
provider.register(this);
|
||||
|
||||
if ( provider.getReferenceOrderedData() != null && ! shardSpan.isUnmapped() ) {
|
||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan)));
|
||||
}
|
||||
}
|
||||
|
||||
public ReadMetaDataTracker getReferenceOrderedDataForRead(SAMRecord read) {
|
||||
return window.getTracker(read);
|
||||
|
||||
/**
|
||||
* Testing constructor
|
||||
*/
|
||||
protected ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser,
|
||||
final GenomeLoc shardSpan,
|
||||
final List<String> names,
|
||||
final List<PeekableIterator<RODRecordList>> featureSources) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.shardSpan = shardSpan;
|
||||
for ( int i = 0; i < names.size(); i++ )
|
||||
states.add(new RMDDataState(names.get(i), featureSources.get(i)));
|
||||
}
|
||||
|
||||
public Collection<Class<? extends View>> getConflictingViews() {
|
||||
|
|
@ -65,135 +88,72 @@ public class ReadBasedReferenceOrderedView implements View {
|
|||
return classes;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if (window != null) window.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
|
||||
class WindowedData {
|
||||
// the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope
|
||||
private final TreeMap<Integer, RODMetaDataContainer> mapping = new TreeMap<Integer, RODMetaDataContainer>();
|
||||
|
||||
// our current location from the last read we processed
|
||||
private GenomeLoc currentLoc;
|
||||
|
||||
// a list of the RMDDataState (location->iterators)
|
||||
private List<RMDDataState> states;
|
||||
|
||||
// the provider; where we get all our information
|
||||
private final ShardDataProvider provider;
|
||||
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(WindowedData.class);
|
||||
|
||||
/**
|
||||
* create a WindowedData given a shard provider
|
||||
*
|
||||
* @param provider the ShardDataProvider
|
||||
*/
|
||||
public WindowedData(ShardDataProvider provider) {
|
||||
this.provider = provider;
|
||||
}
|
||||
|
||||
/**
|
||||
* load the states dynamically, since the only way to get a genome loc is from the read (the shard doesn't have one)
|
||||
*
|
||||
* @param provider the ShardDataProvider
|
||||
* @param rec the current read
|
||||
*/
|
||||
private void getStates(ShardDataProvider provider, SAMRecord rec) {
|
||||
|
||||
int stop = Integer.MAX_VALUE;
|
||||
// figure out the appropriate alignment stop
|
||||
if (provider.hasReference()) {
|
||||
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
|
||||
}
|
||||
|
||||
// calculate the range of positions we need to look at
|
||||
GenomeLoc range = provider.getGenomeLocParser().createGenomeLoc(rec.getReferenceName(),
|
||||
rec.getAlignmentStart(),
|
||||
stop);
|
||||
states = new ArrayList<RMDDataState>();
|
||||
if (provider.getReferenceOrderedData() != null)
|
||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(range)));
|
||||
}
|
||||
|
||||
/**
|
||||
* this function is for testing only
|
||||
*
|
||||
* @param states a list of RMDDataState to initialize with
|
||||
*/
|
||||
WindowedData(List<RMDDataState> states) {
|
||||
this.states = states;
|
||||
provider = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a ReadMetaDataTracker given the current read
|
||||
* create a RefMetaDataTracker given the current read
|
||||
*
|
||||
* @param rec the read
|
||||
*
|
||||
* @return a ReadMetaDataTracker for the read, from which you can get ROD -> read alignments
|
||||
* @return a RefMetaDataTracker for the read, from which you can get ROD -> read alignments
|
||||
*/
|
||||
public ReadMetaDataTracker getTracker(SAMRecord rec) {
|
||||
updatePosition(rec);
|
||||
return new ReadMetaDataTracker(provider.getGenomeLocParser(), rec, mapping);
|
||||
@Requires("rec != null")
|
||||
@Ensures("result != null")
|
||||
public RefMetaDataTracker getReferenceOrderedDataForRead(final SAMRecord rec) {
|
||||
if ( rec.getReadUnmappedFlag() )
|
||||
// empty RODs for unmapped reads
|
||||
return new RefMetaDataTracker();
|
||||
else
|
||||
return getReferenceOrderedDataForInterval(genomeLocParser.createGenomeLoc(rec));
|
||||
}
|
||||
|
||||
/**
|
||||
* update the position we're storing
|
||||
*
|
||||
* @param rec the read to use for start and end
|
||||
*/
|
||||
private void updatePosition(SAMRecord rec) {
|
||||
if (states == null) getStates(this.provider, rec);
|
||||
currentLoc = provider.getGenomeLocParser().createGenomeLoc(rec);
|
||||
|
||||
// flush the queue looking for records we've passed over
|
||||
while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart())
|
||||
mapping.pollFirstEntry(); // toss away records that we've passed
|
||||
|
||||
// add new data to the queue
|
||||
for (RMDDataState state : states) {
|
||||
// move into position
|
||||
while (state.iterator.hasNext() && state.iterator.peekNextLocation().isBefore(currentLoc))
|
||||
state.iterator.next();
|
||||
while (state.iterator.hasNext() && state.iterator.peekNextLocation().overlapsP(currentLoc)) {
|
||||
RODRecordList list = state.iterator.next();
|
||||
for (GATKFeature datum : list) {
|
||||
if (!mapping.containsKey(list.getLocation().getStart()))
|
||||
mapping.put(list.getLocation().getStart(), new RODMetaDataContainer());
|
||||
mapping.get(list.getLocation().getStart()).addEntry(datum);
|
||||
}
|
||||
}
|
||||
@Requires({"interval != null", "shardSpan == null || shardSpan.isUnmapped() || shardSpan.containsP(interval)"})
|
||||
@Ensures("result != null")
|
||||
public RefMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) {
|
||||
if ( states.isEmpty() || shardSpan.isUnmapped() ) // optimization for no bindings (common for read walkers)
|
||||
return EMPTY_TRACKER;
|
||||
else {
|
||||
final List<RODRecordList> bindings = new ArrayList<RODRecordList>(states.size());
|
||||
for ( final RMDDataState state : states )
|
||||
bindings.add(state.stream.getOverlapping(interval));
|
||||
return new RefMetaDataTracker(bindings);
|
||||
}
|
||||
}
|
||||
|
||||
/** Closes the current view. */
|
||||
/**
|
||||
* Closes the current view.
|
||||
*/
|
||||
public void close() {
|
||||
if (states == null) return;
|
||||
for (RMDDataState state : states)
|
||||
state.dataSource.close( state.iterator );
|
||||
for (final RMDDataState state : states)
|
||||
state.close();
|
||||
|
||||
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
|
||||
states = null;
|
||||
states.clear();
|
||||
}
|
||||
|
||||
/** Models the traversal state of a given ROD lane. */
|
||||
private static class RMDDataState {
|
||||
public final ReferenceOrderedDataSource dataSource;
|
||||
public final IntervalOverlappingRODsFromStream stream;
|
||||
private final LocationAwareSeekableRODIterator iterator;
|
||||
|
||||
}
|
||||
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
|
||||
this.dataSource = dataSource;
|
||||
this.iterator = iterator;
|
||||
this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<RODRecordList>(iterator));
|
||||
}
|
||||
|
||||
/** Models the traversal state of a given ROD lane. */
|
||||
class RMDDataState {
|
||||
public final ReferenceOrderedDataSource dataSource;
|
||||
public final LocationAwareSeekableRODIterator iterator;
|
||||
/**
|
||||
* For testing
|
||||
*/
|
||||
public RMDDataState(final String name, final PeekableIterator<RODRecordList> iterator) {
|
||||
this.dataSource = null;
|
||||
this.iterator = null;
|
||||
this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<RODRecordList>(iterator));
|
||||
}
|
||||
|
||||
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
|
||||
this.dataSource = dataSource;
|
||||
this.iterator = iterator;
|
||||
public void close() {
|
||||
if ( dataSource != null )
|
||||
dataSource.close( iterator );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
// special case the interval again -- add it into the ROD
|
||||
if ( interval != null ) { allTracksHere.add(interval); }
|
||||
return new RefMetaDataTracker(allTracksHere, referenceContext);
|
||||
return new RefMetaDataTracker(allTracksHere);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
|
|||
|
|
@ -6,11 +6,9 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|||
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -125,4 +123,33 @@ public class ReadShard extends Shard {
|
|||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the full span from the start of the left most read to the end of the right most one
|
||||
*
|
||||
* Note this may be different than the getLocation() of the shard, as this reflects the
|
||||
* targeted span, not the actual span of reads
|
||||
*
|
||||
* @return the genome loc representing the span of these reads on the genome
|
||||
*/
|
||||
public GenomeLoc getReadsSpan() {
|
||||
if ( isUnmapped() || super.getGenomeLocs() == null || reads.isEmpty() )
|
||||
return super.getLocation();
|
||||
else {
|
||||
int start = Integer.MAX_VALUE;
|
||||
int stop = Integer.MIN_VALUE;
|
||||
String contig = null;
|
||||
|
||||
for ( final SAMRecord read : reads ) {
|
||||
if ( contig != null && ! read.getReferenceName().equals(contig) )
|
||||
throw new ReviewedStingException("ReadShard contains reads spanning contig boundaries, which is no longer allowed. "
|
||||
+ "First contig is " + contig + " next read was " + read.getReferenceName() );
|
||||
contig = read.getReferenceName();
|
||||
if ( read.getAlignmentStart() < start ) start = read.getAlignmentStart();
|
||||
if ( read.getAlignmentEnd() > stop ) stop = read.getAlignmentEnd();
|
||||
}
|
||||
|
||||
return parser.createGenomeLoc(contig, start, stop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.sam.MergingSamRecordIterator;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
import net.sf.samtools.*;
|
||||
|
|
@ -42,12 +41,9 @@ import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.baq.BAQSamIterator;
|
||||
import org.broadinstitute.sting.utils.baq.ReadTransformingIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.BQSRSamIterator;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -200,11 +196,8 @@ public class SAMDataSource {
|
|||
downsamplingMethod,
|
||||
exclusionList,
|
||||
supplementalFilters,
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
includeReadsWithDeletionAtLoci,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1,
|
||||
false);
|
||||
}
|
||||
|
|
@ -234,11 +227,8 @@ public class SAMDataSource {
|
|||
DownsamplingMethod downsamplingMethod,
|
||||
ValidationExclusion exclusionList,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
List<ReadTransformer> readTransformers,
|
||||
boolean includeReadsWithDeletionAtLoci,
|
||||
BAQ.CalculationMode cmode,
|
||||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
byte defaultBaseQualities,
|
||||
boolean removeProgramRecords) {
|
||||
this.readMetrics = new ReadMetrics();
|
||||
|
|
@ -262,7 +252,7 @@ public class SAMDataSource {
|
|||
else {
|
||||
// Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively
|
||||
// will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once.
|
||||
ReadShard.setReadBufferSize(Math.min(1000*samFiles.size(),250000));
|
||||
ReadShard.setReadBufferSize(Math.min(10000*samFiles.size(),250000));
|
||||
}
|
||||
|
||||
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
|
||||
|
|
@ -308,11 +298,8 @@ public class SAMDataSource {
|
|||
downsamplingMethod,
|
||||
exclusionList,
|
||||
supplementalFilters,
|
||||
readTransformers,
|
||||
includeReadsWithDeletionAtLoci,
|
||||
cmode,
|
||||
qmode,
|
||||
refReader,
|
||||
bqsrApplier,
|
||||
defaultBaseQualities);
|
||||
|
||||
// cache the read group id (original) -> read group id (merged)
|
||||
|
|
@ -486,9 +473,15 @@ public class SAMDataSource {
|
|||
|
||||
CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
|
||||
while(!shard.isBufferFull() && iterator.hasNext()) {
|
||||
read = iterator.next();
|
||||
shard.addRead(read);
|
||||
noteFilePositionUpdate(positionUpdates,read);
|
||||
final SAMRecord nextRead = iterator.next();
|
||||
if ( read == null || (nextRead.getReferenceIndex().equals(read.getReferenceIndex())) ) {
|
||||
// only add reads to the shard if they are on the same contig
|
||||
read = nextRead;
|
||||
shard.addRead(read);
|
||||
noteFilePositionUpdate(positionUpdates,read);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If the reads are sorted in queryname order, ensure that all reads
|
||||
|
|
@ -597,10 +590,7 @@ public class SAMDataSource {
|
|||
readProperties.getDownsamplingMethod().toFraction,
|
||||
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
readProperties.getSupplementalFilters(),
|
||||
readProperties.getBAQCalculationMode(),
|
||||
readProperties.getBAQQualityMode(),
|
||||
readProperties.getRefReader(),
|
||||
readProperties.getBQSRApplier(),
|
||||
readProperties.getReadTransformers(),
|
||||
readProperties.defaultBaseQualities());
|
||||
}
|
||||
|
||||
|
|
@ -667,10 +657,7 @@ public class SAMDataSource {
|
|||
Double downsamplingFraction,
|
||||
Boolean noValidationOfReadOrder,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
BAQ.CalculationMode cmode,
|
||||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
List<ReadTransformer> readTransformers,
|
||||
byte defaultBaseQualities) {
|
||||
|
||||
// *********************************************************************************** //
|
||||
|
|
@ -692,11 +679,11 @@ public class SAMDataSource {
|
|||
// only wrap if we are replacing the original qualities or using a default base quality
|
||||
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
|
||||
|
||||
if (bqsrApplier != null)
|
||||
wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier);
|
||||
|
||||
if (cmode != BAQ.CalculationMode.OFF)
|
||||
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
|
||||
// set up read transformers
|
||||
for ( final ReadTransformer readTransformer : readTransformers ) {
|
||||
if ( readTransformer.enabled() && readTransformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_INPUT )
|
||||
wrappedIterator = new ReadTransformingIterator(wrappedIterator, readTransformer);
|
||||
}
|
||||
|
||||
return wrappedIterator;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,22 +100,29 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
* @return The best-fit microscheduler.
|
||||
*/
|
||||
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
|
||||
if (walker instanceof TreeReducible && threadAllocation.getNumCPUThreads() > 1) {
|
||||
if(walker.isReduceByInterval())
|
||||
if (threadAllocation.getNumCPUThreads() > 1) {
|
||||
if (walker.isReduceByInterval())
|
||||
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
|
||||
|
||||
logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",threadAllocation.getNumCPUThreads()));
|
||||
|
||||
if ( walker instanceof ReadWalker )
|
||||
if ( walker instanceof ReadWalker ) {
|
||||
if ( ! (walker instanceof ThreadSafeMapReduce) ) badNT(engine, walker);
|
||||
return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
|
||||
else
|
||||
} else {
|
||||
// TODO -- update test for when nano scheduling only is an option
|
||||
if ( ! (walker instanceof TreeReducible) ) badNT(engine, walker);
|
||||
return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
|
||||
}
|
||||
} else {
|
||||
if(threadAllocation.getNumCPUThreads() > 1)
|
||||
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
|
||||
return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
|
||||
}
|
||||
}
|
||||
|
||||
private static void badNT(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a microscheduler given the reads and reference.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -25,9 +25,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.collect.Collections2;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Manage filters and filter options. Any requests for basic filtering classes
|
||||
|
|
@ -54,4 +59,39 @@ public class FilterManager extends PluginManager<ReadFilter> {
|
|||
public Collection<Class<? extends ReadFilter>> getValues() {
|
||||
return this.getPlugins();
|
||||
}
|
||||
|
||||
/**
|
||||
* Rather than use the default error message, print out a list of read filters as well.
|
||||
* @param pluginCategory - string, the category of the plugin (e.g. read filter)
|
||||
* @param pluginName - string, what we were trying to match (but failed to)
|
||||
* @return - A wall of text with the default message, followed by a listing of available read filters
|
||||
*/
|
||||
@Override
|
||||
protected String formatErrorMessage(String pluginCategory, String pluginName) {
|
||||
List<Class<? extends ReadFilter>> availableFilters = this.getPluginsImplementing(ReadFilter.class);
|
||||
|
||||
|
||||
return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
|
||||
userFriendlyListofReadFilters(availableFilters),
|
||||
"Please consult the GATK Documentation (http://www.broadinstitute.org/gatk/gatkdocs/) for more information.");
|
||||
}
|
||||
|
||||
private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
|
||||
final String headName = "FilterName", headDoc = "Documentation";
|
||||
int longestNameLength = -1;
|
||||
for ( Class < ? extends ReadFilter> filter : filters ) {
|
||||
longestNameLength = Math.max(longestNameLength,this.getName(filter).length());
|
||||
}
|
||||
String format = " %"+longestNameLength+"s %s%n";
|
||||
|
||||
StringBuilder listBuilder = new StringBuilder();
|
||||
listBuilder.append(String.format(format,headName,headDoc));
|
||||
for ( Class<? extends ReadFilter> filter : filters ) {
|
||||
String helpLink = GATKDocUtils.helpLinksToGATKDocs(filter);
|
||||
String filterName = this.getName(filter);
|
||||
listBuilder.append(String.format(format,filterName,helpLink));
|
||||
}
|
||||
|
||||
return listBuilder.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,12 +31,16 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* A stub for routing and management of SAM file reading and writing.
|
||||
|
|
@ -116,15 +120,15 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
*/
|
||||
private boolean simplifyBAM = false;
|
||||
|
||||
private List<ReadTransformer> onOutputReadTransformers = null;
|
||||
|
||||
/**
|
||||
* Create a new stub given the requested SAM file and compression level.
|
||||
* @param engine source of header data, maybe other data about input files.
|
||||
* @param samFile SAM file to (ultimately) create.
|
||||
*/
|
||||
public SAMFileWriterStub( GenomeAnalysisEngine engine, File samFile ) {
|
||||
this.engine = engine;
|
||||
this.samFile = samFile;
|
||||
this.samOutputStream = null;
|
||||
this(engine, samFile, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -133,8 +137,12 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
* @param stream Output stream to which data should be written.
|
||||
*/
|
||||
public SAMFileWriterStub( GenomeAnalysisEngine engine, OutputStream stream ) {
|
||||
this(engine, null, stream);
|
||||
}
|
||||
|
||||
private SAMFileWriterStub(final GenomeAnalysisEngine engine, final File samFile, final OutputStream stream) {
|
||||
this.engine = engine;
|
||||
this.samFile = null;
|
||||
this.samFile = samFile;
|
||||
this.samOutputStream = stream;
|
||||
}
|
||||
|
||||
|
|
@ -274,17 +282,29 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
this.headerOverride = header;
|
||||
}
|
||||
|
||||
private void initializeReadTransformers() {
|
||||
this.onOutputReadTransformers = new ArrayList<ReadTransformer>(engine.getReadTransformers().size());
|
||||
for ( final ReadTransformer transformer : engine.getReadTransformers() ) {
|
||||
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT )
|
||||
onOutputReadTransformers.add(transformer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
public void addAlignment( SAMRecord alignment ) {
|
||||
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.OFF && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
|
||||
//System.out.printf("Writing BAQ at OUTPUT TIME%n");
|
||||
baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode());
|
||||
}
|
||||
public void addAlignment( final SAMRecord readIn ) {
|
||||
if ( onOutputReadTransformers == null )
|
||||
initializeReadTransformers();
|
||||
|
||||
GATKSAMRecord workingRead = (GATKSAMRecord)readIn;
|
||||
|
||||
// run on output read transformers
|
||||
for ( final ReadTransformer transform : onOutputReadTransformers )
|
||||
workingRead = transform.apply(workingRead);
|
||||
|
||||
writeStarted = true;
|
||||
outputTracker.getStorage(this).addAlignment(alignment);
|
||||
outputTracker.getStorage(this).addAlignment(workingRead);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,144 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Baseclass used to describe a read transformer like BAQ and BQSR
|
||||
*
|
||||
* Read transformers are plugable infrastructure that modify read state
|
||||
* either on input, on output, or within walkers themselves.
|
||||
*
|
||||
* The function apply() is called on each read seen by the GATK (after passing
|
||||
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
|
||||
* to the read to change qualities, add tags, etc.
|
||||
*
|
||||
* Initialize is called once right before the GATK traversal begins providing
|
||||
* the ReadTransformer with the ability to collect and initialize data from the
|
||||
* engine.
|
||||
*
|
||||
* Note that all ReadTransformers within the classpath are created and initialized. If one
|
||||
* shouldn't be run it should look at the command line options of the engine and override
|
||||
* the enabled.
|
||||
*
|
||||
* @since 8/31/12
|
||||
* @author depristo
|
||||
*/
|
||||
abstract public class ReadTransformer {
|
||||
/**
|
||||
* When should this read transform be applied?
|
||||
*/
|
||||
private ApplicationTime applicationTime;
|
||||
|
||||
/**
|
||||
* Keep track of whether we've been initialized already, and ensure it's not called more than once.
|
||||
*/
|
||||
private boolean initialized = false;
|
||||
|
||||
protected ReadTransformer() {}
|
||||
|
||||
/**
|
||||
* Master initialization routine. Called to setup a ReadTransform, using it's overloaded initialialSub routine.
|
||||
*
|
||||
* @param overrideTime if not null, we will run this ReadTransform at the time provided, regardless of the timing of this read transformer itself
|
||||
* @param engine the engine, for initializing values
|
||||
* @param walker the walker we intend to run
|
||||
*/
|
||||
@Requires({"initialized == false", "engine != null", "walker != null"})
|
||||
@Ensures("initialized == true")
|
||||
public final void initialize(final ApplicationTime overrideTime, final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
if ( engine == null ) throw new IllegalArgumentException("engine cannot be null");
|
||||
if ( walker == null ) throw new IllegalArgumentException("walker cannot be null");
|
||||
|
||||
this.applicationTime = initializeSub(engine, walker);
|
||||
if ( overrideTime != null ) this.applicationTime = overrideTime;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must override this to initialize themeselves
|
||||
*
|
||||
* @param engine the engine, for initializing values
|
||||
* @param walker the walker we intend to run
|
||||
* @return the point of time we'd like this read transform to be run
|
||||
*/
|
||||
@Requires({"engine != null", "walker != null"})
|
||||
@Ensures("result != null")
|
||||
protected abstract ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker);
|
||||
|
||||
/**
|
||||
* Should this ReadTransformer be activated? Called after initialize, which allows this
|
||||
* read transformer to look at its arguments and decide if it should be active. All
|
||||
* ReadTransformers must override this, as by default they are not enabled.
|
||||
*
|
||||
* @return true if this ReadTransformer should be used on the read stream
|
||||
*/
|
||||
public boolean enabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Has this transformer been initialized?
|
||||
*
|
||||
* @return true if it has
|
||||
*/
|
||||
public final boolean isInitialized() {
|
||||
return initialized;
|
||||
}
|
||||
|
||||
/**
|
||||
* When should we apply this read transformer?
|
||||
*
|
||||
* @return true if yes
|
||||
*/
|
||||
public final ApplicationTime getApplicationTime() {
|
||||
return applicationTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Primary interface function for a read transform to actually do some work
|
||||
*
|
||||
* The function apply() is called on each read seen by the GATK (after passing
|
||||
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
|
||||
* to the read to change qualities, add tags, etc.
|
||||
*
|
||||
* @param read the read to transform
|
||||
* @return the transformed read
|
||||
*/
|
||||
@Requires("read != null")
|
||||
@Ensures("result != null")
|
||||
abstract public GATKSAMRecord apply(final GATKSAMRecord read);
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
|
||||
/**
|
||||
* When should a read transformer be applied?
|
||||
*/
|
||||
public static enum ApplicationTime {
|
||||
/**
|
||||
* Walker does not tolerate this read transformer
|
||||
*/
|
||||
FORBIDDEN,
|
||||
|
||||
/**
|
||||
* apply the transformation to the incoming reads, the default
|
||||
*/
|
||||
ON_INPUT,
|
||||
|
||||
/**
|
||||
* apply the transformation to the outgoing read stream
|
||||
*/
|
||||
ON_OUTPUT,
|
||||
|
||||
/**
|
||||
* the walker will deal with the calculation itself
|
||||
*/
|
||||
HANDLED_IN_WALKER
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 1:51:22 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows the walker to indicate what type of data it wants to consume.
|
||||
*/
|
||||
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface ReadTransformersMode {
|
||||
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
|
|
@ -48,9 +47,9 @@ public class VerifyingSamIterator implements StingSAMIterator {
|
|||
if(cur.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || cur.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START)
|
||||
throw new UserException.MalformedBAM(last,String.format("read %s has inconsistent mapping information.",cur.format()));
|
||||
|
||||
GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last );
|
||||
GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur );
|
||||
return curLoc.compareTo(lastLoc) == -1;
|
||||
return (last.getReferenceIndex() > cur.getReferenceIndex()) ||
|
||||
(last.getReferenceIndex().equals(cur.getReferenceIndex()) &&
|
||||
last.getAlignmentStart() > cur.getAlignmentStart());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,179 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ReadMetaDataTracker
|
||||
* <p/>
|
||||
* a read-based meta data tracker
|
||||
*/
|
||||
public class ReadMetaDataTracker {
|
||||
/**
|
||||
* The parser, used to create new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
private final SAMRecord record;
|
||||
|
||||
// the buffer of positions and RODs we've stored
|
||||
private final TreeMap<Integer, RODMetaDataContainer> mapping;
|
||||
|
||||
/**
|
||||
* create a read meta data tracker, given the read and a queue of RODatum positions
|
||||
*
|
||||
* @param record the read to create offset from
|
||||
* @param mapping the mapping of reference ordered datum
|
||||
*/
|
||||
public ReadMetaDataTracker(GenomeLocParser genomeLocParser, SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.record = record;
|
||||
this.mapping = mapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* create an alignment of read position to reference ordered datum
|
||||
*
|
||||
* @param record the SAMRecord
|
||||
* @param queue the queue (as a tree set)
|
||||
* @param cl the class name, null if not filtered by classname
|
||||
* @param name the datum track name, null if not filtered by name
|
||||
*
|
||||
* @return a mapping from the position in the read to the reference ordered datum
|
||||
*/
|
||||
private Map<Integer, Collection<GATKFeature>> createReadAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> queue, Class cl, String name) {
|
||||
if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified");
|
||||
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
|
||||
GenomeLoc location = genomeLocParser.createGenomeLoc(record);
|
||||
int length = record.getReadLength();
|
||||
for (Integer loc : queue.keySet()) {
|
||||
Integer position = loc - location.getStart();
|
||||
if (position >= 0 && position < length) {
|
||||
Collection<GATKFeature> set;
|
||||
if (cl != null)
|
||||
set = queue.get(loc).getSet(cl);
|
||||
else
|
||||
set = queue.get(loc).getSet(name);
|
||||
if (set != null && set.size() > 0)
|
||||
ret.put(position, set);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* create an alignment of read position to reference ordered datum
|
||||
*
|
||||
* @return a mapping from the position in the read to the reference ordered datum
|
||||
*/
|
||||
private Map<Integer, Collection<GATKFeature>> createGenomeLocAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping, Class cl, String name) {
|
||||
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
|
||||
int start = record.getAlignmentStart();
|
||||
int stop = record.getAlignmentEnd();
|
||||
for (Integer location : mapping.keySet()) {
|
||||
if (location >= start && location <= stop)
|
||||
if (cl != null)
|
||||
ret.put(location, mapping.get(location).getSet(cl));
|
||||
else
|
||||
ret.put(location, mapping.get(location).getSet(name));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of read offset to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping() {
|
||||
return createReadAlignment(record, mapping, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of genome loc position to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping() {
|
||||
return createGenomeLocAlignment(record, mapping, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of read offset to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(String name) {
|
||||
return createReadAlignment(record, mapping, null, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of genome loc position to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(String name) {
|
||||
return createGenomeLocAlignment(record, mapping, null, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of read offset to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(Class cl) {
|
||||
return createReadAlignment(record, mapping, cl, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the position mapping, from read offset to ROD
|
||||
*
|
||||
* @return a mapping of genome loc position to ROD(s)
|
||||
*/
|
||||
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(Class cl) {
|
||||
return createGenomeLocAlignment(record, mapping, cl, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the list of all the RODS overlapping this read, without any information about their position
|
||||
* @return a Collection (no order guaranteed), of all the RODs covering this read
|
||||
*/
|
||||
public List<GATKFeature> getAllCoveringRods() {
|
||||
List<GATKFeature> ret = new ArrayList<GATKFeature>();
|
||||
for (Map.Entry<Integer, RODMetaDataContainer> entry : mapping.entrySet())
|
||||
ret.addAll(entry.getValue().getSet());
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
@ -5,7 +5,6 @@ import com.google.java.contract.Requires;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -32,11 +31,10 @@ import java.util.*;
|
|||
* Time: 3:05:23 PM
|
||||
*/
|
||||
public class RefMetaDataTracker {
|
||||
// TODO: this should be a list, not a map, actually
|
||||
// TODO: this should be a list, not a bindings, actually
|
||||
private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
|
||||
|
||||
final Map<String, RODRecordList> map;
|
||||
final ReferenceContext ref;
|
||||
final Map<String, RODRecordList> bindings;
|
||||
final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
|
@ -48,28 +46,25 @@ public class RefMetaDataTracker {
|
|||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Only for testing -- not accesssible in any other context
|
||||
* Create an tracker with no bindings
|
||||
*/
|
||||
public RefMetaDataTracker() {
|
||||
ref = null;
|
||||
map = Collections.emptyMap();
|
||||
bindings = Collections.emptyMap();
|
||||
}
|
||||
|
||||
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
|
||||
this.ref = ref;
|
||||
|
||||
// set up the map
|
||||
public RefMetaDataTracker(final Collection<RODRecordList> allBindings) {
|
||||
// set up the bindings
|
||||
if ( allBindings.isEmpty() )
|
||||
map = Collections.emptyMap();
|
||||
bindings = Collections.emptyMap();
|
||||
else {
|
||||
Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
|
||||
final Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
|
||||
for ( RODRecordList rod : allBindings ) {
|
||||
if ( rod != null && ! rod.isEmpty() )
|
||||
tmap.put(canonicalName(rod.getName()), rod);
|
||||
}
|
||||
|
||||
// ensure that no one modifies the map itself
|
||||
map = Collections.unmodifiableMap(tmap);
|
||||
// ensure that no one modifies the bindings itself
|
||||
bindings = Collections.unmodifiableMap(tmap);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -99,7 +94,7 @@ public class RefMetaDataTracker {
|
|||
@Requires({"type != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type) {
|
||||
return addValues(map.keySet(), type, new ArrayList<T>(), null, false, false);
|
||||
return addValues(bindings.keySet(), type, new ArrayList<T>(), null, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -114,7 +109,7 @@ public class RefMetaDataTracker {
|
|||
@Requires({"type != null", "onlyAtThisLoc != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(map.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
||||
return addValues(bindings.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -296,7 +291,7 @@ public class RefMetaDataTracker {
|
|||
*/
|
||||
@Requires({"rodBinding != null"})
|
||||
public boolean hasValues(final RodBinding rodBinding) {
|
||||
return map.containsKey(canonicalName(rodBinding.getName()));
|
||||
return bindings.containsKey(canonicalName(rodBinding.getName()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -306,7 +301,7 @@ public class RefMetaDataTracker {
|
|||
* @return List of all tracks
|
||||
*/
|
||||
public List<RODRecordList> getBoundRodTracks() {
|
||||
return new ArrayList<RODRecordList>(map.values());
|
||||
return new ArrayList<RODRecordList>(bindings.values());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -314,38 +309,30 @@ public class RefMetaDataTracker {
|
|||
* @return the number of tracks with at least one bound Feature
|
||||
*/
|
||||
public int getNTracksWithBoundFeatures() {
|
||||
return map.size();
|
||||
return bindings.size();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// old style accessors
|
||||
//
|
||||
// TODO -- DELETE ME
|
||||
//
|
||||
//
|
||||
// Protected accessors using strings for unit testing
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
@Deprecated
|
||||
public boolean hasValues(final String name) {
|
||||
return map.containsKey(canonicalName(name));
|
||||
protected boolean hasValues(final String name) {
|
||||
return bindings.containsKey(canonicalName(name));
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
|
||||
protected <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
|
||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), null, false, false);
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
|
||||
protected <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
|
||||
|
||||
protected <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
|
||||
return safeGetFirst(getValues(type, name));
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
|
||||
protected <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(getValues(type, name, onlyAtThisLoc));
|
||||
}
|
||||
|
||||
|
|
@ -366,7 +353,7 @@ public class RefMetaDataTracker {
|
|||
* @return
|
||||
*/
|
||||
@Requires({"l != null"})
|
||||
final private <T extends Feature> T safeGetFirst(final List<T> l) {
|
||||
private <T extends Feature> T safeGetFirst(final List<T> l) {
|
||||
return l.isEmpty() ? null : l.get(0);
|
||||
}
|
||||
|
||||
|
|
@ -435,7 +422,7 @@ public class RefMetaDataTracker {
|
|||
*/
|
||||
private RODRecordList getTrackDataByName(final String name) {
|
||||
final String luName = canonicalName(name);
|
||||
RODRecordList l = map.get(luName);
|
||||
RODRecordList l = bindings.get(luName);
|
||||
return l == null ? EMPTY_ROD_RECORD_LIST : l;
|
||||
}
|
||||
|
||||
|
|
@ -448,7 +435,7 @@ public class RefMetaDataTracker {
|
|||
* @param name the name of the rod
|
||||
* @return canonical name of the rod
|
||||
*/
|
||||
private final String canonicalName(final String name) {
|
||||
private String canonicalName(final String name) {
|
||||
// todo -- remove me after switch to RodBinding syntax
|
||||
return name.toLowerCase();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
*/
|
||||
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
|
||||
// Just want to output the active regions to a file, not actually process them
|
||||
for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion : workQueue ) {
|
||||
for( final ActiveRegion activeRegion : workQueue ) {
|
||||
if( activeRegion.isActive ) {
|
||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||
}
|
||||
|
|
@ -198,7 +198,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
while( workQueue.peek() != null ) {
|
||||
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
|
||||
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
|
||||
final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion = workQueue.remove();
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||
} else {
|
||||
break;
|
||||
|
|
@ -208,15 +208,15 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
return sum;
|
||||
}
|
||||
|
||||
private T processActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc );
|
||||
org.broadinstitute.sting.utils.activeregion.ActiveRegion bestRegion = activeRegion;
|
||||
for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion otherRegionToTest : workQueue ) {
|
||||
ActiveRegion bestRegion = activeRegion;
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( readLoc );
|
||||
bestRegion = otherRegionToTest;
|
||||
|
|
@ -229,7 +229,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
if( !bestRegion.equals(activeRegion) ) {
|
||||
activeRegion.add( read );
|
||||
}
|
||||
for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion otherRegionToTest : workQueue ) {
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||
otherRegionToTest.add( read );
|
||||
}
|
||||
|
|
@ -241,6 +241,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
}
|
||||
}
|
||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
// WARNING: This hashset relies on reads being exactly equal when they are placed in the list as when they are removed. So the ActiveRegionWalker can't modify the reads in any way.
|
||||
|
||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
||||
final M x = walker.map( activeRegion, null );
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrd
|
|||
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -91,7 +91,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// if the read is mapped, create a metadata tracker
|
||||
final ReadMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
|
||||
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
|
||||
|
||||
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
|
||||
if (keepMeP) {
|
||||
|
|
|
|||
|
|
@ -27,17 +27,21 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
|
||||
import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
|
|
@ -51,12 +55,13 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
|||
/** our log, which we want to capture anything from this class */
|
||||
protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
|
||||
private static final boolean DEBUG = false;
|
||||
final NanoScheduler<SAMRecord, M, T> nanoScheduler;
|
||||
private static final int MIN_GROUP_SIZE = 100;
|
||||
final NanoScheduler<MapData, M, T> nanoScheduler;
|
||||
|
||||
public TraverseReadsNano(int nThreads) {
|
||||
final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
|
||||
final int mapGroupSize = bufferSize / 10 + 1;
|
||||
nanoScheduler = new NanoScheduler<SAMRecord, M, T>(bufferSize, mapGroupSize, nThreads);
|
||||
final int mapGroupSize = (int)Math.max(Math.ceil(bufferSize / 50.0 + 1), MIN_GROUP_SIZE);
|
||||
nanoScheduler = new NanoScheduler<MapData, M, T>(bufferSize, mapGroupSize, nThreads);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -80,24 +85,42 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
|||
if( !dataProvider.hasReads() )
|
||||
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
|
||||
|
||||
if ( dataProvider.hasReferenceOrderedData() )
|
||||
throw new ReviewedStingException("Parallel read walkers currently don't support access to reference ordered data");
|
||||
|
||||
final ReadView reads = new ReadView(dataProvider);
|
||||
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
||||
|
||||
nanoScheduler.setDebug(DEBUG);
|
||||
final TraverseReadsMap myMap = new TraverseReadsMap(reads, reference, rodView, walker);
|
||||
final TraverseReadsMap myMap = new TraverseReadsMap(walker);
|
||||
final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
|
||||
|
||||
T result = nanoScheduler.execute(reads.iterator().iterator(), myMap, sum, myReduce);
|
||||
T result = nanoScheduler.execute(aggregateMapData(dataProvider).iterator(), myMap, sum, myReduce);
|
||||
// TODO -- how do we print progress?
|
||||
//printProgress(dataProvider.getShard(), ???);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
|
||||
final ReadView reads = new ReadView(dataProvider);
|
||||
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
|
||||
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
||||
|
||||
final List<MapData> mapData = new ArrayList<MapData>(); // TODO -- need size of reads
|
||||
for ( final SAMRecord read : reads ) {
|
||||
final ReferenceContext refContext = ! read.getReadUnmappedFlag()
|
||||
? reference.getReferenceContext(read)
|
||||
: null;
|
||||
|
||||
// if the read is mapped, create a metadata tracker
|
||||
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0
|
||||
? rodView.getReferenceOrderedDataForRead(read)
|
||||
: null;
|
||||
|
||||
// update the number of reads we've seen
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker));
|
||||
}
|
||||
|
||||
return mapData;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void printOnTraversalDone() {
|
||||
nanoScheduler.shutdown();
|
||||
|
|
@ -117,36 +140,31 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
|||
}
|
||||
}
|
||||
|
||||
private class TraverseReadsMap implements MapFunction<SAMRecord, M> {
|
||||
final ReadView reads;
|
||||
final ReadReferenceView reference;
|
||||
final ReadBasedReferenceOrderedView rodView;
|
||||
private class MapData {
|
||||
final GATKSAMRecord read;
|
||||
final ReferenceContext refContext;
|
||||
final RefMetaDataTracker tracker;
|
||||
|
||||
private MapData(GATKSAMRecord read, ReferenceContext refContext, RefMetaDataTracker tracker) {
|
||||
this.read = read;
|
||||
this.refContext = refContext;
|
||||
this.tracker = tracker;
|
||||
}
|
||||
}
|
||||
|
||||
private class TraverseReadsMap implements MapFunction<MapData, M> {
|
||||
final ReadWalker<M,T> walker;
|
||||
|
||||
private TraverseReadsMap(ReadView reads, ReadReferenceView reference, ReadBasedReferenceOrderedView rodView, ReadWalker<M, T> walker) {
|
||||
this.reads = reads;
|
||||
this.reference = reference;
|
||||
this.rodView = rodView;
|
||||
private TraverseReadsMap(ReadWalker<M, T> walker) {
|
||||
this.walker = walker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public M apply(final SAMRecord read) {
|
||||
public M apply(final MapData data) {
|
||||
if ( ! walker.isDone() ) {
|
||||
// ReferenceContext -- the reference bases covered by the read
|
||||
final ReferenceContext refContext = ! read.getReadUnmappedFlag() && reference != null
|
||||
? reference.getReferenceContext(read)
|
||||
: null;
|
||||
|
||||
// update the number of reads we've seen
|
||||
//dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// if the read is mapped, create a metadata tracker
|
||||
final ReadMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
|
||||
|
||||
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
|
||||
final boolean keepMeP = walker.filter(data.refContext, data.read);
|
||||
if (keepMeP) {
|
||||
return walker.map(refContext, (GATKSAMRecord) read, tracker);
|
||||
return walker.map(data.refContext, data.read, data.tracker);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
|
|
@ -77,7 +78,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
|
||||
// Map over the ActiveRegion
|
||||
public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
||||
public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
||||
|
||||
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
|
||||
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
|
|
@ -25,5 +27,5 @@ import java.lang.annotation.*;
|
|||
@Target(ElementType.TYPE)
|
||||
public @interface BAQMode {
|
||||
public abstract org.broadinstitute.sting.utils.baq.BAQ.QualityMode QualityMode() default org.broadinstitute.sting.utils.baq.BAQ.QualityMode.OVERWRITE_QUALS;
|
||||
public abstract org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime ApplicationTime() default org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime.ON_INPUT;
|
||||
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingOp;
|
||||
|
|
@ -299,7 +299,7 @@ public class ClipReads extends ReadWalker<ClipReads.ReadClipperWithData, ClipRea
|
|||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @return the ReadClipper object describing what should be done to clip this read
|
||||
*/
|
||||
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
|
||||
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES )
|
||||
read = ReadClipper.revertSoftClippedBases(read);
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ import java.text.NumberFormat;
|
|||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@Requires({DataSource.READS})
|
||||
public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements TreeReducible<FlagStat.FlagStatus> {
|
||||
public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements ThreadSafeMapReduce {
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
|
|
@ -179,7 +179,7 @@ public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatu
|
|||
|
||||
|
||||
@Override
|
||||
public FlagStatus map( final ReferenceContext ref, final GATKSAMRecord read, final ReadMetaDataTracker metaDataTracker ) {
|
||||
public FlagStatus map( final ReferenceContext ref, final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker ) {
|
||||
return new FlagStatus().add(read);
|
||||
}
|
||||
|
||||
|
|
@ -193,11 +193,6 @@ public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatu
|
|||
return sum.add(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FlagStatus treeReduce(final FlagStatus value, final FlagStatus sum) {
|
||||
return sum.add(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(final FlagStatus result) {
|
||||
out.println(result.toString());
|
||||
|
|
|
|||
|
|
@ -45,19 +45,8 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Prints the alignment in the pileup format. In the pileup format, each line represents a genomic position,
|
||||
* consisting of chromosome name, coordinate, reference base, read bases, read qualities and alignment mapping
|
||||
* qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all
|
||||
* encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand,
|
||||
* a comma for a match on the reverse strand, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the
|
||||
* reverse strand.
|
||||
*
|
||||
* A pattern '\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next
|
||||
* reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence.
|
||||
* Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference.
|
||||
* Also at the read base column, a symbol '^' marks the start of a read segment which is a contiguous subsequence on the read
|
||||
* separated by 'N/S/H' CIGAR operations. The ASCII of the character following '^' minus 33 gives the mapping quality.
|
||||
* A symbol '$' marks the end of a read segment.
|
||||
* Prints the alignment in something similar to the samtools pileup format. Each line represents a genomic position,
|
||||
* consisting of chromosome name, coordinate, reference base, read bases, and read qualities.
|
||||
*
|
||||
* Associated command:
|
||||
* samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>
|
||||
|
|
|
|||
|
|
@ -32,17 +32,16 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
|
||||
|
|
@ -91,9 +90,10 @@ import java.util.TreeSet;
|
|||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@ReadTransformersMode(ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements TreeReducible<SAMFileWriter> {
|
||||
public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements ThreadSafeMapReduce {
|
||||
|
||||
@Output(doc="Write output to this BAM filename instead of STDOUT", required = true)
|
||||
SAMFileWriter out;
|
||||
|
|
@ -138,6 +138,7 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
|
|||
public boolean simplifyReads = false;
|
||||
|
||||
|
||||
List<ReadTransformer> readTransformers = Collections.emptyList();
|
||||
private TreeSet<String> samplesToChoose = new TreeSet<String>();
|
||||
private boolean SAMPLES_SPECIFIED = false;
|
||||
|
||||
|
|
@ -150,6 +151,9 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
|
|||
if ( platform != null )
|
||||
platform = platform.toUpperCase();
|
||||
|
||||
if ( getToolkit() != null )
|
||||
readTransformers = getToolkit().getReadTransformers();
|
||||
|
||||
Collection<String> samplesFromFile;
|
||||
if (!sampleFile.isEmpty()) {
|
||||
samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFile);
|
||||
|
|
@ -217,11 +221,20 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
|
|||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @param readIn the read itself, as a GATKSAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return simplifyReads ? read.simplify() : read;
|
||||
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord readIn, RefMetaDataTracker metaDataTracker ) {
|
||||
GATKSAMRecord workingRead = readIn;
|
||||
|
||||
for ( final ReadTransformer transformer : readTransformers ) {
|
||||
if ( logger.isDebugEnabled() ) logger.debug("Applying transformer " + transformer + " to read " + readIn.getReadName());
|
||||
workingRead = transformer.apply(workingRead);
|
||||
}
|
||||
|
||||
if ( simplifyReads ) workingRead = workingRead.simplify();
|
||||
|
||||
return workingRead;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -245,9 +258,4 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
|
|||
output.addAlignment(read);
|
||||
return output;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SAMFileWriter treeReduce(SAMFileWriter lhs, SAMFileWriter rhs) {
|
||||
return lhs; // nothing to do
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
|
|
@ -27,5 +26,5 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
|
|||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker);
|
||||
public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -64,7 +64,7 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
|
|||
logger.info("SplitSamFile version: " + VERSION);
|
||||
}
|
||||
|
||||
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
/**
|
||||
* Root parallelism interface. Walkers that implement this
|
||||
* declare that their map function is thread-safe and so multiple
|
||||
* map calls can be run in parallel in the same JVM instance.
|
||||
*/
|
||||
public interface ThreadSafeMapReduce {
|
||||
}
|
||||
|
|
@ -13,7 +13,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
* shards of the data can reduce with each other, and the composite result
|
||||
* can be reduced with other composite results.
|
||||
*/
|
||||
public interface TreeReducible<ReduceType> {
|
||||
public interface TreeReducible<ReduceType> extends ThreadSafeMapReduce {
|
||||
/**
|
||||
* A composite, 'reduce of reduces' function.
|
||||
* @param lhs 'left-most' portion of data in the composite reduce.
|
||||
|
|
|
|||
|
|
@ -30,12 +30,14 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.recalibration.BQSRMode;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -48,7 +50,8 @@ import java.util.List;
|
|||
*/
|
||||
@ReadFilters(MalformedReadFilter.class)
|
||||
@PartitionBy(PartitionType.NONE)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
|
||||
@BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
|
||||
@DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class})
|
||||
public abstract class Walker<MapType, ReduceType> {
|
||||
final protected static Logger logger = Logger.getLogger(Walker.class);
|
||||
|
|
|
|||
|
|
@ -33,6 +33,9 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
|
|||
final Genotype g,
|
||||
final GenotypeBuilder gb,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
|
||||
if ( stratifiedContext == null )
|
||||
return;
|
||||
|
||||
Double ratio = annotateSNP(stratifiedContext, vc, g);
|
||||
if (ratio == null)
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
final Genotype g,
|
||||
final GenotypeBuilder gb,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
|
||||
return;
|
||||
|
||||
if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
|||
final Genotype g,
|
||||
final GenotypeBuilder gb,
|
||||
final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
|
||||
if ( g == null || !g.isCalled() )
|
||||
if ( g == null || !g.isCalled() || stratifiedContext == null )
|
||||
return;
|
||||
|
||||
int mq0 = 0;
|
||||
|
|
|
|||
|
|
@ -300,16 +300,12 @@ public class VariantAnnotatorEngine {
|
|||
if (stratifiedPerReadAlleleLikelihoodMap != null)
|
||||
perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
|
||||
if ( context == null && perReadAlleleLikelihoodMap == null) {
|
||||
// no likelihoods nor pileup available: just move on to next sample
|
||||
genotypes.add(genotype);
|
||||
} else {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
|
||||
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
annotation.annotate(tracker, walker, ref, context, vc, genotype, gb, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
genotypes.add(gb.make());
|
||||
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
|
||||
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
annotation.annotate(tracker, walker, ref, context, vc, genotype, gb, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
|
|
|
|||
|
|
@ -32,10 +32,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -46,6 +45,7 @@ import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
|
|||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
|
|
@ -104,7 +104,7 @@ import java.util.ArrayList;
|
|||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
@By(DataSource.READS)
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
|
|
|
|||
|
|
@ -35,5 +35,5 @@ public interface RecalibrationEngine {
|
|||
|
||||
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase);
|
||||
|
||||
public void updateDataForRead(final GATKSAMRecord read, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors);
|
||||
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
}
|
||||
|
||||
@Override
|
||||
public synchronized void updateDataForRead( final GATKSAMRecord read, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
public synchronized void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
throw new UnsupportedOperationException("Delocalized BQSR is not available in the GATK-lite version");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
|
@ -140,7 +140,7 @@ public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ReferenceContext referenceContext, GATKSAMRecord read, ReadMetaDataTracker readMetaDataTracker) {
|
||||
public Integer map(ReferenceContext referenceContext, GATKSAMRecord read, RefMetaDataTracker RefMetaDataTracker) {
|
||||
final String rgID = read.getReadGroup().getId();
|
||||
final PerReadGroupInfo info = readGroupInfo.get(rgID);
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import net.sf.samtools.SAMReadGroupRecord;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
|
@ -74,7 +74,7 @@ public class ReadLengthDistribution extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) {
|
||||
public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, RefMetaDataTracker RefMetaDataTracker) {
|
||||
GATKReportTable table = report.getTable("ReadLengthDistribution");
|
||||
|
||||
int length = Math.abs(samRecord.getReadLength());
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
|
|
@ -117,7 +118,7 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
|
||||
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} )
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
@By(DataSource.REFERENCE)
|
||||
|
|
|
|||
|
|
@ -36,8 +36,8 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
@ -112,7 +112,7 @@ import java.util.*;
|
|||
* @author ebanks
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_OUTPUT)
|
||||
public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||
|
||||
public static final String ORIGINAL_CIGAR_TAG = "OC";
|
||||
|
|
@ -473,7 +473,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
readsActuallyCleaned.clear();
|
||||
}
|
||||
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
if ( currentInterval == null ) {
|
||||
emit(read);
|
||||
return 0;
|
||||
|
|
@ -540,7 +540,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// TODO -- it would be nice if we could use indels from 454/Ion reads as alternate consenses
|
||||
}
|
||||
|
||||
private void cleanAndCallMap(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
|
||||
private void cleanAndCallMap(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
|
||||
if ( readsToClean.size() > 0 ) {
|
||||
GenomeLoc earliestPossibleMove = getToolkit().getGenomeLocParser().createGenomeLoc(readsToClean.getReads().get(0));
|
||||
if ( manager.canMoveReads(earliestPossibleMove) )
|
||||
|
|
@ -619,17 +619,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private void populateKnownIndels(ReadMetaDataTracker metaDataTracker, ReferenceContext ref) {
|
||||
for ( Collection<GATKFeature> rods : metaDataTracker.getContigOffsetMapping().values() ) {
|
||||
Iterator<GATKFeature> rodIter = rods.iterator();
|
||||
while ( rodIter.hasNext() ) {
|
||||
Object rod = rodIter.next().getUnderlyingObject();
|
||||
if ( indelRodsSeen.contains(rod) )
|
||||
continue;
|
||||
indelRodsSeen.add(rod);
|
||||
if ( rod instanceof VariantContext )
|
||||
knownIndelsToTry.add((VariantContext)rod);
|
||||
}
|
||||
private void populateKnownIndels(RefMetaDataTracker metaDataTracker, ReferenceContext ref) {
|
||||
for ( final VariantContext vc : metaDataTracker.getValues(known) ) {
|
||||
if ( indelRodsSeen.contains(vc) )
|
||||
continue;
|
||||
indelRodsSeen.add(vc);
|
||||
knownIndelsToTry.add(vc);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,12 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
|||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
|
|
@ -80,9 +79,9 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
|
|||
writer.addAlignment(read);
|
||||
}
|
||||
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
// we can not deal with screwy records
|
||||
if ( read.getCigar().numCigarElements() == 0 ) {
|
||||
if ( read.getReadUnmappedFlag() || read.getCigar().numCigarElements() == 0 ) {
|
||||
emit(read);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,10 +33,10 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -101,7 +101,7 @@ import java.util.TreeSet;
|
|||
@Reference(window=@Window(start=-1,stop=50))
|
||||
@Allows(value={DataSource.READS, DataSource.REFERENCE})
|
||||
@By(DataSource.REFERENCE)
|
||||
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.EventPair> implements TreeReducible<RealignerTargetCreator.EventPair> {
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
|||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
|
|
@ -477,7 +477,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
|
||||
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
|
||||
// if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ");
|
||||
|
||||
|
|
@ -1181,10 +1181,10 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
if ( event_length == 0 ) { // insertion
|
||||
|
||||
l.add( Allele.create(referencePaddingBase,true) );
|
||||
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false ));
|
||||
l.add( Allele.create((char)referencePaddingBase + new String(call.getVariant().getBases()), false ));
|
||||
|
||||
} else { //deletion:
|
||||
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true ));
|
||||
l.add( Allele.create((char)referencePaddingBase + new String(call.getVariant().getBases()), true ));
|
||||
l.add( Allele.create(referencePaddingBase,false) );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
|
|||
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountBases extends ReadWalker<Integer, Long> {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
|
||||
|
||||
return read.getReadLength();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
|
|||
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Gender;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
|
|
@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountMales extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
|
||||
Sample sample = getSampleDB().getSample(read);
|
||||
return sample.getGender() == Gender.MALE ? 1 : 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import net.sf.samtools.CigarOperator;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
|
@ -47,7 +47,7 @@ public class CountReadEvents extends ReadWalker<Map<CigarOperator, ArrayList<Int
|
|||
@Output (doc = "GATKReport table output")
|
||||
PrintStream out;
|
||||
|
||||
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
|
||||
return ReadUtils.getCigarOperatorForAllBases(read);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.walkers.qc;
|
|||
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.ThreadSafeMapReduce;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -41,12 +41,11 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountReads extends ReadWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public class CountReads extends ReadWalker<Integer, Integer> implements ThreadSafeMapReduce {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override public Integer reduceInit() { return 0; }
|
||||
@Override public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
@Override public Integer treeReduce(Integer lhs, Integer rhs) { return lhs + rhs; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
@ -41,7 +41,7 @@ import java.util.List;
|
|||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountTerminusEvent extends ReadWalker<Pair<Long, Long>, Pair<Long, Long>> {
|
||||
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
|
||||
List<CigarElement> cigarElements = read.getCigar().getCigarElements();
|
||||
|
||||
CigarElement lastElement = null;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
@ -75,7 +75,7 @@ public class ReadClippingStats extends ReadWalker<ReadClippingStats.ReadClipping
|
|||
int readLength, nClippingEvents, nClippedBases;
|
||||
}
|
||||
|
||||
public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
|
||||
if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY)
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -39,11 +39,11 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -218,7 +218,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
String filterString = null;
|
||||
|
||||
// Annotate the new record with its VQSLOD and the worst performing annotation
|
||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lodString); // use the String representation so that we don't lose precision on output
|
||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
|
||||
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
|
||||
|
||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||
|
|
|
|||
|
|
@ -286,7 +286,6 @@ public class VariantDataManager {
|
|||
case INDEL:
|
||||
case MIXED:
|
||||
case SYMBOLIC:
|
||||
case STRUCTURAL_INDEL:
|
||||
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
|
||||
default:
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -125,6 +125,15 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
return ! discontinuousP( that );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if this GenomeLoc represents the UNMAPPED location
|
||||
* @return
|
||||
*/
|
||||
public final boolean isUnmapped() {
|
||||
return isUnmapped(this);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a new GenomeLoc that represents the entire span of this and that. Requires that
|
||||
* this and that GenomeLoc are contiguous and both mapped
|
||||
|
|
@ -141,7 +150,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
}
|
||||
|
||||
if (!(this.contiguousP(that))) {
|
||||
throw new ReviewedStingException("The two genome loc's need to be contigous");
|
||||
throw new ReviewedStingException("The two genome loc's need to be contiguous");
|
||||
}
|
||||
|
||||
return new GenomeLoc(getContig(), this.contigIndex,
|
||||
|
|
@ -418,7 +427,10 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
result = cmpContig;
|
||||
} else {
|
||||
if ( this.getStart() < that.getStart() ) result = -1;
|
||||
if ( this.getStart() > that.getStart() ) result = 1;
|
||||
else if ( this.getStart() > that.getStart() ) result = 1;
|
||||
// these have the same start, so check the ends
|
||||
else if ( this.getStop() < that.getStop() ) result = -1;
|
||||
else if ( this.getStop() > that.getStop() ) result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,13 +52,6 @@ public class BAQ {
|
|||
DONT_MODIFY // do the BAQ, but don't modify the quality scores themselves, just return them in the function.
|
||||
}
|
||||
|
||||
public enum ApplicationTime {
|
||||
FORBIDDEN, // Walker does not tolerate BAQ input
|
||||
ON_INPUT, // apply the BAQ calculation to the incoming reads, the default
|
||||
ON_OUTPUT, // apply the BAQ calculation to outgoing read streams
|
||||
HANDLED_IN_WALKER // the walker will deal with the BAQ calculation status itself
|
||||
}
|
||||
|
||||
public static final String BAQ_TAG = "BQ";
|
||||
|
||||
private static double[] qual2prob = new double[256];
|
||||
|
|
@ -68,7 +61,7 @@ public class BAQ {
|
|||
}
|
||||
|
||||
// Phred scaled now (changed 1/10/2011)
|
||||
public static double DEFAULT_GOP = 40;
|
||||
public static final double DEFAULT_GOP = 40;
|
||||
|
||||
/* Takes a Phred Scale quality score and returns the error probability.
|
||||
*
|
||||
|
|
@ -110,10 +103,19 @@ public class BAQ {
|
|||
* Use defaults for everything
|
||||
*/
|
||||
public BAQ() {
|
||||
cd = convertFromPhredScale(DEFAULT_GOP);
|
||||
this(DEFAULT_GOP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use defaults for everything
|
||||
*/
|
||||
public BAQ(final double gapOpenPenalty) {
|
||||
cd = convertFromPhredScale(gapOpenPenalty);
|
||||
initializeCachedData();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Create a new HmmGlocal object with specified parameters
|
||||
*
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Applies Heng's BAQ calculation to a stream of incoming reads
|
||||
*/
|
||||
public class BAQReadTransformer extends ReadTransformer {
|
||||
private BAQ baqHMM;
|
||||
private IndexedFastaSequenceFile refReader;
|
||||
private BAQ.CalculationMode cmode;
|
||||
private BAQ.QualityMode qmode;
|
||||
|
||||
@Override
|
||||
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
final BAQMode mode = WalkerManager.getWalkerAnnotation(walker, BAQMode.class);
|
||||
this.refReader = engine.getReferenceDataSource().getReference();
|
||||
this.cmode = engine.getArguments().BAQMode;
|
||||
this.qmode = mode.QualityMode();
|
||||
baqHMM = new BAQ(engine.getArguments().BAQGOP);
|
||||
|
||||
if ( qmode == BAQ.QualityMode.DONT_MODIFY )
|
||||
throw new ReviewedStingException("BUG: shouldn't create BAQ transformer with quality mode DONT_MODIFY");
|
||||
|
||||
if ( mode.ApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN && enabled() )
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + cmode + " was requested.");
|
||||
|
||||
return mode.ApplicationTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean enabled() {
|
||||
return cmode != BAQ.CalculationMode.OFF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKSAMRecord apply(final GATKSAMRecord read) {
|
||||
baqHMM.baqRead(read, refReader, cmode, qmode);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Simple iterator that applies Heng's BAQ calculation to a stream of incoming reads
|
||||
*/
|
||||
public class BAQSamIterator implements StingSAMIterator {
|
||||
private final StingSAMIterator it;
|
||||
private final BAQ baqHMM = new BAQ(); // creates a BAQ creator with default parameters
|
||||
private final IndexedFastaSequenceFile refReader;
|
||||
private final BAQ.CalculationMode cmode;
|
||||
private final BAQ.QualityMode qmode;
|
||||
|
||||
/**
|
||||
* Creates a new BAMSamIterator using the reference getter refReader and applies the BAM to the reads coming
|
||||
* in from it. See BAQ docs for baqType information.
|
||||
*
|
||||
* @param refReader
|
||||
* @param it
|
||||
* @param cmode
|
||||
* @param qmode
|
||||
*/
|
||||
@Requires({
|
||||
"refReader != null",
|
||||
"it != null",
|
||||
"cmode != null" ,
|
||||
"qmode != null"})
|
||||
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
|
||||
if ( cmode == BAQ.CalculationMode.OFF ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF");
|
||||
if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
|
||||
|
||||
this.refReader = refReader;
|
||||
this.it = it;
|
||||
this.cmode = cmode;
|
||||
this.qmode = qmode;
|
||||
}
|
||||
|
||||
@Requires("hasNext()")
|
||||
@Ensures("result != null")
|
||||
public SAMRecord next() {
|
||||
//System.out.printf("BAQing during input%n");
|
||||
SAMRecord read = it.next();
|
||||
baqHMM.baqRead(read, refReader, cmode, qmode);
|
||||
return read;
|
||||
}
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package org.broadinstitute.sting.utils.baq;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Iterator that applies a ReadTransformer to a stream of reads
|
||||
*/
|
||||
public class ReadTransformingIterator implements StingSAMIterator {
|
||||
private final StingSAMIterator it;
|
||||
private final ReadTransformer transformer;
|
||||
|
||||
/**
|
||||
* Creates a new ReadTransforming iterator
|
||||
*/
|
||||
@Requires({"it != null", "transformer != null", "transformer.isInitialized()"})
|
||||
public ReadTransformingIterator(final StingSAMIterator it, final ReadTransformer transformer) {
|
||||
if ( ! transformer.isInitialized() )
|
||||
throw new IllegalStateException("Creating a read transformer stream for an uninitialized read transformer: " + transformer);
|
||||
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN )
|
||||
throw new IllegalStateException("Creating a read transformer stream for a forbidden transformer " + transformer);
|
||||
|
||||
this.it = it;
|
||||
this.transformer = transformer;
|
||||
}
|
||||
|
||||
@Requires("hasNext()")
|
||||
@Ensures("result != null")
|
||||
public SAMRecord next() {
|
||||
final GATKSAMRecord read = (GATKSAMRecord)it.next();
|
||||
return transformer.apply(read);
|
||||
}
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
}
|
||||
|
|
@ -27,6 +27,8 @@ package org.broadinstitute.sting.utils.classloader;
|
|||
|
||||
import ch.qos.logback.classic.Level;
|
||||
import ch.qos.logback.classic.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -276,8 +278,16 @@ public class PluginManager<PluginType> {
|
|||
*/
|
||||
public PluginType createByName(String pluginName) {
|
||||
Class<? extends PluginType> plugin = pluginsByName.get(pluginName);
|
||||
if( plugin == null )
|
||||
throw new UserException(String.format("Could not find %s with name: %s", pluginCategory,pluginName));
|
||||
if( plugin == null ) {
|
||||
String errorMessage = formatErrorMessage(pluginCategory,pluginName);
|
||||
if ( this.getClass().isAssignableFrom(FilterManager.class) ) {
|
||||
throw new UserException.MalformedReadFilterException(errorMessage);
|
||||
} else if ( this.getClass().isAssignableFrom(WalkerManager.class) ) {
|
||||
throw new UserException.MalformedWalkerArgumentsException(errorMessage);
|
||||
} else {
|
||||
throw new UserException.CommandLineException(errorMessage);
|
||||
}
|
||||
}
|
||||
try {
|
||||
return plugin.newInstance();
|
||||
} catch (Exception e) {
|
||||
|
|
@ -330,4 +340,14 @@ public class PluginManager<PluginType> {
|
|||
|
||||
return pluginName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the error message for the plugin manager. The message is allowed to depend on the class.
|
||||
* @param pluginCategory - string, the category of the plugin (e.g. read filter)
|
||||
* @param pluginName - string, what we were trying to match (but failed to)
|
||||
* @return error message text describing the error
|
||||
*/
|
||||
protected String formatErrorMessage(String pluginCategory, String pluginName ) {
|
||||
return String.format("Could not find %s with name: %s", pluginCategory,pluginName);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,6 +63,18 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class MalformedReadFilterException extends CommandLineException {
|
||||
public MalformedReadFilterException(String message) {
|
||||
super(String.format("Malformed read filter: %s",message));
|
||||
}
|
||||
}
|
||||
|
||||
public static class MalformedWalkerArgumentsException extends CommandLineException {
|
||||
public MalformedWalkerArgumentsException(String message) {
|
||||
super(String.format("Malformed walker argument: %s",message));
|
||||
}
|
||||
}
|
||||
|
||||
public static class MalformedGenomeLoc extends UserException {
|
||||
public MalformedGenomeLoc(String message, GenomeLoc loc) {
|
||||
super(String.format("Badly formed genome loc: %s: %s", message, loc));
|
||||
|
|
|
|||
|
|
@ -128,22 +128,13 @@ public class FragmentUtils {
|
|||
return create(reads, reads.size(), SamRecordGetter);
|
||||
}
|
||||
|
||||
public final static List<GATKSAMRecord> mergeOverlappingPairedFragments( List<GATKSAMRecord> overlappingPair ) {
|
||||
public final static List<GATKSAMRecord> mergeOverlappingPairedFragments( final List<GATKSAMRecord> overlappingPair ) {
|
||||
final byte MIN_QUAL_BAD_OVERLAP = 16;
|
||||
if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); }
|
||||
|
||||
GATKSAMRecord firstRead = overlappingPair.get(0);
|
||||
GATKSAMRecord secondRead = overlappingPair.get(1);
|
||||
/*
|
||||
System.out.println("read 0 unclipped start:"+overlappingPair.get(0).getUnclippedStart());
|
||||
System.out.println("read 0 unclipped end:"+overlappingPair.get(0).getUnclippedEnd());
|
||||
System.out.println("read 1 unclipped start:"+overlappingPair.get(1).getUnclippedStart());
|
||||
System.out.println("read 1 unclipped end:"+overlappingPair.get(1).getUnclippedEnd());
|
||||
System.out.println("read 0 start:"+overlappingPair.get(0).getAlignmentStart());
|
||||
System.out.println("read 0 end:"+overlappingPair.get(0).getAlignmentEnd());
|
||||
System.out.println("read 1 start:"+overlappingPair.get(1).getAlignmentStart());
|
||||
System.out.println("read 1 end:"+overlappingPair.get(1).getAlignmentEnd());
|
||||
*/
|
||||
|
||||
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
|
||||
firstRead = overlappingPair.get(1); // swap them
|
||||
secondRead = overlappingPair.get(0);
|
||||
|
|
@ -155,15 +146,6 @@ public class FragmentUtils {
|
|||
return overlappingPair; // fragments contain indels so don't merge them
|
||||
}
|
||||
|
||||
/* // check for inconsistent start positions between uncliped/soft alignment starts
|
||||
if (secondRead.getAlignmentStart() >= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() < firstRead.getUnclippedStart())
|
||||
return overlappingPair;
|
||||
if (secondRead.getAlignmentStart() <= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() > firstRead.getUnclippedStart())
|
||||
return overlappingPair;
|
||||
|
||||
if (secondRead.getUnclippedStart() < firstRead.getAlignmentEnd() && secondRead.getAlignmentStart() >= firstRead.getAlignmentEnd())
|
||||
return overlappingPair;
|
||||
*/
|
||||
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
|
||||
|
||||
final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() );
|
||||
|
|
@ -183,7 +165,7 @@ public class FragmentUtils {
|
|||
}
|
||||
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
|
||||
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
|
||||
return overlappingPair;// high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them
|
||||
return overlappingPair; // high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them
|
||||
}
|
||||
if( firstReadQuals[iii] < MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] < MIN_QUAL_BAD_OVERLAP ) {
|
||||
return overlappingPair; // both reads have low qual bases in the overlap region so don't merge them because don't know what is going on
|
||||
|
|
@ -197,7 +179,7 @@ public class FragmentUtils {
|
|||
}
|
||||
|
||||
final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() );
|
||||
returnRead.setAlignmentStart( firstRead.getUnclippedStart() );
|
||||
returnRead.setAlignmentStart( firstRead.getSoftStart() );
|
||||
returnRead.setReadBases( bases );
|
||||
returnRead.setBaseQualities( quals );
|
||||
returnRead.setReadGroup( firstRead.getReadGroup() );
|
||||
|
|
|
|||
|
|
@ -43,7 +43,8 @@ import java.util.concurrent.*;
|
|||
* Time: 9:47 AM
|
||||
*/
|
||||
public class NanoScheduler<InputType, MapType, ReduceType> {
|
||||
private static Logger logger = Logger.getLogger(NanoScheduler.class);
|
||||
private final static Logger logger = Logger.getLogger(NanoScheduler.class);
|
||||
private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
|
||||
|
||||
final int bufferSize;
|
||||
final int mapGroupSize;
|
||||
|
|
@ -79,7 +80,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
|||
this.mapGroupSize = mapGroupSize;
|
||||
}
|
||||
|
||||
this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads - 1);
|
||||
this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -172,7 +173,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
|
|||
if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
|
||||
if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
|
||||
|
||||
if ( getnThreads() == 1 ) {
|
||||
if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
|
||||
return executeSingleThreaded(inputReader, map, initialValue, reduce);
|
||||
} else {
|
||||
return executeMultiThreaded(inputReader, map, initialValue, reduce);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,30 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
* Time: 1:51:22 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Allows the walker to indicate what type of data it wants to consume.
|
||||
*/
|
||||
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface BQSRMode {
|
||||
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* A ReadTransformer that applies BQSR on the fly to reads
|
||||
*
|
||||
* User: rpoplin
|
||||
* Date: 2/13/12
|
||||
*/
|
||||
public class BQSRReadTransformer extends ReadTransformer {
|
||||
private boolean enabled;
|
||||
private BaseRecalibration bqsr;
|
||||
|
||||
@Override
|
||||
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
this.enabled = engine.hasBaseRecalibration();
|
||||
this.bqsr = engine.getBaseRecalibration();
|
||||
final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class);
|
||||
return mode.ApplicationTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean enabled() {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads.
|
||||
*/
|
||||
@Override
|
||||
public GATKSAMRecord apply(GATKSAMRecord read) {
|
||||
bqsr.recalibrateRead(read);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 2/13/12
|
||||
*/
|
||||
|
||||
public class BQSRSamIterator implements StingSAMIterator {
|
||||
private final StingSAMIterator it;
|
||||
private final BaseRecalibration bqsr;
|
||||
|
||||
/**
|
||||
* Creates a new BQSRSamIterator and applies BQSR on the fly to incoming reads.
|
||||
*
|
||||
* @param it The incoming SamIterator to wrap
|
||||
* @param bqsr The object which holds the BQSR table information and knows how to apply it
|
||||
*/
|
||||
@Requires({
|
||||
"it != null",
|
||||
"bqsr != null"})
|
||||
public BQSRSamIterator(StingSAMIterator it, BaseRecalibration bqsr) {
|
||||
if ( bqsr == null ) throw new ReviewedStingException("BUG: shouldn't create BQSRSamIterator with null recalibration object");
|
||||
|
||||
this.it = it;
|
||||
this.bqsr = bqsr;
|
||||
}
|
||||
|
||||
@Requires("hasNext()")
|
||||
@Ensures("result != null")
|
||||
public SAMRecord next() {
|
||||
SAMRecord read = it.next();
|
||||
bqsr.recalibrateRead((GATKSAMRecord) read);
|
||||
return read;
|
||||
}
|
||||
|
||||
public boolean hasNext() { return this.it.hasNext(); }
|
||||
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
|
||||
public void close() { it.close(); }
|
||||
public Iterator<SAMRecord> iterator() { return this; }
|
||||
}
|
||||
|
|
@ -27,12 +27,11 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
|
||||
import net.sf.samtools.SAMTag;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -46,7 +45,6 @@ import java.io.File;
|
|||
|
||||
public class BaseRecalibration {
|
||||
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
|
||||
private final ReadCovariates readCovariates;
|
||||
|
||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private final RecalibrationTables recalibrationTables;
|
||||
|
|
@ -56,10 +54,23 @@ public class BaseRecalibration {
|
|||
private final int preserveQLessThan;
|
||||
private final boolean emitOriginalQuals;
|
||||
|
||||
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
static {
|
||||
for (int i = 0; i < EventType.values().length; i++)
|
||||
qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||
// TODO -- was this supposed to be used somewhere?
|
||||
// private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
||||
// static {
|
||||
// for (int i = 0; i < EventType.values().length; i++)
|
||||
// qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||
// }
|
||||
|
||||
/**
|
||||
* Thread local cache to allow multi-threaded use of this class
|
||||
*/
|
||||
private ThreadLocal<ReadCovariates> readCovariatesCache;
|
||||
{
|
||||
readCovariatesCache = new ThreadLocal<ReadCovariates> () {
|
||||
@Override protected ReadCovariates initialValue() {
|
||||
return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -81,7 +92,6 @@ public class BaseRecalibration {
|
|||
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
|
||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
this.disableIndelQuals = disableIndelQuals;
|
||||
this.preserveQLessThan = preserveQLessThan;
|
||||
this.emitOriginalQuals = emitOriginalQuals;
|
||||
|
|
@ -104,6 +114,11 @@ public class BaseRecalibration {
|
|||
}
|
||||
|
||||
// Compute all covariates for the read
|
||||
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
|
||||
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
|
||||
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
|
||||
// TODO -- needs to be fixed.
|
||||
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
|
|
@ -130,6 +145,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implements a serial recalibration of the reads using the combinational table.
|
||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
||||
|
|
@ -147,7 +163,7 @@ public class BaseRecalibration {
|
|||
* @param errorModel the event type
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
protected byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
||||
private byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
|
||||
|
||||
final byte qualFromRead = (byte)(long)key[1];
|
||||
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
|
|
@ -21,6 +23,17 @@ public class ReadCovariates {
|
|||
currentCovariateIndex = index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Necessary due to bug in BaseRecalibration recalibrateRead function. It is clearly seeing space it's not supposed to
|
||||
* @return
|
||||
*/
|
||||
public ReadCovariates clear() {
|
||||
for ( int i = 0; i < keys.length; i++ )
|
||||
for ( int j = 0; j < keys[i].length; j++)
|
||||
Arrays.fill(keys[i][j], 0);
|
||||
return this;
|
||||
}
|
||||
|
||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
||||
|
|
|
|||
|
|
@ -228,8 +228,7 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
if( quals == null ) {
|
||||
quals = new byte[getBaseQualities().length];
|
||||
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||
setBaseQualities(quals, EventType.BASE_INSERTION);
|
||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||
}
|
||||
return quals;
|
||||
}
|
||||
|
|
@ -246,7 +245,6 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
quals = new byte[getBaseQualities().length];
|
||||
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||
setBaseQualities(quals, EventType.BASE_DELETION);
|
||||
}
|
||||
return quals;
|
||||
}
|
||||
|
|
@ -262,7 +260,7 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
|
||||
mReadGroup = readGroup;
|
||||
retrievedReadGroup = true;
|
||||
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
|
||||
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -367,15 +365,15 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
* Clears all attributes except ReadGroup of the read.
|
||||
*/
|
||||
public GATKSAMRecord simplify () {
|
||||
GATKSAMReadGroupRecord rg = getReadGroup(); // save the read group information
|
||||
GATKSAMReadGroupRecord rg = getReadGroup(); // save the read group information
|
||||
byte[] insQuals = (this.getAttribute(BQSR_BASE_INSERTION_QUALITIES) == null) ? null : getBaseInsertionQualities();
|
||||
byte[] delQuals = (this.getAttribute(BQSR_BASE_DELETION_QUALITIES) == null) ? null : getBaseDeletionQualities();
|
||||
this.clearAttributes(); // clear all attributes from the read
|
||||
this.setReadGroup(rg); // restore read group
|
||||
this.clearAttributes(); // clear all attributes from the read
|
||||
this.setReadGroup(rg); // restore read group
|
||||
if (insQuals != null)
|
||||
this.setBaseQualities(insQuals, EventType.BASE_INSERTION); // restore base insertion if we had any
|
||||
this.setBaseQualities(insQuals, EventType.BASE_INSERTION); // restore base insertion if we had any
|
||||
if (delQuals != null)
|
||||
this.setBaseQualities(delQuals, EventType.BASE_DELETION); // restore base deletion if we had any
|
||||
this.setBaseQualities(delQuals, EventType.BASE_DELETION); // restore base deletion if we had any
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -457,7 +457,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
SNP,
|
||||
MNP, // a multi-nucleotide polymorphism
|
||||
INDEL,
|
||||
STRUCTURAL_INDEL,
|
||||
SYMBOLIC,
|
||||
MIXED,
|
||||
}
|
||||
|
|
@ -531,7 +530,17 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
public boolean isStructuralIndel() {
|
||||
return getType() == Type.STRUCTURAL_INDEL;
|
||||
if ( getType() == Type.INDEL ) {
|
||||
List<Integer> sizes = getIndelLengths();
|
||||
if ( sizes != null ) {
|
||||
for ( Integer length : sizes ) {
|
||||
if ( length > MAX_ALLELE_SIZE_FOR_NON_SV ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -716,7 +725,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* @return a list of indel lengths ( null if not of type indel or mixed )
|
||||
*/
|
||||
public List<Integer> getIndelLengths() {
|
||||
if ( getType() != Type.INDEL && getType() != Type.MIXED && getType() != Type.STRUCTURAL_INDEL ) {
|
||||
if ( getType() != Type.INDEL && getType() != Type.MIXED ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -1263,13 +1272,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
// is reserved for cases of multiple alternate alleles of different types). Therefore, if we've reached this point
|
||||
// in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.
|
||||
|
||||
// Because a number of structural variation callers write the whole alternate allele into the VCF where possible,
|
||||
// this can result in insertion/deletion alleles of structural variant size, e.g. 151+. As of July 2012, we now
|
||||
// classify these as structural events, rather than indel events, as we think differently about the mechanism,
|
||||
// representation, and handling of these events. Check for this case here:
|
||||
if ( ref.length() > MAX_ALLELE_SIZE_FOR_NON_SV || allele.length() > MAX_ALLELE_SIZE_FOR_NON_SV )
|
||||
return Type.STRUCTURAL_INDEL;
|
||||
|
||||
return Type.INDEL;
|
||||
|
||||
// old incorrect logic:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 8/31/12
|
||||
* Time: 11:03 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class InvalidArgumentIntegrationTest extends WalkerTest {
|
||||
private static final String callsB36 = BaseTest.validationDataLocation + "lowpass.N3.chr1.raw.vcf";
|
||||
|
||||
private WalkerTest.WalkerTestSpec baseTest(String flag, String arg, Class exeption) {
|
||||
return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variant:vcf "
|
||||
+ callsB36 + " -F POS,CHROM -R "
|
||||
+ b36KGReference + " -o %s " + flag + " " + arg,
|
||||
1, exeption);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnknownReadFilter() {
|
||||
executeTest("UnknownReadFilter",baseTest("-rf","TestUnknownReadFilter", UserException.MalformedReadFilterException.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMalformedWalkerArgs() {
|
||||
executeTest("MalformedWalkerArgs",
|
||||
new WalkerTest.WalkerTestSpec("-T UnknownWalkerName -M 10 --variant:vcf "
|
||||
+ callsB36 + " -F POS,CHROM -R "
|
||||
+ b36KGReference + " -o %s ",
|
||||
1, UserException.MalformedWalkerArgumentsException.class));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,207 +1,364 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.testng.Assert;
|
||||
import org.broad.tribble.BasicFeature;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTrackerUnitTest;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ReadBasedReferenceOrderedViewUnitTest
|
||||
* <p/>
|
||||
* test out the ReadBasedReferenceOrderedView class
|
||||
* @author depristo
|
||||
*/
|
||||
public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
private static int startingChr = 1;
|
||||
private static int endingChr = 2;
|
||||
private static int readCount = 100;
|
||||
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
|
||||
private static String contig;
|
||||
private static SAMFileHeader header;
|
||||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
contig = header.getSequence(0).getSequenceName();
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
|
||||
initializeTests();
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void beforeEach() {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateReadMetaDataTrackerOnePerSite() {
|
||||
// make ten reads,
|
||||
List<SAMRecord> records = new ArrayList<SAMRecord>();
|
||||
for (int x = 1; x < 11; x++) {
|
||||
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "name", 0, x, 10);
|
||||
private class CompareFeatures implements Comparator<Feature> {
|
||||
@Override
|
||||
public int compare(Feature o1, Feature o2) {
|
||||
return genomeLocParser.createGenomeLoc(o1).compareTo(genomeLocParser.createGenomeLoc(o2));
|
||||
}
|
||||
GenomeLoc start = genomeLocParser.createGenomeLoc(header.getSequenceDictionary().getSequence(0).getSequenceName(), 0, 0);
|
||||
List<RMDDataState> list = new ArrayList<RMDDataState>();
|
||||
list.add(new RMDDataState(null, new FakePeekingRODIterator(genomeLocParser,start, "fakeName")));
|
||||
ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list));
|
||||
}
|
||||
|
||||
for (SAMRecord rec : records) {
|
||||
ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec);
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping();
|
||||
for (Integer i : map.keySet()) {
|
||||
Assert.assertEquals(map.get(i).size(), 1);
|
||||
private class ReadMetaDataTrackerRODStreamTest extends TestDataProvider {
|
||||
final List<Feature> allFeatures;
|
||||
final List<GenomeLoc> intervals;
|
||||
|
||||
public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final GenomeLoc interval) {
|
||||
this(allFeatures, Collections.singletonList(interval));
|
||||
}
|
||||
|
||||
public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final List<GenomeLoc> intervals) {
|
||||
super(ReadMetaDataTrackerRODStreamTest.class);
|
||||
this.allFeatures = new ArrayList<Feature>(allFeatures);
|
||||
Collections.sort(this.allFeatures, new CompareFeatures());
|
||||
this.intervals = new ArrayList<GenomeLoc>(intervals);
|
||||
Collections.sort(this.intervals);
|
||||
setName(String.format("%s nFeatures %d intervals %s", getClass().getSimpleName(), allFeatures.size(),
|
||||
intervals.size() == 1 ? intervals.get(0) : "size " + intervals.size()));
|
||||
}
|
||||
|
||||
public PeekableIterator<RODRecordList> getIterator(final String name) {
|
||||
return new PeekableIterator<RODRecordList>(new TribbleIteratorFromCollection(name, genomeLocParser, allFeatures));
|
||||
}
|
||||
|
||||
public Set<Feature> getExpectedOverlaps(final GenomeLoc interval) {
|
||||
final Set<Feature> overlapping = new HashSet<Feature>();
|
||||
for ( final Feature f : allFeatures )
|
||||
if ( genomeLocParser.createGenomeLoc(f).overlapsP(interval) )
|
||||
overlapping.add(f);
|
||||
return overlapping;
|
||||
}
|
||||
}
|
||||
|
||||
public void initializeTests() {
|
||||
final List<Feature> handPickedFeatures = new ArrayList<Feature>();
|
||||
|
||||
handPickedFeatures.add(new BasicFeature(contig, 1, 1));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 2, 5));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 4, 4));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 6, 6));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 9, 10));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 10, 10));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 10, 11));
|
||||
handPickedFeatures.add(new BasicFeature(contig, 13, 20));
|
||||
|
||||
createTestsForFeatures(handPickedFeatures);
|
||||
|
||||
// test in the present of a large spanning element
|
||||
{
|
||||
List<Feature> oneLargeSpan = new ArrayList<Feature>(handPickedFeatures);
|
||||
oneLargeSpan.add(new BasicFeature(contig, 1, 30));
|
||||
createTestsForFeatures(oneLargeSpan);
|
||||
}
|
||||
|
||||
// test in the presence of a partially spanning element
|
||||
{
|
||||
List<Feature> partialSpanStart = new ArrayList<Feature>(handPickedFeatures);
|
||||
partialSpanStart.add(new BasicFeature(contig, 1, 6));
|
||||
createTestsForFeatures(partialSpanStart);
|
||||
}
|
||||
|
||||
// test in the presence of a partially spanning element at the end
|
||||
{
|
||||
List<Feature> partialSpanEnd = new ArrayList<Feature>(handPickedFeatures);
|
||||
partialSpanEnd.add(new BasicFeature(contig, 10, 30));
|
||||
createTestsForFeatures(partialSpanEnd);
|
||||
}
|
||||
|
||||
// no data at all
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, 5, 5);
|
||||
new ReadMetaDataTrackerRODStreamTest(Collections.<Feature>emptyList(), loc);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// tests for the lower level IntervalOverlappingRODsFromStream
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "ReadMetaDataTrackerRODStreamTest")
|
||||
public Object[][] createReadMetaDataTrackerRODStreamTest() {
|
||||
return ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
|
||||
}
|
||||
|
||||
private GenomeLoc span(final List<GenomeLoc> features) {
|
||||
int featuresStart = 1; for ( final GenomeLoc f : features ) featuresStart = Math.min(featuresStart, f.getStart());
|
||||
int featuresStop = 1; for ( final GenomeLoc f : features ) featuresStop = Math.max(featuresStop, f.getStop());
|
||||
return genomeLocParser.createGenomeLoc(contig, featuresStart, featuresStop);
|
||||
}
|
||||
|
||||
private void createTestsForFeatures(final List<Feature> features) {
|
||||
int featuresStart = 1; for ( final Feature f : features ) featuresStart = Math.min(featuresStart, f.getStart());
|
||||
int featuresStop = 1; for ( final Feature f : features ) featuresStop = Math.max(featuresStop, f.getEnd());
|
||||
|
||||
for ( final int size : Arrays.asList(1, 5, 10, 100) ) {
|
||||
final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||
// regularly spaced
|
||||
for ( int start = featuresStart; start < featuresStop; start++) {
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, start, start + size - 1);
|
||||
allIntervals.add(loc);
|
||||
new ReadMetaDataTrackerRODStreamTest(features, loc);
|
||||
}
|
||||
Assert.assertEquals(map.keySet().size(), 10);
|
||||
|
||||
// starting and stopping at every feature
|
||||
for ( final Feature f : features ) {
|
||||
// just at the feature
|
||||
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart(), f.getEnd()));
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
|
||||
|
||||
// up to end
|
||||
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd()));
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
|
||||
|
||||
// missing by 1
|
||||
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() + 1, f.getEnd() + 1));
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
|
||||
|
||||
// just spanning
|
||||
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd() + 1));
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
|
||||
}
|
||||
|
||||
new ReadMetaDataTrackerRODStreamTest(features, allIntervals);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest")
|
||||
public void runReadMetaDataTrackerRODStreamTest_singleQuery(final ReadMetaDataTrackerRODStreamTest data) {
|
||||
if ( data.intervals.size() == 1 ) {
|
||||
final String name = "testName";
|
||||
final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
|
||||
final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
|
||||
testRODStream(data, stream, Collections.singletonList(data.intervals.get(0)));
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_singleQuery")
|
||||
public void runReadMetaDataTrackerRODStreamTest_multipleQueries(final ReadMetaDataTrackerRODStreamTest data) {
|
||||
if ( data.intervals.size() > 1 ) {
|
||||
final String name = "testName";
|
||||
final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
|
||||
final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
|
||||
testRODStream(data, stream, data.intervals);
|
||||
}
|
||||
}
|
||||
|
||||
private void testRODStream(final ReadMetaDataTrackerRODStreamTest test, final IntervalOverlappingRODsFromStream stream, final List<GenomeLoc> intervals) {
|
||||
for ( final GenomeLoc interval : intervals ) {
|
||||
final RODRecordList query = stream.getOverlapping(interval);
|
||||
final HashSet<Feature> queryFeatures = new HashSet<Feature>();
|
||||
for ( final GATKFeature f : query ) queryFeatures.add((Feature)f.getUnderlyingObject());
|
||||
final Set<Feature> overlaps = test.getExpectedOverlaps(interval);
|
||||
|
||||
Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
|
||||
" Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());
|
||||
|
||||
BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
|
||||
" Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// tests for the higher level tracker itself
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "ReadMetaDataTrackerTests")
|
||||
public Object[][] createTrackerTests() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final Object[][] singleTests = ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
|
||||
final List<ReadMetaDataTrackerRODStreamTest> multiSiteTests = new ArrayList<ReadMetaDataTrackerRODStreamTest>();
|
||||
for ( final Object[] singleTest : singleTests ) {
|
||||
if ( ((ReadMetaDataTrackerRODStreamTest)singleTest[0]).intervals.size() > 1 )
|
||||
multiSiteTests.add((ReadMetaDataTrackerRODStreamTest)singleTest[0]);
|
||||
}
|
||||
|
||||
for ( final boolean testStateless : Arrays.asList(true, false) ) {
|
||||
// all pairwise tests
|
||||
for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 2, false)) {
|
||||
tests.add(new Object[]{singleTest, testStateless});
|
||||
}
|
||||
|
||||
// all 3 way pairwise tests
|
||||
//for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
|
||||
// tests.add(new Object[]{singleTest, testStateless});
|
||||
//}
|
||||
}
|
||||
|
||||
logger.warn("Creating " + tests.size() + " tests for ReadMetaDataTrackerTests");
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
}
|
||||
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerTests", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_multipleQueries")
|
||||
public void runReadMetaDataTrackerTest(final List<ReadMetaDataTrackerRODStreamTest> RODs, final boolean testStateless) {
|
||||
final List<String> names = new ArrayList<String>();
|
||||
final List<PeekableIterator<RODRecordList>> iterators = new ArrayList<PeekableIterator<RODRecordList>>();
|
||||
final List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
|
||||
final List<RodBinding<Feature>> rodBindings = new ArrayList<RodBinding<Feature>>();
|
||||
|
||||
for ( int i = 0; i < RODs.size(); i++ ) {
|
||||
final RodBinding<Feature> rodBinding = new RodBinding<Feature>(Feature.class, "name"+i);
|
||||
rodBindings.add(rodBinding);
|
||||
final String name = rodBinding.getName();
|
||||
names.add(name);
|
||||
iterators.add(RODs.get(i).getIterator(name));
|
||||
intervals.addAll(RODs.get(i).intervals);
|
||||
}
|
||||
|
||||
class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
Collections.sort(intervals);
|
||||
final GenomeLoc span = span(intervals);
|
||||
final ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(genomeLocParser, span, names, iterators);
|
||||
|
||||
// current location
|
||||
private GenomeLoc location;
|
||||
private GATKFeature curROD;
|
||||
private final String name;
|
||||
if ( testStateless ) {
|
||||
// test each tracker is well formed, as each is created
|
||||
for ( final GenomeLoc interval : intervals ) {
|
||||
final RefMetaDataTracker tracker = view.getReferenceOrderedDataForInterval(interval);
|
||||
testMetaDataTrackerBindings(tracker, interval, RODs, rodBindings);
|
||||
}
|
||||
} else {
|
||||
// tests all trackers are correct after reading them into an array
|
||||
// this checks that the trackers are be safely stored away and analyzed later (critical for nano-scheduling)
|
||||
final List<RefMetaDataTracker> trackers = new ArrayList<RefMetaDataTracker>();
|
||||
for ( final GenomeLoc interval : intervals ) {
|
||||
final RefMetaDataTracker tracker = view.getReferenceOrderedDataForInterval(interval);
|
||||
trackers.add(tracker);
|
||||
}
|
||||
|
||||
public FakePeekingRODIterator(GenomeLocParser genomeLocParser, GenomeLoc startingLoc, String name) {
|
||||
this.name = name;
|
||||
this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
|
||||
for ( int i = 0; i < trackers.size(); i++) {
|
||||
testMetaDataTrackerBindings(trackers.get(i), intervals.get(i), RODs, rodBindings);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header associated with the backing input stream.
|
||||
* @return the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public Object getHeader() {
|
||||
return null;
|
||||
private void testMetaDataTrackerBindings(final RefMetaDataTracker tracker,
|
||||
final GenomeLoc interval,
|
||||
final List<ReadMetaDataTrackerRODStreamTest> RODs,
|
||||
final List<RodBinding<Feature>> rodBindings) {
|
||||
for ( int i = 0; i < RODs.size(); i++ ) {
|
||||
final ReadMetaDataTrackerRODStreamTest test = RODs.get(i);
|
||||
final List<Feature> queryFeaturesList = tracker.getValues(rodBindings.get(i));
|
||||
final Set<Feature> queryFeatures = new HashSet<Feature>(queryFeaturesList);
|
||||
final Set<Feature> overlaps = test.getExpectedOverlaps(interval);
|
||||
|
||||
Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
|
||||
" Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());
|
||||
|
||||
BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
|
||||
" Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary associated with the backing input stream.
|
||||
* @return sequence dictionary from the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return null;
|
||||
}
|
||||
static class TribbleIteratorFromCollection implements Iterator<RODRecordList> {
|
||||
// current location
|
||||
private final String name;
|
||||
final Queue<GATKFeature> gatkFeatures;
|
||||
|
||||
public TribbleIteratorFromCollection(final String name, final GenomeLocParser genomeLocParser, final List<Feature> features) {
|
||||
this.name = name;
|
||||
|
||||
@Override
|
||||
public GenomeLoc peekNextLocation() {
|
||||
System.err.println("Peek Next -> " + location);
|
||||
return location;
|
||||
}
|
||||
this.gatkFeatures = new LinkedList<GATKFeature>();
|
||||
for ( final Feature f : features )
|
||||
gatkFeatures.add(new GATKFeature.TribbleGATKFeature(genomeLocParser, f, name));
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc position() {
|
||||
return location;
|
||||
}
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return ! gatkFeatures.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RODRecordList seekForward(GenomeLoc interval) {
|
||||
while (location.isBefore(interval))
|
||||
next();
|
||||
return next(); // we always move by one, we know the next location will be right
|
||||
}
|
||||
@Override
|
||||
public RODRecordList next() {
|
||||
final GATKFeature first = gatkFeatures.poll();
|
||||
final Collection<GATKFeature> myFeatures = new LinkedList<GATKFeature>();
|
||||
myFeatures.add(first);
|
||||
while ( gatkFeatures.peek() != null && gatkFeatures.peek().getLocation().getStart() == first.getStart() )
|
||||
myFeatures.add(gatkFeatures.poll());
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return true; // we always have next
|
||||
}
|
||||
GenomeLoc loc = first.getLocation();
|
||||
for ( final GATKFeature feature : myFeatures )
|
||||
loc = loc.merge(feature.getLocation());
|
||||
|
||||
@Override
|
||||
public RODRecordList next() {
|
||||
System.err.println("Next -> " + location);
|
||||
curROD = new ReadMetaDataTrackerUnitTest.FakeRODatum(location, name);
|
||||
location = genomeLocParser.createGenomeLoc(location.getContig(), location.getStart() + 1, location.getStop() + 1);
|
||||
FakeRODRecordList list = new FakeRODRecordList();
|
||||
list.add(curROD);
|
||||
return list;
|
||||
}
|
||||
return new RODRecordListImpl(name, myFeatures, loc); // is this safe?
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new IllegalStateException("GRRR");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// nothing to do
|
||||
@Override public void remove() { throw new IllegalStateException("GRRR"); }
|
||||
}
|
||||
}
|
||||
|
||||
class FakeRODRecordList extends AbstractList<GATKFeature> implements RODRecordList {
|
||||
private final List<GATKFeature> list = new ArrayList<GATKFeature>();
|
||||
|
||||
public boolean add(GATKFeature data) {
|
||||
return list.add(data);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKFeature get(int i) {
|
||||
return list.get(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return list.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return list.get(0).getLocation();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "test";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(RODRecordList rodRecordList) {
|
||||
return this.list.get(0).getLocation().compareTo(rodRecordList.getLocation());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
|
|
@ -89,7 +90,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
|
||||
TableFeature datum = tracker.getFirstValue(TableFeature.class, "tableTest");
|
||||
TableFeature datum = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest"));
|
||||
|
||||
Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect");
|
||||
Assert.assertEquals(datum.get("COL2"),"D","datum parameter for COL2 is incorrect");
|
||||
|
|
@ -115,13 +116,13 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
|
||||
TableFeature datum1 = tracker.getFirstValue(TableFeature.class, "tableTest1");
|
||||
TableFeature datum1 = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest1"));
|
||||
|
||||
Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect");
|
||||
Assert.assertEquals(datum1.get("COL2"),"D","datum1 parameter for COL2 is incorrect");
|
||||
Assert.assertEquals(datum1.get("COL3"),"E","datum1 parameter for COL3 is incorrect");
|
||||
|
||||
TableFeature datum2 = tracker.getFirstValue(TableFeature.class, "tableTest2");
|
||||
TableFeature datum2 = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest2"));
|
||||
|
||||
Assert.assertEquals(datum2.get("COL1"),"C","datum2 parameter for COL1 is incorrect");
|
||||
Assert.assertEquals(datum2.get("COL2"),"D","datum2 parameter for COL2 is incorrect");
|
||||
|
|
|
|||
|
|
@ -36,8 +36,8 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
|||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
|
@ -69,18 +69,15 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
|||
for(int i = 0; i < reps; i++) {
|
||||
SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
|
||||
reader.getFileHeader(),
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
downsampling.create(),
|
||||
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte)0);
|
||||
reader.getFileHeader(),
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
downsampling.create(),
|
||||
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
(byte)0);
|
||||
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
|
@ -123,7 +123,7 @@ class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
|
|||
private long Gs;
|
||||
private long Ts;
|
||||
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
|
||||
for(byte base: read.getReadBases()) {
|
||||
switch(base) {
|
||||
case 'A': As++; break;
|
||||
|
|
|
|||
|
|
@ -24,9 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
import static org.testng.Assert.fail;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
|
|
@ -35,24 +32,25 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.annotations.AfterMethod;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.testng.Assert.*;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
|
|
@ -183,11 +181,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1,
|
||||
removeProgramRecords);
|
||||
|
||||
|
|
@ -205,11 +200,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1,
|
||||
removeProgramRecords);
|
||||
|
||||
|
|
|
|||
|
|
@ -4,24 +4,27 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* testing of the LocusIteratorByState
|
||||
|
|
@ -255,6 +258,90 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////
|
||||
// comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////
|
||||
|
||||
private static final int IS_BEFORE_DELETED_BASE_FLAG = 1;
|
||||
private static final int IS_BEFORE_DELETION_START_FLAG = 2;
|
||||
private static final int IS_AFTER_DELETED_BASE_FLAG = 4;
|
||||
private static final int IS_AFTER_DELETION_END_FLAG = 8;
|
||||
private static final int IS_BEFORE_INSERTION_FLAG = 16;
|
||||
private static final int IS_AFTER_INSERTION_FLAG = 32;
|
||||
private static final int IS_NEXT_TO_SOFTCLIP_FLAG = 64;
|
||||
|
||||
private static class LIBSTest {
|
||||
|
||||
|
||||
final String cigar;
|
||||
final int readLength;
|
||||
final List<Integer> offsets;
|
||||
final List<Integer> flags;
|
||||
|
||||
private LIBSTest(final String cigar, final int readLength, final List<Integer> offsets, final List<Integer> flags) {
|
||||
this.cigar = cigar;
|
||||
this.readLength = readLength;
|
||||
this.offsets = offsets;
|
||||
this.flags = flags;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "LIBSTest")
|
||||
public Object[][] createLIBSTestData() {
|
||||
return new Object[][]{
|
||||
{new LIBSTest("1I", 1, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
|
||||
{new LIBSTest("10I", 10, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
|
||||
{new LIBSTest("2M2I2M", 6, Arrays.asList(0,1,4,5), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG,IS_AFTER_INSERTION_FLAG,0))},
|
||||
{new LIBSTest("2M2I", 4, Arrays.asList(0,1), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG))},
|
||||
//TODO -- uncomment these when LIBS is fixed
|
||||
//{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
|
||||
//{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
|
||||
//{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
|
||||
{new LIBSTest("1M2D2M", 3, Arrays.asList(0,1,2), Arrays.asList(IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG,0))},
|
||||
{new LIBSTest("1S1M", 2, Arrays.asList(1), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
|
||||
{new LIBSTest("1M1S", 2, Arrays.asList(0), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
|
||||
{new LIBSTest("1S1M1I", 3, Arrays.asList(1), Arrays.asList(IS_BEFORE_INSERTION_FLAG | IS_NEXT_TO_SOFTCLIP_FLAG))}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "LIBSTest")
|
||||
public void testLIBS(LIBSTest params) {
|
||||
final int locus = 44367788;
|
||||
|
||||
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
|
||||
read.setCigarString(params.cigar);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(Arrays.asList(read), createTestReadProperties());
|
||||
|
||||
int offset = 0;
|
||||
while ( li.hasNext() ) {
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
ReadBackedPileup p = alignmentContext.getBasePileup();
|
||||
Assert.assertTrue(p.getNumberOfElements() == 1);
|
||||
PileupElement pe = p.iterator().next();
|
||||
|
||||
final int flag = params.flags.get(offset);
|
||||
Assert.assertEquals(pe.isBeforeDeletedBase(), (flag & IS_BEFORE_DELETED_BASE_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isBeforeDeletionStart(), (flag & IS_BEFORE_DELETION_START_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isAfterDeletedBase(), (flag & IS_AFTER_DELETED_BASE_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isAfterDeletionEnd(), (flag & IS_AFTER_DELETION_END_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isBeforeInsertion(), (flag & IS_BEFORE_INSERTION_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isAfterInsertion(), (flag & IS_AFTER_INSERTION_FLAG) != 0);
|
||||
Assert.assertEquals(pe.isNextToSoftClip(), (flag & IS_NEXT_TO_SOFTCLIP_FLAG) != 0);
|
||||
|
||||
Assert.assertEquals(pe.getOffset(), params.offsets.get(offset).intValue());
|
||||
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// End comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////////
|
||||
|
||||
private static ReadProperties createTestReadProperties() {
|
||||
return new ReadProperties(
|
||||
Collections.<SAMReaderID>emptyList(),
|
||||
|
|
@ -264,11 +351,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
null,
|
||||
new ValidationExclusion(),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,276 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ReadMetaDataTrackerUnitTest
|
||||
* <p/>
|
||||
* test out the ReadMetaDataTracker
|
||||
*/
|
||||
public class ReadMetaDataTrackerUnitTest extends BaseTest {
|
||||
private static int startingChr = 1;
|
||||
private static int endingChr = 2;
|
||||
private static int readCount = 100;
|
||||
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
|
||||
private static SAMFileHeader header;
|
||||
private Set<String> nameSet;
|
||||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void beforeEach() {
|
||||
nameSet = new TreeSet<String>();
|
||||
nameSet.add("default");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void twoRodsAtEachReadBase() {
|
||||
nameSet.add("default2");
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 2);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void rodAtEachReadBase() {
|
||||
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void filterByName() {
|
||||
nameSet.add("default2");
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping("default");
|
||||
for (Integer x : map.keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(map.get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void filterByDupType() {
|
||||
nameSet.add("default2");
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
|
||||
// count the positions
|
||||
int count = 0;
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
|
||||
for (Integer x : map.keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(map.get(x).size(), 2);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
// @Test this test can be uncommented to determine the speed impacts of any changes to the RODs for reads system
|
||||
|
||||
public void filterByMassiveDupType() {
|
||||
|
||||
for (int y = 0; y < 20; y++) {
|
||||
nameSet.add("default" + String.valueOf(y));
|
||||
long firstTime = System.currentTimeMillis();
|
||||
for (int lp = 0; lp < 1000; lp++) {
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
|
||||
// count the positions
|
||||
int count = 0;
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
|
||||
for (Integer x : map.keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(map.get(x).size(), y + 2);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
System.err.println(y + " = " + (System.currentTimeMillis() - firstTime));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void filterByType() {
|
||||
nameSet.add("default2");
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(Fake2RODatum.class);
|
||||
for (int x : map.keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(map.get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void sparceRODsForRead() {
|
||||
ReadMetaDataTracker tracker = getRMDT(7, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void rodByGenomeLoc() {
|
||||
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
||||
|
||||
// count the positions
|
||||
int count = 0;
|
||||
for (Integer x : tracker.getContigOffsetMapping().keySet()) {
|
||||
count++;
|
||||
Assert.assertEquals(tracker.getContigOffsetMapping().get(x).size(), 1);
|
||||
}
|
||||
Assert.assertEquals(count, 10);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a ReadMetaDataTracker given:
|
||||
*
|
||||
* @param incr the spacing between site locations
|
||||
* @param names the names of the reference ordered data to create: one will be created at every location for each name
|
||||
*
|
||||
* @return a ReadMetaDataTracker
|
||||
*/
|
||||
private ReadMetaDataTracker getRMDT(int incr, Set<String> names, boolean alternateTypes) {
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10);
|
||||
TreeMap<Integer, RODMetaDataContainer> data = new TreeMap<Integer, RODMetaDataContainer>();
|
||||
for (int x = 0; x < record.getAlignmentEnd(); x += incr) {
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getReferenceName(), record.getAlignmentStart() + x, record.getAlignmentStart() + x);
|
||||
RODMetaDataContainer set = new RODMetaDataContainer();
|
||||
|
||||
int cnt = 0;
|
||||
for (String name : names) {
|
||||
if (alternateTypes)
|
||||
set.addEntry((cnt % 2 == 0) ? new FakeRODatum(loc, name) : new Fake2RODatum(loc, name));
|
||||
else
|
||||
set.addEntry(new FakeRODatum(loc, name));
|
||||
cnt++;
|
||||
}
|
||||
data.put(record.getAlignmentStart() + x, set);
|
||||
}
|
||||
ReadMetaDataTracker tracker = new ReadMetaDataTracker(genomeLocParser, record, data);
|
||||
return tracker;
|
||||
}
|
||||
|
||||
|
||||
/** for testing, we want a fake rod with a different classname, for the get-by-class-name functions */
|
||||
static public class Fake2RODatum extends FakeRODatum {
|
||||
|
||||
public Fake2RODatum(GenomeLoc location, String name) {
|
||||
super(location, name);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** for testing only */
|
||||
static public class FakeRODatum extends GATKFeature {
|
||||
|
||||
final GenomeLoc location;
|
||||
final String name;
|
||||
|
||||
public FakeRODatum(GenomeLoc location, String name) {
|
||||
super(name);
|
||||
this.location = location;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return this.location;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getUnderlyingObject() {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getChr() {
|
||||
return location.getContig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getStart() {
|
||||
return (int)this.location.getStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getEnd() {
|
||||
return (int)this.location.getStop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -133,7 +133,7 @@ public class RefMetaDataTrackerUnitTest {
|
|||
List<RODRecordList> x = new ArrayList<RODRecordList>();
|
||||
if ( AValues != null ) x.add(AValues);
|
||||
if ( BValues != null ) x.add(BValues);
|
||||
return new RefMetaDataTracker(x, context);
|
||||
return new RefMetaDataTracker(x);
|
||||
}
|
||||
|
||||
public int nBoundTracks() {
|
||||
|
|
|
|||
|
|
@ -5,15 +5,7 @@ import org.testng.annotations.Test;
|
|||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Dec 1, 2009
|
||||
* Time: 9:03:34 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PileupWalkerIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void testGnarleyFHSPileup() {
|
||||
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
|
||||
|
|
@ -23,4 +15,14 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5));
|
||||
executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleReadAligningOffChromosome1() {
|
||||
String gatk_args = "-T Pileup "
|
||||
+ " -I " + privateTestDir + "readOffb37contig1.bam"
|
||||
+ " -R " + b37KGReference
|
||||
+ " -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList("4a45fe1f85aaa8c4158782f2b6dee2bd"));
|
||||
executeTest("Testing single read spanning off chromosome 1", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,7 +38,8 @@ public class PrintReadsIntegrationTest extends WalkerTest {
|
|||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1", "6e920b8505e7e95d67634b0905237dbc")},
|
||||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L unmapped", "13bb9a91b1d4dd2425f73302b8a1ac1c")},
|
||||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1 -L unmapped", "6e920b8505e7e95d67634b0905237dbc")},
|
||||
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")}
|
||||
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")},
|
||||
{new PRTest(b37KGReference, "NA12878.1_10mb_2_10mb.bam", "", "c43380ac39b98853af457b90e52f8427")}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||
private static class VRTest {
|
||||
|
|
@ -28,7 +28,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"f360ce3eb2b0b887301be917a9843e2b", // tranches
|
||||
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
|
||||
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
|
||||
"afa297c743437551cc2bd36ddd6d6d75"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
@ -77,7 +77,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
|
||||
"a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches
|
||||
"74c10fc15f9739a938b7138909fbde04", // recal file
|
||||
"62fda105e14b619a1c263855cf56af1d"); // cut VCF
|
||||
"c30d163871a37f2bbf8ee7f761e870b4"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRBCFTest")
|
||||
public Object[][] createVRBCFTest() {
|
||||
|
|
@ -129,13 +129,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
|
||||
"b7589cd098dc153ec64c02dcff2838e4", // tranches
|
||||
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
||||
"64f576881e21323dd4078262604717a2"); // cut VCF
|
||||
"b2c6827be592c24a4692b1753edc7d23"); // cut VCF
|
||||
|
||||
VRTest indelFiltered = new VRTest(
|
||||
validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
|
||||
"b7589cd098dc153ec64c02dcff2838e4", // tranches
|
||||
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
||||
"af22c55d91394c56a222fd40d6d54781"); // cut VCF
|
||||
"5d483fe1ba2ef36ee9e6c14cbd654706"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRIndelTest")
|
||||
public Object[][] createTestVariantRecalibratorIndel() {
|
||||
|
|
@ -193,7 +193,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
|
||||
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
|
||||
Arrays.asList("ec519e1f01459813dab57aefffc019e2"));
|
||||
Arrays.asList("018b3a5cc7cf0cb5468c6a0c80ccaa8b"));
|
||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
|
@ -211,4 +212,59 @@ public class GenomeLocUnitTest extends BaseTest {
|
|||
Assert.assertEquals(cfg.gl1.reciprocialOverlapFraction(cfg.gl2), cfg.overlapFraction);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing comparison, hashcode, and equals
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "GenomeLocComparisons")
|
||||
public Object[][] createGenomeLocComparisons() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final int start = 10;
|
||||
for ( int stop = start; stop < start + 3; stop++ ) {
|
||||
final GenomeLoc g1 = genomeLocParser.createGenomeLoc("chr2", start, stop);
|
||||
for ( final String contig : Arrays.asList("chr1", "chr2", "chr3")) {
|
||||
for ( int start2 = start - 1; start2 <= stop + 1; start2++ ) {
|
||||
for ( int stop2 = start2; stop2 < stop + 2; stop2++ ) {
|
||||
final GenomeLoc g2 = genomeLocParser.createGenomeLoc(contig, start2, stop2);
|
||||
|
||||
ComparisonResult cmp = ComparisonResult.EQUALS;
|
||||
if ( contig.equals("chr3") ) cmp = ComparisonResult.LESS_THAN;
|
||||
else if ( contig.equals("chr1") ) cmp = ComparisonResult.GREATER_THAN;
|
||||
else if ( start < start2 ) cmp = ComparisonResult.LESS_THAN;
|
||||
else if ( start > start2 ) cmp = ComparisonResult.GREATER_THAN;
|
||||
else if ( stop < stop2 ) cmp = ComparisonResult.LESS_THAN;
|
||||
else if ( stop > stop2 ) cmp = ComparisonResult.GREATER_THAN;
|
||||
|
||||
tests.add(new Object[]{g1, g2, cmp});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
private enum ComparisonResult {
|
||||
LESS_THAN(-1),
|
||||
EQUALS(0),
|
||||
GREATER_THAN(1);
|
||||
|
||||
final int cmp;
|
||||
|
||||
private ComparisonResult(int cmp) {
|
||||
this.cmp = cmp;
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "GenomeLocComparisons")
|
||||
public void testGenomeLocComparisons(GenomeLoc g1, GenomeLoc g2, ComparisonResult expected) {
|
||||
Assert.assertEquals(g1.compareTo(g2), expected.cmp, "Comparing genome locs failed");
|
||||
Assert.assertEquals(g1.equals(g2), expected == ComparisonResult.EQUALS);
|
||||
if ( expected == ComparisonResult.EQUALS )
|
||||
Assert.assertEquals(g1.hashCode(), g2.hashCode(), "Equal genome locs don't have the same hash code");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -122,20 +122,20 @@ class GATKResourcesBundle extends QScript {
|
|||
//
|
||||
// standard VCF files. Will be lifted to each reference
|
||||
//
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_135_b37.leftAligned.vcf",
|
||||
"dbsnp_135", b37, true, false))
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_137_b37.leftAligned.vcf",
|
||||
"dbsnp_137", b37, true, false))
|
||||
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_genotypes_1525_samples.b37.vcf",
|
||||
"1000G_omni2.5", b37, true, true))
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_sites_2141_samples.b37.vcf",
|
||||
"1000G_omni2.5", b37, true, false))
|
||||
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf",
|
||||
"hapmap_3.3", b37, true, true))
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf",
|
||||
"hapmap_3.3", b37, true, false))
|
||||
|
||||
addResource(new Resource("/humgen/1kg/DCC/ftp/technical/working/20120312_phase1_v2_indel_cleaned_sites_list/ALL.wgs.phase1_release_v2.20101123.official_indel_calls.20120312.sites.vcf",
|
||||
"1000G_phase1.indels", b37, true, false))
|
||||
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
|
||||
"Mills_and_1000G_gold_standard.indels", b37, true, true))
|
||||
"Mills_and_1000G_gold_standard.indels", b37, true, false))
|
||||
|
||||
//
|
||||
// example call set for wiki tutorial
|
||||
|
|
|
|||
Loading…
Reference in New Issue