Merge branch 'master' of ssh://gsa4/humgen/gsa-scr1/gsa-engineering/git/unstable

merging trivially after a commit
This commit is contained in:
Yossi Farjoun 2012-09-05 14:33:39 -04:00
commit f4b39a7545
93 changed files with 1755 additions and 1340 deletions

7
.gitignore vendored
View File

@ -18,3 +18,10 @@ queueScatterGather
/bar*
integrationtests/
public/testdata/onTheFlyOutputTest.vcf
private/testdata/onTheFlyOutputTest.vcf
lib
html
gatkdocs
dist
build
resources

View File

@ -106,47 +106,49 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
}
@Override
public synchronized void updateDataForRead(final GATKSAMRecord read, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
public synchronized void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
final ReadCovariates readCovariates = covariateKeySetFrom(read);
if( !skip[offset] ) {
final ReadCovariates readCovariates = covariateKeySetFrom(read);
tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
tempFractionalErrorArray[EventType.BASE_INSERTION.index] = insertionErrors[offset];
tempQualArray[EventType.BASE_DELETION.index] = read.getBaseDeletionQualities()[offset];
tempFractionalErrorArray[EventType.BASE_DELETION.index] = deletionErrors[offset];
tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
tempFractionalErrorArray[EventType.BASE_INSERTION.index] = insertionErrors[offset];
tempQualArray[EventType.BASE_DELETION.index] = read.getBaseDeletionQualities()[offset];
tempFractionalErrorArray[EventType.BASE_DELETION.index] = deletionErrors[offset];
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.index;
final byte qual = tempQualArray[eventIndex];
final double isError = tempFractionalErrorArray[eventIndex];
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.index;
final byte qual = tempQualArray[eventIndex];
final double isError = tempFractionalErrorArray[eventIndex];
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
rgPreviousDatum.combine(rgThisDatum);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(1.0, isError);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
final NestedIntegerArray<RecalDatum> rgRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE);
final RecalDatum rgPreviousDatum = rgRecalTable.get(keys[0], eventIndex);
final RecalDatum rgThisDatum = createDatumObject(qual, isError);
if (rgPreviousDatum == null) // key doesn't exist yet in the map so make a new bucket and add it
rgRecalTable.put(rgThisDatum, keys[0], eventIndex);
else
covPreviousDatum.increment(1.0, isError);
rgPreviousDatum.combine(rgThisDatum);
final NestedIntegerArray<RecalDatum> qualRecalTable = recalibrationTables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE);
final RecalDatum qualPreviousDatum = qualRecalTable.get(keys[0], keys[1], eventIndex);
if (qualPreviousDatum == null)
qualRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], eventIndex);
else
qualPreviousDatum.increment(1.0, isError);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
final NestedIntegerArray<RecalDatum> covRecalTable = recalibrationTables.getTable(i);
final RecalDatum covPreviousDatum = covRecalTable.get(keys[0], keys[1], keys[i], eventIndex);
if (covPreviousDatum == null)
covRecalTable.put(createDatumObject(qual, isError), keys[0], keys[1], keys[i], eventIndex);
else
covPreviousDatum.increment(1.0, isError);
}
}
}
}

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
@ -247,7 +247,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
* @return a linked list with all the reads produced by the clipping operations
*/
@Override
public LinkedList<GATKSAMRecord> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
public LinkedList<GATKSAMRecord> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
LinkedList<GATKSAMRecord> mappedReads;
totalReads++;
if (!debugRead.isEmpty() && read.getReadName().contains(debugRead))

View File

@ -27,24 +27,23 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.collections.Pair;
@ -52,6 +51,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -101,7 +101,7 @@ import java.util.*;
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.LOCUS)
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
@ActiveRegionExtension(extension=65, maxRegion=300)
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
@ -308,7 +308,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
if( !allelesToGenotype.contains(vc) ) {
allelesToGenotype.add(vc); // save for later for processing during the ActiveRegion's map call. Should be folded into a ReadMetaDataTracker object
allelesToGenotype.add(vc); // save for later for processing during the ActiveRegion's map call. Should be folded into a RefMetaDataTracker object
}
}
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {

View File

@ -127,9 +127,9 @@ public class BQSRIntegrationTest extends WalkerTest {
@DataProvider(name = "PRTest")
public Object[][] createPRTestData() {
return new Object[][]{
{new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")},
{new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")},
{new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")},
{new PRTest("", "1532242f9fe90ef759a0faa5d85f61fb")},
{new PRTest(" -qq -1", "3dd2c87915c96ac55c3872026574d8cb")},
{new PRTest(" -qq 6", "5d012ee224f1cb4a7afac59e3655e20c")},
{new PRTest(" -DIQ", "66aa65223f192ee39c1773aa187fd493")}
};
}

View File

@ -66,4 +66,11 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "e1f88fac91424740c0eaac1de48b3970");
}
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("000fd36d5cf8090386bb2ac15e3ab0b5"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
}

View File

@ -245,7 +245,7 @@ public class FastaSequenceIndexBuilder {
* Reset iterators and add contig to sequence index
*/
private void finishReadingContig(FastaSequenceIndex sequenceIndex) {
sequenceIndex.add(new FastaSequenceIndexEntry(contig, location, size, (int) basesPerLine, (int) bytesPerLine, thisSequenceIndex++));
sequenceIndex.add(new FastaSequenceIndexEntry(trimContigName(contig), location, size, (int) basesPerLine, (int) bytesPerLine, thisSequenceIndex++));
status = Status.NONE;
contig = "";
size = 0;
@ -258,6 +258,14 @@ public class FastaSequenceIndexBuilder {
}
}
/*
* Trims the contig name to the expected value by removing any characters after the first whitespace
*/
private static String trimContigName(final String contigName) {
int whitespaceIndex = contigName.indexOf(' ');
return ( whitespaceIndex == -1 ) ? contigName : contigName.substring(0, whitespaceIndex);
}
/**
* Stores FastaSequenceIndex as a .fasta.fai file on local machine
* Although method is public it cannot be called on any old FastaSequenceIndex - must be created by a FastaSequenceIndexBuilder

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -81,7 +81,7 @@ public class AlignmentValidation extends ReadWalker<Integer,Integer> {
* @return Number of reads aligned by this map (aka 1).
*/
@Override
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
//logger.info(String.format("examining read %s", read.getReadName()));
byte[] bases = read.getReadBases();

View File

@ -117,6 +117,15 @@ public final class RodBinding<T extends Feature> {
this.bound = true;
}
/**
* For testing purposes only. Creates a RodBinding sufficient for looking up associations to rawName
* @param type
* @param rawName
*/
public RodBinding(Class<T> type, final String rawName) {
this(type, rawName, "missing", type.getSimpleName(), new Tags());
}
/**
* Make an unbound RodBinding<T>. Only available for creating the globally unique UNBOUND object
* @param type class this unbound RodBinding creates

View File

@ -42,6 +42,8 @@ import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
@ -49,8 +51,8 @@ import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -131,6 +133,11 @@ public class GenomeAnalysisEngine {
*/
private Collection<ReadFilter> filters;
/**
* Collection of the read transformers applied to the reads
*/
private List<ReadTransformer> readTransformers;
/**
* Controls the allocation of threads between CPU vs IO.
*/
@ -354,6 +361,39 @@ public class GenomeAnalysisEngine {
return Collections.unmodifiableList(filters);
}
/**
* Returns a list of active, initialized read transformers
*
* @param walker the walker we need to apply read transformers too
* @return a non-null list of read transformers
*/
public void initializeReadTransformers(final Walker walker) {
final List<ReadTransformer> activeTransformers = new ArrayList<ReadTransformer>();
final ReadTransformersMode overrideMode = WalkerManager.getWalkerAnnotation(walker, ReadTransformersMode.class);
final ReadTransformer.ApplicationTime overrideTime = overrideMode != null ? overrideMode.ApplicationTime() : null;
final PluginManager<ReadTransformer> pluginManager = new PluginManager<ReadTransformer>(ReadTransformer.class);
for ( final ReadTransformer transformer : pluginManager.createAllTypes() ) {
transformer.initialize(overrideTime, this, walker);
if ( transformer.enabled() )
activeTransformers.add(transformer);
}
setReadTransformers(activeTransformers);
}
public List<ReadTransformer> getReadTransformers() {
return readTransformers;
}
private void setReadTransformers(final List<ReadTransformer> readTransformers) {
if ( readTransformers == null )
throw new ReviewedStingException("read transformers cannot be null");
this.readTransformers = readTransformers;
}
/**
* Parse out the thread allocation from the given command-line argument.
*/
@ -419,9 +459,6 @@ public class GenomeAnalysisEngine {
argCollection.setDownsamplingMethod(method);
}
public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); }
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }
protected boolean includeReadsWithDeletionAtLoci() {
return walker.includeReadsWithDeletionAtLoci();
}
@ -702,13 +739,12 @@ public class GenomeAnalysisEngine {
protected void initializeDataSources() {
logger.info("Strictness is " + argCollection.strictnessLevel);
// TODO -- REMOVE ME
BAQ.DEFAULT_GOP = argCollection.BAQGOP;
validateSuppliedReference();
setReferenceDataSource(argCollection.referenceFile);
validateSuppliedReads();
initializeReadTransformers(walker);
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
for (ReadFilter filter : filters)
@ -795,9 +831,6 @@ public class GenomeAnalysisEngine {
// interrogating for the downsample method during command line recreation.
setDownsamplingMethod(method);
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
@ -817,11 +850,8 @@ public class GenomeAnalysisEngine {
method,
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
filters,
readTransformers,
includeReadsWithDeletionAtLoci(),
getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF,
getWalkerBAQQualityMode(),
refReader,
getBaseRecalibration(),
argCollection.defaultBaseQualities,
removeProgramRecords);
}

View File

@ -1,15 +1,14 @@
package org.broadinstitute.sting.gatk;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import java.util.Collection;
import java.util.List;
/**
* User: hanna
* Date: May 14, 2009
@ -34,12 +33,9 @@ public class ReadProperties {
private final DownsamplingMethod downsamplingMethod;
private final ValidationExclusion exclusionList;
private final Collection<ReadFilter> supplementalFilters;
private final List<ReadTransformer> readTransformers;
private final boolean includeReadsWithDeletionAtLoci;
private final boolean useOriginalBaseQualities;
private final BAQ.CalculationMode cmode;
private final BAQ.QualityMode qmode;
private final IndexedFastaSequenceFile refReader; // read for BAQ, if desired
private final BaseRecalibration bqsrApplier;
private final byte defaultBaseQualities;
/**
@ -95,6 +91,11 @@ public class ReadProperties {
return supplementalFilters;
}
public List<ReadTransformer> getReadTransformers() {
return readTransformers;
}
/**
* Return whether to use original base qualities.
* @return Whether to use original base qualities.
@ -103,16 +104,6 @@ public class ReadProperties {
return useOriginalBaseQualities;
}
public BAQ.QualityMode getBAQQualityMode() { return qmode; }
public BAQ.CalculationMode getBAQCalculationMode() { return cmode; }
public IndexedFastaSequenceFile getRefReader() {
return refReader;
}
public BaseRecalibration getBQSRApplier() { return bqsrApplier; }
/**
* @return Default base quality value to fill reads missing base quality information.
*/
@ -134,9 +125,6 @@ public class ReadProperties {
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
* bases will be seen in the pileups, and the deletions will be skipped silently.
* @param cmode How should we apply the BAQ calculation to the reads?
* @param qmode How should we apply the BAQ calculation to the reads?
* @param refReader if applyBAQ is true, must be a valid pointer to a indexed fasta file reads so we can get the ref bases for BAQ calculation
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
*/
public ReadProperties( Collection<SAMReaderID> samFiles,
@ -146,11 +134,8 @@ public class ReadProperties {
DownsamplingMethod downsamplingMethod,
ValidationExclusion exclusionList,
Collection<ReadFilter> supplementalFilters,
List<ReadTransformer> readTransformers,
boolean includeReadsWithDeletionAtLoci,
BAQ.CalculationMode cmode,
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
byte defaultBaseQualities) {
this.readers = samFiles;
this.header = header;
@ -158,12 +143,9 @@ public class ReadProperties {
this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod;
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
this.supplementalFilters = supplementalFilters;
this.readTransformers = readTransformers;
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
this.useOriginalBaseQualities = useOriginalBaseQualities;
this.cmode = cmode;
this.qmode = qmode;
this.refReader = refReader;
this.bqsrApplier = bqsrApplier;
this.defaultBaseQualities = defaultBaseQualities;
}
}

View File

@ -29,13 +29,14 @@ import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.lang.annotation.Annotation;
import java.util.*;
/**
@ -319,11 +320,11 @@ public class WalkerManager extends PluginManager<Walker> {
return downsamplingMethod;
}
public static BAQ.QualityMode getBAQQualityMode(Walker walker) {
return walker.getClass().getAnnotation(BAQMode.class).QualityMode();
public static <T extends Annotation> T getWalkerAnnotation(final Walker walker, final Class<T> clazz) {
return walker.getClass().getAnnotation(clazz);
}
public static BAQ.ApplicationTime getBAQApplicationTime(Walker walker) {
public static ReadTransformer.ApplicationTime getBAQApplicationTime(Walker walker) {
return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime();
}

View File

@ -177,7 +177,7 @@ public class ReferenceContext {
* @return The base at the given locus from the reference.
*/
public byte getBase() {
return getBases()[(int)(locus.getStart() - window.getStart())];
return getBases()[(locus.getStart() - window.getStart())];
}
/**

View File

@ -0,0 +1,143 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Collection;
import java.util.LinkedList;
import java.util.ListIterator;
/**
* Key algorithmic helper for ReadBasedReferenceOrderedData
*
* Takes a single iterator of features, and provides a single capability that returns
* the list of RODs that overlap an interval. Allows sequential getOverlapping calls
* from intervals provided that these intervals always have increasing getStart() values.
*
*/
class IntervalOverlappingRODsFromStream {
/**
* Only held for QC purposes
*/
GenomeLoc lastQuery = null;
private final String name;
private final LinkedList<GATKFeature> currentFeatures = new LinkedList<GATKFeature>();
private final PeekableIterator<RODRecordList> futureFeatures;
/**
* Create a new IntervalOverlappingRODsFromStream that reads elements from futureFeatures and
* returns RODRecordLists having name
*
* @param name
* @param futureFeatures
*/
IntervalOverlappingRODsFromStream(final String name, final PeekableIterator<RODRecordList> futureFeatures) {
if ( futureFeatures == null ) throw new IllegalArgumentException("futureFeatures cannot be null");
this.name = name;
this.futureFeatures = futureFeatures;
}
/**
* Get the list of RODs overlapping loc from this stream of RODs.
*
* Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart
*
* @param loc the interval to query
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
*/
@Ensures({"overlaps(loc, result)",
"! futureFeatures.hasNext() || futureFeatures.peek().getLocation().isPast(loc)",
"result != null"})
public RODRecordList getOverlapping(final GenomeLoc loc) {
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
trimCurrentFeaturesToLoc(loc);
readOverlappingFutureFeatures(loc);
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
}
/**
* For contract assurance. Checks that all bindings in loc overlap
*
* @param loc
* @param bindings
* @return
*/
@Requires({"loc != null", "bindings != null"})
private boolean overlaps(final GenomeLoc loc, final RODRecordList bindings) {
for ( final GATKFeature feature : bindings )
if ( ! feature.getLocation().overlapsP(loc) )
return false;
return true;
}
/**
* Subset the features in all to those that overlap with loc
*
* The current features list contains everything read that cannot be thrown away yet, but not
* everything in there necessarily overlaps with loc. Subset to just those that do overlap
*
* @param loc the location that features must overlap
* @param all the list of all features
* @return a subset of all that overlaps with loc
*/
@Requires({"loc != null", "all != null"})
@Ensures("result.size() <= all.size()")
private Collection<GATKFeature> subsetToOverlapping(final GenomeLoc loc, final Collection<GATKFeature> all) {
final LinkedList<GATKFeature> overlapping = new LinkedList<GATKFeature>();
for ( final GATKFeature feature : all )
if ( feature.getLocation().overlapsP(loc) )
overlapping.add(feature);
return overlapping;
}
/**
* Update function. Remove all elements of currentFeatures that end before loc
*
* @param loc the location to use
*/
@Requires("loc != null")
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
private void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
while ( it.hasNext() ) {
final GATKFeature feature = it.next();
if ( feature.getLocation().isBefore(loc) )
it.remove();
}
}
/**
* Update function: Read all elements from futureFeatures that overlap with loc
*
* Stops at the first element that starts before the end of loc, or the stream empties
*
* @param loc
*/
@Requires("loc != null")
@Ensures("currentFeatures.size() >= old(currentFeatures.size())")
private void readOverlappingFutureFeatures(final GenomeLoc loc) {
while ( futureFeatures.hasNext() ) {
final GenomeLoc nextLoc = futureFeatures.peek().getLocation();
if ( nextLoc.isBefore(loc) ) {
futureFeatures.next(); // next rod element is before loc, throw it away and keep looking
} else if ( nextLoc.isPast(loc) ) {
break; // next element is past loc, stop looking but don't pop it
} else if ( nextLoc.overlapsP(loc) ) {
// add overlapping elements to our current features, removing from stream
for ( final GATKFeature feature : futureFeatures.next() ) {
currentFeatures.add(feature);
}
}
}
}
}

View File

@ -58,7 +58,7 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
// todo -- warning, I removed the reference to the name from states
bindings.add( state.iterator.seekForward(loc) );
return new RefMetaDataTracker(bindings, referenceContext);
return new RefMetaDataTracker(bindings);
}
/**

View File

@ -23,40 +23,63 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeMap;
/** a ROD view for reads. This provides the Read traversals a way of getting a ReadMetaDataTracker */
/** a ROD view for reads. This provides the Read traversals a way of getting a RefMetaDataTracker */
public class ReadBasedReferenceOrderedView implements View {
private final WindowedData window;
public ReadBasedReferenceOrderedView(ShardDataProvider provider) {
window = new WindowedData(provider);
provider.register(this);
}
// a list of the RMDDataState (location->iterators)
private final List<RMDDataState> states = new ArrayList<RMDDataState>(1);
private final static RefMetaDataTracker EMPTY_TRACKER = new RefMetaDataTracker();
/**
* for testing only please
*
* @param data the window provider
* Used to get genome locs for reads
*/
ReadBasedReferenceOrderedView(WindowedData data) {
window = data;
private final GenomeLocParser genomeLocParser;
/**
* The total extent of all reads in this span. We create iterators from our RODs
* from the start of this span, to the end.
*/
private final GenomeLoc shardSpan;
public ReadBasedReferenceOrderedView(final ShardDataProvider provider) {
this.genomeLocParser = provider.getGenomeLocParser();
// conditional to optimize the case where we don't have any ROD data
this.shardSpan = provider.getReferenceOrderedData() != null ? ((ReadShard)provider.getShard()).getReadsSpan() : null;
provider.register(this);
if ( provider.getReferenceOrderedData() != null && ! shardSpan.isUnmapped() ) {
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan)));
}
}
public ReadMetaDataTracker getReferenceOrderedDataForRead(SAMRecord read) {
return window.getTracker(read);
/**
* Testing constructor
*/
protected ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser,
final GenomeLoc shardSpan,
final List<String> names,
final List<PeekableIterator<RODRecordList>> featureSources) {
this.genomeLocParser = genomeLocParser;
this.shardSpan = shardSpan;
for ( int i = 0; i < names.size(); i++ )
states.add(new RMDDataState(names.get(i), featureSources.get(i)));
}
public Collection<Class<? extends View>> getConflictingViews() {
@ -65,135 +88,72 @@ public class ReadBasedReferenceOrderedView implements View {
return classes;
}
public void close() {
if (window != null) window.close();
}
}
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
class WindowedData {
// the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope
private final TreeMap<Integer, RODMetaDataContainer> mapping = new TreeMap<Integer, RODMetaDataContainer>();
// our current location from the last read we processed
private GenomeLoc currentLoc;
// a list of the RMDDataState (location->iterators)
private List<RMDDataState> states;
// the provider; where we get all our information
private final ShardDataProvider provider;
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(WindowedData.class);
/**
* create a WindowedData given a shard provider
*
* @param provider the ShardDataProvider
*/
public WindowedData(ShardDataProvider provider) {
this.provider = provider;
}
/**
* load the states dynamically, since the only way to get a genome loc is from the read (the shard doesn't have one)
*
* @param provider the ShardDataProvider
* @param rec the current read
*/
private void getStates(ShardDataProvider provider, SAMRecord rec) {
int stop = Integer.MAX_VALUE;
// figure out the appropriate alignment stop
if (provider.hasReference()) {
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
}
// calculate the range of positions we need to look at
GenomeLoc range = provider.getGenomeLocParser().createGenomeLoc(rec.getReferenceName(),
rec.getAlignmentStart(),
stop);
states = new ArrayList<RMDDataState>();
if (provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, dataSource.seek(range)));
}
/**
* this function is for testing only
*
* @param states a list of RMDDataState to initialize with
*/
WindowedData(List<RMDDataState> states) {
this.states = states;
provider = null;
}
/**
* create a ReadMetaDataTracker given the current read
* create a RefMetaDataTracker given the current read
*
* @param rec the read
*
* @return a ReadMetaDataTracker for the read, from which you can get ROD -> read alignments
* @return a RefMetaDataTracker for the read, from which you can get ROD -> read alignments
*/
public ReadMetaDataTracker getTracker(SAMRecord rec) {
updatePosition(rec);
return new ReadMetaDataTracker(provider.getGenomeLocParser(), rec, mapping);
@Requires("rec != null")
@Ensures("result != null")
public RefMetaDataTracker getReferenceOrderedDataForRead(final SAMRecord rec) {
if ( rec.getReadUnmappedFlag() )
// empty RODs for unmapped reads
return new RefMetaDataTracker();
else
return getReferenceOrderedDataForInterval(genomeLocParser.createGenomeLoc(rec));
}
/**
* update the position we're storing
*
* @param rec the read to use for start and end
*/
private void updatePosition(SAMRecord rec) {
if (states == null) getStates(this.provider, rec);
currentLoc = provider.getGenomeLocParser().createGenomeLoc(rec);
// flush the queue looking for records we've passed over
while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart())
mapping.pollFirstEntry(); // toss away records that we've passed
// add new data to the queue
for (RMDDataState state : states) {
// move into position
while (state.iterator.hasNext() && state.iterator.peekNextLocation().isBefore(currentLoc))
state.iterator.next();
while (state.iterator.hasNext() && state.iterator.peekNextLocation().overlapsP(currentLoc)) {
RODRecordList list = state.iterator.next();
for (GATKFeature datum : list) {
if (!mapping.containsKey(list.getLocation().getStart()))
mapping.put(list.getLocation().getStart(), new RODMetaDataContainer());
mapping.get(list.getLocation().getStart()).addEntry(datum);
}
}
@Requires({"interval != null", "shardSpan == null || shardSpan.isUnmapped() || shardSpan.containsP(interval)"})
@Ensures("result != null")
public RefMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) {
if ( states.isEmpty() || shardSpan.isUnmapped() ) // optimization for no bindings (common for read walkers)
return EMPTY_TRACKER;
else {
final List<RODRecordList> bindings = new ArrayList<RODRecordList>(states.size());
for ( final RMDDataState state : states )
bindings.add(state.stream.getOverlapping(interval));
return new RefMetaDataTracker(bindings);
}
}
/** Closes the current view. */
/**
* Closes the current view.
*/
public void close() {
if (states == null) return;
for (RMDDataState state : states)
state.dataSource.close( state.iterator );
for (final RMDDataState state : states)
state.close();
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
states = null;
states.clear();
}
/** Models the traversal state of a given ROD lane. */
private static class RMDDataState {
public final ReferenceOrderedDataSource dataSource;
public final IntervalOverlappingRODsFromStream stream;
private final LocationAwareSeekableRODIterator iterator;
}
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
this.dataSource = dataSource;
this.iterator = iterator;
this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<RODRecordList>(iterator));
}
/** Models the traversal state of a given ROD lane. */
class RMDDataState {
public final ReferenceOrderedDataSource dataSource;
public final LocationAwareSeekableRODIterator iterator;
/**
* For testing
*/
public RMDDataState(final String name, final PeekableIterator<RODRecordList> iterator) {
this.dataSource = null;
this.iterator = null;
this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<RODRecordList>(iterator));
}
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
this.dataSource = dataSource;
this.iterator = iterator;
public void close() {
if ( dataSource != null )
dataSource.close( iterator );
}
}
}

View File

@ -101,7 +101,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
// special case the interval again -- add it into the ROD
if ( interval != null ) { allTracksHere.add(interval); }
return new RefMetaDataTracker(allTracksHere, referenceContext);
return new RefMetaDataTracker(allTracksHere);
}
public boolean hasNext() {

View File

@ -6,11 +6,9 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
*
@ -125,4 +123,33 @@ public class ReadShard extends Shard {
}
return sb.toString();
}
/**
* Get the full span from the start of the left most read to the end of the right most one
*
* Note this may be different than the getLocation() of the shard, as this reflects the
* targeted span, not the actual span of reads
*
* @return the genome loc representing the span of these reads on the genome
*/
public GenomeLoc getReadsSpan() {
if ( isUnmapped() || super.getGenomeLocs() == null || reads.isEmpty() )
return super.getLocation();
else {
int start = Integer.MAX_VALUE;
int stop = Integer.MIN_VALUE;
String contig = null;
for ( final SAMRecord read : reads ) {
if ( contig != null && ! read.getReferenceName().equals(contig) )
throw new ReviewedStingException("ReadShard contains reads spanning contig boundaries, which is no longer allowed. "
+ "First contig is " + contig + " next read was " + read.getReferenceName() );
contig = read.getReferenceName();
if ( read.getAlignmentStart() < start ) start = read.getAlignmentStart();
if ( read.getAlignmentEnd() > stop ) stop = read.getAlignmentEnd();
}
return parser.createGenomeLoc(contig, start, stop);
}
}
}

View File

@ -24,7 +24,6 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.sam.MergingSamRecordIterator;
import net.sf.picard.sam.SamFileHeaderMerger;
import net.sf.samtools.*;
@ -42,12 +41,9 @@ import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.baq.BAQSamIterator;
import org.broadinstitute.sting.utils.baq.ReadTransformingIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.recalibration.BQSRSamIterator;
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import java.io.File;
@ -200,11 +196,8 @@ public class SAMDataSource {
downsamplingMethod,
exclusionList,
supplementalFilters,
Collections.<ReadTransformer>emptyList(),
includeReadsWithDeletionAtLoci,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
false);
}
@ -234,11 +227,8 @@ public class SAMDataSource {
DownsamplingMethod downsamplingMethod,
ValidationExclusion exclusionList,
Collection<ReadFilter> supplementalFilters,
List<ReadTransformer> readTransformers,
boolean includeReadsWithDeletionAtLoci,
BAQ.CalculationMode cmode,
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
byte defaultBaseQualities,
boolean removeProgramRecords) {
this.readMetrics = new ReadMetrics();
@ -262,7 +252,7 @@ public class SAMDataSource {
else {
// Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively
// will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once.
ReadShard.setReadBufferSize(Math.min(1000*samFiles.size(),250000));
ReadShard.setReadBufferSize(Math.min(10000*samFiles.size(),250000));
}
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
@ -308,11 +298,8 @@ public class SAMDataSource {
downsamplingMethod,
exclusionList,
supplementalFilters,
readTransformers,
includeReadsWithDeletionAtLoci,
cmode,
qmode,
refReader,
bqsrApplier,
defaultBaseQualities);
// cache the read group id (original) -> read group id (merged)
@ -486,9 +473,15 @@ public class SAMDataSource {
CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
while(!shard.isBufferFull() && iterator.hasNext()) {
read = iterator.next();
shard.addRead(read);
noteFilePositionUpdate(positionUpdates,read);
final SAMRecord nextRead = iterator.next();
if ( read == null || (nextRead.getReferenceIndex().equals(read.getReferenceIndex())) ) {
// only add reads to the shard if they are on the same contig
read = nextRead;
shard.addRead(read);
noteFilePositionUpdate(positionUpdates,read);
} else {
break;
}
}
// If the reads are sorted in queryname order, ensure that all reads
@ -597,10 +590,7 @@ public class SAMDataSource {
readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
readProperties.getSupplementalFilters(),
readProperties.getBAQCalculationMode(),
readProperties.getBAQQualityMode(),
readProperties.getRefReader(),
readProperties.getBQSRApplier(),
readProperties.getReadTransformers(),
readProperties.defaultBaseQualities());
}
@ -667,10 +657,7 @@ public class SAMDataSource {
Double downsamplingFraction,
Boolean noValidationOfReadOrder,
Collection<ReadFilter> supplementalFilters,
BAQ.CalculationMode cmode,
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
List<ReadTransformer> readTransformers,
byte defaultBaseQualities) {
// *********************************************************************************** //
@ -692,11 +679,11 @@ public class SAMDataSource {
// only wrap if we are replacing the original qualities or using a default base quality
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
if (bqsrApplier != null)
wrappedIterator = new BQSRSamIterator(wrappedIterator, bqsrApplier);
if (cmode != BAQ.CalculationMode.OFF)
wrappedIterator = new BAQSamIterator(refReader, wrappedIterator, cmode, qmode);
// set up read transformers
for ( final ReadTransformer readTransformer : readTransformers ) {
if ( readTransformer.enabled() && readTransformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_INPUT )
wrappedIterator = new ReadTransformingIterator(wrappedIterator, readTransformer);
}
return wrappedIterator;
}

View File

@ -100,22 +100,29 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
* @return The best-fit microscheduler.
*/
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
if (walker instanceof TreeReducible && threadAllocation.getNumCPUThreads() > 1) {
if(walker.isReduceByInterval())
if (threadAllocation.getNumCPUThreads() > 1) {
if (walker.isReduceByInterval())
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",threadAllocation.getNumCPUThreads()));
if ( walker instanceof ReadWalker )
if ( walker instanceof ReadWalker ) {
if ( ! (walker instanceof ThreadSafeMapReduce) ) badNT(engine, walker);
return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
else
} else {
// TODO -- update test for when nano scheduling only is an option
if ( ! (walker instanceof TreeReducible) ) badNT(engine, walker);
return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
}
} else {
if(threadAllocation.getNumCPUThreads() > 1)
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
}
}
private static void badNT(final GenomeAnalysisEngine engine, final Walker walker) {
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
}
/**
* Create a microscheduler given the reads and reference.
*

View File

@ -25,9 +25,14 @@
package org.broadinstitute.sting.gatk.filters;
import com.google.common.base.Function;
import com.google.common.collect.Collections2;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.help.GATKDocUtils;
import java.util.Collection;
import java.util.List;
/**
* Manage filters and filter options. Any requests for basic filtering classes
@ -54,4 +59,39 @@ public class FilterManager extends PluginManager<ReadFilter> {
public Collection<Class<? extends ReadFilter>> getValues() {
return this.getPlugins();
}
/**
* Rather than use the default error message, print out a list of read filters as well.
* @param pluginCategory - string, the category of the plugin (e.g. read filter)
* @param pluginName - string, what we were trying to match (but failed to)
* @return - A wall of text with the default message, followed by a listing of available read filters
*/
@Override
protected String formatErrorMessage(String pluginCategory, String pluginName) {
List<Class<? extends ReadFilter>> availableFilters = this.getPluginsImplementing(ReadFilter.class);
return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
userFriendlyListofReadFilters(availableFilters),
"Please consult the GATK Documentation (http://www.broadinstitute.org/gatk/gatkdocs/) for more information.");
}
private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
final String headName = "FilterName", headDoc = "Documentation";
int longestNameLength = -1;
for ( Class < ? extends ReadFilter> filter : filters ) {
longestNameLength = Math.max(longestNameLength,this.getName(filter).length());
}
String format = " %"+longestNameLength+"s %s%n";
StringBuilder listBuilder = new StringBuilder();
listBuilder.append(String.format(format,headName,headDoc));
for ( Class<? extends ReadFilter> filter : filters ) {
String helpLink = GATKDocUtils.helpLinksToGATKDocs(filter);
String filterName = this.getName(filter);
listBuilder.append(String.format(format,filterName,helpLink));
}
return listBuilder.toString();
}
}

View File

@ -31,12 +31,16 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
/**
* A stub for routing and management of SAM file reading and writing.
@ -116,15 +120,15 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
*/
private boolean simplifyBAM = false;
private List<ReadTransformer> onOutputReadTransformers = null;
/**
* Create a new stub given the requested SAM file and compression level.
* @param engine source of header data, maybe other data about input files.
* @param samFile SAM file to (ultimately) create.
*/
public SAMFileWriterStub( GenomeAnalysisEngine engine, File samFile ) {
this.engine = engine;
this.samFile = samFile;
this.samOutputStream = null;
this(engine, samFile, null);
}
/**
@ -133,8 +137,12 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
* @param stream Output stream to which data should be written.
*/
public SAMFileWriterStub( GenomeAnalysisEngine engine, OutputStream stream ) {
this(engine, null, stream);
}
private SAMFileWriterStub(final GenomeAnalysisEngine engine, final File samFile, final OutputStream stream) {
this.engine = engine;
this.samFile = null;
this.samFile = samFile;
this.samOutputStream = stream;
}
@ -274,17 +282,29 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
this.headerOverride = header;
}
private void initializeReadTransformers() {
this.onOutputReadTransformers = new ArrayList<ReadTransformer>(engine.getReadTransformers().size());
for ( final ReadTransformer transformer : engine.getReadTransformers() ) {
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.ON_OUTPUT )
onOutputReadTransformers.add(transformer);
}
}
/**
* @{inheritDoc}
*/
public void addAlignment( SAMRecord alignment ) {
if ( engine.getArguments().BAQMode != BAQ.CalculationMode.OFF && engine.getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_OUTPUT ) {
//System.out.printf("Writing BAQ at OUTPUT TIME%n");
baqHMM.baqRead(alignment, engine.getReferenceDataSource().getReference(), engine.getArguments().BAQMode, engine.getWalkerBAQQualityMode());
}
public void addAlignment( final SAMRecord readIn ) {
if ( onOutputReadTransformers == null )
initializeReadTransformers();
GATKSAMRecord workingRead = (GATKSAMRecord)readIn;
// run on output read transformers
for ( final ReadTransformer transform : onOutputReadTransformers )
workingRead = transform.apply(workingRead);
writeStarted = true;
outputTracker.getStorage(this).addAlignment(alignment);
outputTracker.getStorage(this).addAlignment(workingRead);
}
/**

View File

@ -0,0 +1,144 @@
package org.broadinstitute.sting.gatk.iterators;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Baseclass used to describe a read transformer like BAQ and BQSR
*
* Read transformers are plugable infrastructure that modify read state
* either on input, on output, or within walkers themselves.
*
* The function apply() is called on each read seen by the GATK (after passing
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
* to the read to change qualities, add tags, etc.
*
* Initialize is called once right before the GATK traversal begins providing
* the ReadTransformer with the ability to collect and initialize data from the
* engine.
*
* Note that all ReadTransformers within the classpath are created and initialized. If one
* shouldn't be run it should look at the command line options of the engine and override
* the enabled.
*
* @since 8/31/12
* @author depristo
*/
abstract public class ReadTransformer {
/**
* When should this read transform be applied?
*/
private ApplicationTime applicationTime;
/**
* Keep track of whether we've been initialized already, and ensure it's not called more than once.
*/
private boolean initialized = false;
protected ReadTransformer() {}
/**
* Master initialization routine. Called to setup a ReadTransform, using it's overloaded initialialSub routine.
*
* @param overrideTime if not null, we will run this ReadTransform at the time provided, regardless of the timing of this read transformer itself
* @param engine the engine, for initializing values
* @param walker the walker we intend to run
*/
@Requires({"initialized == false", "engine != null", "walker != null"})
@Ensures("initialized == true")
public final void initialize(final ApplicationTime overrideTime, final GenomeAnalysisEngine engine, final Walker walker) {
if ( engine == null ) throw new IllegalArgumentException("engine cannot be null");
if ( walker == null ) throw new IllegalArgumentException("walker cannot be null");
this.applicationTime = initializeSub(engine, walker);
if ( overrideTime != null ) this.applicationTime = overrideTime;
initialized = true;
}
/**
* Subclasses must override this to initialize themeselves
*
* @param engine the engine, for initializing values
* @param walker the walker we intend to run
* @return the point of time we'd like this read transform to be run
*/
@Requires({"engine != null", "walker != null"})
@Ensures("result != null")
protected abstract ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker);
/**
* Should this ReadTransformer be activated? Called after initialize, which allows this
* read transformer to look at its arguments and decide if it should be active. All
* ReadTransformers must override this, as by default they are not enabled.
*
* @return true if this ReadTransformer should be used on the read stream
*/
public boolean enabled() {
return false;
}
/**
* Has this transformer been initialized?
*
* @return true if it has
*/
public final boolean isInitialized() {
return initialized;
}
/**
* When should we apply this read transformer?
*
* @return true if yes
*/
public final ApplicationTime getApplicationTime() {
return applicationTime;
}
/**
* Primary interface function for a read transform to actually do some work
*
* The function apply() is called on each read seen by the GATK (after passing
* all ReadFilters) and it can do as it sees fit (without modifying the alignment)
* to the read to change qualities, add tags, etc.
*
* @param read the read to transform
* @return the transformed read
*/
@Requires("read != null")
@Ensures("result != null")
abstract public GATKSAMRecord apply(final GATKSAMRecord read);
@Override
public String toString() {
return getClass().getSimpleName();
}
/**
* When should a read transformer be applied?
*/
public static enum ApplicationTime {
/**
* Walker does not tolerate this read transformer
*/
FORBIDDEN,
/**
* apply the transformation to the incoming reads, the default
*/
ON_INPUT,
/**
* apply the transformation to the outgoing read stream
*/
ON_OUTPUT,
/**
* the walker will deal with the calculation itself
*/
HANDLED_IN_WALKER
}
}

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.gatk.iterators;
import java.lang.annotation.*;
/**
* User: hanna
* Date: May 14, 2009
* Time: 1:51:22 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Allows the walker to indicate what type of data it wants to consume.
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface ReadTransformersMode {
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -48,9 +47,9 @@ public class VerifyingSamIterator implements StingSAMIterator {
if(cur.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || cur.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START)
throw new UserException.MalformedBAM(last,String.format("read %s has inconsistent mapping information.",cur.format()));
GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last );
GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur );
return curLoc.compareTo(lastLoc) == -1;
return (last.getReferenceIndex() > cur.getReferenceIndex()) ||
(last.getReferenceIndex().equals(cur.getReferenceIndex()) &&
last.getAlignmentStart() > cur.getAlignmentStart());
}
}

View File

@ -1,179 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.*;
/**
* @author aaron
* <p/>
* Class ReadMetaDataTracker
* <p/>
* a read-based meta data tracker
*/
public class ReadMetaDataTracker {
/**
* The parser, used to create new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
private final SAMRecord record;
// the buffer of positions and RODs we've stored
private final TreeMap<Integer, RODMetaDataContainer> mapping;
/**
* create a read meta data tracker, given the read and a queue of RODatum positions
*
* @param record the read to create offset from
* @param mapping the mapping of reference ordered datum
*/
public ReadMetaDataTracker(GenomeLocParser genomeLocParser, SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping) {
this.genomeLocParser = genomeLocParser;
this.record = record;
this.mapping = mapping;
}
/**
* create an alignment of read position to reference ordered datum
*
* @param record the SAMRecord
* @param queue the queue (as a tree set)
* @param cl the class name, null if not filtered by classname
* @param name the datum track name, null if not filtered by name
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Integer, Collection<GATKFeature>> createReadAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> queue, Class cl, String name) {
if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified");
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
GenomeLoc location = genomeLocParser.createGenomeLoc(record);
int length = record.getReadLength();
for (Integer loc : queue.keySet()) {
Integer position = loc - location.getStart();
if (position >= 0 && position < length) {
Collection<GATKFeature> set;
if (cl != null)
set = queue.get(loc).getSet(cl);
else
set = queue.get(loc).getSet(name);
if (set != null && set.size() > 0)
ret.put(position, set);
}
}
return ret;
}
/**
* create an alignment of read position to reference ordered datum
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Integer, Collection<GATKFeature>> createGenomeLocAlignment(SAMRecord record, TreeMap<Integer, RODMetaDataContainer> mapping, Class cl, String name) {
Map<Integer, Collection<GATKFeature>> ret = new LinkedHashMap<Integer, Collection<GATKFeature>>();
int start = record.getAlignmentStart();
int stop = record.getAlignmentEnd();
for (Integer location : mapping.keySet()) {
if (location >= start && location <= stop)
if (cl != null)
ret.put(location, mapping.get(location).getSet(cl));
else
ret.put(location, mapping.get(location).getSet(name));
}
return ret;
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping() {
return createReadAlignment(record, mapping, null, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping() {
return createGenomeLocAlignment(record, mapping, null, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(String name) {
return createReadAlignment(record, mapping, null, name);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(String name) {
return createGenomeLocAlignment(record, mapping, null, name);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getReadOffsetMapping(Class cl) {
return createReadAlignment(record, mapping, cl, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Integer, Collection<GATKFeature>> getContigOffsetMapping(Class cl) {
return createGenomeLocAlignment(record, mapping, cl, null);
}
/**
* get the list of all the RODS overlapping this read, without any information about their position
* @return a Collection (no order guaranteed), of all the RODs covering this read
*/
public List<GATKFeature> getAllCoveringRods() {
List<GATKFeature> ret = new ArrayList<GATKFeature>();
for (Map.Entry<Integer, RODMetaDataContainer> entry : mapping.entrySet())
ret.addAll(entry.getValue().getSet());
return ret;
}
}

View File

@ -5,7 +5,6 @@ import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -32,11 +31,10 @@ import java.util.*;
* Time: 3:05:23 PM
*/
public class RefMetaDataTracker {
// TODO: this should be a list, not a map, actually
// TODO: this should be a list, not a bindings, actually
private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
final Map<String, RODRecordList> map;
final ReferenceContext ref;
final Map<String, RODRecordList> bindings;
final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
// ------------------------------------------------------------------------------------------
@ -48,28 +46,25 @@ public class RefMetaDataTracker {
// ------------------------------------------------------------------------------------------
/**
* Only for testing -- not accesssible in any other context
* Create an tracker with no bindings
*/
public RefMetaDataTracker() {
ref = null;
map = Collections.emptyMap();
bindings = Collections.emptyMap();
}
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
this.ref = ref;
// set up the map
public RefMetaDataTracker(final Collection<RODRecordList> allBindings) {
// set up the bindings
if ( allBindings.isEmpty() )
map = Collections.emptyMap();
bindings = Collections.emptyMap();
else {
Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
final Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
for ( RODRecordList rod : allBindings ) {
if ( rod != null && ! rod.isEmpty() )
tmap.put(canonicalName(rod.getName()), rod);
}
// ensure that no one modifies the map itself
map = Collections.unmodifiableMap(tmap);
// ensure that no one modifies the bindings itself
bindings = Collections.unmodifiableMap(tmap);
}
}
@ -99,7 +94,7 @@ public class RefMetaDataTracker {
@Requires({"type != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Class<T> type) {
return addValues(map.keySet(), type, new ArrayList<T>(), null, false, false);
return addValues(bindings.keySet(), type, new ArrayList<T>(), null, false, false);
}
/**
@ -114,7 +109,7 @@ public class RefMetaDataTracker {
@Requires({"type != null", "onlyAtThisLoc != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
return addValues(map.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
return addValues(bindings.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
}
/**
@ -296,7 +291,7 @@ public class RefMetaDataTracker {
*/
@Requires({"rodBinding != null"})
public boolean hasValues(final RodBinding rodBinding) {
return map.containsKey(canonicalName(rodBinding.getName()));
return bindings.containsKey(canonicalName(rodBinding.getName()));
}
/**
@ -306,7 +301,7 @@ public class RefMetaDataTracker {
* @return List of all tracks
*/
public List<RODRecordList> getBoundRodTracks() {
return new ArrayList<RODRecordList>(map.values());
return new ArrayList<RODRecordList>(bindings.values());
}
/**
@ -314,38 +309,30 @@ public class RefMetaDataTracker {
* @return the number of tracks with at least one bound Feature
*/
public int getNTracksWithBoundFeatures() {
return map.size();
return bindings.size();
}
// ------------------------------------------------------------------------------------------
//
//
// old style accessors
//
// TODO -- DELETE ME
//
//
// Protected accessors using strings for unit testing
// ------------------------------------------------------------------------------------------
@Deprecated
public boolean hasValues(final String name) {
return map.containsKey(canonicalName(name));
protected boolean hasValues(final String name) {
return bindings.containsKey(canonicalName(name));
}
@Deprecated
public <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
protected <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), null, false, false);
}
@Deprecated
public <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
protected <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
}
@Deprecated
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
protected <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
return safeGetFirst(getValues(type, name));
}
@Deprecated
public <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
protected <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
return safeGetFirst(getValues(type, name, onlyAtThisLoc));
}
@ -366,7 +353,7 @@ public class RefMetaDataTracker {
* @return
*/
@Requires({"l != null"})
final private <T extends Feature> T safeGetFirst(final List<T> l) {
private <T extends Feature> T safeGetFirst(final List<T> l) {
return l.isEmpty() ? null : l.get(0);
}
@ -435,7 +422,7 @@ public class RefMetaDataTracker {
*/
private RODRecordList getTrackDataByName(final String name) {
final String luName = canonicalName(name);
RODRecordList l = map.get(luName);
RODRecordList l = bindings.get(luName);
return l == null ? EMPTY_ROD_RECORD_LIST : l;
}
@ -448,7 +435,7 @@ public class RefMetaDataTracker {
* @param name the name of the rod
* @return canonical name of the rod
*/
private final String canonicalName(final String name) {
private String canonicalName(final String name) {
// todo -- remove me after switch to RodBinding syntax
return name.toLowerCase();
}

View File

@ -185,7 +185,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
*/
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
// Just want to output the active regions to a file, not actually process them
for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion : workQueue ) {
for( final ActiveRegion activeRegion : workQueue ) {
if( activeRegion.isActive ) {
walker.activeRegionOutStream.println( activeRegion.getLocation() );
}
@ -198,7 +198,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
while( workQueue.peek() != null ) {
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion = workQueue.remove();
final ActiveRegion activeRegion = workQueue.remove();
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
} else {
break;
@ -208,15 +208,15 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
return sum;
}
private T processActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord read : reads ) {
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc );
org.broadinstitute.sting.utils.activeregion.ActiveRegion bestRegion = activeRegion;
for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion otherRegionToTest : workQueue ) {
ActiveRegion bestRegion = activeRegion;
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( readLoc );
bestRegion = otherRegionToTest;
@ -229,7 +229,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
if( !bestRegion.equals(activeRegion) ) {
activeRegion.add( read );
}
for( final org.broadinstitute.sting.utils.activeregion.ActiveRegion otherRegionToTest : workQueue ) {
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
otherRegionToTest.add( read );
}
@ -241,6 +241,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
}
}
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
// WARNING: This hashset relies on reads being exactly equal when they are placed in the list as when they are removed. So the ActiveRegionWalker can't modify the reads in any way.
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
final M x = walker.map( activeRegion, null );

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrd
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -91,7 +91,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
dataProvider.getShard().getReadMetrics().incrementNumIterations();
// if the read is mapped, create a metadata tracker
final ReadMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
if (keepMeP) {

View File

@ -27,17 +27,21 @@ package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.ArrayList;
import java.util.List;
/**
* @author aaron
* @version 1.0
@ -51,12 +55,13 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
/** our log, which we want to capture anything from this class */
protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
private static final boolean DEBUG = false;
final NanoScheduler<SAMRecord, M, T> nanoScheduler;
private static final int MIN_GROUP_SIZE = 100;
final NanoScheduler<MapData, M, T> nanoScheduler;
public TraverseReadsNano(int nThreads) {
final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
final int mapGroupSize = bufferSize / 10 + 1;
nanoScheduler = new NanoScheduler<SAMRecord, M, T>(bufferSize, mapGroupSize, nThreads);
final int mapGroupSize = (int)Math.max(Math.ceil(bufferSize / 50.0 + 1), MIN_GROUP_SIZE);
nanoScheduler = new NanoScheduler<MapData, M, T>(bufferSize, mapGroupSize, nThreads);
}
@Override
@ -80,24 +85,42 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
if( !dataProvider.hasReads() )
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
if ( dataProvider.hasReferenceOrderedData() )
throw new ReviewedStingException("Parallel read walkers currently don't support access to reference ordered data");
final ReadView reads = new ReadView(dataProvider);
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
nanoScheduler.setDebug(DEBUG);
final TraverseReadsMap myMap = new TraverseReadsMap(reads, reference, rodView, walker);
final TraverseReadsMap myMap = new TraverseReadsMap(walker);
final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
T result = nanoScheduler.execute(reads.iterator().iterator(), myMap, sum, myReduce);
T result = nanoScheduler.execute(aggregateMapData(dataProvider).iterator(), myMap, sum, myReduce);
// TODO -- how do we print progress?
//printProgress(dataProvider.getShard(), ???);
return result;
}
private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
final ReadView reads = new ReadView(dataProvider);
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
final List<MapData> mapData = new ArrayList<MapData>(); // TODO -- need size of reads
for ( final SAMRecord read : reads ) {
final ReferenceContext refContext = ! read.getReadUnmappedFlag()
? reference.getReferenceContext(read)
: null;
// if the read is mapped, create a metadata tracker
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0
? rodView.getReferenceOrderedDataForRead(read)
: null;
// update the number of reads we've seen
dataProvider.getShard().getReadMetrics().incrementNumIterations();
mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker));
}
return mapData;
}
@Override
public void printOnTraversalDone() {
nanoScheduler.shutdown();
@ -117,36 +140,31 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
}
}
private class TraverseReadsMap implements MapFunction<SAMRecord, M> {
final ReadView reads;
final ReadReferenceView reference;
final ReadBasedReferenceOrderedView rodView;
private class MapData {
final GATKSAMRecord read;
final ReferenceContext refContext;
final RefMetaDataTracker tracker;
private MapData(GATKSAMRecord read, ReferenceContext refContext, RefMetaDataTracker tracker) {
this.read = read;
this.refContext = refContext;
this.tracker = tracker;
}
}
private class TraverseReadsMap implements MapFunction<MapData, M> {
final ReadWalker<M,T> walker;
private TraverseReadsMap(ReadView reads, ReadReferenceView reference, ReadBasedReferenceOrderedView rodView, ReadWalker<M, T> walker) {
this.reads = reads;
this.reference = reference;
this.rodView = rodView;
private TraverseReadsMap(ReadWalker<M, T> walker) {
this.walker = walker;
}
@Override
public M apply(final SAMRecord read) {
public M apply(final MapData data) {
if ( ! walker.isDone() ) {
// ReferenceContext -- the reference bases covered by the read
final ReferenceContext refContext = ! read.getReadUnmappedFlag() && reference != null
? reference.getReferenceContext(read)
: null;
// update the number of reads we've seen
//dataProvider.getShard().getReadMetrics().incrementNumIterations();
// if the read is mapped, create a metadata tracker
final ReadMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
final boolean keepMeP = walker.filter(data.refContext, data.read);
if (keepMeP) {
return walker.map(refContext, (GATKSAMRecord) read, tracker);
return walker.map(data.refContext, data.read, data.tracker);
}
}

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
@ -77,7 +78,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
// Map over the ActiveRegion
public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, IndexedFastaSequenceFile reference ) {
final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionExtension.class).extension();

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import java.lang.annotation.*;
/**
@ -25,5 +27,5 @@ import java.lang.annotation.*;
@Target(ElementType.TYPE)
public @interface BAQMode {
public abstract org.broadinstitute.sting.utils.baq.BAQ.QualityMode QualityMode() default org.broadinstitute.sting.utils.baq.BAQ.QualityMode.OVERWRITE_QUALS;
public abstract org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime ApplicationTime() default org.broadinstitute.sting.utils.baq.BAQ.ApplicationTime.ON_INPUT;
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
}

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipping.ClippingOp;
@ -299,7 +299,7 @@ public class ClipReads extends ReadWalker<ClipReads.ReadClipperWithData, ClipRea
* @param read the read itself, as a GATKSAMRecord
* @return the ReadClipper object describing what should be done to clip this read
*/
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES )
read = ReadClipper.revertSoftClippedBases(read);

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -45,7 +45,7 @@ import java.text.NumberFormat;
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements TreeReducible<FlagStat.FlagStatus> {
public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements ThreadSafeMapReduce {
@Output
PrintStream out;
@ -179,7 +179,7 @@ public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatu
@Override
public FlagStatus map( final ReferenceContext ref, final GATKSAMRecord read, final ReadMetaDataTracker metaDataTracker ) {
public FlagStatus map( final ReferenceContext ref, final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker ) {
return new FlagStatus().add(read);
}
@ -193,11 +193,6 @@ public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatu
return sum.add(value);
}
@Override
public FlagStatus treeReduce(final FlagStatus value, final FlagStatus sum) {
return sum.add(value);
}
@Override
public void onTraversalDone(final FlagStatus result) {
out.println(result.toString());

View File

@ -45,19 +45,8 @@ import java.util.Collections;
import java.util.List;
/**
* Prints the alignment in the pileup format. In the pileup format, each line represents a genomic position,
* consisting of chromosome name, coordinate, reference base, read bases, read qualities and alignment mapping
* qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all
* encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand,
* a comma for a match on the reverse strand, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the
* reverse strand.
*
* A pattern '\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next
* reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence.
* Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference.
* Also at the read base column, a symbol '^' marks the start of a read segment which is a contiguous subsequence on the read
* separated by 'N/S/H' CIGAR operations. The ASCII of the character following '^' minus 33 gives the mapping quality.
* A symbol '$' marks the end of a read segment.
* Prints the alignment in something similar to the samtools pileup format. Each line represents a genomic position,
* consisting of chromosome name, coordinate, reference base, read bases, and read qualities.
*
* Associated command:
* samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>

View File

@ -32,17 +32,16 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.iterators.ReadTransformersMode;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
import java.util.Collection;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import java.util.*;
/**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
@ -91,9 +90,10 @@ import java.util.TreeSet;
*
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@ReadTransformersMode(ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
@Requires({DataSource.READS, DataSource.REFERENCE})
public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements TreeReducible<SAMFileWriter> {
public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements ThreadSafeMapReduce {
@Output(doc="Write output to this BAM filename instead of STDOUT", required = true)
SAMFileWriter out;
@ -138,6 +138,7 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
public boolean simplifyReads = false;
List<ReadTransformer> readTransformers = Collections.emptyList();
private TreeSet<String> samplesToChoose = new TreeSet<String>();
private boolean SAMPLES_SPECIFIED = false;
@ -150,6 +151,9 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
if ( platform != null )
platform = platform.toUpperCase();
if ( getToolkit() != null )
readTransformers = getToolkit().getReadTransformers();
Collection<String> samplesFromFile;
if (!sampleFile.isEmpty()) {
samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFile);
@ -217,11 +221,20 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
* The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a GATKSAMRecord
* @param readIn the read itself, as a GATKSAMRecord
* @return the read itself
*/
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return simplifyReads ? read.simplify() : read;
public GATKSAMRecord map( ReferenceContext ref, GATKSAMRecord readIn, RefMetaDataTracker metaDataTracker ) {
GATKSAMRecord workingRead = readIn;
for ( final ReadTransformer transformer : readTransformers ) {
if ( logger.isDebugEnabled() ) logger.debug("Applying transformer " + transformer + " to read " + readIn.getReadName());
workingRead = transformer.apply(workingRead);
}
if ( simplifyReads ) workingRead = workingRead.simplify();
return workingRead;
}
/**
@ -245,9 +258,4 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
output.addAlignment(read);
return output;
}
@Override
public SAMFileWriter treeReduce(SAMFileWriter lhs, SAMFileWriter rhs) {
return lhs; // nothing to do
}
}

View File

@ -1,8 +1,7 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
@ -27,5 +26,5 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
}
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker);
public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker);
}

View File

@ -33,7 +33,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -64,7 +64,7 @@ public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWrite
logger.info("SplitSamFile version: " + VERSION);
}
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
return read;
}

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers;
/**
* Root parallelism interface. Walkers that implement this
* declare that their map function is thread-safe and so multiple
* map calls can be run in parallel in the same JVM instance.
*/
public interface ThreadSafeMapReduce {
}

View File

@ -13,7 +13,7 @@ package org.broadinstitute.sting.gatk.walkers;
* shards of the data can reduce with each other, and the composite result
* can be reduced with other composite results.
*/
public interface TreeReducible<ReduceType> {
public interface TreeReducible<ReduceType> extends ThreadSafeMapReduce {
/**
* A composite, 'reduce of reduces' function.
* @param lhs 'left-most' portion of data in the composite reduce.

View File

@ -30,12 +30,14 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.recalibration.BQSRMode;
import java.util.List;
@ -48,7 +50,8 @@ import java.util.List;
*/
@ReadFilters(MalformedReadFilter.class)
@PartitionBy(PartitionType.NONE)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
@BQSRMode(ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
@DocumentedGATKFeature(groupName = "Uncategorized", extraDocs = {CommandLineGATK.class})
public abstract class Walker<MapType, ReduceType> {
final protected static Logger logger = Logger.getLogger(Walker.class);

View File

@ -33,6 +33,9 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
final Genotype g,
final GenotypeBuilder gb,
final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
if ( stratifiedContext == null )
return;
Double ratio = annotateSNP(stratifiedContext, vc, g);
if (ratio == null)
return;

View File

@ -54,7 +54,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
final Genotype g,
final GenotypeBuilder gb,
final PerReadAlleleLikelihoodMap alleleLikelihoodMap) {
if ( g == null || !g.isCalled() )
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
return;
if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())

View File

@ -55,7 +55,7 @@ public class MappingQualityZeroBySample extends GenotypeAnnotation {
final Genotype g,
final GenotypeBuilder gb,
final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
if ( g == null || !g.isCalled() )
if ( g == null || !g.isCalled() || stratifiedContext == null )
return;
int mq0 = 0;

View File

@ -300,16 +300,12 @@ public class VariantAnnotatorEngine {
if (stratifiedPerReadAlleleLikelihoodMap != null)
perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
if ( context == null && perReadAlleleLikelihoodMap == null) {
// no likelihoods nor pileup available: just move on to next sample
genotypes.add(genotype);
} else {
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.annotate(tracker, walker, ref, context, vc, genotype, gb, perReadAlleleLikelihoodMap);
}
genotypes.add(gb.make());
final GenotypeBuilder gb = new GenotypeBuilder(genotype);
for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.annotate(tracker, walker, ref, context, vc, genotype, gb, perReadAlleleLikelihoodMap);
}
genotypes.add(gb.make());
}
return genotypes;

View File

@ -32,10 +32,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -46,6 +45,7 @@ import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -104,7 +104,7 @@ import java.util.ArrayList;
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
@By(DataSource.READS)
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality

View File

@ -35,5 +35,5 @@ public interface RecalibrationEngine {
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase);
public void updateDataForRead(final GATKSAMRecord read, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors);
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors);
}

View File

@ -93,7 +93,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
}
@Override
public synchronized void updateDataForRead( final GATKSAMRecord read, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
public synchronized void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
throw new UnsupportedOperationException("Delocalized BQSR is not available in the GATK-lite version");
}

View File

@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -140,7 +140,7 @@ public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
}
@Override
public Integer map(ReferenceContext referenceContext, GATKSAMRecord read, ReadMetaDataTracker readMetaDataTracker) {
public Integer map(ReferenceContext referenceContext, GATKSAMRecord read, RefMetaDataTracker RefMetaDataTracker) {
final String rgID = read.getReadGroup().getId();
final PerReadGroupInfo info = readGroupInfo.get(rgID);

View File

@ -4,7 +4,7 @@ import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -74,7 +74,7 @@ public class ReadLengthDistribution extends ReadWalker<Integer, Integer> {
}
@Override
public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) {
public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, RefMetaDataTracker RefMetaDataTracker) {
GATKReportTable table = report.getTable("ReadLengthDistribution");
int length = Math.abs(samRecord.getReadLength());

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
@ -117,7 +118,7 @@ import java.util.*;
*/
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_INPUT)
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} )
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.REFERENCE)

View File

@ -36,8 +36,8 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.*;
@ -112,7 +112,7 @@ import java.util.*;
* @author ebanks
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.ON_OUTPUT)
public class IndelRealigner extends ReadWalker<Integer, Integer> {
public static final String ORIGINAL_CIGAR_TAG = "OC";
@ -473,7 +473,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
readsActuallyCleaned.clear();
}
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
if ( currentInterval == null ) {
emit(read);
return 0;
@ -540,7 +540,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// TODO -- it would be nice if we could use indels from 454/Ion reads as alternate consenses
}
private void cleanAndCallMap(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
private void cleanAndCallMap(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
if ( readsToClean.size() > 0 ) {
GenomeLoc earliestPossibleMove = getToolkit().getGenomeLocParser().createGenomeLoc(readsToClean.getReads().get(0));
if ( manager.canMoveReads(earliestPossibleMove) )
@ -619,17 +619,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
}
private void populateKnownIndels(ReadMetaDataTracker metaDataTracker, ReferenceContext ref) {
for ( Collection<GATKFeature> rods : metaDataTracker.getContigOffsetMapping().values() ) {
Iterator<GATKFeature> rodIter = rods.iterator();
while ( rodIter.hasNext() ) {
Object rod = rodIter.next().getUnderlyingObject();
if ( indelRodsSeen.contains(rod) )
continue;
indelRodsSeen.add(rod);
if ( rod instanceof VariantContext )
knownIndelsToTry.add((VariantContext)rod);
}
private void populateKnownIndels(RefMetaDataTracker metaDataTracker, ReferenceContext ref) {
for ( final VariantContext vc : metaDataTracker.getValues(known) ) {
if ( indelRodsSeen.contains(vc) )
continue;
indelRodsSeen.add(vc);
knownIndelsToTry.add(vc);
}
}

View File

@ -27,12 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.Cigar;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
@ -80,9 +79,9 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
writer.addAlignment(read);
}
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
// we can not deal with screwy records
if ( read.getCigar().numCigarElements() == 0 ) {
if ( read.getReadUnmappedFlag() || read.getCigar().numCigarElements() == 0 ) {
emit(read);
return 0;
}

View File

@ -33,10 +33,10 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -101,7 +101,7 @@ import java.util.TreeSet;
@Reference(window=@Window(start=-1,stop=50))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@By(DataSource.REFERENCE)
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Event, RealignerTargetCreator.EventPair> implements TreeReducible<RealignerTargetCreator.EventPair> {
/**

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
@ -477,7 +477,7 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
@Override
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
// if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ");
@ -1181,10 +1181,10 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
if ( event_length == 0 ) { // insertion
l.add( Allele.create(referencePaddingBase,true) );
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), false ));
l.add( Allele.create((char)referencePaddingBase + new String(call.getVariant().getBases()), false ));
} else { //deletion:
l.add( Allele.create(referencePaddingBase + call.getVariant().getBases(), true ));
l.add( Allele.create((char)referencePaddingBase + new String(call.getVariant().getBases()), true ));
l.add( Allele.create(referencePaddingBase,false) );
}
}

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountBases extends ReadWalker<Integer, Long> {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
return read.getReadLength();
}

View File

@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Gender;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.DataSource;
@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountMales extends ReadWalker<Integer, Integer> {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
Sample sample = getSampleDB().getSample(read);
return sample.getGender() == Gender.MALE ? 1 : 0;
}

View File

@ -4,7 +4,7 @@ import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -47,7 +47,7 @@ public class CountReadEvents extends ReadWalker<Map<CigarOperator, ArrayList<Int
@Output (doc = "GATKReport table output")
PrintStream out;
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
return ReadUtils.getCigarOperatorForAllBases(read);
}

View File

@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.ThreadSafeMapReduce;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -41,12 +41,11 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReads extends ReadWalker<Integer, Integer> implements TreeReducible<Integer> {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
public class CountReads extends ReadWalker<Integer, Integer> implements ThreadSafeMapReduce {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
return 1;
}
@Override public Integer reduceInit() { return 0; }
@Override public Integer reduce(Integer value, Integer sum) { return value + sum; }
@Override public Integer treeReduce(Integer lhs, Integer rhs) { return lhs + rhs; }
}

View File

@ -4,7 +4,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -41,7 +41,7 @@ import java.util.List;
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountTerminusEvent extends ReadWalker<Pair<Long, Long>, Pair<Long, Long>> {
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
List<CigarElement> cigarElements = read.getCigar().getCigarElements();
CigarElement lastElement = null;

View File

@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -75,7 +75,7 @@ public class ReadClippingStats extends ReadWalker<ReadClippingStats.ReadClipping
int readLength, nClippingEvents, nClippedBases;
}
public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY)
return null;

View File

@ -39,11 +39,11 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import java.io.File;
import java.util.*;
@ -218,7 +218,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
String filterString = null;
// Annotate the new record with its VQSLOD and the worst performing annotation
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lodString); // use the String representation so that we don't lose precision on output
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
for( int i = tranches.size() - 1; i >= 0; i-- ) {

View File

@ -286,7 +286,6 @@ public class VariantDataManager {
case INDEL:
case MIXED:
case SYMBOLIC:
case STRUCTURAL_INDEL:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
default:
return false;

View File

@ -125,6 +125,15 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
return ! discontinuousP( that );
}
/**
* Return true if this GenomeLoc represents the UNMAPPED location
* @return
*/
public final boolean isUnmapped() {
return isUnmapped(this);
}
/**
* Returns a new GenomeLoc that represents the entire span of this and that. Requires that
* this and that GenomeLoc are contiguous and both mapped
@ -141,7 +150,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
}
if (!(this.contiguousP(that))) {
throw new ReviewedStingException("The two genome loc's need to be contigous");
throw new ReviewedStingException("The two genome loc's need to be contiguous");
}
return new GenomeLoc(getContig(), this.contigIndex,
@ -418,7 +427,10 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
result = cmpContig;
} else {
if ( this.getStart() < that.getStart() ) result = -1;
if ( this.getStart() > that.getStart() ) result = 1;
else if ( this.getStart() > that.getStart() ) result = 1;
// these have the same start, so check the ends
else if ( this.getStop() < that.getStop() ) result = -1;
else if ( this.getStop() > that.getStop() ) result = 1;
}
}

View File

@ -52,13 +52,6 @@ public class BAQ {
DONT_MODIFY // do the BAQ, but don't modify the quality scores themselves, just return them in the function.
}
public enum ApplicationTime {
FORBIDDEN, // Walker does not tolerate BAQ input
ON_INPUT, // apply the BAQ calculation to the incoming reads, the default
ON_OUTPUT, // apply the BAQ calculation to outgoing read streams
HANDLED_IN_WALKER // the walker will deal with the BAQ calculation status itself
}
public static final String BAQ_TAG = "BQ";
private static double[] qual2prob = new double[256];
@ -68,7 +61,7 @@ public class BAQ {
}
// Phred scaled now (changed 1/10/2011)
public static double DEFAULT_GOP = 40;
public static final double DEFAULT_GOP = 40;
/* Takes a Phred Scale quality score and returns the error probability.
*
@ -110,10 +103,19 @@ public class BAQ {
* Use defaults for everything
*/
public BAQ() {
cd = convertFromPhredScale(DEFAULT_GOP);
this(DEFAULT_GOP);
}
/**
* Use defaults for everything
*/
public BAQ(final double gapOpenPenalty) {
cd = convertFromPhredScale(gapOpenPenalty);
initializeCachedData();
}
/**
* Create a new HmmGlocal object with specified parameters
*

View File

@ -0,0 +1,49 @@
package org.broadinstitute.sting.utils.baq;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Applies Heng's BAQ calculation to a stream of incoming reads
*/
public class BAQReadTransformer extends ReadTransformer {
private BAQ baqHMM;
private IndexedFastaSequenceFile refReader;
private BAQ.CalculationMode cmode;
private BAQ.QualityMode qmode;
@Override
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
final BAQMode mode = WalkerManager.getWalkerAnnotation(walker, BAQMode.class);
this.refReader = engine.getReferenceDataSource().getReference();
this.cmode = engine.getArguments().BAQMode;
this.qmode = mode.QualityMode();
baqHMM = new BAQ(engine.getArguments().BAQGOP);
if ( qmode == BAQ.QualityMode.DONT_MODIFY )
throw new ReviewedStingException("BUG: shouldn't create BAQ transformer with quality mode DONT_MODIFY");
if ( mode.ApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN && enabled() )
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + cmode + " was requested.");
return mode.ApplicationTime();
}
@Override
public boolean enabled() {
return cmode != BAQ.CalculationMode.OFF;
}
@Override
public GATKSAMRecord apply(final GATKSAMRecord read) {
baqHMM.baqRead(read, refReader, cmode, qmode);
return read;
}
}

View File

@ -1,59 +0,0 @@
package org.broadinstitute.sting.utils.baq;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Iterator;
/**
* Simple iterator that applies Heng's BAQ calculation to a stream of incoming reads
*/
public class BAQSamIterator implements StingSAMIterator {
private final StingSAMIterator it;
private final BAQ baqHMM = new BAQ(); // creates a BAQ creator with default parameters
private final IndexedFastaSequenceFile refReader;
private final BAQ.CalculationMode cmode;
private final BAQ.QualityMode qmode;
/**
* Creates a new BAMSamIterator using the reference getter refReader and applies the BAM to the reads coming
* in from it. See BAQ docs for baqType information.
*
* @param refReader
* @param it
* @param cmode
* @param qmode
*/
@Requires({
"refReader != null",
"it != null",
"cmode != null" ,
"qmode != null"})
public BAQSamIterator(IndexedFastaSequenceFile refReader, StingSAMIterator it, BAQ.CalculationMode cmode, BAQ.QualityMode qmode) {
if ( cmode == BAQ.CalculationMode.OFF ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with calculation mode OFF");
if ( qmode == BAQ.QualityMode.DONT_MODIFY ) throw new ReviewedStingException("BUG: shouldn't create BAQSamIterator with quailty mode DONT_MODIFY");
this.refReader = refReader;
this.it = it;
this.cmode = cmode;
this.qmode = qmode;
}
@Requires("hasNext()")
@Ensures("result != null")
public SAMRecord next() {
//System.out.printf("BAQing during input%n");
SAMRecord read = it.next();
baqHMM.baqRead(read, refReader, cmode, qmode);
return read;
}
public boolean hasNext() { return this.it.hasNext(); }
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
public void close() { it.close(); }
public Iterator<SAMRecord> iterator() { return this; }
}

View File

@ -0,0 +1,44 @@
package org.broadinstitute.sting.utils.baq;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator;
/**
* Iterator that applies a ReadTransformer to a stream of reads
*/
public class ReadTransformingIterator implements StingSAMIterator {
private final StingSAMIterator it;
private final ReadTransformer transformer;
/**
* Creates a new ReadTransforming iterator
*/
@Requires({"it != null", "transformer != null", "transformer.isInitialized()"})
public ReadTransformingIterator(final StingSAMIterator it, final ReadTransformer transformer) {
if ( ! transformer.isInitialized() )
throw new IllegalStateException("Creating a read transformer stream for an uninitialized read transformer: " + transformer);
if ( transformer.getApplicationTime() == ReadTransformer.ApplicationTime.FORBIDDEN )
throw new IllegalStateException("Creating a read transformer stream for a forbidden transformer " + transformer);
this.it = it;
this.transformer = transformer;
}
@Requires("hasNext()")
@Ensures("result != null")
public SAMRecord next() {
final GATKSAMRecord read = (GATKSAMRecord)it.next();
return transformer.apply(read);
}
public boolean hasNext() { return this.it.hasNext(); }
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
public void close() { it.close(); }
public Iterator<SAMRecord> iterator() { return this; }
}

View File

@ -27,6 +27,8 @@ package org.broadinstitute.sting.utils.classloader;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -276,8 +278,16 @@ public class PluginManager<PluginType> {
*/
public PluginType createByName(String pluginName) {
Class<? extends PluginType> plugin = pluginsByName.get(pluginName);
if( plugin == null )
throw new UserException(String.format("Could not find %s with name: %s", pluginCategory,pluginName));
if( plugin == null ) {
String errorMessage = formatErrorMessage(pluginCategory,pluginName);
if ( this.getClass().isAssignableFrom(FilterManager.class) ) {
throw new UserException.MalformedReadFilterException(errorMessage);
} else if ( this.getClass().isAssignableFrom(WalkerManager.class) ) {
throw new UserException.MalformedWalkerArgumentsException(errorMessage);
} else {
throw new UserException.CommandLineException(errorMessage);
}
}
try {
return plugin.newInstance();
} catch (Exception e) {
@ -330,4 +340,14 @@ public class PluginManager<PluginType> {
return pluginName;
}
/**
* Generate the error message for the plugin manager. The message is allowed to depend on the class.
* @param pluginCategory - string, the category of the plugin (e.g. read filter)
* @param pluginName - string, what we were trying to match (but failed to)
* @return error message text describing the error
*/
protected String formatErrorMessage(String pluginCategory, String pluginName ) {
return String.format("Could not find %s with name: %s", pluginCategory,pluginName);
}
}

View File

@ -63,6 +63,18 @@ public class UserException extends ReviewedStingException {
}
}
public static class MalformedReadFilterException extends CommandLineException {
public MalformedReadFilterException(String message) {
super(String.format("Malformed read filter: %s",message));
}
}
public static class MalformedWalkerArgumentsException extends CommandLineException {
public MalformedWalkerArgumentsException(String message) {
super(String.format("Malformed walker argument: %s",message));
}
}
public static class MalformedGenomeLoc extends UserException {
public MalformedGenomeLoc(String message, GenomeLoc loc) {
super(String.format("Badly formed genome loc: %s: %s", message, loc));

View File

@ -128,22 +128,13 @@ public class FragmentUtils {
return create(reads, reads.size(), SamRecordGetter);
}
public final static List<GATKSAMRecord> mergeOverlappingPairedFragments( List<GATKSAMRecord> overlappingPair ) {
public final static List<GATKSAMRecord> mergeOverlappingPairedFragments( final List<GATKSAMRecord> overlappingPair ) {
final byte MIN_QUAL_BAD_OVERLAP = 16;
if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); }
GATKSAMRecord firstRead = overlappingPair.get(0);
GATKSAMRecord secondRead = overlappingPair.get(1);
/*
System.out.println("read 0 unclipped start:"+overlappingPair.get(0).getUnclippedStart());
System.out.println("read 0 unclipped end:"+overlappingPair.get(0).getUnclippedEnd());
System.out.println("read 1 unclipped start:"+overlappingPair.get(1).getUnclippedStart());
System.out.println("read 1 unclipped end:"+overlappingPair.get(1).getUnclippedEnd());
System.out.println("read 0 start:"+overlappingPair.get(0).getAlignmentStart());
System.out.println("read 0 end:"+overlappingPair.get(0).getAlignmentEnd());
System.out.println("read 1 start:"+overlappingPair.get(1).getAlignmentStart());
System.out.println("read 1 end:"+overlappingPair.get(1).getAlignmentEnd());
*/
if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) {
firstRead = overlappingPair.get(1); // swap them
secondRead = overlappingPair.get(0);
@ -155,15 +146,6 @@ public class FragmentUtils {
return overlappingPair; // fragments contain indels so don't merge them
}
/* // check for inconsistent start positions between uncliped/soft alignment starts
if (secondRead.getAlignmentStart() >= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() < firstRead.getUnclippedStart())
return overlappingPair;
if (secondRead.getAlignmentStart() <= firstRead.getAlignmentStart() && secondRead.getUnclippedStart() > firstRead.getUnclippedStart())
return overlappingPair;
if (secondRead.getUnclippedStart() < firstRead.getAlignmentEnd() && secondRead.getAlignmentStart() >= firstRead.getAlignmentEnd())
return overlappingPair;
*/
final Pair<Integer, Boolean> pair = ReadUtils.getReadCoordinateForReferenceCoordinate(firstRead, secondRead.getSoftStart());
final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() );
@ -183,7 +165,7 @@ public class FragmentUtils {
}
for(int iii = firstReadStop; iii < firstRead.getReadLength(); iii++) {
if( firstReadQuals[iii] > MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] > MIN_QUAL_BAD_OVERLAP && firstReadBases[iii] != secondReadBases[iii-firstReadStop] ) {
return overlappingPair;// high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them
return overlappingPair; // high qual bases don't match exactly, probably indel in only one of the fragments, so don't merge them
}
if( firstReadQuals[iii] < MIN_QUAL_BAD_OVERLAP && secondReadQuals[iii-firstReadStop] < MIN_QUAL_BAD_OVERLAP ) {
return overlappingPair; // both reads have low qual bases in the overlap region so don't merge them because don't know what is going on
@ -197,7 +179,7 @@ public class FragmentUtils {
}
final GATKSAMRecord returnRead = new GATKSAMRecord( firstRead.getHeader() );
returnRead.setAlignmentStart( firstRead.getUnclippedStart() );
returnRead.setAlignmentStart( firstRead.getSoftStart() );
returnRead.setReadBases( bases );
returnRead.setBaseQualities( quals );
returnRead.setReadGroup( firstRead.getReadGroup() );

View File

@ -43,7 +43,8 @@ import java.util.concurrent.*;
* Time: 9:47 AM
*/
public class NanoScheduler<InputType, MapType, ReduceType> {
private static Logger logger = Logger.getLogger(NanoScheduler.class);
private final static Logger logger = Logger.getLogger(NanoScheduler.class);
private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
final int bufferSize;
final int mapGroupSize;
@ -79,7 +80,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
this.mapGroupSize = mapGroupSize;
}
this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads - 1);
this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
}
/**
@ -172,7 +173,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
if ( getnThreads() == 1 ) {
if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
return executeSingleThreaded(inputReader, map, initialValue, reduce);
} else {
return executeMultiThreaded(inputReader, map, initialValue, reduce);

View File

@ -0,0 +1,30 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import java.lang.annotation.*;
/**
* User: hanna
* Date: May 14, 2009
* Time: 1:51:22 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* Allows the walker to indicate what type of data it wants to consume.
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface BQSRMode {
public abstract ReadTransformer.ApplicationTime ApplicationTime() default ReadTransformer.ApplicationTime.ON_INPUT;
}

View File

@ -0,0 +1,40 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* A ReadTransformer that applies BQSR on the fly to reads
*
* User: rpoplin
* Date: 2/13/12
*/
public class BQSRReadTransformer extends ReadTransformer {
private boolean enabled;
private BaseRecalibration bqsr;
@Override
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
this.enabled = engine.hasBaseRecalibration();
this.bqsr = engine.getBaseRecalibration();
final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class);
return mode.ApplicationTime();
}
@Override
public boolean enabled() {
return enabled;
}
/**
* initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads.
*/
@Override
public GATKSAMRecord apply(GATKSAMRecord read) {
bqsr.recalibrateRead(read);
return read;
}
}

View File

@ -1,50 +0,0 @@
package org.broadinstitute.sting.utils.recalibration;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 2/13/12
*/
public class BQSRSamIterator implements StingSAMIterator {
private final StingSAMIterator it;
private final BaseRecalibration bqsr;
/**
* Creates a new BQSRSamIterator and applies BQSR on the fly to incoming reads.
*
* @param it The incoming SamIterator to wrap
* @param bqsr The object which holds the BQSR table information and knows how to apply it
*/
@Requires({
"it != null",
"bqsr != null"})
public BQSRSamIterator(StingSAMIterator it, BaseRecalibration bqsr) {
if ( bqsr == null ) throw new ReviewedStingException("BUG: shouldn't create BQSRSamIterator with null recalibration object");
this.it = it;
this.bqsr = bqsr;
}
@Requires("hasNext()")
@Ensures("result != null")
public SAMRecord next() {
SAMRecord read = it.next();
bqsr.recalibrateRead((GATKSAMRecord) read);
return read;
}
public boolean hasNext() { return this.it.hasNext(); }
public void remove() { throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!"); }
public void close() { it.close(); }
public Iterator<SAMRecord> iterator() { return this; }
}

View File

@ -27,12 +27,11 @@ package org.broadinstitute.sting.utils.recalibration;
import net.sf.samtools.SAMTag;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
@ -46,7 +45,6 @@ import java.io.File;
public class BaseRecalibration {
private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000;
private final ReadCovariates readCovariates;
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
private final RecalibrationTables recalibrationTables;
@ -56,10 +54,23 @@ public class BaseRecalibration {
private final int preserveQLessThan;
private final boolean emitOriginalQuals;
private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
static {
for (int i = 0; i < EventType.values().length; i++)
qualityScoreByFullCovariateKey[i] = new NestedHashMap();
// TODO -- was this supposed to be used somewhere?
// private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
// static {
// for (int i = 0; i < EventType.values().length; i++)
// qualityScoreByFullCovariateKey[i] = new NestedHashMap();
// }
/**
* Thread local cache to allow multi-threaded use of this class
*/
private ThreadLocal<ReadCovariates> readCovariatesCache;
{
readCovariatesCache = new ThreadLocal<ReadCovariates> () {
@Override protected ReadCovariates initialValue() {
return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
}
};
}
/**
@ -81,7 +92,6 @@ public class BaseRecalibration {
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
quantizationInfo.quantizeQualityScores(quantizationLevels);
readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
this.disableIndelQuals = disableIndelQuals;
this.preserveQLessThan = preserveQLessThan;
this.emitOriginalQuals = emitOriginalQuals;
@ -104,6 +114,11 @@ public class BaseRecalibration {
}
// Compute all covariates for the read
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
// TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here
// TODO -- needs to be fixed.
final ReadCovariates readCovariates = readCovariatesCache.get().clear();
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
@ -130,6 +145,7 @@ public class BaseRecalibration {
}
}
/**
* Implements a serial recalibration of the reads using the combinational table.
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
@ -147,7 +163,7 @@ public class BaseRecalibration {
* @param errorModel the event type
* @return A recalibrated quality score as a byte
*/
protected byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
private byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) {
final byte qualFromRead = (byte)(long)key[1];
final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE), key, errorModel);

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.utils.recalibration;
import java.util.Arrays;
/**
* The object temporarily held by a read that describes all of it's covariates.
*
@ -21,6 +23,17 @@ public class ReadCovariates {
currentCovariateIndex = index;
}
/**
* Necessary due to bug in BaseRecalibration recalibrateRead function. It is clearly seeing space it's not supposed to
* @return
*/
public ReadCovariates clear() {
for ( int i = 0; i < keys.length; i++ )
for ( int j = 0; j < keys[i].length; j++)
Arrays.fill(keys[i][j], 0);
return this;
}
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;

View File

@ -228,8 +228,7 @@ public class GATKSAMRecord extends BAMRecord {
if( quals == null ) {
quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
setBaseQualities(quals, EventType.BASE_INSERTION);
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
}
return quals;
}
@ -246,7 +245,6 @@ public class GATKSAMRecord extends BAMRecord {
quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
setBaseQualities(quals, EventType.BASE_DELETION);
}
return quals;
}
@ -262,7 +260,7 @@ public class GATKSAMRecord extends BAMRecord {
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
mReadGroup = readGroup;
retrievedReadGroup = true;
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
}
///////////////////////////////////////////////////////////////////////////////
@ -367,15 +365,15 @@ public class GATKSAMRecord extends BAMRecord {
* Clears all attributes except ReadGroup of the read.
*/
public GATKSAMRecord simplify () {
GATKSAMReadGroupRecord rg = getReadGroup(); // save the read group information
GATKSAMReadGroupRecord rg = getReadGroup(); // save the read group information
byte[] insQuals = (this.getAttribute(BQSR_BASE_INSERTION_QUALITIES) == null) ? null : getBaseInsertionQualities();
byte[] delQuals = (this.getAttribute(BQSR_BASE_DELETION_QUALITIES) == null) ? null : getBaseDeletionQualities();
this.clearAttributes(); // clear all attributes from the read
this.setReadGroup(rg); // restore read group
this.clearAttributes(); // clear all attributes from the read
this.setReadGroup(rg); // restore read group
if (insQuals != null)
this.setBaseQualities(insQuals, EventType.BASE_INSERTION); // restore base insertion if we had any
this.setBaseQualities(insQuals, EventType.BASE_INSERTION); // restore base insertion if we had any
if (delQuals != null)
this.setBaseQualities(delQuals, EventType.BASE_DELETION); // restore base deletion if we had any
this.setBaseQualities(delQuals, EventType.BASE_DELETION); // restore base deletion if we had any
return this;
}

View File

@ -457,7 +457,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
SNP,
MNP, // a multi-nucleotide polymorphism
INDEL,
STRUCTURAL_INDEL,
SYMBOLIC,
MIXED,
}
@ -531,7 +530,17 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
public boolean isStructuralIndel() {
return getType() == Type.STRUCTURAL_INDEL;
if ( getType() == Type.INDEL ) {
List<Integer> sizes = getIndelLengths();
if ( sizes != null ) {
for ( Integer length : sizes ) {
if ( length > MAX_ALLELE_SIZE_FOR_NON_SV ) {
return true;
}
}
}
}
return false;
}
/**
@ -716,7 +725,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
* @return a list of indel lengths ( null if not of type indel or mixed )
*/
public List<Integer> getIndelLengths() {
if ( getType() != Type.INDEL && getType() != Type.MIXED && getType() != Type.STRUCTURAL_INDEL ) {
if ( getType() != Type.INDEL && getType() != Type.MIXED ) {
return null;
}
@ -1263,13 +1272,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
// is reserved for cases of multiple alternate alleles of different types). Therefore, if we've reached this point
// in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.
// Because a number of structural variation callers write the whole alternate allele into the VCF where possible,
// this can result in insertion/deletion alleles of structural variant size, e.g. 151+. As of July 2012, we now
// classify these as structural events, rather than indel events, as we think differently about the mechanism,
// representation, and handling of these events. Check for this case here:
if ( ref.length() > MAX_ALLELE_SIZE_FOR_NON_SV || allele.length() > MAX_ALLELE_SIZE_FOR_NON_SV )
return Type.STRUCTURAL_INDEL;
return Type.INDEL;
// old incorrect logic:

View File

@ -0,0 +1,41 @@
package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 8/31/12
* Time: 11:03 AM
* To change this template use File | Settings | File Templates.
*/
public class InvalidArgumentIntegrationTest extends WalkerTest {
private static final String callsB36 = BaseTest.validationDataLocation + "lowpass.N3.chr1.raw.vcf";
private WalkerTest.WalkerTestSpec baseTest(String flag, String arg, Class exeption) {
return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variant:vcf "
+ callsB36 + " -F POS,CHROM -R "
+ b36KGReference + " -o %s " + flag + " " + arg,
1, exeption);
}
@Test
public void testUnknownReadFilter() {
executeTest("UnknownReadFilter",baseTest("-rf","TestUnknownReadFilter", UserException.MalformedReadFilterException.class));
}
@Test
public void testMalformedWalkerArgs() {
executeTest("MalformedWalkerArgs",
new WalkerTest.WalkerTestSpec("-T UnknownWalkerName -M 10 --variant:vcf "
+ callsB36 + " -F POS,CHROM -R "
+ b36KGReference + " -o %s ",
1, UserException.MalformedWalkerArgumentsException.class));
}
}

View File

@ -1,207 +1,364 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import org.testng.Assert;
import org.broad.tribble.BasicFeature;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTrackerUnitTest;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.BeforeMethod;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* @author aaron
* <p/>
* Class ReadBasedReferenceOrderedViewUnitTest
* <p/>
* test out the ReadBasedReferenceOrderedView class
* @author depristo
*/
public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
private static int startingChr = 1;
private static int endingChr = 2;
private static int readCount = 100;
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
private static String contig;
private static SAMFileHeader header;
private GenomeLocParser genomeLocParser;
@BeforeClass
public void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
contig = header.getSequence(0).getSequenceName();
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
initializeTests();
}
@BeforeMethod
public void beforeEach() {
}
@Test
public void testCreateReadMetaDataTrackerOnePerSite() {
// make ten reads,
List<SAMRecord> records = new ArrayList<SAMRecord>();
for (int x = 1; x < 11; x++) {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "name", 0, x, 10);
private class CompareFeatures implements Comparator<Feature> {
@Override
public int compare(Feature o1, Feature o2) {
return genomeLocParser.createGenomeLoc(o1).compareTo(genomeLocParser.createGenomeLoc(o2));
}
GenomeLoc start = genomeLocParser.createGenomeLoc(header.getSequenceDictionary().getSequence(0).getSequenceName(), 0, 0);
List<RMDDataState> list = new ArrayList<RMDDataState>();
list.add(new RMDDataState(null, new FakePeekingRODIterator(genomeLocParser,start, "fakeName")));
ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list));
}
for (SAMRecord rec : records) {
ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec);
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping();
for (Integer i : map.keySet()) {
Assert.assertEquals(map.get(i).size(), 1);
private class ReadMetaDataTrackerRODStreamTest extends TestDataProvider {
final List<Feature> allFeatures;
final List<GenomeLoc> intervals;
public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final GenomeLoc interval) {
this(allFeatures, Collections.singletonList(interval));
}
public ReadMetaDataTrackerRODStreamTest(final List<Feature> allFeatures, final List<GenomeLoc> intervals) {
super(ReadMetaDataTrackerRODStreamTest.class);
this.allFeatures = new ArrayList<Feature>(allFeatures);
Collections.sort(this.allFeatures, new CompareFeatures());
this.intervals = new ArrayList<GenomeLoc>(intervals);
Collections.sort(this.intervals);
setName(String.format("%s nFeatures %d intervals %s", getClass().getSimpleName(), allFeatures.size(),
intervals.size() == 1 ? intervals.get(0) : "size " + intervals.size()));
}
public PeekableIterator<RODRecordList> getIterator(final String name) {
return new PeekableIterator<RODRecordList>(new TribbleIteratorFromCollection(name, genomeLocParser, allFeatures));
}
public Set<Feature> getExpectedOverlaps(final GenomeLoc interval) {
final Set<Feature> overlapping = new HashSet<Feature>();
for ( final Feature f : allFeatures )
if ( genomeLocParser.createGenomeLoc(f).overlapsP(interval) )
overlapping.add(f);
return overlapping;
}
}
public void initializeTests() {
final List<Feature> handPickedFeatures = new ArrayList<Feature>();
handPickedFeatures.add(new BasicFeature(contig, 1, 1));
handPickedFeatures.add(new BasicFeature(contig, 2, 5));
handPickedFeatures.add(new BasicFeature(contig, 4, 4));
handPickedFeatures.add(new BasicFeature(contig, 6, 6));
handPickedFeatures.add(new BasicFeature(contig, 9, 10));
handPickedFeatures.add(new BasicFeature(contig, 10, 10));
handPickedFeatures.add(new BasicFeature(contig, 10, 11));
handPickedFeatures.add(new BasicFeature(contig, 13, 20));
createTestsForFeatures(handPickedFeatures);
// test in the present of a large spanning element
{
List<Feature> oneLargeSpan = new ArrayList<Feature>(handPickedFeatures);
oneLargeSpan.add(new BasicFeature(contig, 1, 30));
createTestsForFeatures(oneLargeSpan);
}
// test in the presence of a partially spanning element
{
List<Feature> partialSpanStart = new ArrayList<Feature>(handPickedFeatures);
partialSpanStart.add(new BasicFeature(contig, 1, 6));
createTestsForFeatures(partialSpanStart);
}
// test in the presence of a partially spanning element at the end
{
List<Feature> partialSpanEnd = new ArrayList<Feature>(handPickedFeatures);
partialSpanEnd.add(new BasicFeature(contig, 10, 30));
createTestsForFeatures(partialSpanEnd);
}
// no data at all
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, 5, 5);
new ReadMetaDataTrackerRODStreamTest(Collections.<Feature>emptyList(), loc);
}
// --------------------------------------------------------------------------------
//
// tests for the lower level IntervalOverlappingRODsFromStream
//
// --------------------------------------------------------------------------------
@DataProvider(name = "ReadMetaDataTrackerRODStreamTest")
public Object[][] createReadMetaDataTrackerRODStreamTest() {
return ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
}
private GenomeLoc span(final List<GenomeLoc> features) {
int featuresStart = 1; for ( final GenomeLoc f : features ) featuresStart = Math.min(featuresStart, f.getStart());
int featuresStop = 1; for ( final GenomeLoc f : features ) featuresStop = Math.max(featuresStop, f.getStop());
return genomeLocParser.createGenomeLoc(contig, featuresStart, featuresStop);
}
private void createTestsForFeatures(final List<Feature> features) {
int featuresStart = 1; for ( final Feature f : features ) featuresStart = Math.min(featuresStart, f.getStart());
int featuresStop = 1; for ( final Feature f : features ) featuresStop = Math.max(featuresStop, f.getEnd());
for ( final int size : Arrays.asList(1, 5, 10, 100) ) {
final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
// regularly spaced
for ( int start = featuresStart; start < featuresStop; start++) {
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, start, start + size - 1);
allIntervals.add(loc);
new ReadMetaDataTrackerRODStreamTest(features, loc);
}
Assert.assertEquals(map.keySet().size(), 10);
// starting and stopping at every feature
for ( final Feature f : features ) {
// just at the feature
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart(), f.getEnd()));
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
// up to end
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd()));
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
// missing by 1
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() + 1, f.getEnd() + 1));
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
// just spanning
allIntervals.add(genomeLocParser.createGenomeLoc(contig, f.getStart() - 1, f.getEnd() + 1));
new ReadMetaDataTrackerRODStreamTest(features, allIntervals.get(allIntervals.size() - 1));
}
new ReadMetaDataTrackerRODStreamTest(features, allIntervals);
}
}
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest")
public void runReadMetaDataTrackerRODStreamTest_singleQuery(final ReadMetaDataTrackerRODStreamTest data) {
if ( data.intervals.size() == 1 ) {
final String name = "testName";
final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
testRODStream(data, stream, Collections.singletonList(data.intervals.get(0)));
}
}
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerRODStreamTest", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_singleQuery")
public void runReadMetaDataTrackerRODStreamTest_multipleQueries(final ReadMetaDataTrackerRODStreamTest data) {
if ( data.intervals.size() > 1 ) {
final String name = "testName";
final PeekableIterator<RODRecordList> iterator = data.getIterator(name);
final IntervalOverlappingRODsFromStream stream = new IntervalOverlappingRODsFromStream(name, iterator);
testRODStream(data, stream, data.intervals);
}
}
private void testRODStream(final ReadMetaDataTrackerRODStreamTest test, final IntervalOverlappingRODsFromStream stream, final List<GenomeLoc> intervals) {
for ( final GenomeLoc interval : intervals ) {
final RODRecordList query = stream.getOverlapping(interval);
final HashSet<Feature> queryFeatures = new HashSet<Feature>();
for ( final GATKFeature f : query ) queryFeatures.add((Feature)f.getUnderlyingObject());
final Set<Feature> overlaps = test.getExpectedOverlaps(interval);
Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
" Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());
BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
" Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
}
}
// --------------------------------------------------------------------------------
//
// tests for the higher level tracker itself
//
// --------------------------------------------------------------------------------
@DataProvider(name = "ReadMetaDataTrackerTests")
public Object[][] createTrackerTests() {
List<Object[]> tests = new ArrayList<Object[]>();
final Object[][] singleTests = ReadMetaDataTrackerRODStreamTest.getTests(ReadMetaDataTrackerRODStreamTest.class);
final List<ReadMetaDataTrackerRODStreamTest> multiSiteTests = new ArrayList<ReadMetaDataTrackerRODStreamTest>();
for ( final Object[] singleTest : singleTests ) {
if ( ((ReadMetaDataTrackerRODStreamTest)singleTest[0]).intervals.size() > 1 )
multiSiteTests.add((ReadMetaDataTrackerRODStreamTest)singleTest[0]);
}
for ( final boolean testStateless : Arrays.asList(true, false) ) {
// all pairwise tests
for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 2, false)) {
tests.add(new Object[]{singleTest, testStateless});
}
// all 3 way pairwise tests
//for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
// tests.add(new Object[]{singleTest, testStateless});
//}
}
logger.warn("Creating " + tests.size() + " tests for ReadMetaDataTrackerTests");
return tests.toArray(new Object[][]{});
}
}
@Test(enabled = true, dataProvider = "ReadMetaDataTrackerTests", dependsOnMethods = "runReadMetaDataTrackerRODStreamTest_multipleQueries")
public void runReadMetaDataTrackerTest(final List<ReadMetaDataTrackerRODStreamTest> RODs, final boolean testStateless) {
final List<String> names = new ArrayList<String>();
final List<PeekableIterator<RODRecordList>> iterators = new ArrayList<PeekableIterator<RODRecordList>>();
final List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
final List<RodBinding<Feature>> rodBindings = new ArrayList<RodBinding<Feature>>();
for ( int i = 0; i < RODs.size(); i++ ) {
final RodBinding<Feature> rodBinding = new RodBinding<Feature>(Feature.class, "name"+i);
rodBindings.add(rodBinding);
final String name = rodBinding.getName();
names.add(name);
iterators.add(RODs.get(i).getIterator(name));
intervals.addAll(RODs.get(i).intervals);
}
class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
private GenomeLocParser genomeLocParser;
Collections.sort(intervals);
final GenomeLoc span = span(intervals);
final ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(genomeLocParser, span, names, iterators);
// current location
private GenomeLoc location;
private GATKFeature curROD;
private final String name;
if ( testStateless ) {
// test each tracker is well formed, as each is created
for ( final GenomeLoc interval : intervals ) {
final RefMetaDataTracker tracker = view.getReferenceOrderedDataForInterval(interval);
testMetaDataTrackerBindings(tracker, interval, RODs, rodBindings);
}
} else {
// tests all trackers are correct after reading them into an array
// this checks that the trackers are be safely stored away and analyzed later (critical for nano-scheduling)
final List<RefMetaDataTracker> trackers = new ArrayList<RefMetaDataTracker>();
for ( final GenomeLoc interval : intervals ) {
final RefMetaDataTracker tracker = view.getReferenceOrderedDataForInterval(interval);
trackers.add(tracker);
}
public FakePeekingRODIterator(GenomeLocParser genomeLocParser, GenomeLoc startingLoc, String name) {
this.name = name;
this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
for ( int i = 0; i < trackers.size(); i++) {
testMetaDataTrackerBindings(trackers.get(i), intervals.get(i), RODs, rodBindings);
}
}
}
/**
* Gets the header associated with the backing input stream.
* @return the ROD header.
*/
@Override
public Object getHeader() {
return null;
private void testMetaDataTrackerBindings(final RefMetaDataTracker tracker,
final GenomeLoc interval,
final List<ReadMetaDataTrackerRODStreamTest> RODs,
final List<RodBinding<Feature>> rodBindings) {
for ( int i = 0; i < RODs.size(); i++ ) {
final ReadMetaDataTrackerRODStreamTest test = RODs.get(i);
final List<Feature> queryFeaturesList = tracker.getValues(rodBindings.get(i));
final Set<Feature> queryFeatures = new HashSet<Feature>(queryFeaturesList);
final Set<Feature> overlaps = test.getExpectedOverlaps(interval);
Assert.assertEquals(queryFeatures.size(), overlaps.size(), "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
" Expected size = " + overlaps.size() + " but saw " + queryFeatures.size());
BaseTest.assertEqualsSet(queryFeatures, overlaps, "IntervalOverlappingRODsFromStream didn't return the expected set of overlapping features." +
" Expected = " + Utils.join(",", overlaps) + " but saw " + Utils.join(",", queryFeatures));
}
}
/**
* Gets the sequence dictionary associated with the backing input stream.
* @return sequence dictionary from the ROD header.
*/
@Override
public SAMSequenceDictionary getSequenceDictionary() {
return null;
}
static class TribbleIteratorFromCollection implements Iterator<RODRecordList> {
// current location
private final String name;
final Queue<GATKFeature> gatkFeatures;
public TribbleIteratorFromCollection(final String name, final GenomeLocParser genomeLocParser, final List<Feature> features) {
this.name = name;
@Override
public GenomeLoc peekNextLocation() {
System.err.println("Peek Next -> " + location);
return location;
}
this.gatkFeatures = new LinkedList<GATKFeature>();
for ( final Feature f : features )
gatkFeatures.add(new GATKFeature.TribbleGATKFeature(genomeLocParser, f, name));
}
@Override
public GenomeLoc position() {
return location;
}
@Override
public boolean hasNext() {
return ! gatkFeatures.isEmpty();
}
@Override
public RODRecordList seekForward(GenomeLoc interval) {
while (location.isBefore(interval))
next();
return next(); // we always move by one, we know the next location will be right
}
@Override
public RODRecordList next() {
final GATKFeature first = gatkFeatures.poll();
final Collection<GATKFeature> myFeatures = new LinkedList<GATKFeature>();
myFeatures.add(first);
while ( gatkFeatures.peek() != null && gatkFeatures.peek().getLocation().getStart() == first.getStart() )
myFeatures.add(gatkFeatures.poll());
@Override
public boolean hasNext() {
return true; // we always have next
}
GenomeLoc loc = first.getLocation();
for ( final GATKFeature feature : myFeatures )
loc = loc.merge(feature.getLocation());
@Override
public RODRecordList next() {
System.err.println("Next -> " + location);
curROD = new ReadMetaDataTrackerUnitTest.FakeRODatum(location, name);
location = genomeLocParser.createGenomeLoc(location.getContig(), location.getStart() + 1, location.getStop() + 1);
FakeRODRecordList list = new FakeRODRecordList();
list.add(curROD);
return list;
}
return new RODRecordListImpl(name, myFeatures, loc); // is this safe?
}
@Override
public void remove() {
throw new IllegalStateException("GRRR");
}
@Override
public void close() {
// nothing to do
@Override public void remove() { throw new IllegalStateException("GRRR"); }
}
}
class FakeRODRecordList extends AbstractList<GATKFeature> implements RODRecordList {
private final List<GATKFeature> list = new ArrayList<GATKFeature>();
public boolean add(GATKFeature data) {
return list.add(data);
}
@Override
public GATKFeature get(int i) {
return list.get(i);
}
@Override
public int size() {
return list.size();
}
@Override
public GenomeLoc getLocation() {
return list.get(0).getLocation();
}
@Override
public String getName() {
return "test";
}
@Override
public int compareTo(RODRecordList rodRecordList) {
return this.list.get(0).getLocation().compareTo(rodRecordList.getLocation());
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
@ -89,7 +90,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
TableFeature datum = tracker.getFirstValue(TableFeature.class, "tableTest");
TableFeature datum = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest"));
Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect");
Assert.assertEquals(datum.get("COL2"),"D","datum parameter for COL2 is incorrect");
@ -115,13 +116,13 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
TableFeature datum1 = tracker.getFirstValue(TableFeature.class, "tableTest1");
TableFeature datum1 = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest1"));
Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect");
Assert.assertEquals(datum1.get("COL2"),"D","datum1 parameter for COL2 is incorrect");
Assert.assertEquals(datum1.get("COL3"),"E","datum1 parameter for COL3 is incorrect");
TableFeature datum2 = tracker.getFirstValue(TableFeature.class, "tableTest2");
TableFeature datum2 = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest2"));
Assert.assertEquals(datum2.get("COL1"),"C","datum2 parameter for COL1 is incorrect");
Assert.assertEquals(datum2.get("COL2"),"D","datum2 parameter for COL2 is incorrect");

View File

@ -36,8 +36,8 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.util.Collections;
import java.util.Iterator;
@ -69,18 +69,15 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
reader.getFileHeader(),
false,
SAMFileReader.ValidationStringency.SILENT,
downsampling.create(),
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
Collections.<ReadFilter>emptyList(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte)0);
reader.getFileHeader(),
false,
SAMFileReader.ValidationStringency.SILENT,
downsampling.create(),
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
Collections.<ReadFilter>emptyList(),
Collections.<ReadTransformer>emptyList(),
false,
(byte)0);
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
@ -123,7 +123,7 @@ class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
private long Gs;
private long Ts;
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
for(byte base: read.getReadBases()) {
switch(base) {
case 'A': As++; break;

View File

@ -24,9 +24,6 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMProgramRecord;
@ -35,24 +32,25 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Collections;
import java.util.List;
import static org.testng.Assert.*;
/**
* @author aaron
* @version 1.0
@ -183,11 +181,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
Collections.<ReadTransformer>emptyList(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
removeProgramRecords);
@ -205,11 +200,8 @@ public class SAMDataSourceUnitTest extends BaseTest {
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
Collections.<ReadTransformer>emptyList(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
removeProgramRecords);

View File

@ -4,24 +4,27 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* testing of the LocusIteratorByState
@ -255,6 +258,90 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
}
}
////////////////////////////////////////////
// comprehensive LIBS/PileupElement tests //
////////////////////////////////////////////
private static final int IS_BEFORE_DELETED_BASE_FLAG = 1;
private static final int IS_BEFORE_DELETION_START_FLAG = 2;
private static final int IS_AFTER_DELETED_BASE_FLAG = 4;
private static final int IS_AFTER_DELETION_END_FLAG = 8;
private static final int IS_BEFORE_INSERTION_FLAG = 16;
private static final int IS_AFTER_INSERTION_FLAG = 32;
private static final int IS_NEXT_TO_SOFTCLIP_FLAG = 64;
private static class LIBSTest {
final String cigar;
final int readLength;
final List<Integer> offsets;
final List<Integer> flags;
private LIBSTest(final String cigar, final int readLength, final List<Integer> offsets, final List<Integer> flags) {
this.cigar = cigar;
this.readLength = readLength;
this.offsets = offsets;
this.flags = flags;
}
}
@DataProvider(name = "LIBSTest")
public Object[][] createLIBSTestData() {
return new Object[][]{
{new LIBSTest("1I", 1, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
{new LIBSTest("10I", 10, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
{new LIBSTest("2M2I2M", 6, Arrays.asList(0,1,4,5), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG,IS_AFTER_INSERTION_FLAG,0))},
{new LIBSTest("2M2I", 4, Arrays.asList(0,1), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG))},
//TODO -- uncomment these when LIBS is fixed
//{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
//{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
//{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
{new LIBSTest("1M2D2M", 3, Arrays.asList(0,1,2), Arrays.asList(IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG,0))},
{new LIBSTest("1S1M", 2, Arrays.asList(1), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
{new LIBSTest("1M1S", 2, Arrays.asList(0), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
{new LIBSTest("1S1M1I", 3, Arrays.asList(1), Arrays.asList(IS_BEFORE_INSERTION_FLAG | IS_NEXT_TO_SOFTCLIP_FLAG))}
};
}
@Test(dataProvider = "LIBSTest")
public void testLIBS(LIBSTest params) {
final int locus = 44367788;
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
read.setCigarString(params.cigar);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(Arrays.asList(read), createTestReadProperties());
int offset = 0;
while ( li.hasNext() ) {
AlignmentContext alignmentContext = li.next();
ReadBackedPileup p = alignmentContext.getBasePileup();
Assert.assertTrue(p.getNumberOfElements() == 1);
PileupElement pe = p.iterator().next();
final int flag = params.flags.get(offset);
Assert.assertEquals(pe.isBeforeDeletedBase(), (flag & IS_BEFORE_DELETED_BASE_FLAG) != 0);
Assert.assertEquals(pe.isBeforeDeletionStart(), (flag & IS_BEFORE_DELETION_START_FLAG) != 0);
Assert.assertEquals(pe.isAfterDeletedBase(), (flag & IS_AFTER_DELETED_BASE_FLAG) != 0);
Assert.assertEquals(pe.isAfterDeletionEnd(), (flag & IS_AFTER_DELETION_END_FLAG) != 0);
Assert.assertEquals(pe.isBeforeInsertion(), (flag & IS_BEFORE_INSERTION_FLAG) != 0);
Assert.assertEquals(pe.isAfterInsertion(), (flag & IS_AFTER_INSERTION_FLAG) != 0);
Assert.assertEquals(pe.isNextToSoftClip(), (flag & IS_NEXT_TO_SOFTCLIP_FLAG) != 0);
Assert.assertEquals(pe.getOffset(), params.offsets.get(offset).intValue());
offset++;
}
}
////////////////////////////////////////////////
// End comprehensive LIBS/PileupElement tests //
////////////////////////////////////////////////
private static ReadProperties createTestReadProperties() {
return new ReadProperties(
Collections.<SAMReaderID>emptyList(),
@ -264,11 +351,8 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
null,
new ValidationExclusion(),
Collections.<ReadFilter>emptyList(),
Collections.<ReadTransformer>emptyList(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1
);
}

View File

@ -1,276 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.*;
/**
* @author aaron
* <p/>
* Class ReadMetaDataTrackerUnitTest
* <p/>
* test out the ReadMetaDataTracker
*/
public class ReadMetaDataTrackerUnitTest extends BaseTest {
private static int startingChr = 1;
private static int endingChr = 2;
private static int readCount = 100;
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
private static SAMFileHeader header;
private Set<String> nameSet;
private GenomeLocParser genomeLocParser;
@BeforeClass
public void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
}
@BeforeMethod
public void beforeEach() {
nameSet = new TreeSet<String>();
nameSet.add("default");
}
@Test
public void twoRodsAtEachReadBase() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
count++;
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 2);
}
Assert.assertEquals(count, 10);
}
@Test
public void rodAtEachReadBase() {
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
count++;
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
}
Assert.assertEquals(count, 10);
}
@Test
public void filterByName() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping("default");
for (Integer x : map.keySet()) {
count++;
Assert.assertEquals(map.get(x).size(), 1);
}
Assert.assertEquals(count, 10);
}
@Test
public void filterByDupType() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
// count the positions
int count = 0;
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
for (Integer x : map.keySet()) {
count++;
Assert.assertEquals(map.get(x).size(), 2);
}
Assert.assertEquals(count, 10);
}
// @Test this test can be uncommented to determine the speed impacts of any changes to the RODs for reads system
public void filterByMassiveDupType() {
for (int y = 0; y < 20; y++) {
nameSet.add("default" + String.valueOf(y));
long firstTime = System.currentTimeMillis();
for (int lp = 0; lp < 1000; lp++) {
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
// count the positions
int count = 0;
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
for (Integer x : map.keySet()) {
count++;
Assert.assertEquals(map.get(x).size(), y + 2);
}
Assert.assertEquals(count, 10);
}
System.err.println(y + " = " + (System.currentTimeMillis() - firstTime));
}
}
@Test
public void filterByType() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
Map<Integer, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(Fake2RODatum.class);
for (int x : map.keySet()) {
count++;
Assert.assertEquals(map.get(x).size(), 1);
}
Assert.assertEquals(count, 10);
}
@Test
public void sparceRODsForRead() {
ReadMetaDataTracker tracker = getRMDT(7, nameSet, true);
// count the positions
int count = 0;
for (Integer x : tracker.getReadOffsetMapping().keySet()) {
count++;
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
}
Assert.assertEquals(count, 2);
}
@Test
public void rodByGenomeLoc() {
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (Integer x : tracker.getContigOffsetMapping().keySet()) {
count++;
Assert.assertEquals(tracker.getContigOffsetMapping().get(x).size(), 1);
}
Assert.assertEquals(count, 10);
}
/**
* create a ReadMetaDataTracker given:
*
* @param incr the spacing between site locations
* @param names the names of the reference ordered data to create: one will be created at every location for each name
*
* @return a ReadMetaDataTracker
*/
private ReadMetaDataTracker getRMDT(int incr, Set<String> names, boolean alternateTypes) {
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10);
TreeMap<Integer, RODMetaDataContainer> data = new TreeMap<Integer, RODMetaDataContainer>();
for (int x = 0; x < record.getAlignmentEnd(); x += incr) {
GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getReferenceName(), record.getAlignmentStart() + x, record.getAlignmentStart() + x);
RODMetaDataContainer set = new RODMetaDataContainer();
int cnt = 0;
for (String name : names) {
if (alternateTypes)
set.addEntry((cnt % 2 == 0) ? new FakeRODatum(loc, name) : new Fake2RODatum(loc, name));
else
set.addEntry(new FakeRODatum(loc, name));
cnt++;
}
data.put(record.getAlignmentStart() + x, set);
}
ReadMetaDataTracker tracker = new ReadMetaDataTracker(genomeLocParser, record, data);
return tracker;
}
/** for testing, we want a fake rod with a different classname, for the get-by-class-name functions */
static public class Fake2RODatum extends FakeRODatum {
public Fake2RODatum(GenomeLoc location, String name) {
super(location, name);
}
}
/** for testing only */
static public class FakeRODatum extends GATKFeature {
final GenomeLoc location;
final String name;
public FakeRODatum(GenomeLoc location, String name) {
super(name);
this.location = location;
this.name = name;
}
@Override
public String getName() {
return name;
}
@Override
public GenomeLoc getLocation() {
return this.location;
}
@Override
public Object getUnderlyingObject() {
return null; //To change body of implemented methods use File | Settings | File Templates.
}
@Override
public String getChr() {
return location.getContig();
}
@Override
public int getStart() {
return (int)this.location.getStart();
}
@Override
public int getEnd() {
return (int)this.location.getStop();
}
}
}

View File

@ -133,7 +133,7 @@ public class RefMetaDataTrackerUnitTest {
List<RODRecordList> x = new ArrayList<RODRecordList>();
if ( AValues != null ) x.add(AValues);
if ( BValues != null ) x.add(BValues);
return new RefMetaDataTracker(x, context);
return new RefMetaDataTracker(x);
}
public int nBoundTracks() {

View File

@ -5,15 +5,7 @@ import org.testng.annotations.Test;
import java.util.Arrays;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Dec 1, 2009
* Time: 9:03:34 AM
* To change this template use File | Settings | File Templates.
*/
public class PileupWalkerIntegrationTest extends WalkerTest {
@Test
public void testGnarleyFHSPileup() {
String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam "
@ -23,4 +15,14 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(expected_md5));
executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases", spec);
}
@Test
public void testSingleReadAligningOffChromosome1() {
String gatk_args = "-T Pileup "
+ " -I " + privateTestDir + "readOffb37contig1.bam"
+ " -R " + b37KGReference
+ " -o %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList("4a45fe1f85aaa8c4158782f2b6dee2bd"));
executeTest("Testing single read spanning off chromosome 1", spec);
}
}

View File

@ -38,7 +38,8 @@ public class PrintReadsIntegrationTest extends WalkerTest {
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1", "6e920b8505e7e95d67634b0905237dbc")},
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L unmapped", "13bb9a91b1d4dd2425f73302b8a1ac1c")},
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1 -L unmapped", "6e920b8505e7e95d67634b0905237dbc")},
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")}
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")},
{new PRTest(b37KGReference, "NA12878.1_10mb_2_10mb.bam", "", "c43380ac39b98853af457b90e52f8427")}
};
}

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
import java.util.Arrays;
public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
private static class VRTest {
@ -28,7 +28,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
"f360ce3eb2b0b887301be917a9843e2b", // tranches
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
"afa297c743437551cc2bd36ddd6d6d75"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {
@ -77,7 +77,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
"a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches
"74c10fc15f9739a938b7138909fbde04", // recal file
"62fda105e14b619a1c263855cf56af1d"); // cut VCF
"c30d163871a37f2bbf8ee7f761e870b4"); // cut VCF
@DataProvider(name = "VRBCFTest")
public Object[][] createVRBCFTest() {
@ -129,13 +129,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
"b7589cd098dc153ec64c02dcff2838e4", // tranches
"a04a9001f62eff43d363f4d63769f3ee", // recal file
"64f576881e21323dd4078262604717a2"); // cut VCF
"b2c6827be592c24a4692b1753edc7d23"); // cut VCF
VRTest indelFiltered = new VRTest(
validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
"b7589cd098dc153ec64c02dcff2838e4", // tranches
"a04a9001f62eff43d363f4d63769f3ee", // recal file
"af22c55d91394c56a222fd40d6d54781"); // cut VCF
"5d483fe1ba2ef36ee9e6c14cbd654706"); // cut VCF
@DataProvider(name = "VRIndelTest")
public Object[][] createTestVariantRecalibratorIndel() {
@ -193,7 +193,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -o %s" +
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
Arrays.asList("ec519e1f01459813dab57aefffc019e2"));
Arrays.asList("018b3a5cc7cf0cb5468c6a0c80ccaa8b"));
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
}
}

View File

@ -16,6 +16,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -211,4 +212,59 @@ public class GenomeLocUnitTest extends BaseTest {
Assert.assertEquals(cfg.gl1.reciprocialOverlapFraction(cfg.gl2), cfg.overlapFraction);
}
}
// -------------------------------------------------------------------------------------
//
// testing comparison, hashcode, and equals
//
// -------------------------------------------------------------------------------------
@DataProvider(name = "GenomeLocComparisons")
public Object[][] createGenomeLocComparisons() {
List<Object[]> tests = new ArrayList<Object[]>();
final int start = 10;
for ( int stop = start; stop < start + 3; stop++ ) {
final GenomeLoc g1 = genomeLocParser.createGenomeLoc("chr2", start, stop);
for ( final String contig : Arrays.asList("chr1", "chr2", "chr3")) {
for ( int start2 = start - 1; start2 <= stop + 1; start2++ ) {
for ( int stop2 = start2; stop2 < stop + 2; stop2++ ) {
final GenomeLoc g2 = genomeLocParser.createGenomeLoc(contig, start2, stop2);
ComparisonResult cmp = ComparisonResult.EQUALS;
if ( contig.equals("chr3") ) cmp = ComparisonResult.LESS_THAN;
else if ( contig.equals("chr1") ) cmp = ComparisonResult.GREATER_THAN;
else if ( start < start2 ) cmp = ComparisonResult.LESS_THAN;
else if ( start > start2 ) cmp = ComparisonResult.GREATER_THAN;
else if ( stop < stop2 ) cmp = ComparisonResult.LESS_THAN;
else if ( stop > stop2 ) cmp = ComparisonResult.GREATER_THAN;
tests.add(new Object[]{g1, g2, cmp});
}
}
}
}
return tests.toArray(new Object[][]{});
}
private enum ComparisonResult {
LESS_THAN(-1),
EQUALS(0),
GREATER_THAN(1);
final int cmp;
private ComparisonResult(int cmp) {
this.cmp = cmp;
}
}
@Test(dataProvider = "GenomeLocComparisons")
public void testGenomeLocComparisons(GenomeLoc g1, GenomeLoc g2, ComparisonResult expected) {
Assert.assertEquals(g1.compareTo(g2), expected.cmp, "Comparing genome locs failed");
Assert.assertEquals(g1.equals(g2), expected == ComparisonResult.EQUALS);
if ( expected == ComparisonResult.EQUALS )
Assert.assertEquals(g1.hashCode(), g2.hashCode(), "Equal genome locs don't have the same hash code");
}
}

View File

@ -122,20 +122,20 @@ class GATKResourcesBundle extends QScript {
//
// standard VCF files. Will be lifted to each reference
//
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_135_b37.leftAligned.vcf",
"dbsnp_135", b37, true, false))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_137_b37.leftAligned.vcf",
"dbsnp_137", b37, true, false))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_genotypes_1525_samples.b37.vcf",
"1000G_omni2.5", b37, true, true))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_sites_2141_samples.b37.vcf",
"1000G_omni2.5", b37, true, false))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf",
"hapmap_3.3", b37, true, true))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf",
"hapmap_3.3", b37, true, false))
addResource(new Resource("/humgen/1kg/DCC/ftp/technical/working/20120312_phase1_v2_indel_cleaned_sites_list/ALL.wgs.phase1_release_v2.20101123.official_indel_calls.20120312.sites.vcf",
"1000G_phase1.indels", b37, true, false))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
"Mills_and_1000G_gold_standard.indels", b37, true, true))
"Mills_and_1000G_gold_standard.indels", b37, true, false))
//
// example call set for wiki tutorial