2009-05-16 05:02:12 +08:00
|
|
|
package org.broadinstitute.sting.gatk;
|
|
|
|
|
|
2009-09-01 12:21:58 +08:00
|
|
|
import net.sf.picard.filter.SamRecordFilter;
|
|
|
|
|
import net.sf.samtools.SAMFileReader;
|
|
|
|
|
|
2009-05-16 05:02:12 +08:00
|
|
|
import java.io.File;
|
2009-07-10 07:59:53 +08:00
|
|
|
import java.util.ArrayList;
|
2009-09-01 12:21:58 +08:00
|
|
|
import java.util.List;
|
2009-11-11 07:36:17 +08:00
|
|
|
import java.util.Collection;
|
2009-05-16 05:02:12 +08:00
|
|
|
/**
|
|
|
|
|
* User: hanna
|
|
|
|
|
* Date: May 14, 2009
|
|
|
|
|
* Time: 4:06:26 PM
|
|
|
|
|
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
|
|
|
|
* Software and documentation are copyright 2005 by the Broad Institute.
|
|
|
|
|
* All rights are reserved.
|
|
|
|
|
*
|
|
|
|
|
* Users acknowledge that this software is supplied without any warranty or support.
|
|
|
|
|
* The Broad Institute is not responsible for its use, misuse, or
|
|
|
|
|
* functionality.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A data structure containing information about the reads data sources as well as
|
|
|
|
|
* information about how they should be downsampled, sorted, and filtered.
|
|
|
|
|
*/
|
|
|
|
|
public class Reads {
|
|
|
|
|
private List<File> readsFiles = null;
|
2009-06-10 21:39:32 +08:00
|
|
|
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
|
2009-05-16 05:02:12 +08:00
|
|
|
private Double downsamplingFraction = null;
|
|
|
|
|
private Integer downsampleToCoverage = null;
|
|
|
|
|
private Boolean beSafe = null;
|
2009-11-11 07:36:17 +08:00
|
|
|
private Collection<SamRecordFilter> supplementalFilters = null;
|
2009-09-01 12:21:58 +08:00
|
|
|
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
2009-09-09 23:36:12 +08:00
|
|
|
private boolean includeReadsWithDeletionAtLoci = false;
|
2009-12-29 03:52:44 +08:00
|
|
|
private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels)
|
|
|
|
|
// immediately after the reference base such event is associated with?
|
2009-09-09 23:36:12 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return true if the walker wants to see reads that contain deletions when looking at locus pileups
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public boolean includeReadsWithDeletionAtLoci() {
|
|
|
|
|
return includeReadsWithDeletionAtLoci;
|
|
|
|
|
}
|
2009-05-16 05:02:12 +08:00
|
|
|
|
2009-12-29 03:52:44 +08:00
|
|
|
/**
|
|
|
|
|
* Return true if the walker wants to see additional piles of "extended" events (indels). An indel is associated,
|
|
|
|
|
* by convention, with the reference base immediately preceding the insertion/deletion, and if this flag is set
|
|
|
|
|
* to 'true', any locus with an indel associated with it will cause exactly two subsequent calls to walker's map(): first call
|
|
|
|
|
* will be made with a "conventional" base pileup, the next call will be made with a pileup of extended (indel/noevent)
|
|
|
|
|
* events.
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public boolean generateExtendedEvents() {
|
|
|
|
|
return generateExtendedEvents;
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-16 05:02:12 +08:00
|
|
|
/**
|
|
|
|
|
* Gets a list of the files acting as sources of reads.
|
|
|
|
|
* @return A list of files storing reads data.
|
|
|
|
|
*/
|
|
|
|
|
public List<File> getReadsFiles() {
|
|
|
|
|
return readsFiles;
|
|
|
|
|
}
|
|
|
|
|
|
2009-06-10 21:39:32 +08:00
|
|
|
/**
|
|
|
|
|
* How strict should validation be?
|
|
|
|
|
* @return Stringency of validation.
|
|
|
|
|
*/
|
|
|
|
|
public SAMFileReader.ValidationStringency getValidationStringency() {
|
|
|
|
|
return validationStringency;
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-16 05:02:12 +08:00
|
|
|
/**
|
|
|
|
|
* Get the fraction of reads to downsample.
|
|
|
|
|
* @return Downsample fraction.
|
|
|
|
|
*/
|
|
|
|
|
public Double getDownsamplingFraction() {
|
|
|
|
|
return downsamplingFraction;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Downsample each locus to the specified coverage.
|
|
|
|
|
* @return Coverage to which to downsample.
|
|
|
|
|
*/
|
|
|
|
|
public Integer getDownsampleToCoverage() {
|
|
|
|
|
return downsampleToCoverage;
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-01 12:21:58 +08:00
|
|
|
/**
|
|
|
|
|
* get the maximum number of reads we allow at a locus for locus-by-hanger
|
|
|
|
|
* @return the maximum reads allowed in a pile-up
|
|
|
|
|
*/
|
|
|
|
|
public Integer getMaxReadsAtLocus() {
|
|
|
|
|
return maximumReadsAtLocus;
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-16 05:02:12 +08:00
|
|
|
/**
|
|
|
|
|
* Return whether to 'verify' the reads as we pass through them.
|
|
|
|
|
* @return Whether to verify the reads.
|
|
|
|
|
*/
|
|
|
|
|
public Boolean getSafetyChecking() {
|
|
|
|
|
return beSafe;
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-11 07:36:17 +08:00
|
|
|
public Collection<SamRecordFilter> getSupplementalFilters() {
|
2009-07-10 07:59:53 +08:00
|
|
|
return supplementalFilters;
|
2009-05-29 22:51:08 +08:00
|
|
|
}
|
|
|
|
|
|
2009-05-16 05:02:12 +08:00
|
|
|
/**
|
|
|
|
|
* Simple constructor for unit testing.
|
|
|
|
|
* @param readsFiles List of reads files to open.
|
|
|
|
|
*/
|
|
|
|
|
public Reads( List<File> readsFiles ) {
|
|
|
|
|
this.readsFiles = readsFiles;
|
2009-07-10 07:59:53 +08:00
|
|
|
this.supplementalFilters = new ArrayList<SamRecordFilter>();
|
2009-05-16 05:02:12 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Extract the command-line arguments having to do with reads input
|
|
|
|
|
* files and store them in an easy-to-work-with package. Constructor
|
|
|
|
|
* is package protected.
|
2009-06-10 21:39:32 +08:00
|
|
|
* @param samFiles list of reads files.
|
|
|
|
|
* @param strictness Stringency of reads file parsing.
|
|
|
|
|
* @param downsampleFraction fraction of reads to downsample.
|
|
|
|
|
* @param downsampleCoverage downsampling per-locus.
|
|
|
|
|
* @param beSafe Whether to enable safety checking.
|
2009-07-10 07:59:53 +08:00
|
|
|
* @param supplementalFilters additional filters to dynamically apply.
|
2009-12-29 03:52:44 +08:00
|
|
|
* @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with
|
|
|
|
|
* a pile of indel/noevent extended events at every locus with at least one indel associated with it
|
|
|
|
|
* (in addition to a "regular" call to map() at this locus performed with base pileup)
|
|
|
|
|
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
|
|
|
|
|
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
|
|
|
|
|
* bases will be seen in the pileups, and the deletions will be skipped silently.
|
2009-05-16 05:02:12 +08:00
|
|
|
*/
|
2009-06-10 21:39:32 +08:00
|
|
|
Reads( List<File> samFiles,
|
|
|
|
|
SAMFileReader.ValidationStringency strictness,
|
|
|
|
|
Double downsampleFraction,
|
|
|
|
|
Integer downsampleCoverage,
|
|
|
|
|
Boolean beSafe,
|
2009-11-11 07:36:17 +08:00
|
|
|
Collection<SamRecordFilter> supplementalFilters,
|
2009-09-09 23:36:12 +08:00
|
|
|
int maximumReadsAtLocus,
|
2009-12-29 03:52:44 +08:00
|
|
|
boolean includeReadsWithDeletionAtLoci,
|
|
|
|
|
boolean generateExtendedEvents) {
|
2009-06-10 21:39:32 +08:00
|
|
|
this.readsFiles = samFiles;
|
|
|
|
|
this.validationStringency = strictness;
|
|
|
|
|
this.downsamplingFraction = downsampleFraction;
|
|
|
|
|
this.downsampleToCoverage = downsampleCoverage;
|
|
|
|
|
this.beSafe = beSafe;
|
2009-07-10 07:59:53 +08:00
|
|
|
this.supplementalFilters = supplementalFilters;
|
2009-09-01 12:21:58 +08:00
|
|
|
this.maximumReadsAtLocus = maximumReadsAtLocus;
|
2009-09-09 23:36:12 +08:00
|
|
|
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
2009-12-29 03:52:44 +08:00
|
|
|
this.generateExtendedEvents = generateExtendedEvents;
|
2009-05-16 05:02:12 +08:00
|
|
|
}
|
|
|
|
|
}
|