min mapping quality and min base quality arguments for UG

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2354 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-12-15 03:57:27 +00:00
parent faa638532a
commit 2cbc85cc7a
3 changed files with 27 additions and 5 deletions

View File

@ -87,4 +87,11 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
public double MINIMUM_ALLELE_FREQUENCY = 1e-8;
// Mark's filtering arguments
//@Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
public Integer MIN_BASE_QUALTY_SCORE = -1;
//@Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
public Integer MIN_MAPPING_QUALTY_SCORE = -1;
}

View File

@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.alignment.Alignment;
import net.sf.samtools.SAMReadGroupRecord;
@ -196,20 +197,21 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
*
* @param tracker the meta data tracker
* @param refContext the reference base
* @param context contextual information around the locus
* @param rawContext contextual information around the locus
*/
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) )
return null;
// an optimization to speed things up when there is no coverage or when overly covered
if ( context.getPileup().size() == 0 ||
(UAC.MAX_READS_IN_PILEUP > 0 && context.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
if ( rawContext.getPileup().size() == 0 ||
(UAC.MAX_READS_IN_PILEUP > 0 && rawContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
return null;
// are there too many deletions in the pileup?
ReadBackedPileup pileup = context.getPileup();
ReadBackedPileup pileup = rawContext.getPileup().getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
AlignmentContext context = new AlignmentContext(rawContext.getLocation(), pileup);
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getPileupWithoutMappingQualityZeroReads().getNumberOfDeletions() / (double)(pileup.size() - pileup.getNumberOfMappingQualityZeroReads()) > UAC.MAX_DELETION_FRACTION )
return null;

View File

@ -153,6 +153,8 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* @return
*/
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
// todo -- eric, this can be more efficient, FYI
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
List<Integer> newOffsets = new ArrayList<Integer>();
@ -169,6 +171,17 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
}
}
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() >= minMapQ && p.getQual() >= minBaseQ ) {
filteredPileup.add(p);
}
}
return new ReadBackedPileup(loc, filteredPileup);
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*