min mapping quality and min base quality arguments for UG
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2354 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
faa638532a
commit
2cbc85cc7a
|
|
@ -87,4 +87,11 @@ public class UnifiedArgumentCollection {
|
||||||
|
|
||||||
@Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
|
@Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
|
||||||
public double MINIMUM_ALLELE_FREQUENCY = 1e-8;
|
public double MINIMUM_ALLELE_FREQUENCY = 1e-8;
|
||||||
|
|
||||||
|
// Mark's filtering arguments
|
||||||
|
//@Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
|
||||||
|
public Integer MIN_BASE_QUALTY_SCORE = -1;
|
||||||
|
|
||||||
|
//@Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||||
|
public Integer MIN_MAPPING_QUALTY_SCORE = -1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.genotype.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||||
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
|
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
|
||||||
|
|
@ -196,20 +197,21 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
*
|
*
|
||||||
* @param tracker the meta data tracker
|
* @param tracker the meta data tracker
|
||||||
* @param refContext the reference base
|
* @param refContext the reference base
|
||||||
* @param context contextual information around the locus
|
* @param rawContext contextual information around the locus
|
||||||
*/
|
*/
|
||||||
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||||
char ref = Character.toUpperCase(refContext.getBase());
|
char ref = Character.toUpperCase(refContext.getBase());
|
||||||
if ( !BaseUtils.isRegularBase(ref) )
|
if ( !BaseUtils.isRegularBase(ref) )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// an optimization to speed things up when there is no coverage or when overly covered
|
// an optimization to speed things up when there is no coverage or when overly covered
|
||||||
if ( context.getPileup().size() == 0 ||
|
if ( rawContext.getPileup().size() == 0 ||
|
||||||
(UAC.MAX_READS_IN_PILEUP > 0 && context.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
|
(UAC.MAX_READS_IN_PILEUP > 0 && rawContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// are there too many deletions in the pileup?
|
// are there too many deletions in the pileup?
|
||||||
ReadBackedPileup pileup = context.getPileup();
|
ReadBackedPileup pileup = rawContext.getPileup().getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||||
|
AlignmentContext context = new AlignmentContext(rawContext.getLocation(), pileup);
|
||||||
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
||||||
(double)pileup.getPileupWithoutMappingQualityZeroReads().getNumberOfDeletions() / (double)(pileup.size() - pileup.getNumberOfMappingQualityZeroReads()) > UAC.MAX_DELETION_FRACTION )
|
(double)pileup.getPileupWithoutMappingQualityZeroReads().getNumberOfDeletions() / (double)(pileup.size() - pileup.getNumberOfMappingQualityZeroReads()) > UAC.MAX_DELETION_FRACTION )
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,8 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
|
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
|
||||||
|
// todo -- eric, this can be more efficient, FYI
|
||||||
|
|
||||||
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
||||||
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
|
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
|
||||||
List<Integer> newOffsets = new ArrayList<Integer>();
|
List<Integer> newOffsets = new ArrayList<Integer>();
|
||||||
|
|
@ -169,6 +171,17 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) {
|
||||||
|
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||||
|
|
||||||
|
for ( PileupElement p : pileup ) {
|
||||||
|
if ( p.getRead().getMappingQuality() >= minMapQ && p.getQual() >= minBaseQ ) {
|
||||||
|
filteredPileup.add(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ReadBackedPileup(loc, filteredPileup);
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue