Useful debugging argument added to VariantRecalibrator to only use sites whose qual field is above --qual
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4406 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
575c38fc04
commit
2f7892601c
|
|
@ -312,7 +312,7 @@ public class VariantContextUtils {
|
||||||
String rsID = null;
|
String rsID = null;
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
|
|
||||||
// counting the number of filtered and varint VCs
|
// counting the number of filtered and variant VCs
|
||||||
int nFiltered = 0, nVariant = 0;
|
int nFiltered = 0, nVariant = 0;
|
||||||
|
|
||||||
Allele refAllele = determineReferenceAllele(VCs);
|
Allele refAllele = determineReferenceAllele(VCs);
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "NoByHapMapValidationStatus", shortName = "NoByHapMapValidationStatus", doc = "Don't consider sites in dbsnp rod tagged as by-hapmap validation status as real HapMap sites. FOR DEBUGGING PURPOSES ONLY.", required=false)
|
@Argument(fullName = "NoByHapMapValidationStatus", shortName = "NoByHapMapValidationStatus", doc = "Don't consider sites in dbsnp rod tagged as by-hapmap validation status as real HapMap sites. FOR DEBUGGING PURPOSES ONLY.", required=false)
|
||||||
private Boolean NO_BY_HAPMAP_VALIDATION_STATUS = false;
|
private Boolean NO_BY_HAPMAP_VALIDATION_STATUS = false;
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName = "qual", shortName = "qual", doc = "Don't use sites below the qual threshold. FOR DEBUGGING PURPOSES ONLY.", required=false)
|
||||||
|
private double QUAL_THRESHOLD = 0.0;
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Private Member Variables
|
// Private Member Variables
|
||||||
|
|
@ -217,77 +220,79 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), false, false) ) {
|
for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), false, false) ) {
|
||||||
if( vc != null && vc.isSNP() ) {
|
if( vc != null && vc.isSNP() ) {
|
||||||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||||
final VariantDatum variantDatum = new VariantDatum();
|
if( vc.getPhredScaledQual() >= QUAL_THRESHOLD ) {
|
||||||
variantDatum.isTransition = VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
final VariantDatum variantDatum = new VariantDatum();
|
||||||
|
variantDatum.isTransition = VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||||
|
|
||||||
final DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
final DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||||
final VariantContext vcHapMap = tracker.getVariantContext(ref, "hapmap", null, context.getLocation(), false);
|
final VariantContext vcHapMap = tracker.getVariantContext(ref, "hapmap", null, context.getLocation(), false);
|
||||||
final VariantContext vc1KG = tracker.getVariantContext(ref, "1kg", null, context.getLocation(), false);
|
final VariantContext vc1KG = tracker.getVariantContext(ref, "1kg", null, context.getLocation(), false);
|
||||||
|
|
||||||
variantDatum.isKnown = ( dbsnp != null );
|
variantDatum.isKnown = ( dbsnp != null );
|
||||||
double knownPrior_qScore = PRIOR_NOVELS;
|
double knownPrior_qScore = PRIOR_NOVELS;
|
||||||
if( vcHapMap != null || ( !NO_BY_HAPMAP_VALIDATION_STATUS && dbsnp != null && DbSNPHelper.isHapmap(dbsnp) ) ) {
|
if( vcHapMap != null || ( !NO_BY_HAPMAP_VALIDATION_STATUS && dbsnp != null && DbSNPHelper.isHapmap(dbsnp) ) ) {
|
||||||
knownPrior_qScore = PRIOR_HAPMAP;
|
knownPrior_qScore = PRIOR_HAPMAP;
|
||||||
} else if( vc1KG != null ) {
|
} else if( vc1KG != null ) {
|
||||||
knownPrior_qScore = PRIOR_1KG;
|
knownPrior_qScore = PRIOR_1KG;
|
||||||
} else if( dbsnp != null ) {
|
} else if( dbsnp != null ) {
|
||||||
knownPrior_qScore = PRIOR_DBSNP;
|
knownPrior_qScore = PRIOR_DBSNP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// If we can trust the AC field then use it instead of parsing all the genotypes. Results in a substantial speed up.
|
// If we can trust the AC field then use it instead of parsing all the genotypes. Results in a substantial speed up.
|
||||||
final int alleleCount = (trustACField ? Integer.parseInt(vc.getAttribute("AC",-1).toString()) : vc.getChromosomeCount(vc.getAlternateAllele(0)));
|
final int alleleCount = (trustACField ? Integer.parseInt(vc.getAttribute("AC",-1).toString()) : vc.getChromosomeCount(vc.getAlternateAllele(0)));
|
||||||
if( !trustACField && !NEVER_TRUST_AC_FIELD ) {
|
if( !trustACField && !NEVER_TRUST_AC_FIELD ) {
|
||||||
if( !vc.getAttributes().containsKey("AC") ) {
|
if( !vc.getAttributes().containsKey("AC") ) {
|
||||||
NEVER_TRUST_AC_FIELD = true;
|
NEVER_TRUST_AC_FIELD = true;
|
||||||
} else {
|
} else {
|
||||||
if( alleleCount == Integer.parseInt( vc.getAttribute("AC").toString()) ) {
|
if( alleleCount == Integer.parseInt( vc.getAttribute("AC").toString()) ) {
|
||||||
// The AC field is correct at this record so we trust it forever
|
// The AC field is correct at this record so we trust it forever
|
||||||
trustACField = true;
|
trustACField = true;
|
||||||
} else { // We found a record in which the AC field wasn't correct but we are trying to trust it
|
} else { // We found a record in which the AC field wasn't correct but we are trying to trust it
|
||||||
throw new UserException.BadInput("AC info field doesn't match the variant chromosome count so we can't trust it! Please run with --dontTrustACField which will force the walker to parse the genotypes for each record, drastically increasing the runtime." +
|
throw new UserException.BadInput("AC info field doesn't match the variant chromosome count so we can't trust it! Please run with --dontTrustACField which will force the walker to parse the genotypes for each record, drastically increasing the runtime." +
|
||||||
"First observed at " + vc.getChr() + ":" + vc.getStart());
|
"First observed at " + vc.getChr() + ":" + vc.getStart());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if( trustACField && alleleCount == -1 ) {
|
||||||
if( trustACField && alleleCount == -1 ) {
|
throw new UserException.BadInput("AC info field doesn't exist for all records (although it does for some) so we can't trust it! Please run with --dontTrustACField which will force the walker to parse the genotypes for each record, drastically increasing the runtime." +
|
||||||
throw new UserException.BadInput("AC info field doesn't exist for all records (although it does for some) so we can't trust it! Please run with --dontTrustACField which will force the walker to parse the genotypes for each record, drastically increasing the runtime." +
|
"First observed at " + vc.getChr() + ":" + vc.getStart());
|
||||||
"First observed at " + vc.getChr() + ":" + vc.getStart());
|
|
||||||
}
|
|
||||||
|
|
||||||
final Object[] priorKey = new Object[2];
|
|
||||||
priorKey[0] = alleleCount;
|
|
||||||
priorKey[1] = knownPrior_qScore;
|
|
||||||
|
|
||||||
Double priorLodFactor = (Double)priorCache.get( priorKey );
|
|
||||||
|
|
||||||
// If this prior factor hasn't been calculated before, do so now
|
|
||||||
if(priorLodFactor == null) {
|
|
||||||
final double knownPrior = QualityUtils.qualToProb(knownPrior_qScore);
|
|
||||||
final double acPrior = theModel.getAlleleCountPrior( alleleCount );
|
|
||||||
final double totalPrior = 1.0 - ((1.0 - acPrior) * (1.0 - knownPrior));
|
|
||||||
|
|
||||||
if( MathUtils.compareDoubles(totalPrior, 1.0, 1E-8) == 0 || MathUtils.compareDoubles(totalPrior, 0.0, 1E-8) == 0 ) {
|
|
||||||
throw new UserException.CommandLineException("Something is wrong with the prior that was entered by the user: Prior = " + totalPrior); // TODO - fix this up later
|
|
||||||
}
|
}
|
||||||
|
|
||||||
priorLodFactor = Math.log10(totalPrior) - Math.log10(1.0 - totalPrior) - Math.log10(1.0);
|
final Object[] priorKey = new Object[2];
|
||||||
|
priorKey[0] = alleleCount;
|
||||||
|
priorKey[1] = knownPrior_qScore;
|
||||||
|
|
||||||
priorCache.put( priorLodFactor, false, priorKey );
|
Double priorLodFactor = (Double)priorCache.get( priorKey );
|
||||||
|
|
||||||
|
// If this prior factor hasn't been calculated before, do so now
|
||||||
|
if(priorLodFactor == null) {
|
||||||
|
final double knownPrior = QualityUtils.qualToProb(knownPrior_qScore);
|
||||||
|
final double acPrior = theModel.getAlleleCountPrior( alleleCount );
|
||||||
|
final double totalPrior = 1.0 - ((1.0 - acPrior) * (1.0 - knownPrior));
|
||||||
|
|
||||||
|
if( MathUtils.compareDoubles(totalPrior, 1.0, 1E-8) == 0 || MathUtils.compareDoubles(totalPrior, 0.0, 1E-8) == 0 ) {
|
||||||
|
throw new UserException.CommandLineException("Something is wrong with the prior that was entered by the user: Prior = " + totalPrior); // TODO - fix this up later
|
||||||
|
}
|
||||||
|
|
||||||
|
priorLodFactor = Math.log10(totalPrior) - Math.log10(1.0 - totalPrior) - Math.log10(1.0);
|
||||||
|
|
||||||
|
priorCache.put( priorLodFactor, false, priorKey );
|
||||||
|
}
|
||||||
|
|
||||||
|
final double pVar = theModel.evaluateVariant( vc );
|
||||||
|
final double lod = priorLodFactor + Math.log10(pVar);
|
||||||
|
variantDatum.qual = Math.abs( QUALITY_SCALE_FACTOR * QualityUtils.lodToPhredScaleErrorRate(lod) );
|
||||||
|
|
||||||
|
mapList.add( variantDatum );
|
||||||
|
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
||||||
|
attrs.put("OQ", String.format("%.2f", vc.getPhredScaledQual()));
|
||||||
|
attrs.put("LOD", String.format("%.4f", lod));
|
||||||
|
VariantContext newVC = VariantContext.modifyPErrorFiltersAndAttributes(vc, variantDatum.qual / 10.0, new HashSet<String>(), attrs);
|
||||||
|
|
||||||
|
vcfWriter.add( newVC, ref.getBase() );
|
||||||
}
|
}
|
||||||
|
|
||||||
final double pVar = theModel.evaluateVariant( vc );
|
|
||||||
final double lod = priorLodFactor + Math.log10(pVar);
|
|
||||||
variantDatum.qual = Math.abs( QUALITY_SCALE_FACTOR * QualityUtils.lodToPhredScaleErrorRate(lod) );
|
|
||||||
|
|
||||||
mapList.add( variantDatum );
|
|
||||||
final Map<String, Object> attrs = new HashMap<String, Object>(vc.getAttributes());
|
|
||||||
attrs.put("OQ", String.format("%.2f", vc.getPhredScaledQual()));
|
|
||||||
attrs.put("LOD", String.format("%.4f", lod));
|
|
||||||
VariantContext newVC = VariantContext.modifyPErrorFiltersAndAttributes(vc, variantDatum.qual / 10.0, new HashSet<String>(), attrs);
|
|
||||||
|
|
||||||
vcfWriter.add( newVC, ref.getBase() );
|
|
||||||
|
|
||||||
} else { // not a SNP or is filtered so just dump it out to the VCF file
|
} else { // not a SNP or is filtered so just dump it out to the VCF file
|
||||||
vcfWriter.add( vc, ref.getBase() );
|
vcfWriter.add( vc, ref.getBase() );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue