a) Hidden, experimental argument to UnifiedGenotyper that makes code, when in GenotypeGivenAlleles mode, ignore SNP alleles mixed in with indels in complex records - theory is that SNP sites behave statistically differently when doing VQSR so those alleles/sites should be treated separately.
b) Bug fix: multiallelic indel records where not being treated properly by VQSR because vc.isIndel() returns false with them. Correct general treatment for now is to do (vc.isIndel()||vc.isMixed()). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5973 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
17e17d3c3c
commit
f8ffda6835
|
|
@ -58,6 +58,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
private boolean DEBUG = false;
|
||||
|
||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
|
||||
private static ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>> indelLikelihoodMap =
|
||||
|
|
@ -111,7 +113,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
||||
|
||||
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||
|
||||
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -333,8 +335,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
return null;
|
||||
|
||||
alleleList.clear();
|
||||
for (Allele a : vc.getAlleles())
|
||||
alleleList.add(a);
|
||||
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
|
||||
for (Allele a : vc.getAlleles())
|
||||
if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
|
||||
continue;
|
||||
else
|
||||
alleleList.add(a);
|
||||
|
||||
}
|
||||
else {
|
||||
for (Allele a : vc.getAlleles())
|
||||
alleleList.add(a);
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -124,6 +124,11 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
|
||||
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
|
||||
public boolean IGNORE_SNP_ALLELES = false;
|
||||
|
||||
|
||||
@Deprecated
|
||||
@Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false)
|
||||
private Boolean ALL_BASES_DEPRECATED = false;
|
||||
|
|
@ -165,6 +170,7 @@ public class UnifiedArgumentCollection {
|
|||
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
|
||||
uac.dovit = dovit;
|
||||
uac.GSA_PRODUCTION_ONLY = GSA_PRODUCTION_ONLY;
|
||||
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
||||
|
||||
return uac;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -240,7 +240,7 @@ public class VariantDataManager {
|
|||
datum.consensusCount = 0;
|
||||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, false ) ) {
|
||||
if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
|
||||
if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && ((evalVC.isSNP() && trainVC.isSNP())) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
|
||||
datum.isKnown = datum.isKnown || trainingSet.isKnown;
|
||||
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
|
||||
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
|
||||
return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && vc.isIndel());
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && (vc.isIndel() || vc.isMixed()));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Reference in New Issue