a) Hidden, experimental argument to UnifiedGenotyper that makes code, when in GenotypeGivenAlleles mode, ignore SNP alleles mixed in with indels in complex records - theory is that SNP sites behave statistically differently when doing VQSR so those alleles/sites should be treated separately.

b) Bug fix: multiallelic indel records where not being treated properly by VQSR because vc.isIndel() returns false with them. Correct general treatment for now is to do (vc.isIndel()||vc.isMixed()).



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5973 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2011-06-09 19:19:23 +00:00
parent 17e17d3c3c
commit f8ffda6835
4 changed files with 24 additions and 5 deletions

View File

@ -58,6 +58,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private boolean DEBUG = false;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
private PairHMMIndelErrorModel pairModel;
private static ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>> indelLikelihoodMap =
@ -111,7 +113,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
}
@ -333,8 +335,19 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
return null;
alleleList.clear();
for (Allele a : vc.getAlleles())
alleleList.add(a);
if (ignoreSNPAllelesWhenGenotypingIndels) {
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
for (Allele a : vc.getAlleles())
if (a.isNonReference() && a.getBases().length == vc.getReference().getBases().length)
continue;
else
alleleList.add(a);
}
else {
for (Allele a : vc.getAlleles())
alleleList.add(a);
}
}
else {

View File

@ -124,6 +124,11 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
@Hidden
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false;
@Deprecated
@Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false)
private Boolean ALL_BASES_DEPRECATED = false;
@ -165,6 +170,7 @@ public class UnifiedArgumentCollection {
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
uac.dovit = dovit;
uac.GSA_PRODUCTION_ONLY = GSA_PRODUCTION_ONLY;
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
return uac;
}

View File

@ -240,7 +240,7 @@ public class VariantDataManager {
datum.consensusCount = 0;
for( final TrainingSet trainingSet : trainingSets ) {
for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, false ) ) {
if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && ((evalVC.isSNP() && trainVC.isSNP()) || (evalVC.isIndel() && trainVC.isIndel())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && ((evalVC.isSNP() && trainVC.isSNP())) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed())) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
datum.isKnown = datum.isKnown || trainingSet.isKnown;
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;

View File

@ -194,7 +194,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
(mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && vc.isIndel());
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && (vc.isIndel() || vc.isMixed()));
}
//---------------------------------------------------------------------------------------------------------------