Merge pull request #506 from broadinstitute/eb_quick_fixes_to_single_sample_pipeline
Several improvements to the single sample combining steps.
This commit is contained in:
commit
6facf695ab
|
|
@ -54,6 +54,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
|
|
@ -86,7 +87,8 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
int depth = 0;
|
||||
int standardDepth = 0;
|
||||
int ADrestrictedDepth = 0;
|
||||
|
||||
for ( final Genotype genotype : genotypes ) {
|
||||
|
||||
|
|
@ -101,39 +103,43 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
// TODO -- annotations must come afterwards (so that QD can use the AD).
|
||||
if ( genotype.hasAD() ) {
|
||||
final int[] AD = genotype.getAD();
|
||||
int variantDepth = 0;
|
||||
for ( int i = 1; i < AD.length; i++ )
|
||||
variantDepth += AD[i];
|
||||
if ( variantDepth <= 1 )
|
||||
continue;
|
||||
final int totalADdepth = (int)MathUtils.sum(AD);
|
||||
if ( totalADdepth - AD[0] > 1 )
|
||||
ADrestrictedDepth += totalADdepth;
|
||||
standardDepth += totalADdepth;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (stratifiedContexts!= null && !stratifiedContexts.isEmpty()) {
|
||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||
if ( context == null )
|
||||
continue;
|
||||
depth += context.getBasePileup().depthOfCoverage();
|
||||
standardDepth += context.getBasePileup().depthOfCoverage();
|
||||
|
||||
} else if (perReadAlleleLikelihoodMap != null) {
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||
if (perReadAlleleLikelihoods == null || perReadAlleleLikelihoods.isEmpty())
|
||||
continue;
|
||||
|
||||
depth += perReadAlleleLikelihoods.getNumberOfStoredElements();
|
||||
standardDepth += perReadAlleleLikelihoods.getNumberOfStoredElements();
|
||||
} else if ( genotype.hasDP() ) {
|
||||
depth += genotype.getDP();
|
||||
standardDepth += genotype.getDP();
|
||||
}
|
||||
}
|
||||
|
||||
if ( depth == 0 )
|
||||
// if the AD-restricted depth is a usable value (i.e. not zero), then we should use that one going forward
|
||||
if ( ADrestrictedDepth > 0 )
|
||||
standardDepth = ADrestrictedDepth;
|
||||
|
||||
if ( standardDepth == 0 )
|
||||
return null;
|
||||
|
||||
final double altAlleleLength = GATKVariantContextUtils.getMeanAltAlleleLength(vc);
|
||||
// Hack: when refContext == null then we know we are coming from the HaplotypeCaller and do not want to do a
|
||||
// full length-based normalization (because the indel length problem is present only in the UnifiedGenotyper)
|
||||
double QD = -10.0 * vc.getLog10PError() / ((double)depth * indelNormalizationFactor(altAlleleLength, ref != null));
|
||||
// Hack: when refContext == null then we know we are coming from the HaplotypeCaller and do not want to do a
|
||||
// full length-based normalization (because the indel length problem is present only in the UnifiedGenotyper)
|
||||
double QD = -10.0 * vc.getLog10PError() / ((double)standardDepth * indelNormalizationFactor(altAlleleLength, ref != null));
|
||||
|
||||
// Hack: see note in the fixTooHighQD method below
|
||||
// Hack: see note in the fixTooHighQD method below
|
||||
QD = fixTooHighQD(QD);
|
||||
|
||||
final Map<String, Object> map = new HashMap<>();
|
||||
|
|
@ -149,7 +155,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
* @return a possitive double
|
||||
*/
|
||||
private double indelNormalizationFactor(final double altAlleleLength, final boolean increaseNormalizationAsLengthIncreases) {
|
||||
return ( increaseNormalizationAsLengthIncreases ? Math.max(altAlleleLength / 3.0, 1.0) : 1.0);
|
||||
return ( increaseNormalizationAsLengthIncreases ? Math.max(altAlleleLength / 3.0, 1.0) : 1.0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -205,12 +205,25 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
|||
if ( VCs == null ) throw new IllegalArgumentException("The list of VariantContexts cannot be null");
|
||||
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
if ( vc.getEnd() == pos )
|
||||
if ( isEndingContext(vc, pos) )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the given variant context end (in terms of reference blocks, not necessarily formally) at the given position.
|
||||
* Note that for the purposes of this method/tool, deletions are considered to be single base events (as opposed to
|
||||
* reference blocks), hence the check for the number of alleles (because we know there will always be a <NON_REF> allele).
|
||||
*
|
||||
* @param vc the variant context
|
||||
* @param pos the position to query against
|
||||
* @return true if this variant context "ends" at this position, false otherwise
|
||||
*/
|
||||
private boolean isEndingContext(final VariantContext vc, final int pos) {
|
||||
return vc.getNAlleles() > 2 || vc.getEnd() == pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disrupt the VariantContexts so that they all stop at the given pos, write them out, and put the remainder back in the list.
|
||||
*
|
||||
|
|
@ -228,10 +241,8 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
|||
|
||||
stoppedVCs.add(vc);
|
||||
|
||||
// if it was ending anyways, then remove it from the future state;
|
||||
// note that for the purposes of this method, deletions are considered to be single base events (as opposed
|
||||
// to ref blocks), hence the check for the number of alleles (because we know there will always be a <NON_REF> allele)
|
||||
if ( vc.getNAlleles() > 2 || vc.getEnd() == pos )
|
||||
// if it was ending anyways, then remove it from the future state
|
||||
if ( isEndingContext(vc, pos) )
|
||||
state.VCs.remove(i);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,10 +56,8 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -154,10 +152,14 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
|
||||
|
||||
public void initialize() {
|
||||
// create the annotation engine
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, getToolkit());
|
||||
|
||||
// take care of the VCF headers
|
||||
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit());
|
||||
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
|
||||
headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions());
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.MLE_ALLELE_COUNT_KEY, VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
|
||||
|
||||
|
|
@ -168,9 +170,6 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
// create the genotyping engine
|
||||
genotypingEngine = new UnifiedGenotyperEngine(getToolkit(), new UnifiedArgumentCollection(), logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// create the annotation engine
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, getToolkit());
|
||||
|
||||
// collect the actual rod bindings into a list for use later
|
||||
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections )
|
||||
variants.addAll(variantCollection.getRodBindings());
|
||||
|
|
@ -206,7 +205,13 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
final VariantContext regenotypedVC = genotypingEngine.calculateGenotypes(result);
|
||||
if ( regenotypedVC == null )
|
||||
return null;
|
||||
result = new VariantContextBuilder(regenotypedVC).attributes(originalVC.getAttributes()).make();
|
||||
|
||||
// we want to carry forward the attributes from the original VC but make sure to add the MLE-based annotations
|
||||
final Map<String, Object> attrs = new HashMap<>(originalVC.getAttributes());
|
||||
attrs.put(VCFConstants.MLE_ALLELE_COUNT_KEY, regenotypedVC.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
attrs.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, regenotypedVC.getAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
|
||||
result = new VariantContextBuilder(regenotypedVC).attributes(attrs).make();
|
||||
}
|
||||
|
||||
// if it turned monomorphic and we don't want such sites, quit
|
||||
|
|
|
|||
|
|
@ -69,9 +69,6 @@ public class QualByDepthUnitTest extends WalkerTest {
|
|||
|
||||
final List<Allele> AA = Arrays.asList(A,A);
|
||||
final List<Allele> AC = Arrays.asList(A,C);
|
||||
final List<Allele> CC = Arrays.asList(C,C);
|
||||
final List<Allele> AG = Arrays.asList(A,G);
|
||||
final List<Allele> CG = Arrays.asList(C,G);
|
||||
final List<Allele> GG = Arrays.asList(G,G);
|
||||
final List<Allele> ACG = Arrays.asList(A,C,G);
|
||||
|
||||
|
|
@ -81,6 +78,7 @@ public class QualByDepthUnitTest extends WalkerTest {
|
|||
final Genotype gGG = new GenotypeBuilder("4", GG).DP(10).AD(new int[]{1,9}).make();
|
||||
|
||||
tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(-5).genotypes(Arrays.asList(gAC)).make(), 5.0});
|
||||
tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(-5).genotypes(Arrays.asList(gACerror)).make(), 5.0});
|
||||
tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(-5).genotypes(Arrays.asList(gAA, gAC)).make(), 5.0});
|
||||
tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(-5).genotypes(Arrays.asList(gAC, gACerror)).make(), 5.0});
|
||||
tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, ACG).log10PError(-5).genotypes(Arrays.asList(gAA, gAC, gACerror, gGG)).make(), 2.5});
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
final File gVCF = executeTest("testOneHasDeletionAndTwoHasRefBlock", spec).first.get(0);
|
||||
final List<VariantContext> allVCs = GATKVCFUtils.readVCF(gVCF).getSecond();
|
||||
|
||||
Assert.assertEquals(allVCs.size(), 2);
|
||||
Assert.assertEquals(allVCs.size(), 3);
|
||||
|
||||
final VariantContext first = allVCs.get(0);
|
||||
Assert.assertEquals(first.getStart(), 69772);
|
||||
|
|
@ -149,14 +149,19 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
|
||||
final VariantContext second = allVCs.get(1);
|
||||
Assert.assertEquals(second.getStart(), 69773);
|
||||
Assert.assertEquals(second.getEnd(), 69783);
|
||||
Assert.assertEquals(second.getEnd(), 69774);
|
||||
Assert.assertEquals(second.getGenotypes().size(), 2);
|
||||
|
||||
final VariantContext third = allVCs.get(2);
|
||||
Assert.assertEquals(third.getStart(), 69775);
|
||||
Assert.assertEquals(third.getEnd(), 69783);
|
||||
Assert.assertEquals(third.getGenotypes().size(), 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMD5s() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d2a5ca67a8ef6e27854e7a439883f05d"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("aecdfa9eb32b802cd629e9f811ef15fd"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMD5s", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("54487ea151c49d36a15eac8097a7e460"));
|
||||
Arrays.asList("ebda39d3343b34d21490a78284ed88e8"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||
1,
|
||||
Arrays.asList("9e6ef126d5e872e5b2a68c3d73471566"));
|
||||
Arrays.asList("8eeda24a07f66d67b7639a31fda5c903"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||
}
|
||||
|
||||
|
|
@ -99,7 +99,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "gvcfExample1.vcf",
|
||||
1,
|
||||
Arrays.asList("dd0e2846b3be9692ecb94f152b45c316"));
|
||||
Arrays.asList("2541e164056d5632ad7de784a9af3880"));
|
||||
executeTest("testJustOneSample", spec);
|
||||
}
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
||||
" -V " + privateTestDir + "gvcfExample2.vcf",
|
||||
1,
|
||||
Arrays.asList("a4f76a094af4708fc7f96a9b7a1b8726"));
|
||||
Arrays.asList("9daf9602338db9d06c075c6e9a15ee2c"));
|
||||
executeTest("testSamplesWithDifferentLs", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue