diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 087218cfb..c9ea7a3b5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -68,8 +68,9 @@ import java.util.*; * -T VariantAnnotator \ * -I input.bam \ * -o output.vcf \ - * -A DepthOfCoverage + * -A DepthOfCoverage \ * --variant input.vcf \ + * -L input.vcf \ * --dbsnp dbsnp.vcf * * @@ -221,8 +222,33 @@ public class VariantAnnotator extends RodWalker implements Ann if ( isUniqueHeaderLine(line, hInfo) ) hInfo.add(line); } - for ( String expression : expressionsToUse ) - hInfo.add(new VCFInfoHeaderLine(expression, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource")); + // for the expressions, pull the info header line from the header of the resource rod + for ( VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions() ) { + // special case the ID field + if ( expression.fieldName.equals("ID") ) { + hInfo.add(new VCFInfoHeaderLine(expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource")); + continue; + } + VCFInfoHeaderLine targetHeaderLine = null; + for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) { + if ( line instanceof VCFInfoHeaderLine ) { + VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line; + if ( infoline.getName().equals(expression.fieldName) ) { + targetHeaderLine = infoline; + break; + } + } + } + + if ( targetHeaderLine != null ) { + if ( targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER ) + hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCount(), targetHeaderLine.getType(), targetHeaderLine.getDescription())); + else + hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCountType(), targetHeaderLine.getType(), targetHeaderLine.getDescription())); + } else { + hInfo.add(new VCFInfoHeaderLine(expression.fullName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource")); + } + } engine.invokeAnnotationInitializationMethods(hInfo); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 9fb25d605..6f481b872 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -50,20 +50,20 @@ public class VariantAnnotatorEngine { private AnnotatorCompatibleWalker walker; private GenomeAnalysisEngine toolkit; - private static class VAExpression { + protected static class VAExpression { public String fullName, fieldName; public RodBinding binding; - public VAExpression(String fullEpression, List> bindings) { - int indexOfDot = fullEpression.lastIndexOf("."); + public VAExpression(String fullExpression, List> bindings) { + int indexOfDot = fullExpression.lastIndexOf("."); if ( indexOfDot == -1 ) - throw new UserException.BadArgumentValue(fullEpression, "it should be in rodname.value format"); + throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format"); - fullName = fullEpression; - fieldName = fullEpression.substring(indexOfDot+1); + fullName = fullExpression; + fieldName = fullExpression.substring(indexOfDot+1); - String bindingName = fullEpression.substring(0, indexOfDot); + String bindingName = fullExpression.substring(0, indexOfDot); for ( RodBinding rod : bindings ) { if ( rod.getName().equals(bindingName) ) { binding = rod; @@ -98,6 +98,8 @@ public class VariantAnnotatorEngine { requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings())); } + protected List getRequestedExpressions() { return requestedExpressions; } + private void initializeAnnotations(List annotationGroupsToUse, List annotationsToUse, List annotationsToExclude) { AnnotationInterfaceManager.validateAnnotations(annotationGroupsToUse, annotationsToUse); requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(annotationGroupsToUse, annotationsToUse); @@ -212,8 +214,13 @@ public class VariantAnnotatorEngine { continue; VariantContext vc = VCs.iterator().next(); - if ( vc.hasAttribute(expression.fieldName) ) + // special-case the ID field + if ( expression.fieldName.equals("ID") ) { + if ( vc.hasID() ) + infoAnnotations.put(expression.fullName, vc.getID()); + } else if ( vc.hasAttribute(expression.fieldName) ) { infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName)); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index e10334a77..6b4fec04e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -6,9 +6,7 @@ import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.Arrays; import java.util.EnumSet; -import java.util.List; /* * Copyright (c) 2009 The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 035d8d2ca..088c4ddc4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -7,10 +7,7 @@ import org.broadinstitute.sting.alignment.Alignment; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -126,6 +123,11 @@ public class ValidationAmplicons extends RodWalker { @Argument(doc="Do not use BWA, lower-case repeats only",fullName="doNotUseBWA",required=false) boolean doNotUseBWA = false; + @Hidden + @Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false) + boolean sequenomOutput = false; + + GenomeLoc prevInterval; GenomeLoc allelePos; String probeName; @@ -485,6 +487,13 @@ public class ValidationAmplicons extends RodWalker { } String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D'); - out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); + + if (!sequenomOutput) + out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity); + else { + seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record + probeName = probeName.replace("amplicon_","a"); + out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index 3fa9c3883..82776ca2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -52,6 +52,7 @@ public class GaussianMixtureModel { private final double[] empiricalMu; private final Matrix empiricalSigma; public boolean isModelReadyForEvaluation; + public boolean failedToConverge = false; public GaussianMixtureModel( final int numGaussians, final int numAnnotations, final double shrinkage, final double dirichletParameter, final double priorCounts ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index f60a94a22..20b000978 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -309,8 +309,23 @@ public class VariantRecalibrator extends RodWalker negativeTrainingData = dataManager.selectWorstVariants( VRAC.PERCENT_BAD_VARIANTS, VRAC.MIN_NUM_BAD_VARIANTS ); + GaussianMixtureModel badModel = engine.generateModel( negativeTrainingData ); engine.evaluateData( dataManager.getData(), badModel, true ); + + // Detect if the negative model failed to converge because of too few points and/or too many Gaussians and try again + while( badModel.failedToConverge && VRAC.MAX_GAUSSIANS > 4 ) { + logger.info("Negative model failed to converge. Retrying..."); + VRAC.MAX_GAUSSIANS--; + badModel = engine.generateModel( negativeTrainingData ); + engine.evaluateData( dataManager.getData(), goodModel, false ); + engine.evaluateData( dataManager.getData(), badModel, true ); + } + + if( badModel.failedToConverge || goodModel.failedToConverge ) { + throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider raising the number of variants used to train the negative model (via --percentBadVariants 0.05, for example) or lowering the maximum number of Gaussians to use in the model (via --maxGaussians 4, for example)"); + } + engine.calculateWorstPerformingAnnotation( dataManager.getData(), goodModel, badModel ); // Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java index adfb38a25..6d2ac643b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java @@ -67,14 +67,20 @@ public class VariantRecalibratorEngine { public void evaluateData( final List data, final GaussianMixtureModel model, final boolean evaluateContrastively ) { if( !model.isModelReadyForEvaluation ) { - model.precomputeDenominatorForEvaluation(); + try { + model.precomputeDenominatorForEvaluation(); + } catch( Exception e ) { + model.failedToConverge = true; + return; + } } logger.info("Evaluating full set of " + data.size() + " variants..."); for( final VariantDatum datum : data ) { final double thisLod = evaluateDatum( datum, model ); if( Double.isNaN(thisLod) ) { - throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider raising the number of variants used to train the negative model (via --percentBadVariants 0.05, for example) or lowering the maximum number of Gaussians to use in the model (via --maxGaussians 4, for example)"); + model.failedToConverge = true; + return; } datum.lod = ( evaluateContrastively ? diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index daf6606ef..2d13d6e59 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -95,11 +95,12 @@ public class PileupElement implements Comparable { // -------------------------------------------------------------------------- public boolean isReducedRead() { - return ((GATKSAMRecord)read).isReducedRead(); + return read.isReducedRead(); } public int getRepresentativeCount() { - return isReducedRead() ? ((GATKSAMRecord)read).getReducedCount(offset) : 1; + // TODO -- if we ever decide to reduce the representation of deletions then this will need to be fixed + return (!isDeletion() && isReducedRead()) ? read.getReducedCount(offset) : 1; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index e125b8c80..8d9018045 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -243,7 +243,7 @@ public class ReadUtils { public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) { Pair adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength); - GATKSAMRecord result = (GATKSAMRecord)read; + GATKSAMRecord result = read; if ( adaptorBoundaries != null ) { if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() ) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 28c0a31a6..b2786117f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpressionWithID() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1, - Arrays.asList("4a6f0675242f685e9072c1da5ad9e715")); + Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5")); executeTest("using expression with ID", spec); }