Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
2e9d5363e7
|
|
@ -68,8 +68,9 @@ import java.util.*;
|
|||
* -T VariantAnnotator \
|
||||
* -I input.bam \
|
||||
* -o output.vcf \
|
||||
* -A DepthOfCoverage
|
||||
* -A DepthOfCoverage \
|
||||
* --variant input.vcf \
|
||||
* -L input.vcf \
|
||||
* --dbsnp dbsnp.vcf
|
||||
* </pre>
|
||||
*
|
||||
|
|
@ -221,8 +222,33 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
if ( isUniqueHeaderLine(line, hInfo) )
|
||||
hInfo.add(line);
|
||||
}
|
||||
for ( String expression : expressionsToUse )
|
||||
hInfo.add(new VCFInfoHeaderLine(expression, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource"));
|
||||
// for the expressions, pull the info header line from the header of the resource rod
|
||||
for ( VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions() ) {
|
||||
// special case the ID field
|
||||
if ( expression.fieldName.equals("ID") ) {
|
||||
hInfo.add(new VCFInfoHeaderLine(expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource"));
|
||||
continue;
|
||||
}
|
||||
VCFInfoHeaderLine targetHeaderLine = null;
|
||||
for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) {
|
||||
if ( line instanceof VCFInfoHeaderLine ) {
|
||||
VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line;
|
||||
if ( infoline.getName().equals(expression.fieldName) ) {
|
||||
targetHeaderLine = infoline;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( targetHeaderLine != null ) {
|
||||
if ( targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER )
|
||||
hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCount(), targetHeaderLine.getType(), targetHeaderLine.getDescription()));
|
||||
else
|
||||
hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCountType(), targetHeaderLine.getType(), targetHeaderLine.getDescription()));
|
||||
} else {
|
||||
hInfo.add(new VCFInfoHeaderLine(expression.fullName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource"));
|
||||
}
|
||||
}
|
||||
|
||||
engine.invokeAnnotationInitializationMethods(hInfo);
|
||||
|
||||
|
|
|
|||
|
|
@ -50,20 +50,20 @@ public class VariantAnnotatorEngine {
|
|||
private AnnotatorCompatibleWalker walker;
|
||||
private GenomeAnalysisEngine toolkit;
|
||||
|
||||
private static class VAExpression {
|
||||
protected static class VAExpression {
|
||||
|
||||
public String fullName, fieldName;
|
||||
public RodBinding<VariantContext> binding;
|
||||
|
||||
public VAExpression(String fullEpression, List<RodBinding<VariantContext>> bindings) {
|
||||
int indexOfDot = fullEpression.lastIndexOf(".");
|
||||
public VAExpression(String fullExpression, List<RodBinding<VariantContext>> bindings) {
|
||||
int indexOfDot = fullExpression.lastIndexOf(".");
|
||||
if ( indexOfDot == -1 )
|
||||
throw new UserException.BadArgumentValue(fullEpression, "it should be in rodname.value format");
|
||||
throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format");
|
||||
|
||||
fullName = fullEpression;
|
||||
fieldName = fullEpression.substring(indexOfDot+1);
|
||||
fullName = fullExpression;
|
||||
fieldName = fullExpression.substring(indexOfDot+1);
|
||||
|
||||
String bindingName = fullEpression.substring(0, indexOfDot);
|
||||
String bindingName = fullExpression.substring(0, indexOfDot);
|
||||
for ( RodBinding<VariantContext> rod : bindings ) {
|
||||
if ( rod.getName().equals(bindingName) ) {
|
||||
binding = rod;
|
||||
|
|
@ -98,6 +98,8 @@ public class VariantAnnotatorEngine {
|
|||
requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings()));
|
||||
}
|
||||
|
||||
protected List<VAExpression> getRequestedExpressions() { return requestedExpressions; }
|
||||
|
||||
private void initializeAnnotations(List<String> annotationGroupsToUse, List<String> annotationsToUse, List<String> annotationsToExclude) {
|
||||
AnnotationInterfaceManager.validateAnnotations(annotationGroupsToUse, annotationsToUse);
|
||||
requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(annotationGroupsToUse, annotationsToUse);
|
||||
|
|
@ -212,8 +214,13 @@ public class VariantAnnotatorEngine {
|
|||
continue;
|
||||
|
||||
VariantContext vc = VCs.iterator().next();
|
||||
if ( vc.hasAttribute(expression.fieldName) )
|
||||
// special-case the ID field
|
||||
if ( expression.fieldName.equals("ID") ) {
|
||||
if ( vc.hasID() )
|
||||
infoAnnotations.put(expression.fullName, vc.getID());
|
||||
} else if ( vc.hasAttribute(expression.fieldName) ) {
|
||||
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,9 +6,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
|
|||
|
|
@ -7,10 +7,7 @@ import org.broadinstitute.sting.alignment.Alignment;
|
|||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -126,6 +123,11 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
@Argument(doc="Do not use BWA, lower-case repeats only",fullName="doNotUseBWA",required=false)
|
||||
boolean doNotUseBWA = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(doc="Use Sequenom output format instead of regular FASTA",fullName="sqnm",required=false)
|
||||
boolean sequenomOutput = false;
|
||||
|
||||
|
||||
GenomeLoc prevInterval;
|
||||
GenomeLoc allelePos;
|
||||
String probeName;
|
||||
|
|
@ -485,6 +487,13 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
String seqIdentity = sequence.toString().replace('n', 'N').replace('i', 'I').replace('d', 'D');
|
||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||
|
||||
if (!sequenomOutput)
|
||||
out.printf(">%s %s %s%n%s%n", allelePos != null ? allelePos.toString() : "multiple", valid, probeName, seqIdentity);
|
||||
else {
|
||||
seqIdentity = seqIdentity.replace("*",""); // identifier < 20 letters long, no * in ref allele, one line per record
|
||||
probeName = probeName.replace("amplicon_","a");
|
||||
out.printf("%s_%s %s%n", allelePos != null ? allelePos.toString() : "multiple", probeName, seqIdentity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ public class GaussianMixtureModel {
|
|||
private final double[] empiricalMu;
|
||||
private final Matrix empiricalSigma;
|
||||
public boolean isModelReadyForEvaluation;
|
||||
public boolean failedToConverge = false;
|
||||
|
||||
public GaussianMixtureModel( final int numGaussians, final int numAnnotations,
|
||||
final double shrinkage, final double dirichletParameter, final double priorCounts ) {
|
||||
|
|
|
|||
|
|
@ -309,8 +309,23 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
engine.evaluateData( dataManager.getData(), goodModel, false );
|
||||
|
||||
// Generate the negative model using the worst performing data and evaluate each variant contrastively
|
||||
final GaussianMixtureModel badModel = engine.generateModel( dataManager.selectWorstVariants( VRAC.PERCENT_BAD_VARIANTS, VRAC.MIN_NUM_BAD_VARIANTS ) );
|
||||
final ExpandingArrayList<VariantDatum> negativeTrainingData = dataManager.selectWorstVariants( VRAC.PERCENT_BAD_VARIANTS, VRAC.MIN_NUM_BAD_VARIANTS );
|
||||
GaussianMixtureModel badModel = engine.generateModel( negativeTrainingData );
|
||||
engine.evaluateData( dataManager.getData(), badModel, true );
|
||||
|
||||
// Detect if the negative model failed to converge because of too few points and/or too many Gaussians and try again
|
||||
while( badModel.failedToConverge && VRAC.MAX_GAUSSIANS > 4 ) {
|
||||
logger.info("Negative model failed to converge. Retrying...");
|
||||
VRAC.MAX_GAUSSIANS--;
|
||||
badModel = engine.generateModel( negativeTrainingData );
|
||||
engine.evaluateData( dataManager.getData(), goodModel, false );
|
||||
engine.evaluateData( dataManager.getData(), badModel, true );
|
||||
}
|
||||
|
||||
if( badModel.failedToConverge || goodModel.failedToConverge ) {
|
||||
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider raising the number of variants used to train the negative model (via --percentBadVariants 0.05, for example) or lowering the maximum number of Gaussians to use in the model (via --maxGaussians 4, for example)");
|
||||
}
|
||||
|
||||
engine.calculateWorstPerformingAnnotation( dataManager.getData(), goodModel, badModel );
|
||||
|
||||
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
|
||||
|
|
|
|||
|
|
@ -67,14 +67,20 @@ public class VariantRecalibratorEngine {
|
|||
|
||||
public void evaluateData( final List<VariantDatum> data, final GaussianMixtureModel model, final boolean evaluateContrastively ) {
|
||||
if( !model.isModelReadyForEvaluation ) {
|
||||
model.precomputeDenominatorForEvaluation();
|
||||
try {
|
||||
model.precomputeDenominatorForEvaluation();
|
||||
} catch( Exception e ) {
|
||||
model.failedToConverge = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("Evaluating full set of " + data.size() + " variants...");
|
||||
for( final VariantDatum datum : data ) {
|
||||
final double thisLod = evaluateDatum( datum, model );
|
||||
if( Double.isNaN(thisLod) ) {
|
||||
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider raising the number of variants used to train the negative model (via --percentBadVariants 0.05, for example) or lowering the maximum number of Gaussians to use in the model (via --maxGaussians 4, for example)");
|
||||
model.failedToConverge = true;
|
||||
return;
|
||||
}
|
||||
|
||||
datum.lod = ( evaluateContrastively ?
|
||||
|
|
|
|||
|
|
@ -95,11 +95,12 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
// --------------------------------------------------------------------------
|
||||
|
||||
public boolean isReducedRead() {
|
||||
return ((GATKSAMRecord)read).isReducedRead();
|
||||
return read.isReducedRead();
|
||||
}
|
||||
|
||||
public int getRepresentativeCount() {
|
||||
return isReducedRead() ? ((GATKSAMRecord)read).getReducedCount(offset) : 1;
|
||||
// TODO -- if we ever decide to reduce the representation of deletions then this will need to be fixed
|
||||
return (!isDeletion() && isReducedRead()) ? read.getReducedCount(offset) : 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -243,7 +243,7 @@ public class ReadUtils {
|
|||
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) {
|
||||
|
||||
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
|
||||
GATKSAMRecord result = (GATKSAMRecord)read;
|
||||
GATKSAMRecord result = read;
|
||||
|
||||
if ( adaptorBoundaries != null ) {
|
||||
if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() )
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("4a6f0675242f685e9072c1da5ad9e715"));
|
||||
Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue