diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index ea11391d9..c9ea7a3b5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -222,8 +222,33 @@ public class VariantAnnotator extends RodWalker implements Ann if ( isUniqueHeaderLine(line, hInfo) ) hInfo.add(line); } - for ( String expression : expressionsToUse ) - hInfo.add(new VCFInfoHeaderLine(expression, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource")); + // for the expressions, pull the info header line from the header of the resource rod + for ( VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions() ) { + // special case the ID field + if ( expression.fieldName.equals("ID") ) { + hInfo.add(new VCFInfoHeaderLine(expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource")); + continue; + } + VCFInfoHeaderLine targetHeaderLine = null; + for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) { + if ( line instanceof VCFInfoHeaderLine ) { + VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line; + if ( infoline.getName().equals(expression.fieldName) ) { + targetHeaderLine = infoline; + break; + } + } + } + + if ( targetHeaderLine != null ) { + if ( targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER ) + hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCount(), targetHeaderLine.getType(), targetHeaderLine.getDescription())); + else + hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCountType(), targetHeaderLine.getType(), targetHeaderLine.getDescription())); + } else { + hInfo.add(new VCFInfoHeaderLine(expression.fullName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource")); + } + } engine.invokeAnnotationInitializationMethods(hInfo); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index e4bc0d5d5..20f28007a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -49,20 +49,20 @@ public class VariantAnnotatorEngine { private AnnotatorCompatibleWalker walker; private GenomeAnalysisEngine toolkit; - private static class VAExpression { + protected static class VAExpression { public String fullName, fieldName; public RodBinding binding; - public VAExpression(String fullEpression, List> bindings) { - int indexOfDot = fullEpression.lastIndexOf("."); + public VAExpression(String fullExpression, List> bindings) { + int indexOfDot = fullExpression.lastIndexOf("."); if ( indexOfDot == -1 ) - throw new UserException.BadArgumentValue(fullEpression, "it should be in rodname.value format"); + throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format"); - fullName = fullEpression; - fieldName = fullEpression.substring(indexOfDot+1); + fullName = fullExpression; + fieldName = fullExpression.substring(indexOfDot+1); - String bindingName = fullEpression.substring(0, indexOfDot); + String bindingName = fullExpression.substring(0, indexOfDot); for ( RodBinding rod : bindings ) { if ( rod.getName().equals(bindingName) ) { binding = rod; @@ -97,6 +97,8 @@ public class VariantAnnotatorEngine { requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings())); } + protected List getRequestedExpressions() { return requestedExpressions; } + private void initializeAnnotations(List annotationGroupsToUse, List annotationsToUse, List annotationsToExclude) { AnnotationInterfaceManager.validateAnnotations(annotationGroupsToUse, annotationsToUse); requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(annotationGroupsToUse, annotationsToUse); @@ -211,8 +213,13 @@ public class VariantAnnotatorEngine { continue; VariantContext vc = VCs.iterator().next(); - if ( vc.hasAttribute(expression.fieldName) ) + // special-case the ID field + if ( expression.fieldName.equals("ID") ) { + if ( vc.hasID() ) + infoAnnotations.put(expression.fullName, vc.getID()); + } else if ( vc.hasAttribute(expression.fieldName) ) { infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName)); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index e10334a77..6b4fec04e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -6,9 +6,7 @@ import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import java.util.Arrays; import java.util.EnumSet; -import java.util.List; /* * Copyright (c) 2009 The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index daf6606ef..2d13d6e59 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -95,11 +95,12 @@ public class PileupElement implements Comparable { // -------------------------------------------------------------------------- public boolean isReducedRead() { - return ((GATKSAMRecord)read).isReducedRead(); + return read.isReducedRead(); } public int getRepresentativeCount() { - return isReducedRead() ? ((GATKSAMRecord)read).getReducedCount(offset) : 1; + // TODO -- if we ever decide to reduce the representation of deletions then this will need to be fixed + return (!isDeletion() && isReducedRead()) ? read.getReducedCount(offset) : 1; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 3fe1060dd..6d7c8dad9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -188,7 +188,9 @@ public class GATKSAMRecord extends BAMRecord { } public final byte getReducedCount(final int i) { - return getReducedReadCounts()[i]; + byte firstCount = getReducedReadCounts()[0]; + byte offsetCount = getReducedReadCounts()[i]; + return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE); } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index e125b8c80..8d9018045 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -243,7 +243,7 @@ public class ReadUtils { public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) { Pair adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength); - GATKSAMRecord result = (GATKSAMRecord)read; + GATKSAMRecord result = read; if ( adaptorBoundaries != null ) { if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() ) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index f52a7087b..204b4b841 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -223,7 +223,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param referenceBaseForIndel padded reference base */ public VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { - this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false); + this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false, true); } /** @@ -240,7 +240,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param attributes attributes */ public VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes) { - this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false); + this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false, true); } /** @@ -261,7 +261,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param referenceBaseForIndel padded reference base */ public VariantContext(String source, String contig, long start, long stop, Collection alleles, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { - this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true); + this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true, true); } /** @@ -278,7 +278,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param attributes attributes */ public VariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { - this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes, null, false); + this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes, null, false, true); } /** @@ -291,7 +291,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param alleles alleles */ public VariantContext(String source, String contig, long start, long stop, Collection alleles) { - this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false); + this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false, true); } /** @@ -314,7 +314,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param other the VariantContext to copy */ public VariantContext(VariantContext other) { - this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false); + this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, true); } /** @@ -331,11 +331,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param attributes attributes * @param referenceBaseForIndel padded reference base * @param genotypesAreUnparsed true if the genotypes have not yet been parsed + * @param performValidation if true, call validate() as the final step in construction */ private VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes, - Byte referenceBaseForIndel, boolean genotypesAreUnparsed) { + Byte referenceBaseForIndel, boolean genotypesAreUnparsed, + boolean performValidation ) { if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } this.contig = contig; this.start = start; @@ -371,39 +373,57 @@ public class VariantContext implements Feature { // to enable tribble intergrati } } - validate(); + if ( performValidation ) { + validate(); + } } // --------------------------------------------------------------------------------------------------------- // // Partial-cloning routines (because Variant Context is immutable). + // + // IMPORTANT: These routines assume that the VariantContext on which they're called is already valid. + // Due to this assumption, they explicitly tell the constructor NOT to perform validation by + // calling validate(), and instead perform validation only on the data that's changed. + // // Note that we don't call vc.getGenotypes() because that triggers the lazy loading. // Also note that we need to create a new attributes map because it's unmodifiable and the constructor may try to modify it. // // --------------------------------------------------------------------------------------------------------- public static VariantContext modifyGenotypes(VariantContext vc, Map genotypes) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), false); + VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), false, false); + modifiedVC.validateGenotypes(); + return modifiedVC; } public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) { - return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true); + VariantContext modifiedVC = new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false); + + // Since start and end have changed, we need to call both validateAlleles() and validateReferencePadding(), + // since those validation routines rely on the values of start and end: + modifiedVC.validateAlleles(); + modifiedVC.validateReferencePadding(); + + return modifiedVC; } public static VariantContext modifyFilters(VariantContext vc, Set filters) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true); + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false); } public static VariantContext modifyAttributes(VariantContext vc, Map attributes) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true); + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true, false); } public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true); + VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true, false); + modifiedVC.validateReferencePadding(); + return modifiedVC; } public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set filters, Map attributes) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true); + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true, false); } // --------------------------------------------------------------------------------------------------------- diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 189f643d4..bde4c4a8f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testUsingExpressionWithID() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1, - Arrays.asList("4a6f0675242f685e9072c1da5ad9e715")); + Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5")); executeTest("using expression with ID", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java index 46134cd24..53368c339 100755 --- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java @@ -16,7 +16,8 @@ public class ReadUtilsUnitTest extends BaseTest { GATKSAMRecord read, reducedRead; final static String BASES = "ACTG"; final static String QUALS = "!+5?"; - final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40}; + final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40}; + final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30}; // just the offsets @BeforeTest public void init() { @@ -29,7 +30,7 @@ public class ReadUtilsUnitTest extends BaseTest { reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); reducedRead.setReadBases(BASES.getBytes()); reducedRead.setBaseQualityString(QUALS); - reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS); + reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG); } private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {