Merge branch 'master' of ssh://nickel/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Roger Zurawicki 2011-11-15 00:13:37 -05:00
commit 8e91e19229
9 changed files with 87 additions and 33 deletions

View File

@ -222,8 +222,33 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
if ( isUniqueHeaderLine(line, hInfo) ) if ( isUniqueHeaderLine(line, hInfo) )
hInfo.add(line); hInfo.add(line);
} }
for ( String expression : expressionsToUse ) // for the expressions, pull the info header line from the header of the resource rod
hInfo.add(new VCFInfoHeaderLine(expression, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource")); for ( VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions() ) {
// special case the ID field
if ( expression.fieldName.equals("ID") ) {
hInfo.add(new VCFInfoHeaderLine(expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource"));
continue;
}
VCFInfoHeaderLine targetHeaderLine = null;
for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) {
if ( line instanceof VCFInfoHeaderLine ) {
VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line;
if ( infoline.getName().equals(expression.fieldName) ) {
targetHeaderLine = infoline;
break;
}
}
}
if ( targetHeaderLine != null ) {
if ( targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER )
hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCount(), targetHeaderLine.getType(), targetHeaderLine.getDescription()));
else
hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCountType(), targetHeaderLine.getType(), targetHeaderLine.getDescription()));
} else {
hInfo.add(new VCFInfoHeaderLine(expression.fullName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource"));
}
}
engine.invokeAnnotationInitializationMethods(hInfo); engine.invokeAnnotationInitializationMethods(hInfo);

View File

@ -49,20 +49,20 @@ public class VariantAnnotatorEngine {
private AnnotatorCompatibleWalker walker; private AnnotatorCompatibleWalker walker;
private GenomeAnalysisEngine toolkit; private GenomeAnalysisEngine toolkit;
private static class VAExpression { protected static class VAExpression {
public String fullName, fieldName; public String fullName, fieldName;
public RodBinding<VariantContext> binding; public RodBinding<VariantContext> binding;
public VAExpression(String fullEpression, List<RodBinding<VariantContext>> bindings) { public VAExpression(String fullExpression, List<RodBinding<VariantContext>> bindings) {
int indexOfDot = fullEpression.lastIndexOf("."); int indexOfDot = fullExpression.lastIndexOf(".");
if ( indexOfDot == -1 ) if ( indexOfDot == -1 )
throw new UserException.BadArgumentValue(fullEpression, "it should be in rodname.value format"); throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format");
fullName = fullEpression; fullName = fullExpression;
fieldName = fullEpression.substring(indexOfDot+1); fieldName = fullExpression.substring(indexOfDot+1);
String bindingName = fullEpression.substring(0, indexOfDot); String bindingName = fullExpression.substring(0, indexOfDot);
for ( RodBinding<VariantContext> rod : bindings ) { for ( RodBinding<VariantContext> rod : bindings ) {
if ( rod.getName().equals(bindingName) ) { if ( rod.getName().equals(bindingName) ) {
binding = rod; binding = rod;
@ -97,6 +97,8 @@ public class VariantAnnotatorEngine {
requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings())); requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings()));
} }
protected List<VAExpression> getRequestedExpressions() { return requestedExpressions; }
private void initializeAnnotations(List<String> annotationGroupsToUse, List<String> annotationsToUse, List<String> annotationsToExclude) { private void initializeAnnotations(List<String> annotationGroupsToUse, List<String> annotationsToUse, List<String> annotationsToExclude) {
AnnotationInterfaceManager.validateAnnotations(annotationGroupsToUse, annotationsToUse); AnnotationInterfaceManager.validateAnnotations(annotationGroupsToUse, annotationsToUse);
requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(annotationGroupsToUse, annotationsToUse); requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(annotationGroupsToUse, annotationsToUse);
@ -211,8 +213,13 @@ public class VariantAnnotatorEngine {
continue; continue;
VariantContext vc = VCs.iterator().next(); VariantContext vc = VCs.iterator().next();
if ( vc.hasAttribute(expression.fieldName) ) // special-case the ID field
if ( expression.fieldName.equals("ID") ) {
if ( vc.hasID() )
infoAnnotations.put(expression.fullName, vc.getID());
} else if ( vc.hasAttribute(expression.fieldName) ) {
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName)); infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
}
} }
} }

View File

@ -6,9 +6,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.List;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute

View File

@ -95,11 +95,12 @@ public class PileupElement implements Comparable<PileupElement> {
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
public boolean isReducedRead() { public boolean isReducedRead() {
return ((GATKSAMRecord)read).isReducedRead(); return read.isReducedRead();
} }
public int getRepresentativeCount() { public int getRepresentativeCount() {
return isReducedRead() ? ((GATKSAMRecord)read).getReducedCount(offset) : 1; // TODO -- if we ever decide to reduce the representation of deletions then this will need to be fixed
return (!isDeletion() && isReducedRead()) ? read.getReducedCount(offset) : 1;
} }
} }

View File

@ -188,7 +188,9 @@ public class GATKSAMRecord extends BAMRecord {
} }
public final byte getReducedCount(final int i) { public final byte getReducedCount(final int i) {
return getReducedReadCounts()[i]; byte firstCount = getReducedReadCounts()[0];
byte offsetCount = getReducedReadCounts()[i];
return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
} }

View File

@ -243,7 +243,7 @@ public class ReadUtils {
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) { public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) {
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength); Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
GATKSAMRecord result = (GATKSAMRecord)read; GATKSAMRecord result = read;
if ( adaptorBoundaries != null ) { if ( adaptorBoundaries != null ) {
if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() ) if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() )

View File

@ -223,7 +223,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param referenceBaseForIndel padded reference base * @param referenceBaseForIndel padded reference base
*/ */
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) { public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false); this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false, true);
} }
/** /**
@ -240,7 +240,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes * @param attributes attributes
*/ */
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) { public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false); this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false, true);
} }
/** /**
@ -261,7 +261,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param referenceBaseForIndel padded reference base * @param referenceBaseForIndel padded reference base
*/ */
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) { public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true); this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true, true);
} }
/** /**
@ -278,7 +278,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes * @param attributes attributes
*/ */
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) { public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false); this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false, true);
} }
/** /**
@ -291,7 +291,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param alleles alleles * @param alleles alleles
*/ */
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) { public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false); this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false, true);
} }
/** /**
@ -314,7 +314,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param other the VariantContext to copy * @param other the VariantContext to copy
*/ */
public VariantContext(VariantContext other) { public VariantContext(VariantContext other) {
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false); this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, true);
} }
/** /**
@ -331,11 +331,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes * @param attributes attributes
* @param referenceBaseForIndel padded reference base * @param referenceBaseForIndel padded reference base
* @param genotypesAreUnparsed true if the genotypes have not yet been parsed * @param genotypesAreUnparsed true if the genotypes have not yet been parsed
* @param performValidation if true, call validate() as the final step in construction
*/ */
private VariantContext(String source, String contig, long start, long stop, private VariantContext(String source, String contig, long start, long stop,
Collection<Allele> alleles, Map<String, Genotype> genotypes, Collection<Allele> alleles, Map<String, Genotype> genotypes,
double negLog10PError, Set<String> filters, Map<String, ?> attributes, double negLog10PError, Set<String> filters, Map<String, ?> attributes,
Byte referenceBaseForIndel, boolean genotypesAreUnparsed) { Byte referenceBaseForIndel, boolean genotypesAreUnparsed,
boolean performValidation ) {
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
this.contig = contig; this.contig = contig;
this.start = start; this.start = start;
@ -371,39 +373,57 @@ public class VariantContext implements Feature { // to enable tribble intergrati
} }
} }
validate(); if ( performValidation ) {
validate();
}
} }
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// //
// Partial-cloning routines (because Variant Context is immutable). // Partial-cloning routines (because Variant Context is immutable).
//
// IMPORTANT: These routines assume that the VariantContext on which they're called is already valid.
// Due to this assumption, they explicitly tell the constructor NOT to perform validation by
// calling validate(), and instead perform validation only on the data that's changed.
//
// Note that we don't call vc.getGenotypes() because that triggers the lazy loading. // Note that we don't call vc.getGenotypes() because that triggers the lazy loading.
// Also note that we need to create a new attributes map because it's unmodifiable and the constructor may try to modify it. // Also note that we need to create a new attributes map because it's unmodifiable and the constructor may try to modify it.
// //
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) { public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false); VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false, false);
modifiedVC.validateGenotypes();
return modifiedVC;
} }
public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) { public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) {
return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true); VariantContext modifiedVC = new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
// Since start and end have changed, we need to call both validateAlleles() and validateReferencePadding(),
// since those validation routines rely on the values of start and end:
modifiedVC.validateAlleles();
modifiedVC.validateReferencePadding();
return modifiedVC;
} }
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) { public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true); return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
} }
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) { public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true); return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true, false);
} }
public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) { public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true); VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true, false);
modifiedVC.validateReferencePadding();
return modifiedVC;
} }
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) { public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true); return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true, false);
} }
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------

View File

@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpressionWithID() { public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1, baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("4a6f0675242f685e9072c1da5ad9e715")); Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5"));
executeTest("using expression with ID", spec); executeTest("using expression with ID", spec);
} }

View File

@ -16,7 +16,8 @@ public class ReadUtilsUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead; GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG"; final static String BASES = "ACTG";
final static String QUALS = "!+5?"; final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40}; final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30}; // just the offsets
@BeforeTest @BeforeTest
public void init() { public void init() {
@ -29,7 +30,7 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes()); reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS); reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS); reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
} }
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) { private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {