Merge branch 'master' of ssh://nickel/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Roger Zurawicki 2011-11-15 00:13:37 -05:00
commit 8e91e19229
9 changed files with 87 additions and 33 deletions

View File

@ -222,8 +222,33 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
if ( isUniqueHeaderLine(line, hInfo) )
hInfo.add(line);
}
for ( String expression : expressionsToUse )
hInfo.add(new VCFInfoHeaderLine(expression, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource"));
// for the expressions, pull the info header line from the header of the resource rod
for ( VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions() ) {
// special case the ID field
if ( expression.fieldName.equals("ID") ) {
hInfo.add(new VCFInfoHeaderLine(expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource"));
continue;
}
VCFInfoHeaderLine targetHeaderLine = null;
for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) {
if ( line instanceof VCFInfoHeaderLine ) {
VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line;
if ( infoline.getName().equals(expression.fieldName) ) {
targetHeaderLine = infoline;
break;
}
}
}
if ( targetHeaderLine != null ) {
if ( targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER )
hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCount(), targetHeaderLine.getType(), targetHeaderLine.getDescription()));
else
hInfo.add(new VCFInfoHeaderLine(expression.fullName, targetHeaderLine.getCountType(), targetHeaderLine.getType(), targetHeaderLine.getDescription()));
} else {
hInfo.add(new VCFInfoHeaderLine(expression.fullName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource"));
}
}
engine.invokeAnnotationInitializationMethods(hInfo);

View File

@ -49,20 +49,20 @@ public class VariantAnnotatorEngine {
private AnnotatorCompatibleWalker walker;
private GenomeAnalysisEngine toolkit;
private static class VAExpression {
protected static class VAExpression {
public String fullName, fieldName;
public RodBinding<VariantContext> binding;
public VAExpression(String fullEpression, List<RodBinding<VariantContext>> bindings) {
int indexOfDot = fullEpression.lastIndexOf(".");
public VAExpression(String fullExpression, List<RodBinding<VariantContext>> bindings) {
int indexOfDot = fullExpression.lastIndexOf(".");
if ( indexOfDot == -1 )
throw new UserException.BadArgumentValue(fullEpression, "it should be in rodname.value format");
throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format");
fullName = fullEpression;
fieldName = fullEpression.substring(indexOfDot+1);
fullName = fullExpression;
fieldName = fullExpression.substring(indexOfDot+1);
String bindingName = fullEpression.substring(0, indexOfDot);
String bindingName = fullExpression.substring(0, indexOfDot);
for ( RodBinding<VariantContext> rod : bindings ) {
if ( rod.getName().equals(bindingName) ) {
binding = rod;
@ -97,6 +97,8 @@ public class VariantAnnotatorEngine {
requestedExpressions.add(new VAExpression(expression, walker.getResourceRodBindings()));
}
protected List<VAExpression> getRequestedExpressions() { return requestedExpressions; }
private void initializeAnnotations(List<String> annotationGroupsToUse, List<String> annotationsToUse, List<String> annotationsToExclude) {
AnnotationInterfaceManager.validateAnnotations(annotationGroupsToUse, annotationsToUse);
requestedInfoAnnotations = AnnotationInterfaceManager.createInfoFieldAnnotations(annotationGroupsToUse, annotationsToUse);
@ -211,8 +213,13 @@ public class VariantAnnotatorEngine {
continue;
VariantContext vc = VCs.iterator().next();
if ( vc.hasAttribute(expression.fieldName) )
// special-case the ID field
if ( expression.fieldName.equals("ID") ) {
if ( vc.hasID() )
infoAnnotations.put(expression.fullName, vc.getID());
} else if ( vc.hasAttribute(expression.fieldName) ) {
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
}
}
}

View File

@ -6,9 +6,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute

View File

@ -95,11 +95,12 @@ public class PileupElement implements Comparable<PileupElement> {
// --------------------------------------------------------------------------
public boolean isReducedRead() {
return ((GATKSAMRecord)read).isReducedRead();
return read.isReducedRead();
}
public int getRepresentativeCount() {
return isReducedRead() ? ((GATKSAMRecord)read).getReducedCount(offset) : 1;
// TODO -- if we ever decide to reduce the representation of deletions then this will need to be fixed
return (!isDeletion() && isReducedRead()) ? read.getReducedCount(offset) : 1;
}
}

View File

@ -188,7 +188,9 @@ public class GATKSAMRecord extends BAMRecord {
}
public final byte getReducedCount(final int i) {
return getReducedReadCounts()[i];
byte firstCount = getReducedReadCounts()[0];
byte offsetCount = getReducedReadCounts()[i];
return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
}

View File

@ -243,7 +243,7 @@ public class ReadUtils {
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) {
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
GATKSAMRecord result = (GATKSAMRecord)read;
GATKSAMRecord result = read;
if ( adaptorBoundaries != null ) {
if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() )

View File

@ -223,7 +223,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param referenceBaseForIndel padded reference base
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false);
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false, true);
}
/**
@ -240,7 +240,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false);
this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false, true);
}
/**
@ -261,7 +261,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param referenceBaseForIndel padded reference base
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, Byte referenceBaseForIndel) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true);
this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, referenceBaseForIndel, true, true);
}
/**
@ -278,7 +278,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false);
this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes, null, false, true);
}
/**
@ -291,7 +291,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param alleles alleles
*/
public VariantContext(String source, String contig, long start, long stop, Collection<Allele> alleles) {
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false);
this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false, true);
}
/**
@ -314,7 +314,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param other the VariantContext to copy
*/
public VariantContext(VariantContext other) {
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false);
this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, true);
}
/**
@ -331,11 +331,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param attributes attributes
* @param referenceBaseForIndel padded reference base
* @param genotypesAreUnparsed true if the genotypes have not yet been parsed
* @param performValidation if true, call validate() as the final step in construction
*/
private VariantContext(String source, String contig, long start, long stop,
Collection<Allele> alleles, Map<String, Genotype> genotypes,
double negLog10PError, Set<String> filters, Map<String, ?> attributes,
Byte referenceBaseForIndel, boolean genotypesAreUnparsed) {
Byte referenceBaseForIndel, boolean genotypesAreUnparsed,
boolean performValidation ) {
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
this.contig = contig;
this.start = start;
@ -371,39 +373,57 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
}
validate();
if ( performValidation ) {
validate();
}
}
// ---------------------------------------------------------------------------------------------------------
//
// Partial-cloning routines (because Variant Context is immutable).
//
// IMPORTANT: These routines assume that the VariantContext on which they're called is already valid.
// Due to this assumption, they explicitly tell the constructor NOT to perform validation by
// calling validate(), and instead perform validation only on the data that's changed.
//
// Note that we don't call vc.getGenotypes() because that triggers the lazy loading.
// Also note that we need to create a new attributes map because it's unmodifiable and the constructor may try to modify it.
//
// ---------------------------------------------------------------------------------------------------------
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false);
VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), false, false);
modifiedVC.validateGenotypes();
return modifiedVC;
}
public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) {
return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
VariantContext modifiedVC = new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
// Since start and end have changed, we need to call both validateAlleles() and validateReferencePadding(),
// since those validation routines rely on the values of start and end:
modifiedVC.validateAlleles();
modifiedVC.validateReferencePadding();
return modifiedVC;
}
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap<String, Object>(vc.getAttributes()), vc.getReferenceBaseForIndel(), true, false);
}
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true, false);
}
public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true);
VariantContext modifiedVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true, false);
modifiedVC.validateReferencePadding();
return modifiedVC;
}
public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set<String> filters, Map<String, Object> attributes) {
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true);
return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true, false);
}
// ---------------------------------------------------------------------------------------------------------

View File

@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
Arrays.asList("4a6f0675242f685e9072c1da5ad9e715"));
Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5"));
executeTest("using expression with ID", spec);
}

View File

@ -16,7 +16,8 @@ public class ReadUtilsUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30}; // just the offsets
@BeforeTest
public void init() {
@ -29,7 +30,7 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {