From 998ed8fff39cfe499ed08a461b4cd3701f1d9868 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 27 Feb 2012 14:56:10 -0500 Subject: [PATCH] Bug fix to deal with VCF records that don't have GTs. While in there, optimized a bunch of related functions (including removing a copy of the method calculateChromosomeCounts(); why did we have 2 copies? very dangerous). --- .../walkers/variantutils/SelectVariants.java | 2 +- .../sting/utils/codecs/vcf/VCFCodec.java | 2 +- .../sting/utils/variantcontext/Genotype.java | 12 ++-- .../variantcontext/VariantContextUtils.java | 58 +++++-------------- 4 files changed, 21 insertions(+), 53 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 5eef7fb66..204851e1f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -682,7 +682,7 @@ public class SelectVariants extends RodWalker implements TreeR for (String sample : sub.getSampleNames()) { Genotype g = sub.getGenotype(sample); - if (g.isNotFiltered() && g.isCalled()) { + if ( g.isNotFiltered() ) { String dp = (String) g.getAttribute("DP"); if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 453155be7..01cc367c4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -203,7 +203,7 @@ public class VCFCodec extends AbstractVCFCodec { if ( genotypeAlleleLocation > 0 ) generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); - List GTalleles = (genotypeAlleleLocation == -1 ? null : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); + List GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1; // add it to the list diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 1691129c9..13c4ff3d8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -89,9 +89,6 @@ public class Genotype implements Comparable { } public List getAlleles(Allele allele) { - if ( getType() == Type.UNAVAILABLE ) - throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype"); - List al = new ArrayList(); for ( Allele a : alleles ) if ( a.equals(allele) ) @@ -112,7 +109,7 @@ public class Genotype implements Comparable { * @return the ploidy of this genotype */ public int getPloidy() { - if ( alleles == null ) + if ( alleles.size() == 0 ) throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype"); return alleles.size(); } @@ -134,7 +131,7 @@ public class Genotype implements Comparable { } protected Type determineType() { - if ( alleles == null ) + if ( alleles.size() == 0 ) return Type.UNAVAILABLE; boolean sawNoCall = false, sawMultipleAlleles = false; @@ -234,8 +231,7 @@ public class Genotype implements Comparable { } public void validate() { - if ( alleles == null ) return; - if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0"); + if ( alleles.size() == 0) return; // int nNoCalls = 0; for ( Allele allele : alleles ) { @@ -254,7 +250,7 @@ public class Genotype implements Comparable { } public String getGenotypeString(boolean ignoreRefState) { - if ( alleles == null ) + if ( alleles.size() == 0 ) return null; // Notes: diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index c79bbaace..fc50df3a5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -65,8 +65,10 @@ public class VariantContextUtils { * @return the attributes map provided as input, returned for programming convenience */ public static Map calculateChromosomeCounts(VariantContext vc, Map attributes, boolean removeStaleValues) { + final int AN = vc.getCalledChrCount(); + // if everyone is a no-call, remove the old attributes if requested - if ( vc.getCalledChrCount() == 0 && removeStaleValues ) { + if ( AN == 0 && removeStaleValues ) { if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) attributes.remove(VCFConstants.ALLELE_COUNT_KEY); if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) @@ -77,19 +79,22 @@ public class VariantContextUtils { } if ( vc.hasGenotypes() ) { - attributes.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getCalledChrCount()); + attributes.put(VCFConstants.ALLELE_NUMBER_KEY, AN); // if there are alternate alleles, record the relevant tags if ( vc.getAlternateAlleles().size() > 0 ) { - ArrayList alleleFreqs = new ArrayList(); - ArrayList alleleCounts = new ArrayList(); - double totalChromosomes = (double)vc.getCalledChrCount(); + final ArrayList alleleFreqs = new ArrayList(); + final ArrayList alleleCounts = new ArrayList(); for ( Allele allele : vc.getAlternateAlleles() ) { int altChromosomes = vc.getCalledChrCount(allele); alleleCounts.add(altChromosomes); - // todo -- this is a performance problem - String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes)); - alleleFreqs.add(freq); + if ( AN == 0 ) { + alleleFreqs.add("0.0"); + } else { + // todo -- this is a performance problem + final String freq = String.format(makePrecisionFormatStringFromDenominatorValue((double)AN), ((double)altChromosomes / (double)AN)); + alleleFreqs.add(freq); + } } attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); @@ -113,41 +118,8 @@ public class VariantContextUtils { */ public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) { final VariantContext vc = builder.make(); - - // if everyone is a no-call, remove the old attributes if requested - if ( vc.getCalledChrCount() == 0 && removeStaleValues ) { - if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) - builder.rmAttribute(VCFConstants.ALLELE_COUNT_KEY); - if ( vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) - builder.rmAttribute(VCFConstants.ALLELE_FREQUENCY_KEY); - if ( vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) - builder.rmAttribute(VCFConstants.ALLELE_NUMBER_KEY); - return; - } - - if ( vc.hasGenotypes() ) { - builder.attribute(VCFConstants.ALLELE_NUMBER_KEY, vc.getCalledChrCount()); - - // if there are alternate alleles, record the relevant tags - if ( vc.getAlternateAlleles().size() > 0 ) { - ArrayList alleleFreqs = new ArrayList(); - ArrayList alleleCounts = new ArrayList(); - double totalChromosomes = (double)vc.getCalledChrCount(); - for ( Allele allele : vc.getAlternateAlleles() ) { - int altChromosomes = vc.getCalledChrCount(allele); - alleleCounts.add(altChromosomes); - String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes)); - alleleFreqs.add(freq); - } - - builder.attribute(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); - builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs); - } - else { - builder.attribute(VCFConstants.ALLELE_COUNT_KEY, 0); - builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); - } - } + final Map attrs = calculateChromosomeCounts(vc, new HashMap(vc.getAttributes()), removeStaleValues); + builder.attributes(attrs); } private static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {