Bug fix to deal with VCF records that don't have GTs. While in there, optimized a bunch of related functions (including removing a copy of the method calculateChromosomeCounts(); why did we have 2 copies? very dangerous).

This commit is contained in:
Eric Banks 2012-02-27 14:56:10 -05:00
parent 1ea34058c2
commit 998ed8fff3
4 changed files with 21 additions and 53 deletions

View File

@ -682,7 +682,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
for (String sample : sub.getSampleNames()) { for (String sample : sub.getSampleNames()) {
Genotype g = sub.getGenotype(sample); Genotype g = sub.getGenotype(sample);
if (g.isNotFiltered() && g.isCalled()) { if ( g.isNotFiltered() ) {
String dp = (String) g.getAttribute("DP"); String dp = (String) g.getAttribute("DP");
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) { if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {

View File

@ -203,7 +203,7 @@ public class VCFCodec extends AbstractVCFCodec {
if ( genotypeAlleleLocation > 0 ) if ( genotypeAlleleLocation > 0 )
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? null : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList<Allele>(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1; boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
// add it to the list // add it to the list

View File

@ -89,9 +89,6 @@ public class Genotype implements Comparable<Genotype> {
} }
public List<Allele> getAlleles(Allele allele) { public List<Allele> getAlleles(Allele allele) {
if ( getType() == Type.UNAVAILABLE )
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
List<Allele> al = new ArrayList<Allele>(); List<Allele> al = new ArrayList<Allele>();
for ( Allele a : alleles ) for ( Allele a : alleles )
if ( a.equals(allele) ) if ( a.equals(allele) )
@ -112,7 +109,7 @@ public class Genotype implements Comparable<Genotype> {
* @return the ploidy of this genotype * @return the ploidy of this genotype
*/ */
public int getPloidy() { public int getPloidy() {
if ( alleles == null ) if ( alleles.size() == 0 )
throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype"); throw new ReviewedStingException("Requesting ploidy for an UNAVAILABLE genotype");
return alleles.size(); return alleles.size();
} }
@ -134,7 +131,7 @@ public class Genotype implements Comparable<Genotype> {
} }
protected Type determineType() { protected Type determineType() {
if ( alleles == null ) if ( alleles.size() == 0 )
return Type.UNAVAILABLE; return Type.UNAVAILABLE;
boolean sawNoCall = false, sawMultipleAlleles = false; boolean sawNoCall = false, sawMultipleAlleles = false;
@ -234,8 +231,7 @@ public class Genotype implements Comparable<Genotype> {
} }
public void validate() { public void validate() {
if ( alleles == null ) return; if ( alleles.size() == 0) return;
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0");
// int nNoCalls = 0; // int nNoCalls = 0;
for ( Allele allele : alleles ) { for ( Allele allele : alleles ) {
@ -254,7 +250,7 @@ public class Genotype implements Comparable<Genotype> {
} }
public String getGenotypeString(boolean ignoreRefState) { public String getGenotypeString(boolean ignoreRefState) {
if ( alleles == null ) if ( alleles.size() == 0 )
return null; return null;
// Notes: // Notes:

View File

@ -65,8 +65,10 @@ public class VariantContextUtils {
* @return the attributes map provided as input, returned for programming convenience * @return the attributes map provided as input, returned for programming convenience
*/ */
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) { public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
final int AN = vc.getCalledChrCount();
// if everyone is a no-call, remove the old attributes if requested // if everyone is a no-call, remove the old attributes if requested
if ( vc.getCalledChrCount() == 0 && removeStaleValues ) { if ( AN == 0 && removeStaleValues ) {
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) )
attributes.remove(VCFConstants.ALLELE_COUNT_KEY); attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) )
@ -77,19 +79,22 @@ public class VariantContextUtils {
} }
if ( vc.hasGenotypes() ) { if ( vc.hasGenotypes() ) {
attributes.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getCalledChrCount()); attributes.put(VCFConstants.ALLELE_NUMBER_KEY, AN);
// if there are alternate alleles, record the relevant tags // if there are alternate alleles, record the relevant tags
if ( vc.getAlternateAlleles().size() > 0 ) { if ( vc.getAlternateAlleles().size() > 0 ) {
ArrayList<String> alleleFreqs = new ArrayList<String>(); final ArrayList<String> alleleFreqs = new ArrayList<String>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>(); final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
double totalChromosomes = (double)vc.getCalledChrCount();
for ( Allele allele : vc.getAlternateAlleles() ) { for ( Allele allele : vc.getAlternateAlleles() ) {
int altChromosomes = vc.getCalledChrCount(allele); int altChromosomes = vc.getCalledChrCount(allele);
alleleCounts.add(altChromosomes); alleleCounts.add(altChromosomes);
// todo -- this is a performance problem if ( AN == 0 ) {
String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes)); alleleFreqs.add("0.0");
alleleFreqs.add(freq); } else {
// todo -- this is a performance problem
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue((double)AN), ((double)altChromosomes / (double)AN));
alleleFreqs.add(freq);
}
} }
attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts); attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
@ -113,41 +118,8 @@ public class VariantContextUtils {
*/ */
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) { public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) {
final VariantContext vc = builder.make(); final VariantContext vc = builder.make();
final Map<String, Object> attrs = calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues);
// if everyone is a no-call, remove the old attributes if requested builder.attributes(attrs);
if ( vc.getCalledChrCount() == 0 && removeStaleValues ) {
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
builder.rmAttribute(VCFConstants.ALLELE_COUNT_KEY);
if ( vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )
builder.rmAttribute(VCFConstants.ALLELE_FREQUENCY_KEY);
if ( vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) )
builder.rmAttribute(VCFConstants.ALLELE_NUMBER_KEY);
return;
}
if ( vc.hasGenotypes() ) {
builder.attribute(VCFConstants.ALLELE_NUMBER_KEY, vc.getCalledChrCount());
// if there are alternate alleles, record the relevant tags
if ( vc.getAlternateAlleles().size() > 0 ) {
ArrayList<String> alleleFreqs = new ArrayList<String>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
double totalChromosomes = (double)vc.getCalledChrCount();
for ( Allele allele : vc.getAlternateAlleles() ) {
int altChromosomes = vc.getCalledChrCount(allele);
alleleCounts.add(altChromosomes);
String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes));
alleleFreqs.add(freq);
}
builder.attribute(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
}
else {
builder.attribute(VCFConstants.ALLELE_COUNT_KEY, 0);
builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
}
}
} }
private static String makePrecisionFormatStringFromDenominatorValue(double maxValue) { private static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {