Fixes to ValidateVariants as per GS post: ref base of mixed alleles were sometimes wrong, error print out of bad ACs was throwing a RuntimeException, don't validate ACs if there are no genotypes.

This commit is contained in:
Eric Banks 2012-02-07 13:15:58 -05:00
parent a6477e558a
commit 718da7757e
2 changed files with 12 additions and 7 deletions

View File

@ -128,13 +128,13 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
// get the true reference allele // get the true reference allele
Allele reportedRefAllele = vc.getReference(); Allele reportedRefAllele = vc.getReference();
Allele observedRefAllele; Allele observedRefAllele = null;
// insertions // insertions
if ( vc.isSimpleInsertion() ) { if ( vc.isSimpleInsertion() ) {
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING); observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
} }
// deletions // deletions
else if ( vc.isSimpleDeletion() || vc.isMixed() || vc.isMNP() ) { else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
// we can't validate arbitrarily long deletions // we can't validate arbitrarily long deletions
if ( reportedRefAllele.length() > 100 ) { if ( reportedRefAllele.length() > 100 ) {
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart())); logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
@ -143,16 +143,15 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
// deletions are associated with the (position of) the last (preceding) non-deleted base; // deletions are associated with the (position of) the last (preceding) non-deleted base;
// hence to get actually deleted bases we need offset = 1 // hence to get actually deleted bases we need offset = 1
int offset = 1 ; int offset = vc.isMNP() ? 0 : 1;
if ( vc.isMNP() ) offset = 0; // if it's an MNP, the reported position IS the first modified base
byte[] refBytes = ref.getBases(); byte[] refBytes = ref.getBases();
byte[] trueRef = new byte[reportedRefAllele.length()]; byte[] trueRef = new byte[reportedRefAllele.length()];
for (int i = 0; i < reportedRefAllele.length(); i++) for (int i = 0; i < reportedRefAllele.length(); i++)
trueRef[i] = refBytes[i+offset]; trueRef[i] = refBytes[i+offset];
observedRefAllele = Allele.create(trueRef, true); observedRefAllele = Allele.create(trueRef, true);
} }
// SNPs, etc. // SNPs, etc. but not mixed types because they are too difficult
else { else if ( !vc.isMixed() ) {
byte[] refByte = new byte[1]; byte[] refByte = new byte[1];
refByte[0] = ref.getBase(); refByte[0] = ref.getBase();
observedRefAllele = Allele.create(refByte, true); observedRefAllele = Allele.create(refByte, true);

View File

@ -920,6 +920,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
} }
public void validateReferenceBases(Allele reference, Byte paddedRefBase) { public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
if ( reference == null )
return;
// don't validate if we're a complex event // don't validate if we're a complex event
if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) { if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
@ -963,6 +966,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
} }
public void validateChromosomeCounts() { public void validateChromosomeCounts() {
if ( !hasGenotypes() )
return;
// AN // AN
if ( hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) { if ( hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
int reportedAN = Integer.valueOf(getAttribute(VCFConstants.ALLELE_NUMBER_KEY).toString()); int reportedAN = Integer.valueOf(getAttribute(VCFConstants.ALLELE_NUMBER_KEY).toString());
@ -993,7 +999,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag doesn't have the correct number of values for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedACs.size(), observedACs.size())); throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag doesn't have the correct number of values for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedACs.size(), observedACs.size()));
for (int i = 0; i < observedACs.size(); i++) { for (int i = 0; i < observedACs.size(); i++) {
if ( Integer.valueOf(reportedACs.get(i).toString()) != observedACs.get(i) ) if ( Integer.valueOf(reportedACs.get(i).toString()) != observedACs.get(i) )
throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedACs.get(i), observedACs.get(i))); throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %s vs. %d", getChr(), getStart(), reportedACs.get(i), observedACs.get(i)));
} }
} else { } else {
if ( observedACs.size() != 1 ) if ( observedACs.size() != 1 )