Fixes to ValidateVariants as per GS post: ref base of mixed alleles were sometimes wrong, error print out of bad ACs was throwing a RuntimeException, don't validate ACs if there are no genotypes.

This commit is contained in:
Eric Banks 2012-02-07 13:15:58 -05:00
parent a6477e558a
commit 718da7757e
2 changed files with 12 additions and 7 deletions

View File

@ -128,13 +128,13 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
// get the true reference allele
Allele reportedRefAllele = vc.getReference();
Allele observedRefAllele;
Allele observedRefAllele = null;
// insertions
if ( vc.isSimpleInsertion() ) {
observedRefAllele = Allele.create(Allele.NULL_ALLELE_STRING);
}
// deletions
else if ( vc.isSimpleDeletion() || vc.isMixed() || vc.isMNP() ) {
else if ( vc.isSimpleDeletion() || vc.isMNP() ) {
// we can't validate arbitrarily long deletions
if ( reportedRefAllele.length() > 100 ) {
logger.info(String.format("Reference allele is too long (%d) at position %s:%d; skipping that record.", reportedRefAllele.length(), vc.getChr(), vc.getStart()));
@ -143,16 +143,15 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
// deletions are associated with the (position of) the last (preceding) non-deleted base;
// hence to get actually deleted bases we need offset = 1
int offset = 1 ;
if ( vc.isMNP() ) offset = 0; // if it's an MNP, the reported position IS the first modified base
int offset = vc.isMNP() ? 0 : 1;
byte[] refBytes = ref.getBases();
byte[] trueRef = new byte[reportedRefAllele.length()];
for (int i = 0; i < reportedRefAllele.length(); i++)
trueRef[i] = refBytes[i+offset];
observedRefAllele = Allele.create(trueRef, true);
}
// SNPs, etc.
else {
// SNPs, etc. but not mixed types because they are too difficult
else if ( !vc.isMixed() ) {
byte[] refByte = new byte[1];
refByte[0] = ref.getBase();
observedRefAllele = Allele.create(refByte, true);

View File

@ -920,6 +920,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
if ( reference == null )
return;
// don't validate if we're a complex event
if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
@ -963,6 +966,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
public void validateChromosomeCounts() {
if ( !hasGenotypes() )
return;
// AN
if ( hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
int reportedAN = Integer.valueOf(getAttribute(VCFConstants.ALLELE_NUMBER_KEY).toString());
@ -993,7 +999,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag doesn't have the correct number of values for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedACs.size(), observedACs.size()));
for (int i = 0; i < observedACs.size(); i++) {
if ( Integer.valueOf(reportedACs.get(i).toString()) != observedACs.get(i) )
throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedACs.get(i), observedACs.get(i)));
throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %s vs. %d", getChr(), getStart(), reportedACs.get(i), observedACs.get(i)));
}
} else {
if ( observedACs.size() != 1 )