diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java index df79f57e2..13c3ee8e8 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java @@ -11,7 +11,9 @@ import java.util.TreeMap; *

* Class VCFValidator *

- * validate a VCF file + * This is the main class for providing a light weight validation of a VCF file. + * It has two parameters, an optional -A flag meaning that you'd like to collect all + * the errors and present them at the end, and the VCF file itself (a required parameter). */ public class VCFValidator { @@ -20,9 +22,9 @@ public class VCFValidator { /** * about as simple as things come right now. We open the file, process all the entries in the file, * and if no errors pop up in processing, well hey, looks good to us. - * TODO: add validation to individual records fields as they make sense * - * @param args the vcf file is the only parameter + * @param args the vcf file is the only required parameter, with the optional -A indicating that errors + * should be held until the end of processing */ public static void main(String[] args) { boolean catchAll = false; @@ -43,25 +45,36 @@ public class VCFValidator { } // count hom many records we see int recordCount = 0; - Map problems = new TreeMap(); + Map problems = new TreeMap(); try { // open up our reader VCFReader reader = new VCFReader(vcfFile); + // the number of samples should be set in the header and consistant over all records + final int sampleCount = reader.getHeader().getGenotypeSamples().size(); while (reader.hasNext()) { try { recordCount++; VCFRecord rec = reader.next(); // if the header indicates we have genotyping data, try to extract it for all samples if (reader.getHeader().hasGenotypingData()) { + int sampleCounter = 0; for (VCFGenotypeRecord genorec : rec.getVCFGenotypeRecords()) { - // just cycle through them, more checks go here + sampleCounter++; + /** + * just cycle through the records right now; any additional checks for + * the records should go in this block. + **/ } + if (sampleCounter != sampleCount) + throw new RuntimeException("Record " + recordCount + " does not have the required number " + + "of records (" + sampleCounter + " in the record, " + sampleCount + " in the header)"); + } } catch (Exception e) { if (catchAll) - problems.put(recordCount,e); + problems.put(recordCount, e); else { validationFailed(e, recordCount); return; @@ -70,15 +83,15 @@ public class VCFValidator { } } catch (Exception e) { if (catchAll) - problems.put(new Integer(0),e); + problems.put(new Integer(0), e); else validationFailed(e, recordCount); } System.err.println("Viewed " + recordCount + " VCF record entries."); - if (problems.size() > 0) { + if (problems.size() > 0) { System.err.println("Encountered " + problems.size() + " number of issues. (record zero indicates a header problem)"); for (Integer e : problems.keySet()) { - System.err.println("\tProblem at record " + e + " : " + problems.get(e)); + System.err.println("\tProblem at record " + e + " : " + problems.get(e)); } } } @@ -95,7 +108,9 @@ public class VCFValidator { e.printStackTrace(); } - /** print the usage information for the VCF validator */ + /** + * print the usage information for the VCF validator + */ public static void printUsage() { System.err.println("VCF validator (VCF Version " + VCF_VERSION + ")"); System.err.println("Usage:");