From 2a116bb5d68dbbed4d04f86b30a766aa426edaa7 Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 15 Jan 2010 06:39:06 +0000 Subject: [PATCH] Made the VCF validator a simple rod walker instead of having it be in a separate package. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2588 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/walkers/qc/VCFValidator.java | 47 ++++++ .../utils/genotype/vcf/VCFValidator.java | 143 ------------------ packages/VCFValidator.xml | 8 - 3 files changed, 47 insertions(+), 151 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/qc/VCFValidator.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java delete mode 100644 packages/VCFValidator.xml diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/VCFValidator.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/VCFValidator.java new file mode 100644 index 000000000..70f56c292 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/VCFValidator.java @@ -0,0 +1,47 @@ +package org.broadinstitute.sting.gatk.walkers.qc; + + +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.refdata.RodVCF; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.RODRecordList; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.StingException; + +/** + * A light-weight validator for a VCF file. + */ +@Requires(value={},referenceMetaData=@RMD(name="vcf",type= RodVCF.class)) +public class VCFValidator extends RodWalker { + + /** + * It's about as simple as things come right now. We let the rod system process all of the + * entries in the file, and if no errors pop up in processing, then it validates! + */ + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker != null ) { + RODRecordList rodlist = tracker.getTrackData("vcf", null); + if ( rodlist != null ) { + RodVCF rod = (RodVCF)rodlist.getRecords().get(0); + if ( (rod.isSNP() || rod.isReference()) && rod.getReference().charAt(0) != ref.getBase() ) + throw new StingException("The reference base (" + ref.getBase() + ") does not match the base from the VCF record (" + rod.getReference() + ")"); + } + } + return 1; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { + return sum + value; + } + + public void onTraversalDone(Integer result) { + out.println("The input file is a valid VCF"); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java deleted file mode 100644 index f5f14b04c..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java +++ /dev/null @@ -1,143 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - - -import java.io.File; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Map; -import java.util.TreeMap; - - -/** - * @author aaron - *

- * Class VCFValidator - *

- * This is the main class for providing a light weight validation of a VCF file. - * It has two parameters, an optional -A flag meaning that you'd like to collect all - * the errors and present them at the end, and the VCF file itself (a required parameter). - */ -public class VCFValidator { - - private static final String VCF_VERSION = "VCRv3.2"; - - /** - * about as simple as things come right now. We open the file, process all the entries in the file, - * and if no errors pop up in processing, well hey, looks good to us. - * - * @param args the vcf file is the only required parameter, with the optional -A indicating that errors - * should be held until the end of processing - */ - public static void main(String[] args) { - boolean catchAll = false; - - if (args.length == 2 && args[0].equals("-A")) - catchAll = true; - else if (args.length == 1) - catchAll = false; - else { - printUsage(); - return; - } - printHeader(args[(catchAll) ? 1 : 0]); - File vcfFile = new File(args[(catchAll) ? 1 : 0]); - if (!vcfFile.exists()) { - System.err.println("Specified VCF file doesn't exist, please check the input file\n"); - printUsage(); - return; - } - // count hom many records we've see - int recordCount = 0; - Map problems = new TreeMap(); - - try { - // open up our reader - VCFReader reader = new VCFReader(vcfFile); - - // the number of samples should be set in the header and consistant over all records - final int sampleCount = reader.getHeader().getGenotypeSamples().size(); - boolean keepGoing = true; - while (keepGoing) { - try { - recordCount++; - keepGoing = reader.hasNext(); - if (keepGoing) { - VCFRecord rec = reader.next(); - // if the header indicates we have genotyping data, try to extract it for all samples - if (reader.getHeader().hasGenotypingData()) { - int sampleCounter = 0; - for (VCFGenotypeRecord genorec : rec.getVCFGenotypeRecords()) { - sampleCounter++; - /** - * just cycle through the records right now; any additional checks for - * the records should go in this block. - **/ - } - if (sampleCounter != sampleCount) - throw new RuntimeException("Record " + recordCount + " does not have the required number " + - "of records (" + sampleCounter + " in the record, " + sampleCount + " in the header)"); - - } - } - } catch (Exception e) { - if (catchAll) - problems.put(recordCount, e); - else { - validationFailed(e, recordCount); - return; - } - } - } - } catch (Exception e) { - if (catchAll) { - problems.put(new Integer(0), e); - e.printStackTrace(); - } else - validationFailed(e, recordCount); - } - System.err.println("Viewed " + recordCount + " VCF record entries."); - if (problems.size() > 0) { - System.err.println("Encountered " + problems.size() + " number of issues. (record zero indicates a header problem)"); - for (Integer e : problems.keySet()) { - System.err.println("\tProblem at record " + e + " : " + problems.get(e)); - } - } - } - - /** - * validation failed - * - * @param e the exception - * @param count the current record count - */ - public static void validationFailed(Exception e, int count) { - System.err.println("VCF Validation failed, after parsing " + count + " entries."); - System.err.println("The reason given was: " + e.getMessage()); - e.printStackTrace(); - } - - /** print the usage information for the VCF validator */ - public static void printUsage() { - System.err.println("VCF validator (VCF Version " + VCF_VERSION + ")"); - System.err.println("Usage:"); - System.err.println("vcfvalidator <-A> "); - System.err.println(""); - System.err.println("\t<-A>\tTell the validator to attempt to catch all the problems, and not stop at the first. Some may be too fatal to continue."); - System.err.println("\t\tThe vcf file. Required."); - System.err.println(""); - } - - public static void printHeader(String file) { - System.err.println("-------------------------------------------"); - System.err.println("VCF Validator v1.0\n"); - System.err.println("Run on file " + file + " at " + getDateTime()); - System.err.println("-------------------------------------------"); - } - - private static String getDateTime() { - DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); - Date date = new Date(); - return dateFormat.format(date); - } -} diff --git a/packages/VCFValidator.xml b/packages/VCFValidator.xml deleted file mode 100644 index e47944d4d..000000000 --- a/packages/VCFValidator.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - VCFValidator - - VCFValidator - org.broadinstitute.sting.utils.genotype.vcf.VCFValidator - -