Made the VCF validator a simple rod walker instead of having it be in a separate package.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2588 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b19bb19f3d
commit
2a116bb5d6
|
|
@ -0,0 +1,47 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A light-weight validator for a VCF file.
|
||||||
|
*/
|
||||||
|
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= RodVCF.class))
|
||||||
|
public class VCFValidator extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It's about as simple as things come right now. We let the rod system process all of the
|
||||||
|
* entries in the file, and if no errors pop up in processing, then it validates!
|
||||||
|
*/
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( tracker != null ) {
|
||||||
|
RODRecordList<ReferenceOrderedDatum> rodlist = tracker.getTrackData("vcf", null);
|
||||||
|
if ( rodlist != null ) {
|
||||||
|
RodVCF rod = (RodVCF)rodlist.getRecords().get(0);
|
||||||
|
if ( (rod.isSNP() || rod.isReference()) && rod.getReference().charAt(0) != ref.getBase() )
|
||||||
|
throw new StingException("The reference base (" + ref.getBase() + ") does not match the base from the VCF record (" + rod.getReference() + ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return sum + value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
out.println("The input file is a valid VCF");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils.genotype.vcf;
|
|
||||||
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.text.DateFormat;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author aaron
|
|
||||||
* <p/>
|
|
||||||
* Class VCFValidator
|
|
||||||
* <p/>
|
|
||||||
* This is the main class for providing a light weight validation of a VCF file.
|
|
||||||
* It has two parameters, an optional -A flag meaning that you'd like to collect all
|
|
||||||
* the errors and present them at the end, and the VCF file itself (a required parameter).
|
|
||||||
*/
|
|
||||||
public class VCFValidator {
|
|
||||||
|
|
||||||
private static final String VCF_VERSION = "VCRv3.2";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* about as simple as things come right now. We open the file, process all the entries in the file,
|
|
||||||
* and if no errors pop up in processing, well hey, looks good to us.
|
|
||||||
*
|
|
||||||
* @param args the vcf file is the only required parameter, with the optional -A indicating that errors
|
|
||||||
* should be held until the end of processing
|
|
||||||
*/
|
|
||||||
public static void main(String[] args) {
|
|
||||||
boolean catchAll = false;
|
|
||||||
|
|
||||||
if (args.length == 2 && args[0].equals("-A"))
|
|
||||||
catchAll = true;
|
|
||||||
else if (args.length == 1)
|
|
||||||
catchAll = false;
|
|
||||||
else {
|
|
||||||
printUsage();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
printHeader(args[(catchAll) ? 1 : 0]);
|
|
||||||
File vcfFile = new File(args[(catchAll) ? 1 : 0]);
|
|
||||||
if (!vcfFile.exists()) {
|
|
||||||
System.err.println("Specified VCF file doesn't exist, please check the input file\n");
|
|
||||||
printUsage();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// count hom many records we've see
|
|
||||||
int recordCount = 0;
|
|
||||||
Map<Integer, Exception> problems = new TreeMap<Integer, Exception>();
|
|
||||||
|
|
||||||
try {
|
|
||||||
// open up our reader
|
|
||||||
VCFReader reader = new VCFReader(vcfFile);
|
|
||||||
|
|
||||||
// the number of samples should be set in the header and consistant over all records
|
|
||||||
final int sampleCount = reader.getHeader().getGenotypeSamples().size();
|
|
||||||
boolean keepGoing = true;
|
|
||||||
while (keepGoing) {
|
|
||||||
try {
|
|
||||||
recordCount++;
|
|
||||||
keepGoing = reader.hasNext();
|
|
||||||
if (keepGoing) {
|
|
||||||
VCFRecord rec = reader.next();
|
|
||||||
// if the header indicates we have genotyping data, try to extract it for all samples
|
|
||||||
if (reader.getHeader().hasGenotypingData()) {
|
|
||||||
int sampleCounter = 0;
|
|
||||||
for (VCFGenotypeRecord genorec : rec.getVCFGenotypeRecords()) {
|
|
||||||
sampleCounter++;
|
|
||||||
/**
|
|
||||||
* just cycle through the records right now; any additional checks for
|
|
||||||
* the records should go in this block.
|
|
||||||
**/
|
|
||||||
}
|
|
||||||
if (sampleCounter != sampleCount)
|
|
||||||
throw new RuntimeException("Record " + recordCount + " does not have the required number " +
|
|
||||||
"of records (" + sampleCounter + " in the record, " + sampleCount + " in the header)");
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
if (catchAll)
|
|
||||||
problems.put(recordCount, e);
|
|
||||||
else {
|
|
||||||
validationFailed(e, recordCount);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
if (catchAll) {
|
|
||||||
problems.put(new Integer(0), e);
|
|
||||||
e.printStackTrace();
|
|
||||||
} else
|
|
||||||
validationFailed(e, recordCount);
|
|
||||||
}
|
|
||||||
System.err.println("Viewed " + recordCount + " VCF record entries.");
|
|
||||||
if (problems.size() > 0) {
|
|
||||||
System.err.println("Encountered " + problems.size() + " number of issues. (record zero indicates a header problem)");
|
|
||||||
for (Integer e : problems.keySet()) {
|
|
||||||
System.err.println("\tProblem at record " + e + " : " + problems.get(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* validation failed
|
|
||||||
*
|
|
||||||
* @param e the exception
|
|
||||||
* @param count the current record count
|
|
||||||
*/
|
|
||||||
public static void validationFailed(Exception e, int count) {
|
|
||||||
System.err.println("VCF Validation failed, after parsing " + count + " entries.");
|
|
||||||
System.err.println("The reason given was: " + e.getMessage());
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** print the usage information for the VCF validator */
|
|
||||||
public static void printUsage() {
|
|
||||||
System.err.println("VCF validator (VCF Version " + VCF_VERSION + ")");
|
|
||||||
System.err.println("Usage:");
|
|
||||||
System.err.println("vcfvalidator <-A> <file.vcf>");
|
|
||||||
System.err.println("");
|
|
||||||
System.err.println("\t<-A>\tTell the validator to attempt to catch all the problems, and not stop at the first. Some may be too fatal to continue.");
|
|
||||||
System.err.println("\t<file.vcf>\tThe vcf file. Required.");
|
|
||||||
System.err.println("");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void printHeader(String file) {
|
|
||||||
System.err.println("-------------------------------------------");
|
|
||||||
System.err.println("VCF Validator v1.0\n");
|
|
||||||
System.err.println("Run on file " + file + " at " + getDateTime());
|
|
||||||
System.err.println("-------------------------------------------");
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String getDateTime() {
|
|
||||||
DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
|
|
||||||
Date date = new Date();
|
|
||||||
return dateFormat.format(date);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,8 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package>
|
|
||||||
<name>VCFValidator</name>
|
|
||||||
<executable>
|
|
||||||
<name>VCFValidator</name>
|
|
||||||
<main-class>org.broadinstitute.sting.utils.genotype.vcf.VCFValidator</main-class>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
Loading…
Reference in New Issue