some documentation changes, add a couple of simple checks
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1445 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
026e09ec07
commit
0386e110cf
|
|
@ -11,7 +11,9 @@ import java.util.TreeMap;
|
||||||
* <p/>
|
* <p/>
|
||||||
* Class VCFValidator
|
* Class VCFValidator
|
||||||
* <p/>
|
* <p/>
|
||||||
* validate a VCF file
|
* This is the main class for providing a light weight validation of a VCF file.
|
||||||
|
* It has two parameters, an optional -A flag meaning that you'd like to collect all
|
||||||
|
* the errors and present them at the end, and the VCF file itself (a required parameter).
|
||||||
*/
|
*/
|
||||||
public class VCFValidator {
|
public class VCFValidator {
|
||||||
|
|
||||||
|
|
@ -20,9 +22,9 @@ public class VCFValidator {
|
||||||
/**
|
/**
|
||||||
* about as simple as things come right now. We open the file, process all the entries in the file,
|
* about as simple as things come right now. We open the file, process all the entries in the file,
|
||||||
* and if no errors pop up in processing, well hey, looks good to us.
|
* and if no errors pop up in processing, well hey, looks good to us.
|
||||||
* TODO: add validation to individual records fields as they make sense
|
|
||||||
*
|
*
|
||||||
* @param args the vcf file is the only parameter
|
* @param args the vcf file is the only required parameter, with the optional -A indicating that errors
|
||||||
|
* should be held until the end of processing
|
||||||
*/
|
*/
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
boolean catchAll = false;
|
boolean catchAll = false;
|
||||||
|
|
@ -43,25 +45,36 @@ public class VCFValidator {
|
||||||
}
|
}
|
||||||
// count hom many records we see
|
// count hom many records we see
|
||||||
int recordCount = 0;
|
int recordCount = 0;
|
||||||
Map<Integer,Exception> problems = new TreeMap<Integer,Exception>();
|
Map<Integer, Exception> problems = new TreeMap<Integer, Exception>();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// open up our reader
|
// open up our reader
|
||||||
VCFReader reader = new VCFReader(vcfFile);
|
VCFReader reader = new VCFReader(vcfFile);
|
||||||
|
|
||||||
|
// the number of samples should be set in the header and consistant over all records
|
||||||
|
final int sampleCount = reader.getHeader().getGenotypeSamples().size();
|
||||||
while (reader.hasNext()) {
|
while (reader.hasNext()) {
|
||||||
try {
|
try {
|
||||||
recordCount++;
|
recordCount++;
|
||||||
VCFRecord rec = reader.next();
|
VCFRecord rec = reader.next();
|
||||||
// if the header indicates we have genotyping data, try to extract it for all samples
|
// if the header indicates we have genotyping data, try to extract it for all samples
|
||||||
if (reader.getHeader().hasGenotypingData()) {
|
if (reader.getHeader().hasGenotypingData()) {
|
||||||
|
int sampleCounter = 0;
|
||||||
for (VCFGenotypeRecord genorec : rec.getVCFGenotypeRecords()) {
|
for (VCFGenotypeRecord genorec : rec.getVCFGenotypeRecords()) {
|
||||||
// just cycle through them, more checks go here
|
sampleCounter++;
|
||||||
|
/**
|
||||||
|
* just cycle through the records right now; any additional checks for
|
||||||
|
* the records should go in this block.
|
||||||
|
**/
|
||||||
}
|
}
|
||||||
|
if (sampleCounter != sampleCount)
|
||||||
|
throw new RuntimeException("Record " + recordCount + " does not have the required number " +
|
||||||
|
"of records (" + sampleCounter + " in the record, " + sampleCount + " in the header)");
|
||||||
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
if (catchAll)
|
if (catchAll)
|
||||||
problems.put(recordCount,e);
|
problems.put(recordCount, e);
|
||||||
else {
|
else {
|
||||||
validationFailed(e, recordCount);
|
validationFailed(e, recordCount);
|
||||||
return;
|
return;
|
||||||
|
|
@ -70,15 +83,15 @@ public class VCFValidator {
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
if (catchAll)
|
if (catchAll)
|
||||||
problems.put(new Integer(0),e);
|
problems.put(new Integer(0), e);
|
||||||
else
|
else
|
||||||
validationFailed(e, recordCount);
|
validationFailed(e, recordCount);
|
||||||
}
|
}
|
||||||
System.err.println("Viewed " + recordCount + " VCF record entries.");
|
System.err.println("Viewed " + recordCount + " VCF record entries.");
|
||||||
if (problems.size() > 0) {
|
if (problems.size() > 0) {
|
||||||
System.err.println("Encountered " + problems.size() + " number of issues. (record zero indicates a header problem)");
|
System.err.println("Encountered " + problems.size() + " number of issues. (record zero indicates a header problem)");
|
||||||
for (Integer e : problems.keySet()) {
|
for (Integer e : problems.keySet()) {
|
||||||
System.err.println("\tProblem at record " + e + " : " + problems.get(e));
|
System.err.println("\tProblem at record " + e + " : " + problems.get(e));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -95,7 +108,9 @@ public class VCFValidator {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** print the usage information for the VCF validator */
|
/**
|
||||||
|
* print the usage information for the VCF validator
|
||||||
|
*/
|
||||||
public static void printUsage() {
|
public static void printUsage() {
|
||||||
System.err.println("VCF validator (VCF Version " + VCF_VERSION + ")");
|
System.err.println("VCF validator (VCF Version " + VCF_VERSION + ")");
|
||||||
System.err.println("Usage:");
|
System.err.println("Usage:");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue