diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java index 648468ccc..9c21b5fff 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java @@ -42,8 +42,11 @@ public class SequenomValidationConverter extends RodWalker { // max allowable indel size (based on ref window) private static final int MAX_INDEL_SIZE = 40; - // the VCF writer - private VCFWriter vcfWriter = null; + // sample names + private TreeSet sampleNames = null; + + // vcf records + private ArrayList records = new ArrayList(); // statistics private int numRecords = 0; @@ -83,18 +86,26 @@ public class SequenomValidationConverter extends RodWalker { if ( plinkRod == null ) return null; - if ( vcfWriter == null ) - initializeWriter(plinkRod); + if ( sampleNames == null ) + sampleNames = new TreeSet(plinkRod.getSampleNames()); return addVariantInformationToCall(ref, plinkRod); } - private void initializeWriter(PlinkRod plinkRod) { - vcfWriter = new VCFWriter(vcfFile); + public Integer reduce(VCFRecord call, Integer numVariants) { + if ( call != null ) { + numVariants++; + records.add(call); + } + return numVariants; + } + + public void onTraversalDone(Integer finalReduce) { + VCFWriter vcfWriter = new VCFWriter(vcfFile); // set up the info and filter headers Set hInfo = new HashSet(); - hInfo.add(new VCFHeaderLine("source", "PlinkToVCF")); + hInfo.add(new VCFHeaderLine("source", "SequenomValidationConverter")); hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); hInfo.add(new VCFInfoHeaderLine("NoCallPct", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Percent of no-calls")); hInfo.add(new VCFInfoHeaderLine("HomRefPct", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Percent of homozygous reference genotypes")); @@ -107,28 +118,29 @@ public class SequenomValidationConverter extends RodWalker { hInfo.add(new VCFFilterHeaderLine("HighNoCallRate", "The validation no-call rate is too high")); hInfo.add(new VCFFilterHeaderLine("TooManyHomVars", "The validation homozygous variant rate is too high")); - VCFHeader header = new VCFHeader(hInfo, new TreeSet(plinkRod.getSampleNames())); - vcfWriter.writeHeader(header); - } - - public Integer reduce(VCFRecord call, Integer numVariants) { - if ( call != null ) { - numVariants++; - vcfWriter.addRecord(call); - } - return numVariants; - } - - public void onTraversalDone(Integer finalReduce) { - if ( vcfWriter != null ) - vcfWriter.close(); + // print out (and add to headers) the validation metrics + System.out.println(String.format("Total number of samples assayed:\t\t\t%d", sampleNames.size())); + hInfo.add(new VCFHeaderLine("ValidationMetrics_SamplesAssayed", String.format("%d", sampleNames.size()))); System.out.println(String.format("Total number of records processed:\t\t\t%d", numRecords)); + hInfo.add(new VCFHeaderLine("ValidationMetrics_RecordsProcessed", String.format("%d", numRecords))); System.out.println(String.format("Number of Hardy-Weinberg violations:\t\t\t%d (%d%%)", numHWViolations, 100*numHWViolations/numRecords)); + hInfo.add(new VCFHeaderLine("ValidationMetrics_HardyWeinbergViolations", String.format("\"%d (%d%%)\"", numHWViolations, 100*numHWViolations/numRecords))); System.out.println(String.format("Number of no-call violations:\t\t\t\t%d (%d%%)", numNoCallViolations, 100*numNoCallViolations/numRecords)); + hInfo.add(new VCFHeaderLine("ValidationMetrics_NoCallViolations", String.format("\"%d (%d%%)\"", numNoCallViolations, 100*numNoCallViolations/numRecords))); System.out.println(String.format("Number of homozygous variant violations:\t\t%d (%d%%)", numHomVarViolations, 100*numHomVarViolations/numRecords)); + hInfo.add(new VCFHeaderLine("ValidationMetrics_HomVarViolations", String.format("\"%d (%d%%)\"", numHomVarViolations, 100*numHomVarViolations/numRecords))); int goodRecords = numRecords - numHWViolations - numNoCallViolations - numHomVarViolations; System.out.println(String.format("Number of records passing all filters:\t\t\t%d (%d%%)", goodRecords, 100*goodRecords/numRecords)); - System.out.println(String.format("Number of passing records that validated as true:\t%d (%d%%)", numTrueVariants, 100*numTrueVariants/goodRecords)); + hInfo.add(new VCFHeaderLine("ValidationMetrics_RecordsPassingFilters", String.format("\"%d (%d%%)\"", goodRecords, 100*goodRecords/numRecords))); + System.out.println(String.format("Number of passing records that are polymorphic:\t\t%d (%d%%)", numTrueVariants, 100*numTrueVariants/goodRecords)); + hInfo.add(new VCFHeaderLine("ValidationMetrics_PolymorphicPassingRecords", String.format("\"%d (%d%%)\"", numTrueVariants, 100*numTrueVariants/goodRecords))); + + VCFHeader header = new VCFHeader(hInfo, sampleNames); + vcfWriter.writeHeader(header); + + for ( VCFRecord record : records ) + vcfWriter.addRecord(record); + vcfWriter.close(); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverterIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverterIntegrationTest.java index d4c73b987..5d4325375 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverterIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverterIntegrationTest.java @@ -11,7 +11,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest { String testPedFile = validationDataLocation + "Sequenom_Test_File.txt"; String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B input,Plink,"+testPedFile+" -vcf %s"; WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("02dcb683db503599efed0b76daa8fcba")); + Arrays.asList("d19f28fdbe3e731522a52c5329777a9f")); executeTest("Test SNPs", spec); } @@ -20,7 +20,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest { String testPedFile = validationDataLocation + "pilot2_indel_validation.renamed.ped"; String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B input,Plink,"+testPedFile+" -vcf %s"; WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("e72c0ab95b279a4d39cc14d40770a801")); + Arrays.asList("257fcd5e345f2853813e37b88fbc707c")); executeTest("Test Indels", spec); } }