diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java index 249f87092..bce78234f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java @@ -478,11 +478,11 @@ class PlinkVariantInfo implements Comparable { if ( pieces.length < 2 ) throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)"); - String chrom = pieces[0]; + String chrom = pieces[0].trim(); if ( pieces[1].charAt(0) != 'p' ) throw new IllegalArgumentException("Variant name " + variantName + " does not adhere to required convention (...|c..._p...)"); - String pos = pieces[1].substring(1); + String pos = pieces[1].substring(1).trim(); loc = GenomeLocParser.parseGenomeLoc(chrom+":"+pos); if ( pieces.length > 2 && (pieces[2].startsWith("gI") || pieces[2].startsWith("gD")) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 767210974..3cc058e92 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -146,7 +146,7 @@ public class VariantContextAdaptors { private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, Allele refAllele) { - if ( vcf.isSNP() || vcf.isIndel() ) { + if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) { // add the reference allele if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) { System.out.printf("Excluding vcf record %s%n", vcf); @@ -165,7 +165,10 @@ public class VariantContextAdaptors { //System.out.printf("Excluding vcf record %s%n", vcf); return null; } - alleles.add(new Allele(alt, false)); + + Allele allele = new Allele(alt, false); + if ( ! allele.isNoCall() ) + alleles.add(allele); } Map genotypes = new HashMap(); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java new file mode 100755 index 000000000..93fb68ed2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java @@ -0,0 +1,102 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; + +import java.util.ArrayList; +import java.util.List; +import java.util.Arrays; + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public class ValidationRate extends VariantEvaluator { + class SiteStats { + long nPoly = 0, nMono = 0, nNoCall = 0; + + double polyPercent() { return rate(nPoly, nPoly + nMono + nNoCall); } + } + + private SiteStats validationStats = new SiteStats(); + private SiteStats evalOverlapAtMono = new SiteStats(); + private SiteStats evalOverlapAtPoly = new SiteStats(); + + public ValidationRate(VariantEval2Walker parent) { + // don't do anything + } + + public String getName() { + return "validationRate"; + } + + public int getComparisonOrder() { + return 2; // we need to see each eval track and each comp track + } + + public String toString() { + return getName() + ": " + summaryLine(); + } + + private String summaryLine() { + return String.format("%d %d %.2f %d %d %d %.2f %d %d %d %.2f", + validationStats.nMono, validationStats.nPoly, validationStats.polyPercent(), + evalOverlapAtMono.nMono, evalOverlapAtMono.nPoly, evalOverlapAtMono.nNoCall, evalOverlapAtMono.polyPercent(), + evalOverlapAtPoly.nMono, evalOverlapAtPoly.nPoly, evalOverlapAtPoly.nNoCall, evalOverlapAtPoly.polyPercent()); + } + + private static List HEADER = + Arrays.asList("n_mono_in_comp", "n_poly_in_comp", "percent_poly_in_comp", + "n_mono_calls_at_mono_sites", "n_poly_calls_at_mono_sites", "n_nocalls_at_mono_sites", "percent_mono_sites_called_poly", + "n_mono_calls_at_poly_sites", "n_poly_calls_at_poly_sites", "n_nocalls_at_poly_sites", "percent_poly_sites_called_poly"); + + // making it a table + public List getTableHeader() { + return HEADER; + } + + public boolean enabled() { return true; } + + public List> getTableRows() { + return Arrays.asList(Arrays.asList(summaryLine().split(" "))); + } + + public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + String interesting = null; + + if ( validation != null && validation.hasGenotypes() && validation.isNotFiltered() ) { + SiteStats overlap = null; + + if ( validation.isPolymorphic() ) { + validationStats.nPoly++; + overlap = evalOverlapAtPoly; + if ( eval == null || eval.isMonomorphic() ) + interesting = "ValidationStatus=FN"; + } + else { + validationStats.nMono++; + overlap = evalOverlapAtMono; + + if ( eval != null && eval.isPolymorphic() ) + interesting = "ValidationStatus=FP"; + } + + if ( eval == null ) + overlap.nNoCall++; + else if ( eval.isPolymorphic() ) + overlap.nPoly++; + else + overlap.nMono++; + } + + return interesting; // we don't capture any interesting sites + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java index ec1717457..8023a3413 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java @@ -123,7 +123,7 @@ public class VariantEval2Walker extends RodWalker { protected String outputVCF = null; /** Right now we will only be looking at SNPS */ - EnumSet ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP); + EnumSet ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION); @Argument(shortName="rsID", fullName="rsID", doc="If provided, list of rsID and build number for capping known snps by their build date", required=false) protected String rsIDFile = null; @@ -366,7 +366,7 @@ public class VariantEval2Walker extends RodWalker { for ( EvaluationContext group : contexts ) { VariantContext vc = vcs.get(group.evalTrackName); - //logger.debug(String.format("Updating %s of %s with variant", group.name, vc)); + //logger.debug(String.format("Updating %s with variant", vc)); Set evaluations = group.evaluations; boolean evalWantsVC = applyVCtoEvaluation(vc, vcs, group); List interestingReasons = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 7dead9ed1..b5b6dc7c1 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -163,27 +163,34 @@ public class GenomeLocParser { Matcher match = mPattern.matcher(str); try { - if (match.matches() && match.groupCount() == 4) { - if (match.group(1) != null) contig = match.group(1); - if (match.group(2) != null) start = parsePosition(match.group(2)); - if ((match.group(3) != null && match.group(3).equals("+")) || // chr:1+ - (match.group(3) == null && match.group(4) == null && match.group(2) == null)) // chr1 - stop = Integer.MAX_VALUE; - else if (match.group(3) != null && match.group(3).equals("-")) // chr1:1-1 - stop = parsePosition(match.group(4)); - else if (match.group(3) == null && match.group(4) == null) // chr1:1 - stop = start; - else { - bad = true; - } - } - } catch (Exception e) { + if (match.matches() && match.groupCount() == 4) { + if (match.group(1) != null) contig = match.group(1); + if (match.group(2) != null) start = parsePosition(match.group(2)); + if ((match.group(3) != null && match.group(3).equals("+")) || // chr:1+ + (match.group(3) == null && match.group(4) == null && match.group(2) == null)) // chr1 + stop = Integer.MAX_VALUE; + else if (match.group(3) != null && match.group(3).equals("-")) // chr1:1-1 + stop = parsePosition(match.group(4)); + else if (match.group(3) == null && match.group(4) == null) // chr1:1 + stop = start; + else { + bad = true; + } + } + } catch (Exception e) { bad = true; } - if (bad || start < 0 || stop < 0 || contig == null) - throw new StingException("Invalid Genome Location string: " + str); - + if (bad) + throw new StingException("Failed to parse Genome Location string: " + str); + if (start < 0) + throw new StingException("Invalid Genome Location start < 0: " + str + ' ' + start); + if (stop < 0) + throw new StingException("Invalid Genome Location stop < 0: " + str + ' ' + stop); + if (contig == null) + throw new StingException("Invalid Genome Location contig == null : " + str); + + if (!isContigValid(contig)) throw new StingException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference."); diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java index cdad0a0bb..159a6c6ff 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java @@ -20,8 +20,8 @@ public class VariantEval2IntegrationTest extends WalkerTest { @Test public void testVE2Simple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "d58a2a22e5fb3a3d8d90ba02de37f62b"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "03cddae2afbe0d1a8f5e3490aebc7c9c"); + expectations.put("-L 1:1-10,000,000", "d83605861576db9bc0d50d5c11b67a90"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "be922212c7cb8f8070158dab86949c4b"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -41,10 +41,10 @@ public class VariantEval2IntegrationTest extends WalkerTest { " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String eqMD5s = "1606e285d9bd586dc6662b1ace0a3a0e"; // next two examples should be the same! + String eqMD5s = "f03913dacc5e9938a0aa00f1e5a031de"; // next two examples should be the same! expectations.put("", eqMD5s); expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s); - expectations.put(" -known comp_hapmap", "44773a96d1c5904a57e0e983836768e4"); + expectations.put(" -known comp_hapmap", "0f886e7042c3999c3d87f848e0b58eb8"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -62,7 +62,7 @@ public class VariantEval2IntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("1f2e04f8af061b7190758679a7840f12", "a3ce1d70d8ae3874807e9d61994d42af")); + Arrays.asList("8162dcf2539e8241fb27cba2055631bf", "a3ce1d70d8ae3874807e9d61994d42af")); executeTest("testVE2WriteVCF", spec); } }