diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordList.java b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordList.java index 5c19f587b..907be2226 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordList.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordList.java @@ -64,13 +64,24 @@ public class RODRecordList implements Iterabl public Iterator iterator() { return records.iterator() ; } public void clear() { records.clear(); } public boolean isEmpty() { return records.isEmpty(); } - public void add(ROD record) { + + public void add(ROD record) { add(record, false); } + + public void add(ROD record, boolean allowNameMismatch) { if ( record != null ) { - if ( ! name.equals(record.getName() ) ) + if ( ! allowNameMismatch && ! name.equals(record.getName() ) ) throw new StingException("Attempt to add ROD with non-matching name "+record.getName()+" to the track "+name); } records.add(record); } + + public void add(RODRecordList records ) { add( records, false ); } + + public void add(RODRecordList records, boolean allowNameMismatch) { + for ( ROD record : records ) + add(record, allowNameMismatch); + } + public int size() { return records.size() ; } /** @@ -104,7 +115,7 @@ public class RODRecordList implements Iterabl * 0, or 1 according to whether the value of * expression is negative, zero or positive. * - * @param o the object to be compared. + * @param that the object to be compared. * @return a negative integer, zero, or a positive integer as this object * is less than, equal to, or greater than the specified object. * @throws ClassCastException if the specified object's type prevents it diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 715ef82e5..9ca54741a 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -60,20 +60,41 @@ public class RefMetaDataTracker { * @param defaultValue * @return */ - public RODRecordList getTrackData(final String name, ReferenceOrderedDatum defaultValue) { + public RODRecordList getTrackData(final String name, ReferenceOrderedDatum defaultValue, boolean requireExactMatch) { //logger.debug(String.format("Lookup %s%n", name)); - final String luName = canonicalName(name); - if ( map.containsKey(luName) ) - return map.get(luName); - else { - if ( defaultValue == null ) - return null; - return new RODRecordList(defaultValue.getName(), - Collections.singletonList(defaultValue), - defaultValue.getLocation()); + final String luName = canonicalName(name); + RODRecordList trackData = null; + + if ( requireExactMatch ) { + if ( map.containsKey(luName) ) + trackData = map.get(luName); + } else { + for ( Map.Entry> datum : map.entrySet() ) { + final String rodName = datum.getKey(); + if ( rodName.startsWith(luName) ) { + if ( trackData == null ) trackData = new RODRecordList(name); + //System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation()); + trackData.add(datum.getValue(), true); + } + } } + + if ( trackData != null ) + return trackData; + else if ( defaultValue == null ) + return null; + else + return new RODRecordList(defaultValue.getName(), + Collections.singletonList(defaultValue), + defaultValue.getLocation()); } + + public RODRecordList getTrackData(final String name, ReferenceOrderedDatum defaultValue) { + return getTrackData(name, defaultValue, true); + } + + /** * @see this.lookup * @param name diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java index 3aa380205..b292ce0b7 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantDBCoverage.java @@ -58,11 +58,26 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA return nConcordant() / (1.0 * nSNPsAtdbSNPs()); } + public static Variation getFirstRealSNP(RODRecordList dbsnpList) { + if (dbsnpList == null) + return null; + + Variation dbsnp = null; + for (ReferenceOrderedDatum d : dbsnpList) { + if (((Variation) d).isSNP()) { + dbsnp = (Variation)d; + break; + } + } + + return dbsnp; + } + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - rodDbSNP dbSNP = rodDbSNP.getFirstRealSNP(tracker.getTrackData( dbName, null )); + Variation dbSNP = getFirstRealSNP(tracker.getTrackData( dbName, null, false )); String result = null; - if (dbSNP != null) nDBSNPs++; // count the number of real dbSNP events + if ( dbSNP != null ) nDBSNPs++; // count the number of real dbSNP events if ( eval != null && eval.isSNP() ) { // ignore indels right now nEvalObs++; // count the number of eval snps we've seen @@ -76,13 +91,13 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA } } - if ( dbSNP != null && dbSNP.isSNP() ) { - BrokenRODSimulator.attach("dbSNP"); - rodDbSNP dbsnp = (rodDbSNP) BrokenRODSimulator.simulate_lookup("dbSNP", context.getLocation(), tracker); - if ( ! dbSNP.getRS_ID().equals(dbsnp.getRS_ID()) && dbsnp.isSNP() ) { - System.out.printf("Discordant site! %n%s%n vs.%n%s%n", dbSNP, dbsnp); - } - } +// if ( dbSNP != null && dbSNP.isSNP() ) { +// BrokenRODSimulator.attach("dbSNP"); +// rodDbSNP dbsnp = (rodDbSNP) BrokenRODSimulator.simulate_lookup("dbSNP", context.getLocation(), tracker); +// if ( ! dbSNP.getRS_ID().equals(dbsnp.getRS_ID()) && dbsnp.isSNP() ) { +// System.out.printf("Discordant site! %n%s%n vs.%n%s%n", dbSNP, dbsnp); +// } +// } return result; } @@ -97,7 +112,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA s.add(String.format("n_concordant %d", nConcordant())); s.add(String.format("n_novel_sites %d", nNovelSites())); - s.add(String.format("dbsnp_rate %.2f # percent eval snps at dbsnp snps", 100 * dbSNPRate())); + s.add(String.format("%s_rate %.2f # percent eval snps at dbsnp snps", dbName.toLowerCase(), 100 * dbSNPRate())); s.add(String.format("concordance_rate %.2f", 100 * concordanceRate())); return s; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 66d19b2f5..7062c6e84 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -71,6 +71,7 @@ public class VariantEvalWalker extends RodWalker { String analysisFilenameBase = null; final String knownSNPDBName = "dbSNP"; + final String One1KGSNPNames = "1kg"; final String genotypeChipName = "hapmap-chip"; HashMap> analysisSets; @@ -164,6 +165,7 @@ public class VariantEvalWalker extends RodWalker { analyses.add(new VariantCounter()); analyses.add(new VariantDBCoverage(knownSNPDBName)); + analyses.add(new VariantDBCoverage(One1KGSNPNames)); if ( samplesFile != null ) { //if ( numPeopleInPool < 1 ) diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java index 4d77f6539..b3be55ce8 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java @@ -18,8 +18,8 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantROD() { HashMap md5 = new HashMap(); - md5.put("", "d6b8c2d6c37d42d1ca2288799a8bd8e4"); - md5.put("-A", "0294b2e3915e88dfe2547e9db64ed1b3"); + md5.put("", "4b5a43cb4fa1b82e1bb361632754eaa1"); + md5.put("-A", "7dd6aa5379ea5422fe7db4e79baad1f7"); /** * the above MD5 was calculated from running the following command: @@ -52,7 +52,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODConfSix() { List md5 = new ArrayList(); - md5.add("85cfefcac2dfb06545792605a3043a52"); + md5.add("cfcc2a8e0587cb288f0472118a9bdc1d"); /** * the above MD5 was calculated from running the following command: @@ -84,7 +84,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalVariantRODOutputViolations() { List md5 = new ArrayList(); - md5.add("e24732ffd95a78385a2c6986d1d3a359"); + md5.add("ce708258676bbd4e71e8714828e7d695"); /** * the above MD5 was calculated from running the following command: @@ -116,7 +116,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalGenotypeROD() { List md5 = new ArrayList(); - md5.add("010d1c7ce773b39f3de1355eb9682e4d"); + md5.add("4df6a9c9d09a7bf034f29db24fb7a923"); /** * the above MD5 was calculated after running the following command: * @@ -150,7 +150,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalMarksGenotypingExample() { List md5 = new ArrayList(); - md5.add("7d5a98c01051f96a684a383786da3d76"); + md5.add("f79448f380cfb56be3585547b794a48c"); /** * Run with the following commands: * @@ -177,7 +177,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalRuntimeWithLotsOfIntervals() { List md5 = new ArrayList(); - md5.add("d11ea079fc1835514d392056a2c2a28d"); + md5.add("4d01485ed2d3cb5edbff24cc0c05b205"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T VariantEval -R " + oneKGLocation + "reference/human_b36_both.fasta " + "-B eval,Variants," + validationDataLocation + "NA12878.pilot_3.all.geli.calls " + @@ -193,11 +193,11 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testVCFVariantEvals() { HashMap md5 = new HashMap(); - md5.put("", "3dda57ac7a9c8f3800726c9affb9d9bd"); - md5.put("-A", "d985e61fd0d7fc34c9c1a553e2881c67"); - md5.put("-A --includeFilteredRecords", "434c60986aa54c5fd07c22df1910ec44"); - md5.put("-A --sampleName NA12878", "aff844b88f71824a6cd3cce553325b17"); - md5.put("-A -vcfInfoSelector AF=0.50", "9ab9fa5d89cd6e3278d0d2b13cabbd51"); + md5.put("", "9702cba801cf216324607c8ecb7871e4"); + md5.put("-A", "e9e02eba2e3423bde18ddb0a33a50e34"); + md5.put("-A --includeFilteredRecords", "f0cb51e2c7917e502ca510cc9a3382a9"); + md5.put("-A --sampleName NA12878", "b936723b0de69ed2698ab2af4b70a1e8"); + md5.put("-A -vcfInfoSelector AF=0.50", "a8299c6ba370bf9334327ec03e8885b7"); for ( Map.Entry e : md5.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(