-B 1kg_ceu,VFC,CEU.vcf -B 1kg_yri,VCF,YRI.vcf system supported to allow 1KG % (like dbSNP%)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2632 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e936cbff1b
commit
9e0ae993c7
|
|
@ -64,13 +64,24 @@ public class RODRecordList<ROD extends ReferenceOrderedDatum> implements Iterabl
|
||||||
public Iterator<ROD> iterator() { return records.iterator() ; }
|
public Iterator<ROD> iterator() { return records.iterator() ; }
|
||||||
public void clear() { records.clear(); }
|
public void clear() { records.clear(); }
|
||||||
public boolean isEmpty() { return records.isEmpty(); }
|
public boolean isEmpty() { return records.isEmpty(); }
|
||||||
public void add(ROD record) {
|
|
||||||
|
public void add(ROD record) { add(record, false); }
|
||||||
|
|
||||||
|
public void add(ROD record, boolean allowNameMismatch) {
|
||||||
if ( record != null ) {
|
if ( record != null ) {
|
||||||
if ( ! name.equals(record.getName() ) )
|
if ( ! allowNameMismatch && ! name.equals(record.getName() ) )
|
||||||
throw new StingException("Attempt to add ROD with non-matching name "+record.getName()+" to the track "+name);
|
throw new StingException("Attempt to add ROD with non-matching name "+record.getName()+" to the track "+name);
|
||||||
}
|
}
|
||||||
records.add(record);
|
records.add(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void add(RODRecordList<ROD> records ) { add( records, false ); }
|
||||||
|
|
||||||
|
public void add(RODRecordList<ROD> records, boolean allowNameMismatch) {
|
||||||
|
for ( ROD record : records )
|
||||||
|
add(record, allowNameMismatch);
|
||||||
|
}
|
||||||
|
|
||||||
public int size() { return records.size() ; }
|
public int size() { return records.size() ; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -104,7 +115,7 @@ public class RODRecordList<ROD extends ReferenceOrderedDatum> implements Iterabl
|
||||||
* <tt>0</tt>, or <tt>1</tt> according to whether the value of
|
* <tt>0</tt>, or <tt>1</tt> according to whether the value of
|
||||||
* <i>expression</i> is negative, zero or positive.
|
* <i>expression</i> is negative, zero or positive.
|
||||||
*
|
*
|
||||||
* @param o the object to be compared.
|
* @param that the object to be compared.
|
||||||
* @return a negative integer, zero, or a positive integer as this object
|
* @return a negative integer, zero, or a positive integer as this object
|
||||||
* is less than, equal to, or greater than the specified object.
|
* is less than, equal to, or greater than the specified object.
|
||||||
* @throws ClassCastException if the specified object's type prevents it
|
* @throws ClassCastException if the specified object's type prevents it
|
||||||
|
|
|
||||||
|
|
@ -60,20 +60,41 @@ public class RefMetaDataTracker {
|
||||||
* @param defaultValue
|
* @param defaultValue
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public RODRecordList<ReferenceOrderedDatum> getTrackData(final String name, ReferenceOrderedDatum defaultValue) {
|
public RODRecordList<ReferenceOrderedDatum> getTrackData(final String name, ReferenceOrderedDatum defaultValue, boolean requireExactMatch) {
|
||||||
//logger.debug(String.format("Lookup %s%n", name));
|
//logger.debug(String.format("Lookup %s%n", name));
|
||||||
final String luName = canonicalName(name);
|
|
||||||
if ( map.containsKey(luName) )
|
|
||||||
return map.get(luName);
|
|
||||||
else {
|
|
||||||
|
|
||||||
if ( defaultValue == null )
|
final String luName = canonicalName(name);
|
||||||
return null;
|
RODRecordList<ReferenceOrderedDatum> trackData = null;
|
||||||
return new RODRecordList<ReferenceOrderedDatum>(defaultValue.getName(),
|
|
||||||
Collections.singletonList(defaultValue),
|
if ( requireExactMatch ) {
|
||||||
defaultValue.getLocation());
|
if ( map.containsKey(luName) )
|
||||||
|
trackData = map.get(luName);
|
||||||
|
} else {
|
||||||
|
for ( Map.Entry<String, RODRecordList<ReferenceOrderedDatum>> datum : map.entrySet() ) {
|
||||||
|
final String rodName = datum.getKey();
|
||||||
|
if ( rodName.startsWith(luName) ) {
|
||||||
|
if ( trackData == null ) trackData = new RODRecordList<ReferenceOrderedDatum>(name);
|
||||||
|
//System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation());
|
||||||
|
trackData.add(datum.getValue(), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( trackData != null )
|
||||||
|
return trackData;
|
||||||
|
else if ( defaultValue == null )
|
||||||
|
return null;
|
||||||
|
else
|
||||||
|
return new RODRecordList<ReferenceOrderedDatum>(defaultValue.getName(),
|
||||||
|
Collections.singletonList(defaultValue),
|
||||||
|
defaultValue.getLocation());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public RODRecordList<ReferenceOrderedDatum> getTrackData(final String name, ReferenceOrderedDatum defaultValue) {
|
||||||
|
return getTrackData(name, defaultValue, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see this.lookup
|
* @see this.lookup
|
||||||
* @param name
|
* @param name
|
||||||
|
|
|
||||||
|
|
@ -58,11 +58,26 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
||||||
return nConcordant() / (1.0 * nSNPsAtdbSNPs());
|
return nConcordant() / (1.0 * nSNPsAtdbSNPs());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Variation getFirstRealSNP(RODRecordList<ReferenceOrderedDatum> dbsnpList) {
|
||||||
|
if (dbsnpList == null)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
Variation dbsnp = null;
|
||||||
|
for (ReferenceOrderedDatum d : dbsnpList) {
|
||||||
|
if (((Variation) d).isSNP()) {
|
||||||
|
dbsnp = (Variation)d;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dbsnp;
|
||||||
|
}
|
||||||
|
|
||||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
rodDbSNP dbSNP = rodDbSNP.getFirstRealSNP(tracker.getTrackData( dbName, null ));
|
Variation dbSNP = getFirstRealSNP(tracker.getTrackData( dbName, null, false ));
|
||||||
String result = null;
|
String result = null;
|
||||||
|
|
||||||
if (dbSNP != null) nDBSNPs++; // count the number of real dbSNP events
|
if ( dbSNP != null ) nDBSNPs++; // count the number of real dbSNP events
|
||||||
if ( eval != null && eval.isSNP() ) { // ignore indels right now
|
if ( eval != null && eval.isSNP() ) { // ignore indels right now
|
||||||
nEvalObs++; // count the number of eval snps we've seen
|
nEvalObs++; // count the number of eval snps we've seen
|
||||||
|
|
||||||
|
|
@ -76,13 +91,13 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( dbSNP != null && dbSNP.isSNP() ) {
|
// if ( dbSNP != null && dbSNP.isSNP() ) {
|
||||||
BrokenRODSimulator.attach("dbSNP");
|
// BrokenRODSimulator.attach("dbSNP");
|
||||||
rodDbSNP dbsnp = (rodDbSNP) BrokenRODSimulator.simulate_lookup("dbSNP", context.getLocation(), tracker);
|
// rodDbSNP dbsnp = (rodDbSNP) BrokenRODSimulator.simulate_lookup("dbSNP", context.getLocation(), tracker);
|
||||||
if ( ! dbSNP.getRS_ID().equals(dbsnp.getRS_ID()) && dbsnp.isSNP() ) {
|
// if ( ! dbSNP.getRS_ID().equals(dbsnp.getRS_ID()) && dbsnp.isSNP() ) {
|
||||||
System.out.printf("Discordant site! %n%s%n vs.%n%s%n", dbSNP, dbsnp);
|
// System.out.printf("Discordant site! %n%s%n vs.%n%s%n", dbSNP, dbsnp);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
@ -97,7 +112,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
||||||
s.add(String.format("n_concordant %d", nConcordant()));
|
s.add(String.format("n_concordant %d", nConcordant()));
|
||||||
s.add(String.format("n_novel_sites %d", nNovelSites()));
|
s.add(String.format("n_novel_sites %d", nNovelSites()));
|
||||||
|
|
||||||
s.add(String.format("dbsnp_rate %.2f # percent eval snps at dbsnp snps", 100 * dbSNPRate()));
|
s.add(String.format("%s_rate %.2f # percent eval snps at dbsnp snps", dbName.toLowerCase(), 100 * dbSNPRate()));
|
||||||
s.add(String.format("concordance_rate %.2f", 100 * concordanceRate()));
|
s.add(String.format("concordance_rate %.2f", 100 * concordanceRate()));
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
|
|
|
||||||
|
|
@ -71,6 +71,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
String analysisFilenameBase = null;
|
String analysisFilenameBase = null;
|
||||||
|
|
||||||
final String knownSNPDBName = "dbSNP";
|
final String knownSNPDBName = "dbSNP";
|
||||||
|
final String One1KGSNPNames = "1kg";
|
||||||
final String genotypeChipName = "hapmap-chip";
|
final String genotypeChipName = "hapmap-chip";
|
||||||
|
|
||||||
HashMap<ANALYSIS_TYPE, ArrayList<VariantAnalysis>> analysisSets;
|
HashMap<ANALYSIS_TYPE, ArrayList<VariantAnalysis>> analysisSets;
|
||||||
|
|
@ -164,6 +165,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
analyses.add(new VariantCounter());
|
analyses.add(new VariantCounter());
|
||||||
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
||||||
|
analyses.add(new VariantDBCoverage(One1KGSNPNames));
|
||||||
|
|
||||||
if ( samplesFile != null ) {
|
if ( samplesFile != null ) {
|
||||||
//if ( numPeopleInPool < 1 )
|
//if ( numPeopleInPool < 1 )
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,8 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testEvalVariantROD() {
|
public void testEvalVariantROD() {
|
||||||
HashMap<String, String> md5 = new HashMap<String, String>();
|
HashMap<String, String> md5 = new HashMap<String, String>();
|
||||||
md5.put("", "d6b8c2d6c37d42d1ca2288799a8bd8e4");
|
md5.put("", "4b5a43cb4fa1b82e1bb361632754eaa1");
|
||||||
md5.put("-A", "0294b2e3915e88dfe2547e9db64ed1b3");
|
md5.put("-A", "7dd6aa5379ea5422fe7db4e79baad1f7");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the above MD5 was calculated from running the following command:
|
* the above MD5 was calculated from running the following command:
|
||||||
|
|
@ -52,7 +52,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testEvalVariantRODConfSix() {
|
public void testEvalVariantRODConfSix() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("85cfefcac2dfb06545792605a3043a52");
|
md5.add("cfcc2a8e0587cb288f0472118a9bdc1d");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the above MD5 was calculated from running the following command:
|
* the above MD5 was calculated from running the following command:
|
||||||
|
|
@ -84,7 +84,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testEvalVariantRODOutputViolations() {
|
public void testEvalVariantRODOutputViolations() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("e24732ffd95a78385a2c6986d1d3a359");
|
md5.add("ce708258676bbd4e71e8714828e7d695");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the above MD5 was calculated from running the following command:
|
* the above MD5 was calculated from running the following command:
|
||||||
|
|
@ -116,7 +116,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testEvalGenotypeROD() {
|
public void testEvalGenotypeROD() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("010d1c7ce773b39f3de1355eb9682e4d");
|
md5.add("4df6a9c9d09a7bf034f29db24fb7a923");
|
||||||
/**
|
/**
|
||||||
* the above MD5 was calculated after running the following command:
|
* the above MD5 was calculated after running the following command:
|
||||||
*
|
*
|
||||||
|
|
@ -150,7 +150,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testEvalMarksGenotypingExample() {
|
public void testEvalMarksGenotypingExample() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("7d5a98c01051f96a684a383786da3d76");
|
md5.add("f79448f380cfb56be3585547b794a48c");
|
||||||
/**
|
/**
|
||||||
* Run with the following commands:
|
* Run with the following commands:
|
||||||
*
|
*
|
||||||
|
|
@ -177,7 +177,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testEvalRuntimeWithLotsOfIntervals() {
|
public void testEvalRuntimeWithLotsOfIntervals() {
|
||||||
List<String> md5 = new ArrayList<String>();
|
List<String> md5 = new ArrayList<String>();
|
||||||
md5.add("d11ea079fc1835514d392056a2c2a28d");
|
md5.add("4d01485ed2d3cb5edbff24cc0c05b205");
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T VariantEval -R " + oneKGLocation + "reference/human_b36_both.fasta " +
|
"-T VariantEval -R " + oneKGLocation + "reference/human_b36_both.fasta " +
|
||||||
"-B eval,Variants," + validationDataLocation + "NA12878.pilot_3.all.geli.calls " +
|
"-B eval,Variants," + validationDataLocation + "NA12878.pilot_3.all.geli.calls " +
|
||||||
|
|
@ -193,11 +193,11 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testVCFVariantEvals() {
|
public void testVCFVariantEvals() {
|
||||||
HashMap<String, String> md5 = new HashMap<String, String>();
|
HashMap<String, String> md5 = new HashMap<String, String>();
|
||||||
md5.put("", "3dda57ac7a9c8f3800726c9affb9d9bd");
|
md5.put("", "9702cba801cf216324607c8ecb7871e4");
|
||||||
md5.put("-A", "d985e61fd0d7fc34c9c1a553e2881c67");
|
md5.put("-A", "e9e02eba2e3423bde18ddb0a33a50e34");
|
||||||
md5.put("-A --includeFilteredRecords", "434c60986aa54c5fd07c22df1910ec44");
|
md5.put("-A --includeFilteredRecords", "f0cb51e2c7917e502ca510cc9a3382a9");
|
||||||
md5.put("-A --sampleName NA12878", "aff844b88f71824a6cd3cce553325b17");
|
md5.put("-A --sampleName NA12878", "b936723b0de69ed2698ab2af4b70a1e8");
|
||||||
md5.put("-A -vcfInfoSelector AF=0.50", "9ab9fa5d89cd6e3278d0d2b13cabbd51");
|
md5.put("-A -vcfInfoSelector AF=0.50", "a8299c6ba370bf9334327ec03e8885b7");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> e : md5.entrySet() ) {
|
for ( Map.Entry<String, String> e : md5.entrySet() ) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue