-B 1kg_ceu,VFC,CEU.vcf -B 1kg_yri,VCF,YRI.vcf system supported to allow 1KG % (like dbSNP%)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2632 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-01-19 21:33:13 +00:00
parent e936cbff1b
commit 9e0ae993c7
5 changed files with 84 additions and 35 deletions

View File

@ -64,13 +64,24 @@ public class RODRecordList<ROD extends ReferenceOrderedDatum> implements Iterabl
public Iterator<ROD> iterator() { return records.iterator() ; }
public void clear() { records.clear(); }
public boolean isEmpty() { return records.isEmpty(); }
public void add(ROD record) {
public void add(ROD record) { add(record, false); }
public void add(ROD record, boolean allowNameMismatch) {
if ( record != null ) {
if ( ! name.equals(record.getName() ) )
if ( ! allowNameMismatch && ! name.equals(record.getName() ) )
throw new StingException("Attempt to add ROD with non-matching name "+record.getName()+" to the track "+name);
}
records.add(record);
}
public void add(RODRecordList<ROD> records ) { add( records, false ); }
public void add(RODRecordList<ROD> records, boolean allowNameMismatch) {
for ( ROD record : records )
add(record, allowNameMismatch);
}
public int size() { return records.size() ; }
/**
@ -104,7 +115,7 @@ public class RODRecordList<ROD extends ReferenceOrderedDatum> implements Iterabl
* <tt>0</tt>, or <tt>1</tt> according to whether the value of
* <i>expression</i> is negative, zero or positive.
*
* @param o the object to be compared.
* @param that the object to be compared.
* @return a negative integer, zero, or a positive integer as this object
* is less than, equal to, or greater than the specified object.
* @throws ClassCastException if the specified object's type prevents it

View File

@ -60,20 +60,41 @@ public class RefMetaDataTracker {
* @param defaultValue
* @return
*/
public RODRecordList<ReferenceOrderedDatum> getTrackData(final String name, ReferenceOrderedDatum defaultValue) {
public RODRecordList<ReferenceOrderedDatum> getTrackData(final String name, ReferenceOrderedDatum defaultValue, boolean requireExactMatch) {
//logger.debug(String.format("Lookup %s%n", name));
final String luName = canonicalName(name);
if ( map.containsKey(luName) )
return map.get(luName);
else {
if ( defaultValue == null )
return null;
return new RODRecordList<ReferenceOrderedDatum>(defaultValue.getName(),
Collections.singletonList(defaultValue),
defaultValue.getLocation());
final String luName = canonicalName(name);
RODRecordList<ReferenceOrderedDatum> trackData = null;
if ( requireExactMatch ) {
if ( map.containsKey(luName) )
trackData = map.get(luName);
} else {
for ( Map.Entry<String, RODRecordList<ReferenceOrderedDatum>> datum : map.entrySet() ) {
final String rodName = datum.getKey();
if ( rodName.startsWith(luName) ) {
if ( trackData == null ) trackData = new RODRecordList<ReferenceOrderedDatum>(name);
//System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation());
trackData.add(datum.getValue(), true);
}
}
}
if ( trackData != null )
return trackData;
else if ( defaultValue == null )
return null;
else
return new RODRecordList<ReferenceOrderedDatum>(defaultValue.getName(),
Collections.singletonList(defaultValue),
defaultValue.getLocation());
}
public RODRecordList<ReferenceOrderedDatum> getTrackData(final String name, ReferenceOrderedDatum defaultValue) {
return getTrackData(name, defaultValue, true);
}
/**
* @see this.lookup
* @param name

View File

@ -58,11 +58,26 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
return nConcordant() / (1.0 * nSNPsAtdbSNPs());
}
public static Variation getFirstRealSNP(RODRecordList<ReferenceOrderedDatum> dbsnpList) {
if (dbsnpList == null)
return null;
Variation dbsnp = null;
for (ReferenceOrderedDatum d : dbsnpList) {
if (((Variation) d).isSNP()) {
dbsnp = (Variation)d;
break;
}
}
return dbsnp;
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
rodDbSNP dbSNP = rodDbSNP.getFirstRealSNP(tracker.getTrackData( dbName, null ));
Variation dbSNP = getFirstRealSNP(tracker.getTrackData( dbName, null, false ));
String result = null;
if (dbSNP != null) nDBSNPs++; // count the number of real dbSNP events
if ( dbSNP != null ) nDBSNPs++; // count the number of real dbSNP events
if ( eval != null && eval.isSNP() ) { // ignore indels right now
nEvalObs++; // count the number of eval snps we've seen
@ -76,13 +91,13 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
}
}
if ( dbSNP != null && dbSNP.isSNP() ) {
BrokenRODSimulator.attach("dbSNP");
rodDbSNP dbsnp = (rodDbSNP) BrokenRODSimulator.simulate_lookup("dbSNP", context.getLocation(), tracker);
if ( ! dbSNP.getRS_ID().equals(dbsnp.getRS_ID()) && dbsnp.isSNP() ) {
System.out.printf("Discordant site! %n%s%n vs.%n%s%n", dbSNP, dbsnp);
}
}
// if ( dbSNP != null && dbSNP.isSNP() ) {
// BrokenRODSimulator.attach("dbSNP");
// rodDbSNP dbsnp = (rodDbSNP) BrokenRODSimulator.simulate_lookup("dbSNP", context.getLocation(), tracker);
// if ( ! dbSNP.getRS_ID().equals(dbsnp.getRS_ID()) && dbsnp.isSNP() ) {
// System.out.printf("Discordant site! %n%s%n vs.%n%s%n", dbSNP, dbsnp);
// }
// }
return result;
}
@ -97,7 +112,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
s.add(String.format("n_concordant %d", nConcordant()));
s.add(String.format("n_novel_sites %d", nNovelSites()));
s.add(String.format("dbsnp_rate %.2f # percent eval snps at dbsnp snps", 100 * dbSNPRate()));
s.add(String.format("%s_rate %.2f # percent eval snps at dbsnp snps", dbName.toLowerCase(), 100 * dbSNPRate()));
s.add(String.format("concordance_rate %.2f", 100 * concordanceRate()));
return s;

View File

@ -71,6 +71,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
String analysisFilenameBase = null;
final String knownSNPDBName = "dbSNP";
final String One1KGSNPNames = "1kg";
final String genotypeChipName = "hapmap-chip";
HashMap<ANALYSIS_TYPE, ArrayList<VariantAnalysis>> analysisSets;
@ -164,6 +165,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
analyses.add(new VariantCounter());
analyses.add(new VariantDBCoverage(knownSNPDBName));
analyses.add(new VariantDBCoverage(One1KGSNPNames));
if ( samplesFile != null ) {
//if ( numPeopleInPool < 1 )

View File

@ -18,8 +18,8 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantROD() {
HashMap<String, String> md5 = new HashMap<String, String>();
md5.put("", "d6b8c2d6c37d42d1ca2288799a8bd8e4");
md5.put("-A", "0294b2e3915e88dfe2547e9db64ed1b3");
md5.put("", "4b5a43cb4fa1b82e1bb361632754eaa1");
md5.put("-A", "7dd6aa5379ea5422fe7db4e79baad1f7");
/**
* the above MD5 was calculated from running the following command:
@ -52,7 +52,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantRODConfSix() {
List<String> md5 = new ArrayList<String>();
md5.add("85cfefcac2dfb06545792605a3043a52");
md5.add("cfcc2a8e0587cb288f0472118a9bdc1d");
/**
* the above MD5 was calculated from running the following command:
@ -84,7 +84,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantRODOutputViolations() {
List<String> md5 = new ArrayList<String>();
md5.add("e24732ffd95a78385a2c6986d1d3a359");
md5.add("ce708258676bbd4e71e8714828e7d695");
/**
* the above MD5 was calculated from running the following command:
@ -116,7 +116,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalGenotypeROD() {
List<String> md5 = new ArrayList<String>();
md5.add("010d1c7ce773b39f3de1355eb9682e4d");
md5.add("4df6a9c9d09a7bf034f29db24fb7a923");
/**
* the above MD5 was calculated after running the following command:
*
@ -150,7 +150,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalMarksGenotypingExample() {
List<String> md5 = new ArrayList<String>();
md5.add("7d5a98c01051f96a684a383786da3d76");
md5.add("f79448f380cfb56be3585547b794a48c");
/**
* Run with the following commands:
*
@ -177,7 +177,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalRuntimeWithLotsOfIntervals() {
List<String> md5 = new ArrayList<String>();
md5.add("d11ea079fc1835514d392056a2c2a28d");
md5.add("4d01485ed2d3cb5edbff24cc0c05b205");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T VariantEval -R " + oneKGLocation + "reference/human_b36_both.fasta " +
"-B eval,Variants," + validationDataLocation + "NA12878.pilot_3.all.geli.calls " +
@ -193,11 +193,11 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testVCFVariantEvals() {
HashMap<String, String> md5 = new HashMap<String, String>();
md5.put("", "3dda57ac7a9c8f3800726c9affb9d9bd");
md5.put("-A", "d985e61fd0d7fc34c9c1a553e2881c67");
md5.put("-A --includeFilteredRecords", "434c60986aa54c5fd07c22df1910ec44");
md5.put("-A --sampleName NA12878", "aff844b88f71824a6cd3cce553325b17");
md5.put("-A -vcfInfoSelector AF=0.50", "9ab9fa5d89cd6e3278d0d2b13cabbd51");
md5.put("", "9702cba801cf216324607c8ecb7871e4");
md5.put("-A", "e9e02eba2e3423bde18ddb0a33a50e34");
md5.put("-A --includeFilteredRecords", "f0cb51e2c7917e502ca510cc9a3382a9");
md5.put("-A --sampleName NA12878", "b936723b0de69ed2698ab2af4b70a1e8");
md5.put("-A -vcfInfoSelector AF=0.50", "a8299c6ba370bf9334327ec03e8885b7");
for ( Map.Entry<String, String> e : md5.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(