added a check to ensure the eval track variation is bi-allelic. Also changed some string constants over to enums. For some reason my check-ins from home wouldn't work last night, so this is the actual changes for 1884.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1886 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-20 14:15:33 +00:00
parent 449a6ba75a
commit 4be6bb8e92
1 changed files with 72 additions and 50 deletions

View File

@ -63,21 +63,33 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
final String knownSNPDBName = "dbSNP"; final String knownSNPDBName = "dbSNP";
final String genotypeChipName = "hapmap-chip"; final String genotypeChipName = "hapmap-chip";
HashMap<String, ArrayList<VariantAnalysis>> analysisSets; HashMap<ANALYSIS_TYPE, ArrayList<VariantAnalysis>> analysisSets;
PrintStream perLocusStream = null; PrintStream perLocusStream = null;
long nMappedSites = 0; long nMappedSites = 0;
final String ALL_SNPS = "all"; // the types of analysis we support, and the string tags we associate with the enumerated value
final String SINGLETON_SNPS = "singletons"; enum ANALYSIS_TYPE {
final String TWOHIT_SNPS = "2plus_hit"; ALL_SNPS("all"), SINGLETON_SNPS("singletons"), TWOHIT_SNPS("2plus_hit"), KNOWN_SNPS("2plus_hit"), NOVEL_SNPS("2plus_hit");
final String KNOWN_SNPS = "known";
final String NOVEL_SNPS = "novel"; private final String value;
final String[] POPULATION_ANALYSIS_NAMES = { ALL_SNPS, SINGLETON_SNPS, TWOHIT_SNPS, KNOWN_SNPS, NOVEL_SNPS }; ANALYSIS_TYPE(String value) { this.value = value;}
final String[] GENOTYPE_ANALYSIS_NAMES = { ALL_SNPS, KNOWN_SNPS, NOVEL_SNPS };
final String[] SIMPLE_ANALYSIS_NAMES = { ALL_SNPS }; public String toString() { return value; }
String[] ALL_ANALYSIS_NAMES = null;
}
final ANALYSIS_TYPE[] POPULATION_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS,
ANALYSIS_TYPE.SINGLETON_SNPS,
ANALYSIS_TYPE.TWOHIT_SNPS,
ANALYSIS_TYPE.KNOWN_SNPS,
ANALYSIS_TYPE.NOVEL_SNPS};
final ANALYSIS_TYPE[] GENOTYPE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS,
ANALYSIS_TYPE.KNOWN_SNPS,
ANALYSIS_TYPE.NOVEL_SNPS};
final ANALYSIS_TYPE[] SIMPLE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS};
ANALYSIS_TYPE[] ALL_ANALYSIS_NAMES = null;
public void initialize() { public void initialize() {
ALL_ANALYSIS_NAMES = SIMPLE_ANALYSIS_NAMES; ALL_ANALYSIS_NAMES = SIMPLE_ANALYSIS_NAMES;
@ -89,8 +101,8 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
analysisFilenameBase = this.getToolkit().getArguments().outFileName + "."; // + ".analysis."; analysisFilenameBase = this.getToolkit().getArguments().outFileName + "."; // + ".analysis.";
} }
analysisSets = new HashMap<String, ArrayList<VariantAnalysis>>(); analysisSets = new HashMap<ANALYSIS_TYPE, ArrayList<VariantAnalysis>>();
for ( String setName : ALL_ANALYSIS_NAMES ) { for (ANALYSIS_TYPE setName : ALL_ANALYSIS_NAMES) {
analysisSets.put(setName, initializeAnalysisSet(setName)); analysisSets.put(setName, initializeAnalysisSet(setName));
} }
// THIS IS A HACK required in order to reproduce the behavior of old (and imperfect) RODIterator and // THIS IS A HACK required in order to reproduce the behavior of old (and imperfect) RODIterator and
@ -105,11 +117,11 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
return nMappedSites; return nMappedSites;
} }
private ArrayList<VariantAnalysis> getAnalysisSet(final String name) { private ArrayList<VariantAnalysis> getAnalysisSet(final ANALYSIS_TYPE name) {
return analysisSets.containsKey(name) ? analysisSets.get(name) : null; return analysisSets.containsKey(name) ? analysisSets.get(name) : null;
} }
private ArrayList<VariantAnalysis> initializeAnalysisSet(final String setName) { private ArrayList<VariantAnalysis> initializeAnalysisSet(final ANALYSIS_TYPE setName) {
ArrayList<VariantAnalysis> analyses = new ArrayList<VariantAnalysis>(); ArrayList<VariantAnalysis> analyses = new ArrayList<VariantAnalysis>();
// //
@ -156,8 +168,10 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
/** /**
* Returns the filename of the analysis output file where output for an analysis with * Returns the filename of the analysis output file where output for an analysis with
*
* @param name * @param name
* @param params * @param params
*
* @return * @return
*/ */
public String getAnalysisFilename(final String name, final List<String> params) { public String getAnalysisFilename(final String name, final List<String> params) {
@ -167,7 +181,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
return analysisFilenameBase + Utils.join(".", Utils.cons(name, params)); return analysisFilenameBase + Utils.join(".", Utils.cons(name, params));
} }
public void initializeAnalysisOutputStream(final String setName, VariantAnalysis analysis) { public void initializeAnalysisOutputStream(final ANALYSIS_TYPE setName, VariantAnalysis analysis) {
final String filename = getAnalysisFilename(setName + "." + analysis.getName(), analysis.getParams()); final String filename = getAnalysisFilename(setName + "." + analysis.getName(), analysis.getParams());
try { try {
@ -199,17 +213,21 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
// Iterate over each analysis, and update it // Iterate over each analysis, and update it
Variation eval = (Variation) tracker.lookup("eval", null); Variation eval = (Variation) tracker.lookup("eval", null);
// ensure that the variation we're looking at is bi-allelic
if (eval != null && !eval.isBiallelic())
eval = null;
if (eval != null) if (eval != null)
if (eval.getNegLog10PError() < minConfidenceScore) eval = null; if (eval.getNegLog10PError() < minConfidenceScore) eval = null;
// update stats about all of the SNPs // update stats about all of the SNPs
updateAnalysisSet(ALL_SNPS, eval, tracker, ref.getBase(), context); updateAnalysisSet(ANALYSIS_TYPE.ALL_SNPS, eval, tracker, ref.getBase(), context);
// update the known / novel set by checking whether the knownSNPDBName track has an entry here // update the known / novel set by checking whether the knownSNPDBName track has an entry here
if (eval != null) { if (eval != null) {
Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup("dbSNP", ref.getLocus(), tracker); Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup("dbSNP", ref.getLocus(), tracker);
String noveltySet = dbsnp == null ? NOVEL_SNPS : KNOWN_SNPS; ANALYSIS_TYPE noveltySet = dbsnp == null ? ANALYSIS_TYPE.NOVEL_SNPS : ANALYSIS_TYPE.KNOWN_SNPS;
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context); updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
} }
@ -218,7 +236,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
SNPCallFromGenotypes call = (SNPCallFromGenotypes) eval; SNPCallFromGenotypes call = (SNPCallFromGenotypes) eval;
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes(); int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls); //System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS; final ANALYSIS_TYPE s = nVarGenotypes == 1 ? ANALYSIS_TYPE.SINGLETON_SNPS : ANALYSIS_TYPE.TWOHIT_SNPS;
updateAnalysisSet(s, eval, tracker, ref.getBase(), context); updateAnalysisSet(s, eval, tracker, ref.getBase(), context);
} }
} }
@ -227,7 +245,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
} }
public void updateAnalysisSet(final String analysisSetName, Variation eval, public void updateAnalysisSet(final ANALYSIS_TYPE analysisSetName, Variation eval,
RefMetaDataTracker tracker, char ref, AlignmentContext context) { RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// Iterate over each analysis, and update it // Iterate over each analysis, and update it
if (getAnalysisSet(analysisSetName) != null) { if (getAnalysisSet(analysisSetName) != null) {
@ -241,26 +259,30 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
} }
// Given result of map function // Given result of map function
public Integer reduceInit() { return 0; } public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer value, Integer sum) { public Integer reduce(Integer value, Integer sum) {
return treeReduce(sum, value); return treeReduce(sum, value);
} }
public Integer treeReduce(Integer lhs, Integer rhs) { public Integer treeReduce(Integer lhs, Integer rhs) {
return lhs + rhs; return lhs + rhs;
} }
public void onTraversalDone(Integer result) { public void onTraversalDone(Integer result) {
for ( String analysisSetName : ALL_ANALYSIS_NAMES ) { for (ANALYSIS_TYPE analysisSetName : ALL_ANALYSIS_NAMES) {
printAnalysisSet(analysisSetName); printAnalysisSet(analysisSetName);
} }
} }
private String getLineHeader( final String analysisSetName, final String keyword, final String analysis) { private String getLineHeader(final ANALYSIS_TYPE analysisSetName, final String keyword, final String analysis) {
String s = Utils.join(",", Arrays.asList(analysisSetName, keyword, analysis)); String s = Utils.join(",", Arrays.asList(analysisSetName, keyword, analysis));
return s + Utils.dupString(' ', 50 - s.length()); return s + Utils.dupString(' ', 50 - s.length());
} }
private void printAnalysisSet( final String analysisSetName ) { private void printAnalysisSet(final ANALYSIS_TYPE analysisSetName) {
//out.printf("Writing analysis set %s", analysisSetName); //out.printf("Writing analysis set %s", analysisSetName);
Date now = new Date(); Date now = new Date();
for (VariantAnalysis analysis : getAnalysisSet(analysisSetName)) { for (VariantAnalysis analysis : getAnalysisSet(analysisSetName)) {