Fixing odd merge problem with VariantEval -- better cluster analysis (no cumsum), rodVariant is now an AllelicVariant

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1239 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-07-14 18:53:27 +00:00
parent 76b09a879b
commit 84d407ff3f
4 changed files with 43 additions and 25 deletions

View File

@ -4,6 +4,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import java.util.Arrays;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -30,7 +32,7 @@ import java.io.IOException;
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
public class rodVariants extends BasicReferenceOrderedDatum { public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant {
private enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } private enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
private GenomeLoc loc; private GenomeLoc loc;
@ -140,4 +142,29 @@ public class rodVariants extends BasicReferenceOrderedDatum {
this.lodBtr = (float) (bestLikelihood - refLikelihood); this.lodBtr = (float) (bestLikelihood - refLikelihood);
this.lodBtnb = (float) (bestLikelihood - nextBestLikelihood); this.lodBtnb = (float) (bestLikelihood - nextBestLikelihood);
} }
public String getRefBasesFWD() {
char[] b = { getReferenceBase() };
return new String( b );
}
public char getRefSnpFWD() throws IllegalStateException { return 0; }
public String getAltBasesFWD() { return null; }
public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); }
public boolean isSNP() { return getLodBtr() > 5; }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
public double getMAF() { return 0; }
public double getHeterozygosity() { return 0; }
public boolean isGenotype() { return true; }
public double getVariationConfidence() { return getLodBtr(); }
public double getConsensusConfidence() { return getLodBtnb(); }
public List<String> getGenotype() throws IllegalStateException {
return Arrays.asList(getBestGenotype());
}
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
} }

View File

@ -74,13 +74,11 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis {
public List<String> done() { public List<String> done() {
List<String> s = new ArrayList<String>(); List<String> s = new ArrayList<String>();
s.add(String.format("snps_counted_for_neighbor_distances %d", nSeen)); s.add(String.format("snps_counted_for_neighbor_distances %d", nSeen));
int cumulative = 0; s.add(String.format("description maxDist count"));
s.add(String.format("description maxDist count cumulative"));
for ( int i = 0; i < neighborWiseBoundries.length; i++ ) { for ( int i = 0; i < neighborWiseBoundries.length; i++ ) {
int maxDist = neighborWiseBoundries[i]; int maxDist = neighborWiseBoundries[i];
int count = variantsWithClusters.get(i).size(); int count = variantsWithClusters.get(i).size();
cumulative += count; s.add(String.format("snps_within_clusters_of_size %10d %10d", maxDist, count));
s.add(String.format("snps_within_clusters_of_size %10d %10d %10d", maxDist, count, cumulative));
} }
return s; return s;

View File

@ -133,14 +133,6 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
updateAnalysisSet(s, eval, tracker, ref, context); updateAnalysisSet(s, eval, tracker, ref, context);
} }
if ( eval instanceof SNPCallFromGenotypes ) {
SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval;
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS;
updateAnalysisSet(s, eval, tracker, ref, context);
}
return 1; return 1;
} }

View File

@ -150,19 +150,20 @@ def main():
sortedCallFile = 'all.sorted.calls' sortedCallFile = 'all.sorted.calls'
cmd = ("~/dev/GenomeAnalysisTK/trunk/perl/sortByRef.pl -k 1 tmp.calls ~/work/humanref/Homo_sapiens_assembly18.fasta.fai > %s" % ( sortedCallFile ) ) cmd = ("~/dev/GenomeAnalysisTK/trunk/perl/sortByRef.pl -k 1 tmp.calls ~/work/humanref/Homo_sapiens_assembly18.fasta.fai > %s" % ( sortedCallFile ) )
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, just_print_commands = OPTIONS.dry, waitID = jobid) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, just_print_commands = OPTIONS.dry, waitID = jobid)
sortedCalls = [line.split() for line in open(sortedCallFile)]
aggregratedCalls = picard_utils.aggregateGeliCalls(sortedCalls)
print >> outputFile, 'loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom'
for snp in map( lambda x: picard_utils.aggregatedGeliCalls2SNP(x, nIndividuals), aggregratedCalls ): if not OPTIONS.dry:
if snp == None: continue # ignore bad calls sortedCalls = [line.split() for line in open(sortedCallFile)]
#print snp aggregratedCalls = picard_utils.aggregateGeliCalls(sortedCalls)
#sharedCalls = list(sharedCallsGroup)
#genotype = list(sharedCalls[0][5]) print >> outputFile, 'loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom'
print >> outputFile, '%s %s %s %.6f -0.0 -0.0 0.000000 100.0 %d %d %d %d' % (snp.loc, snp.ref, snp.alt(), snp.q(), nIndividuals, snp.nRefGenotypes(), snp.nHetGenotypes(), snp.nHomVarGenotypes())
outputFile.close() for snp in map( lambda x: picard_utils.aggregatedGeliCalls2SNP(x, nIndividuals), aggregratedCalls ):
if snp == None: continue # ignore bad calls
#print snp
#sharedCalls = list(sharedCallsGroup)
#genotype = list(sharedCalls[0][5])
print >> outputFile, '%s %s %s %.6f -0.0 -0.0 0.000000 100.0 %d %d %d %d' % (snp.loc, snp.ref, snp.alt(), snp.q(), nIndividuals, snp.nRefGenotypes(), snp.nHetGenotypes(), snp.nHomVarGenotypes())
outputFile.close()
if __name__ == "__main__": if __name__ == "__main__":