Fixing odd merge problem with VariantEval -- better cluster analysis (no cumsum), rodVariant is now an AllelicVariant

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1239 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-07-14 18:53:27 +00:00
parent 76b09a879b
commit 84d407ff3f
4 changed files with 43 additions and 25 deletions

View File

@ -4,6 +4,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException;
import java.util.List;
import java.util.Arrays;
/*
* Copyright (c) 2009 The Broad Institute
@ -30,7 +32,7 @@ import java.io.IOException;
* OTHER DEALINGS IN THE SOFTWARE.
*/
public class rodVariants extends BasicReferenceOrderedDatum {
public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant {
private enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
private GenomeLoc loc;
@ -140,4 +142,29 @@ public class rodVariants extends BasicReferenceOrderedDatum {
this.lodBtr = (float) (bestLikelihood - refLikelihood);
this.lodBtnb = (float) (bestLikelihood - nextBestLikelihood);
}
public String getRefBasesFWD() {
char[] b = { getReferenceBase() };
return new String( b );
}
public char getRefSnpFWD() throws IllegalStateException { return 0; }
public String getAltBasesFWD() { return null; }
public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); }
public boolean isSNP() { return getLodBtr() > 5; }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
public double getMAF() { return 0; }
public double getHeterozygosity() { return 0; }
public boolean isGenotype() { return true; }
public double getVariationConfidence() { return getLodBtr(); }
public double getConsensusConfidence() { return getLodBtnb(); }
public List<String> getGenotype() throws IllegalStateException {
return Arrays.asList(getBestGenotype());
}
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
}

View File

@ -74,13 +74,11 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis {
public List<String> done() {
List<String> s = new ArrayList<String>();
s.add(String.format("snps_counted_for_neighbor_distances %d", nSeen));
int cumulative = 0;
s.add(String.format("description maxDist count cumulative"));
s.add(String.format("description maxDist count"));
for ( int i = 0; i < neighborWiseBoundries.length; i++ ) {
int maxDist = neighborWiseBoundries[i];
int count = variantsWithClusters.get(i).size();
cumulative += count;
s.add(String.format("snps_within_clusters_of_size %10d %10d %10d", maxDist, count, cumulative));
s.add(String.format("snps_within_clusters_of_size %10d %10d", maxDist, count));
}
return s;

View File

@ -133,14 +133,6 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
updateAnalysisSet(s, eval, tracker, ref, context);
}
if ( eval instanceof SNPCallFromGenotypes ) {
SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval;
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS;
updateAnalysisSet(s, eval, tracker, ref, context);
}
return 1;
}

View File

@ -150,19 +150,20 @@ def main():
sortedCallFile = 'all.sorted.calls'
cmd = ("~/dev/GenomeAnalysisTK/trunk/perl/sortByRef.pl -k 1 tmp.calls ~/work/humanref/Homo_sapiens_assembly18.fasta.fai > %s" % ( sortedCallFile ) )
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, just_print_commands = OPTIONS.dry, waitID = jobid)
sortedCalls = [line.split() for line in open(sortedCallFile)]
aggregratedCalls = picard_utils.aggregateGeliCalls(sortedCalls)
print >> outputFile, 'loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom'
for snp in map( lambda x: picard_utils.aggregatedGeliCalls2SNP(x, nIndividuals), aggregratedCalls ):
if snp == None: continue # ignore bad calls
#print snp
#sharedCalls = list(sharedCallsGroup)
#genotype = list(sharedCalls[0][5])
print >> outputFile, '%s %s %s %.6f -0.0 -0.0 0.000000 100.0 %d %d %d %d' % (snp.loc, snp.ref, snp.alt(), snp.q(), nIndividuals, snp.nRefGenotypes(), snp.nHetGenotypes(), snp.nHomVarGenotypes())
outputFile.close()
if not OPTIONS.dry:
sortedCalls = [line.split() for line in open(sortedCallFile)]
aggregratedCalls = picard_utils.aggregateGeliCalls(sortedCalls)
print >> outputFile, 'loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom'
for snp in map( lambda x: picard_utils.aggregatedGeliCalls2SNP(x, nIndividuals), aggregratedCalls ):
if snp == None: continue # ignore bad calls
#print snp
#sharedCalls = list(sharedCallsGroup)
#genotype = list(sharedCalls[0][5])
print >> outputFile, '%s %s %s %.6f -0.0 -0.0 0.000000 100.0 %d %d %d %d' % (snp.loc, snp.ref, snp.alt(), snp.q(), nIndividuals, snp.nRefGenotypes(), snp.nHetGenotypes(), snp.nHomVarGenotypes())
outputFile.close()
if __name__ == "__main__":