Fixing odd merge problem with VariantEval -- better cluster analysis (no cumsum), rodVariant is now an AllelicVariant
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1239 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
76b09a879b
commit
84d407ff3f
|
|
@ -4,6 +4,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -30,7 +32,7 @@ import java.io.IOException;
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
public class rodVariants extends BasicReferenceOrderedDatum {
|
||||
public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant {
|
||||
private enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||
|
||||
private GenomeLoc loc;
|
||||
|
|
@ -140,4 +142,29 @@ public class rodVariants extends BasicReferenceOrderedDatum {
|
|||
this.lodBtr = (float) (bestLikelihood - refLikelihood);
|
||||
this.lodBtnb = (float) (bestLikelihood - nextBestLikelihood);
|
||||
}
|
||||
|
||||
public String getRefBasesFWD() {
|
||||
char[] b = { getReferenceBase() };
|
||||
return new String( b );
|
||||
}
|
||||
|
||||
public char getRefSnpFWD() throws IllegalStateException { return 0; }
|
||||
public String getAltBasesFWD() { return null; }
|
||||
public char getAltSnpFWD() throws IllegalStateException { return 0; }
|
||||
public boolean isReference() { return ! isSNP(); }
|
||||
public boolean isSNP() { return getLodBtr() > 5; }
|
||||
public boolean isInsertion() { return false; }
|
||||
public boolean isDeletion() { return false; }
|
||||
public boolean isIndel() { return false; }
|
||||
public double getMAF() { return 0; }
|
||||
public double getHeterozygosity() { return 0; }
|
||||
public boolean isGenotype() { return true; }
|
||||
public double getVariationConfidence() { return getLodBtr(); }
|
||||
public double getConsensusConfidence() { return getLodBtnb(); }
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
return Arrays.asList(getBestGenotype());
|
||||
}
|
||||
|
||||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,13 +74,11 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis {
|
|||
public List<String> done() {
|
||||
List<String> s = new ArrayList<String>();
|
||||
s.add(String.format("snps_counted_for_neighbor_distances %d", nSeen));
|
||||
int cumulative = 0;
|
||||
s.add(String.format("description maxDist count cumulative"));
|
||||
s.add(String.format("description maxDist count"));
|
||||
for ( int i = 0; i < neighborWiseBoundries.length; i++ ) {
|
||||
int maxDist = neighborWiseBoundries[i];
|
||||
int count = variantsWithClusters.get(i).size();
|
||||
cumulative += count;
|
||||
s.add(String.format("snps_within_clusters_of_size %10d %10d %10d", maxDist, count, cumulative));
|
||||
s.add(String.format("snps_within_clusters_of_size %10d %10d", maxDist, count));
|
||||
}
|
||||
|
||||
return s;
|
||||
|
|
|
|||
|
|
@ -133,14 +133,6 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
|||
updateAnalysisSet(s, eval, tracker, ref, context);
|
||||
}
|
||||
|
||||
if ( eval instanceof SNPCallFromGenotypes ) {
|
||||
SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval;
|
||||
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
|
||||
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
|
||||
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS;
|
||||
updateAnalysisSet(s, eval, tracker, ref, context);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -150,19 +150,20 @@ def main():
|
|||
sortedCallFile = 'all.sorted.calls'
|
||||
cmd = ("~/dev/GenomeAnalysisTK/trunk/perl/sortByRef.pl -k 1 tmp.calls ~/work/humanref/Homo_sapiens_assembly18.fasta.fai > %s" % ( sortedCallFile ) )
|
||||
jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, just_print_commands = OPTIONS.dry, waitID = jobid)
|
||||
|
||||
sortedCalls = [line.split() for line in open(sortedCallFile)]
|
||||
aggregratedCalls = picard_utils.aggregateGeliCalls(sortedCalls)
|
||||
|
||||
print >> outputFile, 'loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom'
|
||||
|
||||
for snp in map( lambda x: picard_utils.aggregatedGeliCalls2SNP(x, nIndividuals), aggregratedCalls ):
|
||||
if snp == None: continue # ignore bad calls
|
||||
#print snp
|
||||
#sharedCalls = list(sharedCallsGroup)
|
||||
#genotype = list(sharedCalls[0][5])
|
||||
print >> outputFile, '%s %s %s %.6f -0.0 -0.0 0.000000 100.0 %d %d %d %d' % (snp.loc, snp.ref, snp.alt(), snp.q(), nIndividuals, snp.nRefGenotypes(), snp.nHetGenotypes(), snp.nHomVarGenotypes())
|
||||
outputFile.close()
|
||||
if not OPTIONS.dry:
|
||||
sortedCalls = [line.split() for line in open(sortedCallFile)]
|
||||
aggregratedCalls = picard_utils.aggregateGeliCalls(sortedCalls)
|
||||
|
||||
print >> outputFile, 'loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom'
|
||||
|
||||
for snp in map( lambda x: picard_utils.aggregatedGeliCalls2SNP(x, nIndividuals), aggregratedCalls ):
|
||||
if snp == None: continue # ignore bad calls
|
||||
#print snp
|
||||
#sharedCalls = list(sharedCallsGroup)
|
||||
#genotype = list(sharedCalls[0][5])
|
||||
print >> outputFile, '%s %s %s %.6f -0.0 -0.0 0.000000 100.0 %d %d %d %d' % (snp.loc, snp.ref, snp.alt(), snp.q(), nIndividuals, snp.nRefGenotypes(), snp.nHetGenotypes(), snp.nHomVarGenotypes())
|
||||
outputFile.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Reference in New Issue