moving some code around for better organizations, some fixes to the fields out of SSG

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1340 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-30 15:09:43 +00:00
parent 4366ce16e0
commit 4033c718d2
15 changed files with 111 additions and 61 deletions

View File

@ -8,9 +8,8 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ListUtils; import org.broadinstitute.sting.utils.ListUtils;
import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.GenotypeCall; import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;

View File

@ -19,7 +19,7 @@ import org.broadinstitute.sting.utils.genotype.*;
import java.io.File; import java.io.File;
@ReadFilters(ZeroMappingQualityReadFilter.class) @ReadFilters(ZeroMappingQualityReadFilter.class)
public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, GenotypeWriter> { public class SingleSampleGenotyper extends LocusWalker<org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall, GenotypeWriter> {
// Control output settings // Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE; @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr"); @Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
@ -123,14 +123,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
* *
* @return an AlleleFrequencyEstimate object * @return an AlleleFrequencyEstimate object
*/ */
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) { public org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
rationalizeSampleName(context.getReads().get(0)); rationalizeSampleName(context.getReads().get(0));
if (context.getLocation().getStart() == 73) { if (context.getLocation().getStart() == 73) {
int stop = 1; int stop = 1;
} }
ReadBackedPileup pileup = new ReadBackedPileup(ref, context); ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
GenotypeLikelihoods G = callGenotype(tracker); GenotypeLikelihoods G = callGenotype(tracker);
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup); org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall geno = (org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
if (geno != null) { if (geno != null) {
metricsOut.nextPosition(geno, tracker); metricsOut.nextPosition(geno, tracker);
} }
@ -228,7 +228,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
* *
* @return an empty string * @return an empty string
*/ */
public GenotypeWriter reduce(SSGGenotypeCall call, GenotypeWriter sum) { public GenotypeWriter reduce(org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall call, GenotypeWriter sum) {
if (call != null && call.isVariation()) { if (call != null && call.isVariation()) {
if (call.getConfidenceScore().getScore() > LOD_THRESHOLD) if (call.getConfidenceScore().getScore() > LOD_THRESHOLD)
sum.addGenotypeCall(call); sum.addGenotypeCall(call);

View File

@ -4,11 +4,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.rodGFF; import org.broadinstitute.sting.gatk.refdata.rodGFF;
import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;

View File

@ -2,15 +2,21 @@ package org.broadinstitute.sting.playground.utils;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeGenerator;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import static java.lang.Math.log10; import static java.lang.Math.log10;
import static java.lang.Math.pow; import static java.lang.Math.pow;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.ArrayList;
public class GenotypeLikelihoods implements GenotypeGenerator { public class GenotypeLikelihoods implements GenotypeGenerator {
// precalculate these for performance (pow/log10 is expensive!) // precalculate these for performance (pow/log10 is expensive!)

View File

@ -6,11 +6,10 @@ import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
/** /**
*
* @author aaron * @author aaron
* * <p/>
* Class BasicGenotype * Class BasicGenotype
* * <p/>
* A basic implementation of the genotype interface * A basic implementation of the genotype interface
*/ */
public class BasicGenotype implements Genotype { public class BasicGenotype implements Genotype {
@ -64,6 +63,7 @@ public class BasicGenotype implements Genotype {
/** /**
* get the confidence score * get the confidence score
*
* @return get the confidence score that we're based on * @return get the confidence score that we're based on
*/ */
public ConfidenceScore getConfidenceScore() { public ConfidenceScore getConfidenceScore() {
@ -81,6 +81,7 @@ public class BasicGenotype implements Genotype {
/** /**
* get the ploidy * get the ploidy
*
* @return the ploidy value * @return the ploidy value
*/ */
public int getPloidy() { public int getPloidy() {
@ -117,6 +118,7 @@ public class BasicGenotype implements Genotype {
/** /**
* get the genotype's location * get the genotype's location
*
* @return a GenomeLoc representing the location * @return a GenomeLoc representing the location
*/ */
public GenomeLoc getLocation() { public GenomeLoc getLocation() {
@ -142,7 +144,7 @@ public class BasicGenotype implements Genotype {
*/ */
@Override @Override
public boolean isVariant(char ref) { public boolean isVariant(char ref) {
String ret = Utils.dupString(ref,this.getPloidy()); String ret = Utils.dupString(ref, this.getPloidy());
return !this.getBases().equals(ret); return !this.getBases().equals(ret);
} }
@ -155,4 +157,13 @@ public class BasicGenotype implements Genotype {
public Variant toVariant() { public Variant toVariant() {
return null; return null;
} }
/**
* set the confidence score
* @param confidenceScore
*/
public void setConfidenceScore(ConfidenceScore confidenceScore) {
this.mConfidenceScore = confidenceScore;
}
} }

View File

@ -0,0 +1,20 @@
package org.broadinstitute.sting.utils.genotype;
import java.util.Comparator;
/**
*
* @author aaron
*
* Class ConfidenceScoreComparator
*
* A descriptions should go here. Blame aaron if it's missing.
*/
public class ConfidenceScoreComparator implements Comparator<Genotype> {
@Override
public int compare(Genotype genotype, Genotype genotype1) {
return genotype.getConfidenceScore().compareTo(genotype1.getConfidenceScore());
}
}

View File

@ -22,5 +22,5 @@ public interface GenotypeGenerator {
* @param pileup a pileup of the reads, containing the reads and their offsets * @param pileup a pileup of the reads, containing the reads and their offsets
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values * @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/ */
public GenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup); public org.broadinstitute.sting.utils.genotype.calls.GenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup);
} }

View File

@ -38,7 +38,7 @@ public interface GenotypeWriter {
* Add a genotype, given a genotype locus * Add a genotype, given a genotype locus
* @param locus the locus to add * @param locus the locus to add
*/ */
public void addGenotypeCall(GenotypeCall locus); public void addGenotypeCall(org.broadinstitute.sting.utils.genotype.calls.GenotypeCall locus);
/** /**
* add a no call to the genotype file, if supported. * add a no call to the genotype file, if supported.

View File

@ -0,0 +1,22 @@
package org.broadinstitute.sting.utils.genotype;
import java.util.Comparator;
/**
*
* @author aaron
*
* Class LexigraphicalComparator
*
* A descriptions should go here. Blame aaron if it's missing.
*/
public class LexigraphicalComparator implements Comparator<Genotype> {
private final Double EPSILON = 1.0e-15;
@Override
public int compare(Genotype genotype, Genotype genotype1) {
return genotype.getBases().compareTo(genotype1.getBases());
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype; package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.utils.genotype; package org.broadinstitute.sting.utils.genotype.calls;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.Comparator;
import java.util.List; import java.util.List;
/** /**
@ -51,19 +51,3 @@ public interface GenotypeCall extends Genotype {
public List<Genotype> getLexigraphicallySortedGenotypes(); public List<Genotype> getLexigraphicallySortedGenotypes();
} }
class LexigraphicalComparator implements Comparator<Genotype> {
private final Double EPSILON = 1.0e-15;
@Override
public int compare(Genotype genotype, Genotype genotype1) {
return genotype.getBases().compareTo(genotype1.getBases());
}
}
class ConfidenceScoreSort implements Comparator<Genotype> {
@Override
public int compare(Genotype genotype, Genotype genotype1) {
return genotype.getConfidenceScore().compareTo(genotype1.getConfidenceScore());
}
}

View File

@ -1,14 +1,20 @@
package org.broadinstitute.sting.utils.genotype; package org.broadinstitute.sting.utils.genotype.calls;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.LexigraphicalComparator;
import org.broadinstitute.sting.utils.genotype.Variant;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
import java.util.*; import java.util.ArrayList;
import java.util.Collections;
import net.sf.samtools.SAMRecord; import java.util.List;
import java.util.TreeMap;
/** /**
@ -22,7 +28,7 @@ import net.sf.samtools.SAMRecord;
public class SSGGenotypeCall implements GenotypeCall { public class SSGGenotypeCall implements GenotypeCall {
// TODO: make SSG into a more robust Genotype call interface // TODO: make SSG into a more robust Genotype call interface
// our stored genotype locus // our stored genotype locus
private final char mRefBase; private final String mRefBase;
private final int mPloidy; private final int mPloidy;
private final GenomeLoc mLoc; private final GenomeLoc mLoc;
private TreeMap<Double, Genotype> mGenotypes = new TreeMap(); private TreeMap<Double, Genotype> mGenotypes = new TreeMap();
@ -34,7 +40,7 @@ public class SSGGenotypeCall implements GenotypeCall {
private double rmsMapping; private double rmsMapping;
public SSGGenotypeCall(char mRefBase, int mPloidy, GenomeLoc mLoc, List<Genotype> genotypes, double likelihoods[], ReadBackedPileup pileup) { public SSGGenotypeCall(char mRefBase, int mPloidy, GenomeLoc mLoc, List<Genotype> genotypes, double likelihoods[], ReadBackedPileup pileup) {
this.mRefBase = mRefBase; this.mRefBase = String.valueOf(mRefBase).toUpperCase();
this.mPloidy = mPloidy; this.mPloidy = mPloidy;
this.mLoc = mLoc; this.mLoc = mLoc;
if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0"); if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0");
@ -56,7 +62,7 @@ public class SSGGenotypeCall implements GenotypeCall {
mLikelihoods = likelihoods; mLikelihoods = likelihoods;
index = 0; index = 0;
for (Genotype g : genotypes) { for (Genotype g : genotypes) {
((BasicGenotype)g).mConfidenceScore = new BayesianConfidenceScore(Math.abs(likelihoods[index] - ref)); ((BasicGenotype)g).setConfidenceScore( new BayesianConfidenceScore(Math.abs(likelihoods[index] - ref)));
mGenotypes.put(likelihoods[index],g); mGenotypes.put(likelihoods[index],g);
index++; index++;
} }
@ -80,7 +86,7 @@ public class SSGGenotypeCall implements GenotypeCall {
*/ */
@Override @Override
public char getReferencebase() { public char getReferencebase() {
return mRefBase; return mRefBase.charAt(0);
} }
/** /**
@ -90,7 +96,7 @@ public class SSGGenotypeCall implements GenotypeCall {
*/ */
@Override @Override
public boolean isVariation() { public boolean isVariation() {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(mRefBase); return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(mRefBase.charAt(0));
} }
/** /**

View File

@ -3,10 +3,10 @@ package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood; import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import java.io.File; import java.io.File;
@ -101,7 +101,7 @@ public class GeliAdapter implements GenotypeWriter {
*/ */
@Override @Override
public void addGenotypeCall(GenotypeCall locus) { public void addGenotypeCall(GenotypeCall locus) {
//To change body of implemented methods use File | Settings | File Templates. // TODO: add code here
} }
/** /**

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.utils.genotype.geli; package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.GenotypeCall; import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall; import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -50,8 +50,8 @@ public class GeliTextWriter implements GenotypeWriter {
call.getReadDepth(), call.getReadDepth(),
-1, -1,
locus.getBases(), locus.getBases(),
call.getConfidenceScore().getScore(), call.getBestRef(),
locus.getConfidenceScore().getScore(), call.getBestNext(),
call.getLikelihoods()[0], call.getLikelihoods()[0],
call.getLikelihoods()[1], call.getLikelihoods()[1],
call.getLikelihoods()[2], call.getLikelihoods()[2],

View File

@ -4,7 +4,11 @@ import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream; import net.sf.samtools.util.BlockCompressedOutputStream;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
import java.io.DataOutputStream; import java.io.DataOutputStream;
import java.io.File; import java.io.File;