cleanup before moving files

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1365 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-08-03 19:08:24 +00:00
parent e3b08f245f
commit 20986a03de
3 changed files with 71 additions and 61 deletions

View File

@ -23,38 +23,37 @@ import java.io.File;
@ReadFilters(ZeroMappingQualityReadFilter.class)
public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, GenotypeWriter> {
// Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true)
public File VARIANTS_FILE;
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false)
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
// Control what goes into the variants file and what format that file should have
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold on which variants should be filtered", required = false)public Double LOD_THRESHOLD = Double.MIN_VALUE;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false) public boolean GENOTYPE = false;
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold on which variants should be filtered", required = false)
public Double LOD_THRESHOLD = Double.MIN_VALUE;
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false) public boolean THREE_BASE_ERRORS = false;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false)
public boolean GENOTYPE = false;
// Control periodic reporting features
@Argument(fullName = "metrics_interval", shortName = "metint", doc = "Number of loci to process between metrics reports", required = false) public Integer METRICS_INTERVAL = 50000;
@Argument(fullName = "suppress_metrics", shortName = "printmets", doc = "If specified, don't display metrics", required = false) public Boolean SUPPRESS_METRICS = false;
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
public boolean THREE_BASE_ERRORS = false;
// Control what features we use in calling variants
//@Argument(fullName = "ignore_secondary_bases", shortName = "nosb", doc = "Ignore secondary base examination", required = false) public Boolean IGNORE_SECONDARY_BASES = false;
@Argument(fullName = "call_indels", shortName = "indels", doc = "Call indels as well as point mutations", required = false) public Boolean CALL_INDELS = false;
public enum Caller {
OLD_AND_BUSTED,
NEW_HOTNESS
}
@Argument(fullName = "caller", doc = "", required = false)
public Caller caller = Caller.OLD_AND_BUSTED;
// Control how the genotype hypotheses are weighed
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = GenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
@Argument(fullName = "priors_hapmap", shortName = "phapmap", doc = "Comma-separated prior likelihoods for Hapmap loci (homref,het,homvar)", required = false) public String PRIORS_HAPMAP = "0.999,1e-3,1e-5";
@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false) public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
// Control various sample-level settings
@Argument(fullName = "sample_name_regex", shortName = "sample_name_regex", doc = "Replaces the sample name specified in the BAM read group with the value supplied here", required = false) public String SAMPLE_NAME_REGEX = null;
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
public boolean keepQ0Bases = false;
public AlleleMetrics metricsOut;
public String sampleName;
public double[] plocus;
public double[] phapmap;
public double[] pdbsnp;
@ -103,8 +102,6 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
/** Initialize the walker with some sensible defaults */
public void initialize() {
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
plocus = GenotypeLikelihoods.computePriors(heterozygosity);
phapmap = priorsArray(PRIORS_HAPMAP);
pdbsnp = priorsArray(PRIORS_DBSNP);
@ -120,51 +117,31 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
* @return an AlleleFrequencyEstimate object
*/
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
//rationalizeSampleName(context.getReads().get(0));
SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref, context);
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref, context);
// todo -- totally convoluted code!
if ( ! oldAndBusted.equals(newHotness) ) {
System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness);
}
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
}
private SSGGenotypeCall mapNewHotness(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
if (geno != null) {
metricsOut.nextPosition(geno, tracker);
}
if (!SUPPRESS_METRICS) {
metricsOut.printMetricsAtLocusIntervals(METRICS_INTERVAL);
}
return geno;
}
/**
* Sometimes the sample names in the BAM files get screwed up. Fix it here if we can.
*
* @param read a read from the pileup (assuming all the reads have the same sample name)
*
* @return a repaired sample name
*/
private String rationalizeSampleName(SAMRecord read) {
String RG = (String) (read.getAttribute("RG"));
SAMReadGroupRecord read_group_record = read.getHeader().getReadGroup(RG);
if (read_group_record != null) {
String localSampleName = read.getHeader().getReadGroup(RG).getSample();
if (SAMPLE_NAME_REGEX != null) {
localSampleName = localSampleName.replaceAll(SAMPLE_NAME_REGEX, "$1");
}
if (sampleName == null) {
sampleName = localSampleName;
} else {
if (!sampleName.equals(localSampleName)) {
throw new StingException(String.format("Samples appear to have been mixed up: expected '%s' but found '%s'.", sampleName, localSampleName));
}
}
}
return sampleName;
}
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
return geno;
}
/**
* Calls the underlying, single locus genotype of the sample

View File

@ -76,4 +76,11 @@ public interface Genotype {
* @return
*/
public Variant toVariant();
/**
* return a readable string representation of this genotype
*
* @return
*/
public String toString();
}

View File

@ -13,10 +13,7 @@ import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScor
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
import org.broadinstitute.sting.utils.genotype.variant.Variant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.TreeMap;
import java.util.*;
/**
@ -96,6 +93,35 @@ public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
}
}
@Override
public boolean equals(Object other) {
if(other == null)
return false;
if(other instanceof SSGGenotypeCall) {
SSGGenotypeCall otherCall = (SSGGenotypeCall)other;
boolean likelihoodsMatch = true;
for ( int i = 0; i < mLikelihoods.length; i++ ) {
likelihoodsMatch = likelihoodsMatch && MathUtils.compareDoubles(mLikelihoods[i], otherCall.mLikelihoods[i]) == 0;
}
//System.out.printf("likelihoodsMatch = %b%n", likelihoodsMatch);
return this.mRefBase.equals(otherCall.mRefBase) &&
this.mPloidy == otherCall.mPloidy &&
MathUtils.compareDoubles(this.bestNext, otherCall.bestNext) == 0 &&
MathUtils.compareDoubles(this.bestRef, otherCall.bestRef) == 0 &&
this.readDepth == otherCall.readDepth &&
MathUtils.compareDoubles(this.rmsMapping, otherCall.rmsMapping) == 0 &&
likelihoodsMatch;
}
return false;
}
public String toString() {
return String.format("ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s",
mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods));
}
/**
* calculate the rms , given the read pileup
*
@ -128,7 +154,7 @@ public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
*/
@Override
public boolean isVariation() {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(mRefBase.charAt(0));
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(getReferencebase());
}
/**