cleanup before moving files
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1365 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e3b08f245f
commit
20986a03de
|
|
@ -23,38 +23,37 @@ import java.io.File;
|
|||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||
public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, GenotypeWriter> {
|
||||
// Control output settings
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
|
||||
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
|
||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true)
|
||||
public File VARIANTS_FILE;
|
||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false)
|
||||
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||
|
||||
// Control what goes into the variants file and what format that file should have
|
||||
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold on which variants should be filtered", required = false)public Double LOD_THRESHOLD = Double.MIN_VALUE;
|
||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false) public boolean GENOTYPE = false;
|
||||
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold on which variants should be filtered", required = false)
|
||||
public Double LOD_THRESHOLD = Double.MIN_VALUE;
|
||||
|
||||
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false) public boolean THREE_BASE_ERRORS = false;
|
||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false)
|
||||
public boolean GENOTYPE = false;
|
||||
|
||||
// Control periodic reporting features
|
||||
@Argument(fullName = "metrics_interval", shortName = "metint", doc = "Number of loci to process between metrics reports", required = false) public Integer METRICS_INTERVAL = 50000;
|
||||
@Argument(fullName = "suppress_metrics", shortName = "printmets", doc = "If specified, don't display metrics", required = false) public Boolean SUPPRESS_METRICS = false;
|
||||
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
|
||||
public boolean THREE_BASE_ERRORS = false;
|
||||
|
||||
// Control what features we use in calling variants
|
||||
//@Argument(fullName = "ignore_secondary_bases", shortName = "nosb", doc = "Ignore secondary base examination", required = false) public Boolean IGNORE_SECONDARY_BASES = false;
|
||||
@Argument(fullName = "call_indels", shortName = "indels", doc = "Call indels as well as point mutations", required = false) public Boolean CALL_INDELS = false;
|
||||
public enum Caller {
|
||||
OLD_AND_BUSTED,
|
||||
NEW_HOTNESS
|
||||
}
|
||||
|
||||
@Argument(fullName = "caller", doc = "", required = false)
|
||||
public Caller caller = Caller.OLD_AND_BUSTED;
|
||||
|
||||
// Control how the genotype hypotheses are weighed
|
||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = GenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
|
||||
@Argument(fullName = "priors_hapmap", shortName = "phapmap", doc = "Comma-separated prior likelihoods for Hapmap loci (homref,het,homvar)", required = false) public String PRIORS_HAPMAP = "0.999,1e-3,1e-5";
|
||||
@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false) public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
|
||||
|
||||
// Control various sample-level settings
|
||||
@Argument(fullName = "sample_name_regex", shortName = "sample_name_regex", doc = "Replaces the sample name specified in the BAM read group with the value supplied here", required = false) public String SAMPLE_NAME_REGEX = null;
|
||||
|
||||
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
|
||||
public boolean keepQ0Bases = false;
|
||||
|
||||
public AlleleMetrics metricsOut;
|
||||
public String sampleName;
|
||||
|
||||
public double[] plocus;
|
||||
public double[] phapmap;
|
||||
public double[] pdbsnp;
|
||||
|
|
@ -103,8 +102,6 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
|
||||
/** Initialize the walker with some sensible defaults */
|
||||
public void initialize() {
|
||||
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
|
||||
|
||||
plocus = GenotypeLikelihoods.computePriors(heterozygosity);
|
||||
phapmap = priorsArray(PRIORS_HAPMAP);
|
||||
pdbsnp = priorsArray(PRIORS_DBSNP);
|
||||
|
|
@ -120,51 +117,31 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
* @return an AlleleFrequencyEstimate object
|
||||
*/
|
||||
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
//rationalizeSampleName(context.getReads().get(0));
|
||||
SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref, context);
|
||||
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref, context);
|
||||
|
||||
// todo -- totally convoluted code!
|
||||
if ( ! oldAndBusted.equals(newHotness) ) {
|
||||
System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness);
|
||||
}
|
||||
|
||||
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
|
||||
}
|
||||
|
||||
private SSGGenotypeCall mapNewHotness(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
|
||||
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
|
||||
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
|
||||
if (geno != null) {
|
||||
metricsOut.nextPosition(geno, tracker);
|
||||
}
|
||||
if (!SUPPRESS_METRICS) {
|
||||
metricsOut.printMetricsAtLocusIntervals(METRICS_INTERVAL);
|
||||
}
|
||||
return geno;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sometimes the sample names in the BAM files get screwed up. Fix it here if we can.
|
||||
*
|
||||
* @param read a read from the pileup (assuming all the reads have the same sample name)
|
||||
*
|
||||
* @return a repaired sample name
|
||||
*/
|
||||
private String rationalizeSampleName(SAMRecord read) {
|
||||
String RG = (String) (read.getAttribute("RG"));
|
||||
SAMReadGroupRecord read_group_record = read.getHeader().getReadGroup(RG);
|
||||
|
||||
if (read_group_record != null) {
|
||||
String localSampleName = read.getHeader().getReadGroup(RG).getSample();
|
||||
if (SAMPLE_NAME_REGEX != null) {
|
||||
localSampleName = localSampleName.replaceAll(SAMPLE_NAME_REGEX, "$1");
|
||||
}
|
||||
if (sampleName == null) {
|
||||
sampleName = localSampleName;
|
||||
} else {
|
||||
if (!sampleName.equals(localSampleName)) {
|
||||
throw new StingException(String.format("Samples appear to have been mixed up: expected '%s' but found '%s'.", sampleName, localSampleName));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sampleName;
|
||||
}
|
||||
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
|
||||
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
|
||||
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
|
||||
return geno;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the underlying, single locus genotype of the sample
|
||||
|
|
|
|||
|
|
@ -76,4 +76,11 @@ public interface Genotype {
|
|||
* @return
|
||||
*/
|
||||
public Variant toVariant();
|
||||
|
||||
/**
|
||||
* return a readable string representation of this genotype
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String toString();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,10 +13,7 @@ import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScor
|
|||
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.genotype.variant.Variant;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -96,6 +93,35 @@ public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if(other == null)
|
||||
return false;
|
||||
if(other instanceof SSGGenotypeCall) {
|
||||
SSGGenotypeCall otherCall = (SSGGenotypeCall)other;
|
||||
|
||||
boolean likelihoodsMatch = true;
|
||||
for ( int i = 0; i < mLikelihoods.length; i++ ) {
|
||||
likelihoodsMatch = likelihoodsMatch && MathUtils.compareDoubles(mLikelihoods[i], otherCall.mLikelihoods[i]) == 0;
|
||||
}
|
||||
//System.out.printf("likelihoodsMatch = %b%n", likelihoodsMatch);
|
||||
|
||||
return this.mRefBase.equals(otherCall.mRefBase) &&
|
||||
this.mPloidy == otherCall.mPloidy &&
|
||||
MathUtils.compareDoubles(this.bestNext, otherCall.bestNext) == 0 &&
|
||||
MathUtils.compareDoubles(this.bestRef, otherCall.bestRef) == 0 &&
|
||||
this.readDepth == otherCall.readDepth &&
|
||||
MathUtils.compareDoubles(this.rmsMapping, otherCall.rmsMapping) == 0 &&
|
||||
likelihoodsMatch;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s",
|
||||
mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods));
|
||||
}
|
||||
|
||||
/**
|
||||
* calculate the rms , given the read pileup
|
||||
*
|
||||
|
|
@ -128,7 +154,7 @@ public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
|
|||
*/
|
||||
@Override
|
||||
public boolean isVariation() {
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(mRefBase.charAt(0));
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(getReferencebase());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue