Added geli compatibility mode to SingleSampleGenotyper, to enable easy linking to the geli2popsnps.py script

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@866 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-06-01 14:32:12 +00:00
parent 543c68cdd8
commit f19d7abba9
2 changed files with 43 additions and 5 deletions

View File

@ -36,6 +36,18 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
@Argument(fullName="qHomNonRef", shortName="qHomNonRef", doc="qHomNonRef", required=false) public Double qHomNonRef = 0.97;
@Argument(fullName="sample_name_regex", shortName="sample_name_regex", required=false, doc="sample_name_regex") public String SAMPLE_NAME_REGEX = null;
@Argument(fullName="geli", shortName="geli", required=false, doc="If true, output will be in Geli/Picard format")
public boolean GeliOutputFormat = false;
// todo: enable argument after fixing all of the hard-codings in GenotypeLikelihoods and other places
//@Argument(fullName="refPrior", shortName="refPrior", required=false, doc="Prior likelihood of the reference theory")
public double REF_PRIOR = 0.999;
//@Argument(fullName="hetVarPrior", shortName="hetVarPrior", required=false, doc="Prior likelihood of a heterozygous variant theory")
public double HETVAR_PRIOR = 1e-3;
//@Argument(fullName="homVarPrior", shortName="homVarPrior", required=false, doc="Prior likelihood of the homozygous variant theory")
public double HOMVAR_PRIOR = 1e-5;
public AlleleMetrics metrics;
public PrintStream calls_file;
@ -51,8 +63,9 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
if (metricsFileName != null) { metrics = new AlleleMetrics(metricsFileName, lodThreshold); }
if (callsFileName != null)
{
calls_file = new PrintStream(callsFileName);
calls_file.println(AlleleFrequencyEstimate.asTabularStringHeader());
calls_file = new PrintStream(callsFileName);
String header = GeliOutputFormat ? AlleleFrequencyEstimate.geliHeaderString() : AlleleFrequencyEstimate.asTabularStringHeader();
calls_file.println(header);
}
}
catch (Exception e)
@ -104,7 +117,8 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
if (refBaseIndex >= 0 && altBaseIndex >= 0) {
// Set the priors for the hom-ref, het, and non-hom ref (we don't evaluate the
// possibility of a non-ref het).
double[] genotypePriors = { 0.999, 1e-3, 1e-5 };
//double[] genotypePriors = { 0.999, 1e-3, 1e-5 };
double[] genotypePriors = { REF_PRIOR, HETVAR_PRIOR, HOMVAR_PRIOR };
// I'm not sure what the difference between qhat and qstar is in AlleleMetrics, but
// to make my life simpler, I evaluate the non-ref balances in genotypeBalances and
@ -373,8 +387,11 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
{
calls_file.println(alleleFreq.asTabularString());
return "";
if ( alleleFreq.lodVsRef >= lodThreshold ) {
String line = GeliOutputFormat ? alleleFreq.asGeliString() : alleleFreq.asTabularString();
calls_file.println(line);
}
return "";
}
public void onTraversalDone()

View File

@ -152,6 +152,27 @@ public class AlleleFrequencyEstimate {
return "location sample_name ref alt genotype qhat qstar lodVsRef lodVsNextBest depth bases";
}
public static String geliHeaderString() {
return "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT";
}
public String asGeliString()
{
// #Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT
// chr1 7764136 A 48 99 CC 83.650421 9.18159 -92.83638 -18.367548 -96.91729 -96.614204 -9.185958 -23.33643 -23.033337 -101.282059 -101.583092 -101.279999
// chr pos ref Nreads maxMapQ genotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT
return String.format("%s %16d %c %8d %d %s %.6f %.6f 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0",
location.getContig(),
location.getStart(),
ref,
depth,
-1,
genotype(),
lodVsRef,
lodVsNextBest);
}
public String asTabularString()
{
return String.format("%s %s %c %c %s %f %f %f %f %d %s",