Added geli compatibility mode to SingleSampleGenotyper, to enable easy linking to the geli2popsnps.py script
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@866 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
543c68cdd8
commit
f19d7abba9
|
|
@ -36,6 +36,18 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
@Argument(fullName="qHomNonRef", shortName="qHomNonRef", doc="qHomNonRef", required=false) public Double qHomNonRef = 0.97;
|
||||
@Argument(fullName="sample_name_regex", shortName="sample_name_regex", required=false, doc="sample_name_regex") public String SAMPLE_NAME_REGEX = null;
|
||||
|
||||
@Argument(fullName="geli", shortName="geli", required=false, doc="If true, output will be in Geli/Picard format")
|
||||
public boolean GeliOutputFormat = false;
|
||||
|
||||
// todo: enable argument after fixing all of the hard-codings in GenotypeLikelihoods and other places
|
||||
|
||||
//@Argument(fullName="refPrior", shortName="refPrior", required=false, doc="Prior likelihood of the reference theory")
|
||||
public double REF_PRIOR = 0.999;
|
||||
//@Argument(fullName="hetVarPrior", shortName="hetVarPrior", required=false, doc="Prior likelihood of a heterozygous variant theory")
|
||||
public double HETVAR_PRIOR = 1e-3;
|
||||
//@Argument(fullName="homVarPrior", shortName="homVarPrior", required=false, doc="Prior likelihood of the homozygous variant theory")
|
||||
public double HOMVAR_PRIOR = 1e-5;
|
||||
|
||||
public AlleleMetrics metrics;
|
||||
public PrintStream calls_file;
|
||||
|
||||
|
|
@ -51,8 +63,9 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
if (metricsFileName != null) { metrics = new AlleleMetrics(metricsFileName, lodThreshold); }
|
||||
if (callsFileName != null)
|
||||
{
|
||||
calls_file = new PrintStream(callsFileName);
|
||||
calls_file.println(AlleleFrequencyEstimate.asTabularStringHeader());
|
||||
calls_file = new PrintStream(callsFileName);
|
||||
String header = GeliOutputFormat ? AlleleFrequencyEstimate.geliHeaderString() : AlleleFrequencyEstimate.asTabularStringHeader();
|
||||
calls_file.println(header);
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
|
|
@ -104,7 +117,8 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
if (refBaseIndex >= 0 && altBaseIndex >= 0) {
|
||||
// Set the priors for the hom-ref, het, and non-hom ref (we don't evaluate the
|
||||
// possibility of a non-ref het).
|
||||
double[] genotypePriors = { 0.999, 1e-3, 1e-5 };
|
||||
//double[] genotypePriors = { 0.999, 1e-3, 1e-5 };
|
||||
double[] genotypePriors = { REF_PRIOR, HETVAR_PRIOR, HOMVAR_PRIOR };
|
||||
|
||||
// I'm not sure what the difference between qhat and qstar is in AlleleMetrics, but
|
||||
// to make my life simpler, I evaluate the non-ref balances in genotypeBalances and
|
||||
|
|
@ -373,8 +387,11 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
||||
{
|
||||
calls_file.println(alleleFreq.asTabularString());
|
||||
return "";
|
||||
if ( alleleFreq.lodVsRef >= lodThreshold ) {
|
||||
String line = GeliOutputFormat ? alleleFreq.asGeliString() : alleleFreq.asTabularString();
|
||||
calls_file.println(line);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
public void onTraversalDone()
|
||||
|
|
|
|||
|
|
@ -152,6 +152,27 @@ public class AlleleFrequencyEstimate {
|
|||
return "location sample_name ref alt genotype qhat qstar lodVsRef lodVsNextBest depth bases";
|
||||
}
|
||||
|
||||
public static String geliHeaderString() {
|
||||
return "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT";
|
||||
}
|
||||
|
||||
public String asGeliString()
|
||||
{
|
||||
// #Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT
|
||||
// chr1 7764136 A 48 99 CC 83.650421 9.18159 -92.83638 -18.367548 -96.91729 -96.614204 -9.185958 -23.33643 -23.033337 -101.282059 -101.583092 -101.279999
|
||||
|
||||
// chr pos ref Nreads maxMapQ genotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT
|
||||
return String.format("%s %16d %c %8d %d %s %.6f %.6f 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0",
|
||||
location.getContig(),
|
||||
location.getStart(),
|
||||
ref,
|
||||
depth,
|
||||
-1,
|
||||
genotype(),
|
||||
lodVsRef,
|
||||
lodVsNextBest);
|
||||
}
|
||||
|
||||
public String asTabularString()
|
||||
{
|
||||
return String.format("%s %s %c %c %s %f %f %f %f %d %s",
|
||||
|
|
|
|||
Loading…
Reference in New Issue