Added geli compatibility mode to SingleSampleGenotyper, to enable easy linking to the geli2popsnps.py script
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@866 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
543c68cdd8
commit
f19d7abba9
|
|
@ -36,6 +36,18 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
@Argument(fullName="qHomNonRef", shortName="qHomNonRef", doc="qHomNonRef", required=false) public Double qHomNonRef = 0.97;
|
@Argument(fullName="qHomNonRef", shortName="qHomNonRef", doc="qHomNonRef", required=false) public Double qHomNonRef = 0.97;
|
||||||
@Argument(fullName="sample_name_regex", shortName="sample_name_regex", required=false, doc="sample_name_regex") public String SAMPLE_NAME_REGEX = null;
|
@Argument(fullName="sample_name_regex", shortName="sample_name_regex", required=false, doc="sample_name_regex") public String SAMPLE_NAME_REGEX = null;
|
||||||
|
|
||||||
|
@Argument(fullName="geli", shortName="geli", required=false, doc="If true, output will be in Geli/Picard format")
|
||||||
|
public boolean GeliOutputFormat = false;
|
||||||
|
|
||||||
|
// todo: enable argument after fixing all of the hard-codings in GenotypeLikelihoods and other places
|
||||||
|
|
||||||
|
//@Argument(fullName="refPrior", shortName="refPrior", required=false, doc="Prior likelihood of the reference theory")
|
||||||
|
public double REF_PRIOR = 0.999;
|
||||||
|
//@Argument(fullName="hetVarPrior", shortName="hetVarPrior", required=false, doc="Prior likelihood of a heterozygous variant theory")
|
||||||
|
public double HETVAR_PRIOR = 1e-3;
|
||||||
|
//@Argument(fullName="homVarPrior", shortName="homVarPrior", required=false, doc="Prior likelihood of the homozygous variant theory")
|
||||||
|
public double HOMVAR_PRIOR = 1e-5;
|
||||||
|
|
||||||
public AlleleMetrics metrics;
|
public AlleleMetrics metrics;
|
||||||
public PrintStream calls_file;
|
public PrintStream calls_file;
|
||||||
|
|
||||||
|
|
@ -52,7 +64,8 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
if (callsFileName != null)
|
if (callsFileName != null)
|
||||||
{
|
{
|
||||||
calls_file = new PrintStream(callsFileName);
|
calls_file = new PrintStream(callsFileName);
|
||||||
calls_file.println(AlleleFrequencyEstimate.asTabularStringHeader());
|
String header = GeliOutputFormat ? AlleleFrequencyEstimate.geliHeaderString() : AlleleFrequencyEstimate.asTabularStringHeader();
|
||||||
|
calls_file.println(header);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception e)
|
catch (Exception e)
|
||||||
|
|
@ -104,7 +117,8 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
if (refBaseIndex >= 0 && altBaseIndex >= 0) {
|
if (refBaseIndex >= 0 && altBaseIndex >= 0) {
|
||||||
// Set the priors for the hom-ref, het, and non-hom ref (we don't evaluate the
|
// Set the priors for the hom-ref, het, and non-hom ref (we don't evaluate the
|
||||||
// possibility of a non-ref het).
|
// possibility of a non-ref het).
|
||||||
double[] genotypePriors = { 0.999, 1e-3, 1e-5 };
|
//double[] genotypePriors = { 0.999, 1e-3, 1e-5 };
|
||||||
|
double[] genotypePriors = { REF_PRIOR, HETVAR_PRIOR, HOMVAR_PRIOR };
|
||||||
|
|
||||||
// I'm not sure what the difference between qhat and qstar is in AlleleMetrics, but
|
// I'm not sure what the difference between qhat and qstar is in AlleleMetrics, but
|
||||||
// to make my life simpler, I evaluate the non-ref balances in genotypeBalances and
|
// to make my life simpler, I evaluate the non-ref balances in genotypeBalances and
|
||||||
|
|
@ -373,7 +387,10 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
|
|
||||||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
||||||
{
|
{
|
||||||
calls_file.println(alleleFreq.asTabularString());
|
if ( alleleFreq.lodVsRef >= lodThreshold ) {
|
||||||
|
String line = GeliOutputFormat ? alleleFreq.asGeliString() : alleleFreq.asTabularString();
|
||||||
|
calls_file.println(line);
|
||||||
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -152,6 +152,27 @@ public class AlleleFrequencyEstimate {
|
||||||
return "location sample_name ref alt genotype qhat qstar lodVsRef lodVsNextBest depth bases";
|
return "location sample_name ref alt genotype qhat qstar lodVsRef lodVsNextBest depth bases";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String geliHeaderString() {
|
||||||
|
return "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String asGeliString()
|
||||||
|
{
|
||||||
|
// #Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT
|
||||||
|
// chr1 7764136 A 48 99 CC 83.650421 9.18159 -92.83638 -18.367548 -96.91729 -96.614204 -9.185958 -23.33643 -23.033337 -101.282059 -101.583092 -101.279999
|
||||||
|
|
||||||
|
// chr pos ref Nreads maxMapQ genotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT
|
||||||
|
return String.format("%s %16d %c %8d %d %s %.6f %.6f 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0",
|
||||||
|
location.getContig(),
|
||||||
|
location.getStart(),
|
||||||
|
ref,
|
||||||
|
depth,
|
||||||
|
-1,
|
||||||
|
genotype(),
|
||||||
|
lodVsRef,
|
||||||
|
lodVsNextBest);
|
||||||
|
}
|
||||||
|
|
||||||
public String asTabularString()
|
public String asTabularString()
|
||||||
{
|
{
|
||||||
return String.format("%s %s %c %c %s %f %f %f %f %d %s",
|
return String.format("%s %s %c %c %s %f %f %f %f %d %s",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue