New:
Minor changes to CoverageAndPowerWalker bootstrapping (faster selection of indeces). Entirely new Aritifical Pool Walker (ArtificialPoolWalkerMk2), will likely replace ArtificialPoolWalker on the next commit. Adapted the method of sampling, and added a helper context class: ArtificialPoolContext which carries much of the burden of calculation and data handling for the walker. The walker itself maps and reduces ArtificialPoolContexts. Cheers! git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1461 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
92ea947c33
commit
1da45cffb3
|
|
@ -29,6 +29,8 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
||||||
String pathToSyzygyFile = null;
|
String pathToSyzygyFile = null;
|
||||||
@Argument(fullName = "ColumnOffset", shortName = "co", doc = "Offset of column containing the power in the pf", required = true)
|
@Argument(fullName = "ColumnOffset", shortName = "co", doc = "Offset of column containing the power in the pf", required = true)
|
||||||
int colOffset = 0;
|
int colOffset = 0;
|
||||||
|
@Argument(fullName = "linesToClear", shortName="clr", doc = "Clear so many lines from the read file before starting (default - just the header line)", required = false)
|
||||||
|
int clrLines = 1;
|
||||||
|
|
||||||
BufferedReader syzyFileReader;
|
BufferedReader syzyFileReader;
|
||||||
final String pfFileDelimiter = " ";
|
final String pfFileDelimiter = " ";
|
||||||
|
|
@ -40,7 +42,9 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
||||||
super.initialize();
|
super.initialize();
|
||||||
try {
|
try {
|
||||||
syzyFileReader = new BufferedReader(new FileReader(pathToSyzygyFile));
|
syzyFileReader = new BufferedReader(new FileReader(pathToSyzygyFile));
|
||||||
|
for(int clear = 0; clear < clrLines; clear++) {
|
||||||
syzyFileReader.readLine();
|
syzyFileReader.readLine();
|
||||||
|
}
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
String newErrMsg = "Syzygy input file " + pathToSyzygyFile + " could be incorrect. File not found.";
|
String newErrMsg = "Syzygy input file " + pathToSyzygyFile + " could be incorrect. File not found.";
|
||||||
throw new StingException(newErrMsg,e);
|
throw new StingException(newErrMsg,e);
|
||||||
|
|
@ -69,8 +73,8 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
||||||
if(!syzyFileIsReady) {
|
if(!syzyFileIsReady) {
|
||||||
throw new StingException("Input file reader was not ready before an attempt to read from it.");
|
throw new StingException("Input file reader was not ready before an attempt to read from it.");
|
||||||
} else if(!outOfLinesInSyzyFile) {
|
} else if(!outOfLinesInSyzyFile) {
|
||||||
double syzyPow = getSyzyPowFromFile();
|
Pair<Double,String> syzyPow = getSyzyPowFromFile();
|
||||||
out.printf("%s: %d %d %f %f%n", context.getLocation(), context.getReads().size(),powpair.second,powpair.first,syzyPow);
|
out.printf("%s: %d %d %f %f (%s)%n", context.getLocation(), context.getReads().size(),powpair.second,powpair.first,syzyPow.first,syzyPow.second);
|
||||||
} else {
|
} else {
|
||||||
out.printf("%s: %d %d %f%n", context.getLocation(), context.getReads().size(),powpair.second,powpair.first);
|
out.printf("%s: %d %d %f%n", context.getLocation(), context.getReads().size(),powpair.second,powpair.first);
|
||||||
}
|
}
|
||||||
|
|
@ -79,7 +83,7 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
||||||
return context.getReads().size();
|
return context.getReads().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getSyzyPowFromFile() {
|
public Pair<Double,String> getSyzyPowFromFile() {
|
||||||
String thisLine = null;
|
String thisLine = null;
|
||||||
try {
|
try {
|
||||||
thisLine = syzyFileReader.readLine();
|
thisLine = syzyFileReader.readLine();
|
||||||
|
|
@ -87,15 +91,17 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
||||||
String newErrMsg = "Ran out of lines in the syzyfile; further output of Syzygy power will be suppressed.";
|
String newErrMsg = "Ran out of lines in the syzyfile; further output of Syzygy power will be suppressed.";
|
||||||
outOfLinesInSyzyFile=true;
|
outOfLinesInSyzyFile=true;
|
||||||
logger.warn(newErrMsg + " " + e.toString());
|
logger.warn(newErrMsg + " " + e.toString());
|
||||||
return -1.1;
|
return new Pair(-1.1, "Printing Stops Here");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String chromPos = null;
|
||||||
StringTokenizer lineTokenizer = new StringTokenizer(thisLine, pfFileDelimiter);
|
StringTokenizer lineTokenizer = new StringTokenizer(thisLine, pfFileDelimiter);
|
||||||
try {
|
try {
|
||||||
for(int j = 0; j < colOffset; j++) {
|
chromPos = lineTokenizer.nextToken();
|
||||||
|
for(int j = 1; j < colOffset; j++) {
|
||||||
lineTokenizer.nextToken();
|
lineTokenizer.nextToken();
|
||||||
}
|
}
|
||||||
return (Double.valueOf(lineTokenizer.nextToken())/100.0);
|
return new Pair((Double.valueOf(lineTokenizer.nextToken())/100.0),chromPos);
|
||||||
} catch (NoSuchElementException e) {
|
} catch (NoSuchElementException e) {
|
||||||
String errMsg = "The given column offset for the pool, " + colOffset + " exceeded the number of entries in the file " + pathToSyzygyFile;
|
String errMsg = "The given column offset for the pool, " + colOffset + " exceeded the number of entries in the file " + pathToSyzygyFile;
|
||||||
throw new StingException(errMsg);
|
throw new StingException(errMsg);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,146 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import net.sf.samtools.SAMFileWriter;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.playground.utils.ArtificialPoolContext;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ListIterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: Aug 26, 2009
|
||||||
|
* Time: 11:28:26 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class ArtificalPoolWalkerMk2 extends LocusWalker<ArtificialPoolContext, ArtificialPoolContext> {
|
||||||
|
|
||||||
|
@Argument(fullName = "AuxOutputFile", shortName = "af", doc = "Auxiliary file for genotyp & coverage output", required = true)
|
||||||
|
String auxFilePath = null;
|
||||||
|
@Argument(fullName = "OutputBamFile", shortName = "of", doc = "Output to this file rather than standard output", required = false)
|
||||||
|
SAMFileWriter outputBamFile = null;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArtificialPoolContext reduceInit() { // try to initialize the file writer
|
||||||
|
ArtificialPoolContext apContext = new ArtificialPoolContext();
|
||||||
|
apContext.setSingleSampleGenotyper(new SingleSampleGenotyper());
|
||||||
|
apContext.setReadGroupSets(getToolkit().getMergedReadGroupsByReaders());
|
||||||
|
apContext.setAuxWriter(initializeAuxFileWriter(apContext.getTotalNumberOfPeople()));
|
||||||
|
apContext.setSAMFileWriter(outputBamFile);
|
||||||
|
apContext.initializeSSG();
|
||||||
|
|
||||||
|
return apContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArtificialPoolContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
return new ArtificialPoolContext(tracker,ref,context);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArtificialPoolContext reduce(ArtificialPoolContext mapCon, ArtificialPoolContext redCon){
|
||||||
|
ArtificialPoolContext updatedContext = ArtificialPoolContext.mapReduceMerge(mapCon,redCon);
|
||||||
|
List<SAMRecord>[] newReads = updatedContext.splitReadsByGroup(updatedContext.getNewReads());
|
||||||
|
long[] newCvg = updateRunningCoverage(updatedContext.getRunningCoverage(), getCoverageByGroup(newReads));
|
||||||
|
updatedContext.setRunningCoverage(newCvg);
|
||||||
|
List<SAMRecord>[] sampledReads = ArtificialPoolContext.sampleReads(newReads,runningCoverageToDouble(newCvg));
|
||||||
|
printToFiles(sampledReads,updatedContext);
|
||||||
|
return updatedContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper methods follow
|
||||||
|
|
||||||
|
private PrintWriter initializeAuxFileWriter(int nFiles) {
|
||||||
|
PrintWriter auxFileWriter;
|
||||||
|
try {
|
||||||
|
auxFileWriter = new PrintWriter(new FileOutputStream(auxFilePath));
|
||||||
|
auxFileWriter.println(createAuxFileHeader(nFiles));
|
||||||
|
} catch(FileNotFoundException e) {
|
||||||
|
String errmsg = "The filepath you entered "+auxFilePath+" could not be opened. Please double-check that the input is correct.";
|
||||||
|
throw new StingException(errmsg, e);
|
||||||
|
} catch(IOException e) {
|
||||||
|
String errmsg = "The file you entered "+auxFilePath+" could not be written to. Please check your permissions to write to this file.";
|
||||||
|
throw new StingException(errmsg,e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return auxFileWriter;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String createAuxFileHeader(int nFiles) {
|
||||||
|
String sp = " ";
|
||||||
|
String st1 = "Chrom:Pos" + sp;
|
||||||
|
String st2 = "";
|
||||||
|
for(int j = 0; j < nFiles; j++) {
|
||||||
|
st2 = st2 + "Pers " + j + " Gen" + sp; // short for "genotype of person j at this location"
|
||||||
|
st2 = st2 + "Pers " + j + " Conf" + sp; // short for "confidence in genotype call of ..."
|
||||||
|
st2 = st2 + "Pers " + j + " NewCvg" + sp; // short for "coverage of person j at this location"
|
||||||
|
}
|
||||||
|
String st3 = "TotalCvg";
|
||||||
|
|
||||||
|
return st1+st2+st3;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int[] getCoverageByGroup(List<SAMRecord>[] readsByGroup) {
|
||||||
|
int[] coverage = new int[readsByGroup.length];
|
||||||
|
for(int iterator = 0; iterator < readsByGroup.length; iterator ++) {
|
||||||
|
coverage[iterator] = readsByGroup[iterator].size();
|
||||||
|
}
|
||||||
|
|
||||||
|
return coverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long[] updateRunningCoverage(long[] cvgUpToNow, int[] newCvgByGroup) {
|
||||||
|
long[] newCvg = new long[cvgUpToNow.length];
|
||||||
|
for(int iter = 0; iter < cvgUpToNow.length; iter++) {
|
||||||
|
newCvg[iter] = cvgUpToNow[iter] + newCvgByGroup[iter];
|
||||||
|
}
|
||||||
|
|
||||||
|
return newCvg;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double[] runningCoverageToDouble(long[] cvg) {
|
||||||
|
double[] avgProp = new double[cvg.length];
|
||||||
|
long sum = 0;
|
||||||
|
for(long elem : cvg) {
|
||||||
|
sum += elem;
|
||||||
|
}
|
||||||
|
for(int iter = 0; iter < cvg.length; iter++) {
|
||||||
|
avgProp[iter] = cvg[iter]/sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
return avgProp;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printToFiles(List<SAMRecord>[] sampledNewReads, ArtificialPoolContext context) {
|
||||||
|
SAMFileWriter samWrite = context.getSAMFileWriter();
|
||||||
|
String sp = " ";
|
||||||
|
PrintWriter auxWrite = context.getWriterToAuxiliaryFile();
|
||||||
|
int readGroupInt = 0;
|
||||||
|
for(List<SAMRecord> readGroup : sampledNewReads) {
|
||||||
|
for(SAMRecord read : readGroup) {
|
||||||
|
samWrite.addAlignment(read);
|
||||||
|
}
|
||||||
|
auxWrite.print(context.getAlignmentContext().getLocation().toString() + sp);
|
||||||
|
auxWrite.print(context.genotypeAndConfidenceToString(readGroupInt,sp));
|
||||||
|
readGroupInt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -54,6 +54,7 @@ public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFile
|
||||||
private LinkedList<Integer>[] living_reads;
|
private LinkedList<Integer>[] living_reads;
|
||||||
private SingleSampleGenotyper ssg;
|
private SingleSampleGenotyper ssg;
|
||||||
private int npeople;
|
private int npeople;
|
||||||
|
//TODO: LOCAL CLASS FOR ALL THIS
|
||||||
//@param local_genotypes - holds the genotype (A A/ A C/ etc) for each individual. Updates at each locus.
|
//@param local_genotypes - holds the genotype (A A/ A C/ etc) for each individual. Updates at each locus.
|
||||||
//@param auxWrite - the writer to the auxiliary file
|
//@param auxWrite - the writer to the auxiliary file
|
||||||
//@param readGroupSets : holds the readgroups (identifiers for individuals from each read)
|
//@param readGroupSets : holds the readgroups (identifiers for individuals from each read)
|
||||||
|
|
@ -86,8 +87,6 @@ public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFile
|
||||||
|
|
||||||
if(red_prop <= 0) {
|
if(red_prop <= 0) {
|
||||||
red_prop = 1.0/npeople;
|
red_prop = 1.0/npeople;
|
||||||
} else {
|
|
||||||
// do nothing muhahaha
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize the local genotype array
|
// initialize the local genotype array
|
||||||
|
|
@ -112,7 +111,7 @@ public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFile
|
||||||
|
|
||||||
updateLiving();
|
updateLiving();
|
||||||
// each time we move to the next locus, remove from the coverage count those reads that ended
|
// each time we move to the next locus, remove from the coverage count those reads that ended
|
||||||
auxWrite.printf("%s:%s",context.getContig(),context.getPosition());
|
auxWrite.printf("%s:%s",context.getContig(),context.getPosition()); // TODO: PUT IN REDUCE
|
||||||
|
|
||||||
return getNewReadsAndGenotypesByGroup(tracker, ref, context);
|
return getNewReadsAndGenotypesByGroup(tracker, ref, context);
|
||||||
}
|
}
|
||||||
|
|
@ -134,7 +133,6 @@ public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFile
|
||||||
List<SAMRecord>[] randomReadsByGroup = drawReadsRandomlyFromReadsByGroup(readsByReadGroup,sought_coverage);
|
List<SAMRecord>[] randomReadsByGroup = drawReadsRandomlyFromReadsByGroup(readsByReadGroup,sought_coverage);
|
||||||
printToFileAndAuxFile(randomReadsByGroup,sought_coverage,outFile);
|
printToFileAndAuxFile(randomReadsByGroup,sought_coverage,outFile);
|
||||||
return outFile;
|
return outFile;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,14 +33,9 @@ public class CoverageAndPowerWalker extends LocusWalker<Integer, Pair<Long, Long
|
||||||
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls")
|
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls")
|
||||||
public double threshold = 3.0;
|
public double threshold = 3.0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private static final int BOOTSTRAP_ITERATIONS = 300;
|
private static final int BOOTSTRAP_ITERATIONS = 300;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
@Override
|
|
||||||
public void initialize()
|
|
||||||
{
|
|
||||||
|
|
||||||
if(num_individuals <= 0)
|
if(num_individuals <= 0)
|
||||||
throw new IllegalArgumentException("Positive nonzero parameter expected for poolSize");
|
throw new IllegalArgumentException("Positive nonzero parameter expected for poolSize");
|
||||||
|
|
@ -117,9 +112,7 @@ public class CoverageAndPowerWalker extends LocusWalker<Integer, Pair<Long, Long
|
||||||
double pow = 0;
|
double pow = 0;
|
||||||
|
|
||||||
if(depth - kaccept < kaccept) {// kaccept > depth/2 - calculate power as P[hits between k and depth]
|
if(depth - kaccept < kaccept) {// kaccept > depth/2 - calculate power as P[hits between k and depth]
|
||||||
|
|
||||||
for(int k = kaccept; k < depth; k++) {
|
for(int k = kaccept; k < depth; k++) {
|
||||||
|
|
||||||
pow += MathUtils.binomialProbabilityLog(k, depth, snp_prop);
|
pow += MathUtils.binomialProbabilityLog(k, depth, snp_prop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -153,14 +146,8 @@ public class CoverageAndPowerWalker extends LocusWalker<Integer, Pair<Long, Long
|
||||||
if (depth <= 0) {
|
if (depth <= 0) {
|
||||||
result = new Pair(-1,-1);
|
result = new Pair(-1,-1);
|
||||||
} else if (!use_bootstrap) { // object data from command line
|
} else if (!use_bootstrap) { // object data from command line
|
||||||
|
|
||||||
result = powerTheoretical(depth, reads, offsets, single_snip_proportion);
|
result = powerTheoretical(depth, reads, offsets, single_snip_proportion);
|
||||||
} else {
|
} else { // otherwise, bootstrapping occurs below
|
||||||
|
|
||||||
//
|
|
||||||
// otherwise, bootstrapping occurs below
|
|
||||||
//
|
|
||||||
|
|
||||||
int hypothesis_rejections=0;
|
int hypothesis_rejections=0;
|
||||||
|
|
||||||
for(int boot = 0; boot < BOOTSTRAP_ITERATIONS; boot++)
|
for(int boot = 0; boot < BOOTSTRAP_ITERATIONS; boot++)
|
||||||
|
|
@ -196,15 +183,7 @@ public class CoverageAndPowerWalker extends LocusWalker<Integer, Pair<Long, Long
|
||||||
|
|
||||||
public int randomlySelectRead(int depth)
|
public int randomlySelectRead(int depth)
|
||||||
{
|
{
|
||||||
double qscore_selector = Math.random();
|
return (int) Math.floor((double)depth * Math.random());
|
||||||
int readspositionrandom;
|
|
||||||
for(readspositionrandom = 1; readspositionrandom < ((double)depth * qscore_selector); readspositionrandom ++) {
|
|
||||||
if(readspositionrandom > depth + 1) {
|
|
||||||
throw new RuntimeException("qscore iterator exceeding possible thresholds");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return readspositionrandom - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,286 @@
|
||||||
|
package org.broadinstitute.sting.playground.utils;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||||
|
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMFileWriter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: Aug 26, 2009
|
||||||
|
* Time: 11:37:42 AM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class ArtificialPoolContext {
|
||||||
|
private PrintWriter writerToAuxiliaryFile;
|
||||||
|
private SAMFileWriter writerToSamFile;
|
||||||
|
private SingleSampleGenotyper ssg;
|
||||||
|
private List<Set<String>> readGroupSets;
|
||||||
|
private long[] runningCoverage;
|
||||||
|
private RefMetaDataTracker refTracker;
|
||||||
|
private ReferenceContext refContext;
|
||||||
|
private AlignmentContext aliContext;
|
||||||
|
|
||||||
|
public ArtificialPoolContext() {
|
||||||
|
readGroupSets = null;
|
||||||
|
writerToAuxiliaryFile = null;
|
||||||
|
writerToSamFile = null;
|
||||||
|
ssg = null;
|
||||||
|
refTracker = null;
|
||||||
|
aliContext = null;
|
||||||
|
refContext = null;
|
||||||
|
runningCoverage = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArtificialPoolContext(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
refTracker = tracker;
|
||||||
|
refContext = ref;
|
||||||
|
aliContext = context;
|
||||||
|
readGroupSets = null;
|
||||||
|
writerToAuxiliaryFile = null;
|
||||||
|
writerToSamFile=null;
|
||||||
|
ssg = null;
|
||||||
|
runningCoverage = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArtificialPoolContext(PrintWriter pw, SAMFileWriter sw, SingleSampleGenotyper g, List<Set<String>> rgs, long [] runcvg, RefMetaDataTracker rt, ReferenceContext rc, AlignmentContext ac) {
|
||||||
|
writerToAuxiliaryFile = pw;
|
||||||
|
writerToSamFile = sw;
|
||||||
|
ssg = g;
|
||||||
|
readGroupSets = rgs;
|
||||||
|
runningCoverage = runcvg;
|
||||||
|
refTracker = rt;
|
||||||
|
refContext = rc;
|
||||||
|
aliContext = ac;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAuxWriter(PrintWriter writer) {
|
||||||
|
writerToAuxiliaryFile = writer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSingleSampleGenotyper(SingleSampleGenotyper typer) {
|
||||||
|
ssg = typer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initializeSSG() {
|
||||||
|
ssg.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReadGroupSets(List<Set<String>> rgSets) {
|
||||||
|
readGroupSets = rgSets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRefMetaDataTracker(RefMetaDataTracker tracker) {
|
||||||
|
refTracker = tracker;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReferenceContext(ReferenceContext ref) {
|
||||||
|
refContext = ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAlignmentContext(AlignmentContext context) {
|
||||||
|
aliContext = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRunningCoverage(long[] estimate) {
|
||||||
|
runningCoverage = estimate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSAMFileWriter(SAMFileWriter writer) {
|
||||||
|
writerToSamFile = writer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getTotalNumberOfPeople() {
|
||||||
|
return readGroupSets.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public RefMetaDataTracker getRefMetaDataTracker() {
|
||||||
|
return refTracker;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReferenceContext getReferenceContext() {
|
||||||
|
return refContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AlignmentContext getAlignmentContext() {
|
||||||
|
return aliContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PrintWriter getWriterToAuxiliaryFile() {
|
||||||
|
return writerToAuxiliaryFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SingleSampleGenotyper getSingleSampleGenotyper() {
|
||||||
|
return ssg;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Set<String>> getReadGroupSets() {
|
||||||
|
return readGroupSets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long[] getRunningCoverage() {
|
||||||
|
return runningCoverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMFileWriter getSAMFileWriter() {
|
||||||
|
return writerToSamFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<SAMRecord> getReads() {
|
||||||
|
List<SAMRecord> reads;
|
||||||
|
if(aliContext == null) {
|
||||||
|
reads=null;
|
||||||
|
} else {
|
||||||
|
reads = aliContext.getReads();
|
||||||
|
}
|
||||||
|
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Integer> getOffsets() {
|
||||||
|
List<Integer> offsets;
|
||||||
|
if(aliContext == null) {
|
||||||
|
offsets = null;
|
||||||
|
} else {
|
||||||
|
offsets = aliContext.getOffsets();
|
||||||
|
}
|
||||||
|
|
||||||
|
return offsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<SAMRecord> getNewReads() {
|
||||||
|
List<SAMRecord> newReads;
|
||||||
|
|
||||||
|
if(aliContext == null) {
|
||||||
|
newReads = null;
|
||||||
|
} else {
|
||||||
|
newReads = new LinkedList<SAMRecord>();
|
||||||
|
List<SAMRecord> allReads = aliContext.getReads();
|
||||||
|
List<Integer> allOffsets = aliContext.getOffsets();
|
||||||
|
|
||||||
|
for(int iter = 0; iter < allReads.size(); iter++) {
|
||||||
|
if(allOffsets.get(iter) == 0) {
|
||||||
|
newReads.add(allReads.get(iter));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return newReads;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<List<SAMRecord>[],List<Integer>[]> splitByGroup(List<SAMRecord> unsplitReads, List<Integer> unsplitOffsets) {
|
||||||
|
List<SAMRecord>[] readsSplitByGroup;
|
||||||
|
List<Integer> [] offsetsSplitByGroup;
|
||||||
|
|
||||||
|
if(unsplitReads != null && readGroupSets != null) {
|
||||||
|
readsSplitByGroup = new ArrayList[this.getTotalNumberOfPeople()];
|
||||||
|
if(unsplitOffsets != null) {
|
||||||
|
offsetsSplitByGroup = new ArrayList[this.getTotalNumberOfPeople()];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
offsetsSplitByGroup = null;
|
||||||
|
}
|
||||||
|
int listSize = unsplitReads.size();
|
||||||
|
for(int element = 0; element < listSize; element++) {
|
||||||
|
SAMRecord read = unsplitReads.get(element);
|
||||||
|
for(int groupNumber = 0; groupNumber < this.getTotalNumberOfPeople(); groupNumber++) {
|
||||||
|
if(readGroupSets.get(groupNumber).contains((String) read.getAttribute("RG"))) {
|
||||||
|
readsSplitByGroup[groupNumber].add(read);
|
||||||
|
if(offsetsSplitByGroup != null) {
|
||||||
|
offsetsSplitByGroup[groupNumber].add(unsplitOffsets.get(element));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
readsSplitByGroup = null;
|
||||||
|
offsetsSplitByGroup = null; // compiler complains without these lines
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair(readsSplitByGroup,offsetsSplitByGroup);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<SAMRecord>[] splitReadsByGroup(List<SAMRecord> unsplitReads) {
|
||||||
|
return (this.splitByGroup(unsplitReads,null)).first;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Static methods follow
|
||||||
|
|
||||||
|
public static ArtificialPoolContext mapReduceMerge(ArtificialPoolContext mapContext, ArtificialPoolContext reduceContext) {
|
||||||
|
return new ArtificialPoolContext(reduceContext.getWriterToAuxiliaryFile(),reduceContext.getSAMFileWriter(),
|
||||||
|
reduceContext.getSingleSampleGenotyper(), reduceContext.getReadGroupSets(), reduceContext.getRunningCoverage(),
|
||||||
|
mapContext.getRefMetaDataTracker(),mapContext.getReferenceContext(),mapContext.getAlignmentContext());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Pair<List<SAMRecord>[],List<Integer>> sampleReadsAndOffsets(List<SAMRecord>[] reads, List<Integer>[] offsets, double[] propEstGlobal) {
|
||||||
|
double[] samplingRate = calculateSamplingRateFromGlobalEstimate(propEstGlobal);
|
||||||
|
List<SAMRecord>[] sampledReads = new ArrayList[reads.length];
|
||||||
|
List<Integer>[] sampledOffsets;
|
||||||
|
if(offsets != null){
|
||||||
|
sampledOffsets = new ArrayList[offsets.length];
|
||||||
|
} else {
|
||||||
|
sampledOffsets = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int group = 0; group < reads.length; group++) {
|
||||||
|
for(int readNumber = 0; readNumber < reads[group].size(); readNumber++) {
|
||||||
|
if(Math.random() < samplingRate[group]) {
|
||||||
|
sampledReads[group].add(reads[group].get(readNumber));
|
||||||
|
if(sampledOffsets != null) {
|
||||||
|
sampledOffsets[group].add(offsets[group].get(readNumber));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair(sampledReads,sampledOffsets);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String genotypeAndConfidenceToString(int group, String spacer) {
|
||||||
|
GenotypeCall call = this.getGenotypeCall(group);
|
||||||
|
return (call.getGenotypes() + spacer + call.getConfidenceScore().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenotypeCall getGenotypeCall(int group) {
|
||||||
|
AlignmentContext alicon = this.getAlignmentContext();
|
||||||
|
Pair<List<SAMRecord>[],List<Integer>[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets());
|
||||||
|
return ssg.map(this.getRefMetaDataTracker(),this.getReferenceContext(),
|
||||||
|
new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group]));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<SAMRecord>[] sampleReads(List<SAMRecord>[] reads, double[] propEstGlobal) {
|
||||||
|
return (sampleReadsAndOffsets(reads, null, propEstGlobal)).first;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double[] calculateSamplingRateFromGlobalEstimate(double[] ratios) {
|
||||||
|
double min = ratios[0];
|
||||||
|
for(double ratio : ratios) {
|
||||||
|
if(ratio < min) {
|
||||||
|
min = ratio;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double[] samplingRate = new double[ratios.length];
|
||||||
|
// now divide by minimum
|
||||||
|
for(int j = 0; j < ratios.length; j++) {
|
||||||
|
samplingRate[j] = ratios[j]/min;
|
||||||
|
}
|
||||||
|
|
||||||
|
return samplingRate;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue