git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1516 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3ac5ac066f
commit
5130ca9b94
|
|
@ -1,356 +1,154 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* TERMS OF USE: WE ARE NOT LIABLE FOR ANYTHING
|
||||
*/
|
||||
|
||||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.playground.utils.ArtificialPoolContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
// Display the depth of coverage at a given locus and output the power
|
||||
|
||||
public class ArtificialPoolWalker extends LocusWalker<List<SAMRecord>[], SAMFileWriter> {
|
||||
@Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false)
|
||||
public boolean suppressPrinting = false;
|
||||
@Argument(fullName="reductionProportion", shortName="rp", doc="Proportion of total coverage reflected in pool file",required=false)
|
||||
public double red_prop = -1;
|
||||
@Argument(fullName="auxOutputFile", shortName="af", doc="Filepath for Auxiliary pool information (true genotypes, etc.)", required=true)
|
||||
public String auxFilepath = null;
|
||||
@Argument(fullName="outputBamFile",shortName="of",doc="Output to this file rather than to standard output")
|
||||
SAMFileWriter outputBamFile = null;
|
||||
|
||||
private List<Set<String>> readGroupSets;
|
||||
private PrintWriter auxWrite;
|
||||
private Pair<String,Double>[] local_genotypes;
|
||||
private LinkedList<Integer>[] living_reads;
|
||||
private SingleSampleGenotyper ssg;
|
||||
private int npeople;
|
||||
//TODO: LOCAL CLASS FOR ALL THIS
|
||||
//@param local_genotypes - holds the genotype (A A/ A C/ etc) for each individual. Updates at each locus.
|
||||
//@param auxWrite - the writer to the auxiliary file
|
||||
//@param readGroupSets : holds the readgroups (identifiers for individuals from each read)
|
||||
//@param ssg - the instance of the single sample genotyper
|
||||
//@param living_reads - holds on to the selected reads from each person until the location passes the read endpoint
|
||||
// Updates at each locus.
|
||||
//@param npeople - number of people represented in this pool (size of readGroupSets)
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Aug 26, 2009
|
||||
* Time: 11:28:26 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ArtificialPoolWalker extends LocusWalker<ArtificialPoolContext, ArtificialPoolContext> {
|
||||
|
||||
@Argument(fullName = "AuxOutputFile", shortName = "af", doc = "Auxiliary file for genotyp & coverage output", required = true)
|
||||
String auxFilePath = null;
|
||||
@Argument(fullName = "OutputBamFile", shortName = "of", doc = "Output to this file rather than standard output", required = false)
|
||||
SAMFileWriter outputBamFile = null;
|
||||
|
||||
public void initialize() {
|
||||
// initialize the output writer
|
||||
}
|
||||
|
||||
public ArtificialPoolContext reduceInit() { // try to initialize the file writer
|
||||
ArtificialPoolContext apContext = new ArtificialPoolContext();
|
||||
apContext.setSingleSampleGenotyper(new SingleSampleGenotyper());
|
||||
apContext.setReadGroupSets(getToolkit().getMergedReadGroupsByReaders());
|
||||
apContext.setAuxWriter(initializeAuxFileWriter(apContext.getTotalNumberOfPeople()));
|
||||
apContext.setSAMFileWriter(outputBamFile);
|
||||
apContext.initializeSSG();
|
||||
|
||||
return apContext;
|
||||
}
|
||||
|
||||
public ArtificialPoolContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return new ArtificialPoolContext(tracker,ref,context);
|
||||
}
|
||||
|
||||
public ArtificialPoolContext reduce(ArtificialPoolContext mapCon, ArtificialPoolContext redCon){
|
||||
/* ArtificialPoolContext updatedContext = ArtificialPoolContext.mapReduceMerge(mapCon,redCon);
|
||||
List<SAMRecord>[] newReads = updatedContext.splitReadsByGroup(updatedContext.getNewReads());
|
||||
long[] newCvg = updateRunningCoverage(updatedContext.getRunningCoverage(), getCoverageByGroup(newReads));
|
||||
updatedContext.setRunningCoverage(newCvg);
|
||||
List<SAMRecord>[] sampledReads = ArtificialPoolContext.sampleReads(newReads,runningCoverageToDouble(newCvg));
|
||||
printToFiles(sampledReads,updatedContext);*/
|
||||
AlignmentContext context = redCon.getAlignmentContext();
|
||||
SAMFileWriter writer = redCon.getSAMFileWriter();
|
||||
for(SAMRecord read : context.getReads()) {
|
||||
writer.addAlignment(read);
|
||||
}
|
||||
PrintWriter auxWrite = redCon.getWriterToAuxiliaryFile();
|
||||
auxWrite.print("This is a test.");
|
||||
ArtificialPoolContext updatedContext = redCon;
|
||||
return updatedContext;
|
||||
}
|
||||
|
||||
// Helper methods follow
|
||||
|
||||
private PrintWriter initializeAuxFileWriter(int nFiles) {
|
||||
PrintWriter auxFileWriter;
|
||||
try {
|
||||
auxWrite = new PrintWriter(new FileOutputStream(auxFilepath));
|
||||
}
|
||||
catch(FileNotFoundException e) {
|
||||
String ermsg = "auxiliary file for Artificial Pool Walker was either a folder or could not be created";
|
||||
throw new StingException(ermsg, e);
|
||||
auxFileWriter = new PrintWriter(new FileOutputStream(auxFilePath));
|
||||
auxFileWriter.println(createAuxFileHeader(nFiles));
|
||||
} catch(FileNotFoundException e) {
|
||||
String errmsg = "The filepath you entered "+auxFilePath+" could not be opened. Please double-check that the input is correct.";
|
||||
throw new StingException(errmsg, e);
|
||||
} catch(IOException e) {
|
||||
String errmsg = "The file you entered "+auxFilePath+" could not be written to. Please check your permissions to write to this file.";
|
||||
throw new StingException(errmsg,e);
|
||||
}
|
||||
|
||||
// create the file header and write to file
|
||||
|
||||
auxWrite.printf("%s%n",createFileHeader());
|
||||
|
||||
// obtain the readgroup sets
|
||||
|
||||
readGroupSets = getToolkit().getMergedReadGroupsByReaders();
|
||||
npeople = readGroupSets.size();
|
||||
|
||||
// if a reduction proportion was unspecified, set it to 1/num_files
|
||||
|
||||
if(red_prop <= 0) {
|
||||
red_prop = 1.0/npeople;
|
||||
}
|
||||
|
||||
// initialize the local genotype array
|
||||
|
||||
local_genotypes = new Pair[npeople];
|
||||
|
||||
// initilize the single sample genotyper
|
||||
|
||||
ssg = new SingleSampleGenotyper();
|
||||
ssg.initialize();
|
||||
|
||||
// initialize the living reads array
|
||||
|
||||
living_reads = new LinkedList[npeople];
|
||||
|
||||
for(int iter=0; iter < readGroupSets.size(); iter++) {
|
||||
living_reads[iter] = new LinkedList<Integer>();
|
||||
}
|
||||
return auxFileWriter;
|
||||
}
|
||||
|
||||
public List<SAMRecord>[] map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
updateLiving();
|
||||
// each time we move to the next locus, remove from the coverage count those reads that ended
|
||||
auxWrite.printf("%s:%s",context.getContig(),context.getPosition()); // TODO: PUT IN REDUCE
|
||||
|
||||
return getNewReadsAndGenotypesByGroup(tracker, ref, context);
|
||||
}
|
||||
|
||||
public SAMFileWriter reduceInit() {
|
||||
|
||||
return outputBamFile;
|
||||
}
|
||||
|
||||
public SAMFileWriter reduce(List<SAMRecord>[] readsByReadGroup, SAMFileWriter outFile) {
|
||||
// want to generate a sub sample of the new reads
|
||||
int total_new_coverage = 0;
|
||||
|
||||
for(int pers = 0; pers < npeople; pers++) {
|
||||
total_new_coverage += readsByReadGroup[pers].size();
|
||||
}
|
||||
|
||||
int sought_coverage = (int) Math.ceil(red_prop * (double)total_new_coverage);
|
||||
List<SAMRecord>[] randomReadsByGroup = drawReadsRandomlyFromReadsByGroup(readsByReadGroup,sought_coverage);
|
||||
printToFileAndAuxFile(randomReadsByGroup,sought_coverage,outFile);
|
||||
return outFile;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ArtificalPoolWalker helper methods begin below
|
||||
|
||||
private List<SAMRecord>[] drawReadsRandomlyFromReadsByGroup(List<SAMRecord>[] readsByReadGroup, int numToDraw){
|
||||
|
||||
int[] simulatedCoverage = simulateRandomUniformDraws(numToDraw, readsByReadGroup);
|
||||
|
||||
return removeReadsToCoverage(readsByReadGroup,simulatedCoverage);
|
||||
}
|
||||
|
||||
private List<SAMRecord>[] removeReadsToCoverage(List<SAMRecord>[] readsByReadGroup, int[] simulatedCoverage) {
|
||||
for(int group = 0; group < npeople; group++) {
|
||||
int cvgAtGroup = readsByReadGroup[group].size();
|
||||
for(int drawDown = simulatedCoverage[group]; drawDown > 0; drawDown --) {
|
||||
readsByReadGroup[group].remove((int)Math.floor((double) cvgAtGroup*Math.random()));
|
||||
cvgAtGroup--;
|
||||
}
|
||||
}
|
||||
|
||||
return readsByReadGroup;
|
||||
}
|
||||
|
||||
private int[] simulateRandomUniformDraws(int numToDraw, List<SAMRecord>[] readsByReadGroup) {
|
||||
int[] drawsByPerson = new int[npeople];
|
||||
|
||||
for(int pers = 0; pers < npeople; pers++) { //array instantiation
|
||||
drawsByPerson[pers] = 0;
|
||||
}
|
||||
|
||||
for(int draw = 0; draw < numToDraw; draw++) {
|
||||
int personToDrawFrom = (int) Math.floor((double) npeople * Math.random());
|
||||
drawsByPerson[personToDrawFrom]++;
|
||||
}
|
||||
|
||||
return redistributeSimulatedCoverage(drawsByPerson,readsByReadGroup,0, 0, null);
|
||||
}
|
||||
|
||||
private int[] redistributeSimulatedCoverage(int[] drawsByPerson, List<SAMRecord>[] readsByReadGroup, int excess, int nOffLimits, boolean[] offLimits) {
|
||||
int [] result;
|
||||
if(offLimits == null) { // first time calling
|
||||
offLimits = new boolean[npeople];
|
||||
for(int i = 0; i < npeople; i++) {
|
||||
offLimits[i] = false;
|
||||
}
|
||||
result = redistributeSimulatedCoverage(drawsByPerson,readsByReadGroup,excess,0, offLimits);
|
||||
} else if (excess == 0) { // no overt excess, need to check individual persons
|
||||
// get excess coverage
|
||||
int newExcess = 0;
|
||||
|
||||
for(int pers = 0; pers < npeople; pers++) {
|
||||
if(!offLimits[pers] && drawsByPerson[pers] > readsByReadGroup[pers].size()) {
|
||||
newExcess += drawsByPerson[pers] - readsByReadGroup[pers].size();
|
||||
drawsByPerson[pers] = readsByReadGroup[pers].size();
|
||||
offLimits[pers] = true;
|
||||
nOffLimits++;
|
||||
} else {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
|
||||
if(newExcess == 0) { // we are done!
|
||||
result = drawsByPerson;
|
||||
} else { // there is now overt excess
|
||||
result = redistributeSimulatedCoverage(drawsByPerson,readsByReadGroup,newExcess,nOffLimits,offLimits);
|
||||
}
|
||||
} else { // there are overt excess reads to distribute
|
||||
int nRemaining = npeople - nOffLimits;
|
||||
int[] drawsToAdd = new int[nRemaining];
|
||||
|
||||
for(int j = 0; j < nRemaining; j++) {
|
||||
drawsToAdd[j] = 0;
|
||||
}
|
||||
|
||||
for(int draw = excess; draw > 0; draw--) {
|
||||
drawsToAdd[(int) Math.floor((double)nRemaining * Math.random())]++;
|
||||
}
|
||||
|
||||
int notOffLimitsCounter = 0;
|
||||
|
||||
for(int pers = 0; pers < npeople; pers++) {
|
||||
if(! offLimits[pers]) {
|
||||
drawsByPerson[pers] += drawsToAdd[notOffLimitsCounter];
|
||||
notOffLimitsCounter++;
|
||||
} else {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
result = redistributeSimulatedCoverage(drawsByPerson,readsByReadGroup,0,nOffLimits,offLimits);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private int[] createNewPersonIndex(int peopleWithReadsLeft, int[] indexLeft, int indexToRemoveFrom) {
|
||||
int[] newPersonIndex = new int[peopleWithReadsLeft-1];
|
||||
for(int i = 0; i < peopleWithReadsLeft; i++) {
|
||||
if(i < indexToRemoveFrom) {
|
||||
newPersonIndex[i] = indexLeft[i];
|
||||
} else {
|
||||
newPersonIndex[i] = indexLeft[i+1];
|
||||
}
|
||||
}
|
||||
return newPersonIndex;
|
||||
}
|
||||
|
||||
|
||||
private List<SAMRecord>[] getNewReadsAndGenotypesByGroup(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
|
||||
{
|
||||
// at the same time we will:
|
||||
// 1) split reads up by read group
|
||||
// 2) get new reads
|
||||
// 3) get genotypes for each individual
|
||||
// then we will:
|
||||
// return the new reads (a subsample), stratified by read group
|
||||
|
||||
List<SAMRecord> allReads = context.getReads();
|
||||
int n_reads_total = allReads.size();
|
||||
List<Integer> allOffsets = context.getOffsets();
|
||||
ListIterator readIterator = allReads.listIterator();
|
||||
ListIterator offsetIterator = allOffsets.listIterator();
|
||||
LinkedList<SAMRecord>[] reads_by_group = new LinkedList[npeople];
|
||||
LinkedList<Integer>[] offsets_by_group = new LinkedList[npeople];
|
||||
LinkedList<SAMRecord>[] new_reads_by_group = new LinkedList[npeople];
|
||||
|
||||
for(int j = 0; j < npeople; j++) {
|
||||
reads_by_group[j] = new LinkedList<SAMRecord>();
|
||||
offsets_by_group[j] = new LinkedList<Integer>();
|
||||
new_reads_by_group[j] = new LinkedList<SAMRecord>();
|
||||
}
|
||||
|
||||
while(readIterator.hasNext()) {
|
||||
int iter_person = 0;
|
||||
SAMRecord read = (SAMRecord) readIterator.next();
|
||||
int offset = (Integer) offsetIterator.next();
|
||||
while(!readGroupSets.get(iter_person).contains((String) read.getAttribute("RG"))) {
|
||||
iter_person++;
|
||||
// for debug purposes only:
|
||||
if(iter_person > npeople) {
|
||||
throw new StingException("Read not found in any read group in ArtificialPoolWalker. Life sucks.");
|
||||
}
|
||||
}
|
||||
// here, iter_person is the person from whom the read came so:
|
||||
reads_by_group[iter_person].add(read);
|
||||
offsets_by_group[iter_person].add(offset);
|
||||
|
||||
// check to see if the read is new
|
||||
if(offset == 0) {
|
||||
new_reads_by_group[iter_person].add(read);
|
||||
// add this to the new reads set to be randomly selected and printed
|
||||
}
|
||||
}
|
||||
// now we obtain the genotypes
|
||||
for(int iter=0; iter<npeople; iter++) {
|
||||
AlignmentContext subContext = new AlignmentContext(context.getLocation(), reads_by_group[iter], offsets_by_group[iter]);
|
||||
GenotypeCall call = ssg.map(tracker, ref, subContext);
|
||||
local_genotypes[iter].set(call.getGenotypes().get(0).getBases(),call.getGenotypes().get(0).getConfidenceScore().getScore());
|
||||
}
|
||||
|
||||
return new_reads_by_group;
|
||||
}
|
||||
|
||||
private String createFileHeader() {
|
||||
private String createAuxFileHeader(int nFiles) {
|
||||
String sp = " ";
|
||||
String st1 = "Chrom:Pos" + sp;
|
||||
String st2 = "";
|
||||
for(int j = 0; j < npeople; j++) {
|
||||
for(int j = 0; j < nFiles; j++) {
|
||||
st2 = st2 + "Pers " + j + " Gen" + sp; // short for "genotype of person j at this location"
|
||||
st2 = st2 + "Pers " + j + " Conf" + sp; // short for "confidence in genotype call of ..."
|
||||
st2 = st2 + "Pers " + j + " Cvg" + sp; // short for "coverage of person j at this location"
|
||||
st2 = st2 + "Pers " + j + " NewCvg" + sp; // short for "coverage of person j at this location"
|
||||
}
|
||||
String st3 = "TotalCvg";
|
||||
|
||||
return st1+st2+st3;
|
||||
}
|
||||
|
||||
private void updateLiving()
|
||||
{
|
||||
// this function updates the living reads, removing them if the new location goes beyond the end
|
||||
private int[] getCoverageByGroup(List<SAMRecord>[] readsByGroup) {
|
||||
int[] coverage = new int[readsByGroup.length];
|
||||
for(int iterator = 0; iterator < readsByGroup.length; iterator ++) {
|
||||
coverage[iterator] = readsByGroup[iterator].size();
|
||||
}
|
||||
|
||||
int readLengthLeft;
|
||||
ListIterator updater;
|
||||
return coverage;
|
||||
}
|
||||
|
||||
for(int j = 0; j < npeople; j++) {
|
||||
updater = living_reads[j].listIterator();
|
||||
private long[] updateRunningCoverage(long[] cvgUpToNow, int[] newCvgByGroup) {
|
||||
long[] newCvg = new long[cvgUpToNow.length];
|
||||
for(int iter = 0; iter < cvgUpToNow.length; iter++) {
|
||||
newCvg[iter] = cvgUpToNow[iter] + newCvgByGroup[iter];
|
||||
}
|
||||
|
||||
while(updater.hasNext()) {
|
||||
readLengthLeft = (Integer) updater.next();
|
||||
if(readLengthLeft <= 1) {
|
||||
updater.remove();
|
||||
} else {
|
||||
updater.set(readLengthLeft-1);
|
||||
}
|
||||
return newCvg;
|
||||
}
|
||||
|
||||
} // end while(updater.hasNext())
|
||||
} // end for(int j=0; j < npeople; j++)
|
||||
}
|
||||
private double[] runningCoverageToDouble(long[] cvg) {
|
||||
double[] avgProp = new double[cvg.length];
|
||||
long sum = 0;
|
||||
for(long elem : cvg) {
|
||||
sum += elem;
|
||||
}
|
||||
for(int iter = 0; iter < cvg.length; iter++) {
|
||||
avgProp[iter] = cvg[iter]/sum;
|
||||
}
|
||||
|
||||
private void printToFileAndAuxFile(List<SAMRecord>[] downSampledReadsByGroup, int cvg, SAMFileWriter outFile)
|
||||
{
|
||||
// NOTE: the chromosome and position were already printed in the auxFile during map.
|
||||
for(int pers = 0; pers < npeople; pers++) {
|
||||
List<SAMRecord> personReads = downSampledReadsByGroup[pers];
|
||||
for(SAMRecord read : personReads) {
|
||||
outFile.addAlignment(read);
|
||||
living_reads[pers].add(read.getReadLength());
|
||||
return avgProp;
|
||||
}
|
||||
|
||||
private void printToFiles(List<SAMRecord>[] sampledNewReads, ArtificialPoolContext context) {
|
||||
SAMFileWriter samWrite = context.getSAMFileWriter();
|
||||
String sp = " ";
|
||||
PrintWriter auxWrite = context.getWriterToAuxiliaryFile();
|
||||
int readGroupInt = 0;
|
||||
for(List<SAMRecord> readGroup : sampledNewReads) {
|
||||
for(SAMRecord read : readGroup) {
|
||||
samWrite.addAlignment(read);
|
||||
}
|
||||
auxWrite.print(context.getAlignmentContext().getLocation().toString() + sp);
|
||||
auxWrite.print(context.genotypeAndConfidenceToString(readGroupInt,sp));
|
||||
readGroupInt++;
|
||||
}
|
||||
String auxformstring = " %s %f %d";
|
||||
// now print to the aux file
|
||||
int totalcvg = 0;
|
||||
for(int pers = 0; pers < npeople; pers++) {
|
||||
auxWrite.printf(auxformstring, local_genotypes[pers].first, local_genotypes[pers].second,living_reads[pers].size());
|
||||
totalcvg += living_reads[pers].size();
|
||||
}
|
||||
auxWrite.printf(" %d%n", totalcvg);
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,8 +35,11 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
|
|||
@Argument(fullName="bootStrap", shortName="bs", doc="Use a bootstrap method", required=false)
|
||||
public boolean useBootstrap = false;
|
||||
|
||||
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls")
|
||||
public double threshold = 3.0;
|
||||
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls", required=false)
|
||||
public double lodThreshold = 3.0;
|
||||
|
||||
@Argument(fullName="minQScore", shortName="qm", doc="Threshold for the minimum quality score, filter out bases below",required=false)
|
||||
public byte minQScore = 0;
|
||||
|
||||
private static final int BOOTSTRAP_ITERATIONS = 300;
|
||||
|
||||
|
|
@ -58,11 +61,18 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
|
|||
}
|
||||
|
||||
public Pair<Integer,Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> readsByDirection = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
|
||||
AlignmentContext filteredContext;
|
||||
if (this.getMinQualityScore() <= 0) {
|
||||
filteredContext = context;
|
||||
} else {
|
||||
Pair<List<SAMRecord>,List<Integer>> readsFilteredByQuality = filterByQuality(context.getReads(),context.getOffsets(), this.getMinQualityScore());
|
||||
filteredContext = new AlignmentContext(context.getLocation(),readsFilteredByQuality.getFirst(),readsFilteredByQuality.getSecond());
|
||||
}
|
||||
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> readsByDirection = PoolUtils.splitReadsByReadDirection(filteredContext.getReads(),filteredContext.getOffsets());
|
||||
if ( ! suppress_printing) {
|
||||
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, context);
|
||||
out.printf("%s: %d %d %d %d %d %d %f %f %f%n", context.getLocation(), readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
|
||||
context.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
|
||||
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, filteredContext);
|
||||
out.printf("%s: %d %d %d %d %d %d %f %f %f%n", filteredContext.getLocation(), readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
|
||||
filteredContext.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
|
||||
powers.getFirst()[0], powers.getFirst()[1], powers.getFirst()[2]);
|
||||
}
|
||||
return new Pair(readsByDirection.getFirst().getFirst().size(),readsByDirection.getFirst().getSecond().size());
|
||||
|
|
@ -73,9 +83,9 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
|
|||
public Pair<double[],byte[]> calculatePower(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, boolean bootStrap, AlignmentContext context) {
|
||||
Pair<double[],byte[]> powerAndMedianQ;
|
||||
if( bootStrap ) {
|
||||
powerAndMedianQ = calculateDirectionalPowersByBootstrap(dirReads,threshold,context);
|
||||
powerAndMedianQ = calculateDirectionalPowersByBootstrap(dirReads,lodThreshold,context);
|
||||
} else {
|
||||
powerAndMedianQ = calculateDirectionalPowersTheoretical(dirReads,threshold,context);
|
||||
powerAndMedianQ = calculateDirectionalPowersTheoretical(dirReads,lodThreshold,context);
|
||||
}
|
||||
|
||||
return powerAndMedianQ;
|
||||
|
|
@ -162,7 +172,15 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
|
|||
}
|
||||
|
||||
public String createHeaderString() {
|
||||
return "Chrom:Pos CvgF CvgR CvgC QmedF QmedR QmedC PowF PowR PowC";
|
||||
return "Chrom:Pos ForwardCoverage ReverseCoverage CombinedCoverage ForwardMedianQ ReverseMedianQ CombinedMedianQ PowerForward PowerReverse PowerCombined";
|
||||
}
|
||||
|
||||
public byte getMinQualityScore() {
|
||||
return minQScore;
|
||||
}
|
||||
|
||||
public Pair<List<SAMRecord>,List<Integer>> filterByQuality(List<SAMRecord> reads, List<Integer> offsets, byte qMin) {
|
||||
return PoolUtils.thresholdReadsByQuality(reads,offsets,qMin);
|
||||
}
|
||||
|
||||
// class methods
|
||||
|
|
|
|||
|
|
@ -18,6 +18,12 @@ public class PoolUtils {
|
|||
|
||||
private PoolUtils () {}
|
||||
|
||||
public static final int BASE_A_OFFSET = 0;
|
||||
public static final int BASE_C_OFFSET = 1;
|
||||
public static final int BASE_G_OFFSET = 2;
|
||||
public static final int BASE_T_OFFSET = 3;
|
||||
public static final int BASE_INDEXED_ARRAY_SIZE = 4;
|
||||
|
||||
public static Pair<Pair<List<SAMRecord>, List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> splitReadsByReadDirection(List<SAMRecord> reads, List<Integer> offsets) {
|
||||
ArrayList<SAMRecord> forwardReads;
|
||||
ArrayList<SAMRecord> reverseReads;
|
||||
|
|
@ -48,5 +54,108 @@ public class PoolUtils {
|
|||
|
||||
return new Pair(new Pair(forwardReads,reverseReads), new Pair(forwardOffsets,reverseOffsets));
|
||||
}
|
||||
|
||||
|
||||
public static Pair<List<SAMRecord>[], List<Integer>[]> splitReadsByBase(List<SAMRecord> reads, List<Integer> offsets) {
|
||||
ArrayList<SAMRecord>[] readsByBase;
|
||||
ArrayList<Integer>[] offsetsByBase;
|
||||
if ( reads == null ) {
|
||||
readsByBase = null;
|
||||
offsetsByBase = null;
|
||||
} else {
|
||||
readsByBase = new ArrayList[4];
|
||||
offsetsByBase = new ArrayList[4];
|
||||
for(int readNum = 0; readNum < reads.size(); readNum++) {
|
||||
switch (reads.get(readNum).getReadBases()[offsets.get(readNum)]) {
|
||||
case 'A':
|
||||
case 'a': readsByBase[0].add(reads.get(readNum));
|
||||
offsetsByBase[0].add(offsets.get(readNum));
|
||||
break;
|
||||
case 'C':
|
||||
case 'c': readsByBase[1].add(reads.get(readNum));
|
||||
offsetsByBase[1].add(offsets.get(readNum));
|
||||
break;
|
||||
case 'G':
|
||||
case 'g': readsByBase[2].add(reads.get(readNum));
|
||||
offsetsByBase[2].add(offsets.get(readNum));
|
||||
break;
|
||||
case 'T':
|
||||
case 't': readsByBase[3].add(reads.get(readNum));
|
||||
offsetsByBase[3].add(offsets.get(readNum));
|
||||
break;
|
||||
default: break; // TODO: INDEL AWARENESS
|
||||
}
|
||||
}
|
||||
}
|
||||
return new Pair(readsByBase,offsetsByBase);
|
||||
}
|
||||
|
||||
public static Pair<List<SAMRecord>, List<Integer>> thresholdReadsByQuality(List<SAMRecord> reads, List<Integer> offsets, byte qThresh) {
|
||||
List<SAMRecord> threshReads;
|
||||
List<Integer> threshOffsets;
|
||||
if(reads == null) {
|
||||
threshReads=null;
|
||||
threshOffsets = null;
|
||||
} else if (qThresh <= 0) {
|
||||
threshReads = reads;
|
||||
threshOffsets = offsets;
|
||||
} else {
|
||||
threshReads = new ArrayList();
|
||||
threshOffsets = new ArrayList();
|
||||
|
||||
for ( int readNo = 0; readNo < reads.size(); readNo ++) {
|
||||
if ( reads.get(readNo).getBaseQualities()[offsets.get(readNo)] >= qThresh) {
|
||||
threshReads.add(reads.get(readNo));
|
||||
threshOffsets.add(offsets.get(readNo));
|
||||
} // else do nothing
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair(threshReads,threshOffsets);
|
||||
}
|
||||
|
||||
public static int getBaseOffset(char base) {
|
||||
switch(base) {
|
||||
case 'A':
|
||||
case 'a':
|
||||
return getBaseAOffset();
|
||||
case 'C':
|
||||
case 'c':
|
||||
return getBaseCOffset();
|
||||
case 'G':
|
||||
case 'g':
|
||||
return getBaseGOffset();
|
||||
case 'T':
|
||||
case 't':
|
||||
return getBaseTOffset();
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
//TODO: indel offsets
|
||||
}
|
||||
|
||||
public static int getBaseAOffset() {
|
||||
return BASE_A_OFFSET;
|
||||
}
|
||||
|
||||
public static int getBaseCOffset() {
|
||||
return BASE_C_OFFSET;
|
||||
}
|
||||
|
||||
public static int getBaseGOffset() {
|
||||
return BASE_G_OFFSET;
|
||||
}
|
||||
|
||||
public static int getBaseTOffset() {
|
||||
return BASE_T_OFFSET;
|
||||
}
|
||||
|
||||
public static List<Byte> getListOfBaseQualities(List<SAMRecord> reads,List<Integer> offsets) {
|
||||
//TODO: this is a terrible method name. Change it to something better.
|
||||
List<Byte> qualities = new ArrayList<Byte>(reads.size());
|
||||
for (int readNo = 0; readNo < reads.size(); readNo ++) {
|
||||
qualities.add(reads.get(readNo).getBaseQualities()[offsets.get(readNo)]);
|
||||
}
|
||||
|
||||
return qualities;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue