diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/AnalyzePowerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/AnalyzePowerWalker.java index 191327e49..5f51c6b80 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/AnalyzePowerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/AnalyzePowerWalker.java @@ -40,14 +40,19 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{ BufferedReader syzyFileReader; final String pfFileDelimiter = " "; boolean outOfLinesInSyzyFile = false; + private double[][] syzyPowerTable; + private String[][] syzySanityCheckTable; @Override public void initialize() { super.initialize(); + syzyPowerTable = new double[8][1500000]; + syzySanityCheckTable = new String[8][1500000]; try { syzyFileReader = new BufferedReader(new FileReader(pathToSyzygyFile)); - System.out.println(syzyFileReader.readLine()); + logger.error("generatePowerTable called"); + generatePowerTable(syzyFileReader); } catch (FileNotFoundException e) { String newErrMsg = "Syzygy input file " + pathToSyzygyFile + " could be incorrect. File not found."; throw new StingException(newErrMsg,e); @@ -59,34 +64,36 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{ } @Override - public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) + public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext rawContext) { + AlignmentContext context; + if (super.getMinQualityScore() <= 0) { + context = rawContext; + } else { + Pair,List> readsFilteredByQuality = filterByQuality(rawContext.getReads(),rawContext.getOffsets(), super.getMinQualityScore()); + context = new AlignmentContext(rawContext.getLocation(),readsFilteredByQuality.getFirst(),readsFilteredByQuality.getSecond()); + } Pair, List>,Pair,List>> splitReads = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets()); if ( !super.suppress_printing ) { Pair powPair = super.calculatePower(splitReads,false,context); - boolean syzyFileIsReady=true; - try { - syzyFileIsReady = syzyFileReader.ready(); - } - catch(IOException e) { - throw new StingException("Input file reader was not ready before an attempt to read from it", e); - } - if(!syzyFileIsReady) { - throw new StingException("Input file reader was not ready before an attempt to read from it, but there was no IOException"); - } else if(!outOfLinesInSyzyFile) { - Pair syzyPow = getSyzyPowFromFile(); - out.printf("%s: %d %d %d %d %d %d %f %f %f %f |%s%n", context.getLocation(), splitReads.getFirst().getFirst().size(), splitReads.getFirst().getSecond().size(), - context.getReads().size(), powPair.getSecond()[0], powPair.getSecond()[1], powPair.getSecond()[2], - powPair.getFirst()[0], powPair.getFirst()[1], powPair.getFirst()[2], syzyPow.getFirst(), syzyPow.getSecond()); - } else { - out.printf("%s: $d %d %d %d %d %d %d %f %f %f%n", context.getLocation(), splitReads.getFirst().getFirst().size(), splitReads.getFirst().getSecond().size(), - context.getReads().size(), powPair.getSecond()[0], powPair.getSecond()[1], powPair.getSecond()[2], - powPair.getFirst()[0], powPair.getFirst()[1], powPair.getFirst()[2]); + + int tabIndexChrom = getTableChromIndex(context.getLocation().toString()); + int tabIndexLoc = getTablePosIndex(context.getLocation().toString()); + double syzyPow = 0; + String syzySanity = "NoLoc"; + if(tabIndexChrom >= 0 && tabIndexLoc >= 0) { + syzyPow += syzyPowerTable[tabIndexChrom][tabIndexLoc]; + syzySanity = "s " + syzySanityCheckTable[tabIndexChrom][tabIndexLoc]; + } + out.printf("%s|%s: %d %d %d %d %d %d %f %f %f %f %n", context.getLocation(), syzySanity, splitReads.getFirst().getFirst().size(), splitReads.getFirst().getSecond().size(), + context.getReads().size(), powPair.getSecond()[0], powPair.getSecond()[1], powPair.getSecond()[2], + powPair.getFirst()[0], powPair.getFirst()[1], powPair.getFirst()[2], syzyPow); + } return new Pair(splitReads.getFirst().getFirst().size(), splitReads.getFirst().getFirst().size()); @@ -103,13 +110,14 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{ return new Pair(-1.1, "Printing Stops Here"); } - String chromPos = null; + String chromPosTar = null; StringTokenizer lineTokenizer = new StringTokenizer(thisLine, pfFileDelimiter); try { - chromPos = lineTokenizer.nextToken(); + chromPosTar = lineTokenizer.nextToken(); for(int j = 1; j < colOffset; j++) { lineTokenizer.nextToken(); } + String chromPos = (new StringTokenizer(chromPosTar, "\t")).nextToken(); return new Pair((Double.valueOf(lineTokenizer.nextToken())/100.0),chromPos); } catch (NoSuchElementException e) { String errMsg = "The given column offset for the pool, " + colOffset + " exceeded the number of entries in the file " + pathToSyzygyFile; @@ -120,4 +128,90 @@ public class AnalyzePowerWalker extends CoverageAndPowerWalker{ public String createHeaderString() { return (super.createHeaderString() + " PowSyz"); } + + //TODO: rewrite this so it's not a complete hack!!! + + public void generatePowerTable(BufferedReader syzFile) { + + System.out.println("generatePowerTable entered"); + int numreads = 0; + + try{ + while(syzFile.ready()) { + String line = syzFile.readLine(); + if(line == null || line.equals("")) { + break; + } + StringTokenizer tok = new StringTokenizer(line, " :\t"); + String chrmNoStrWithChr = tok.nextToken(); + String chrmNoStr = chrmNoStrWithChr.substring(3); + String locStr = tok.nextToken(); + for(int j = colOffset - 2; j > 0; j --) { + tok.nextToken(); + } + numreads++; + String syzyPowStr = tok.nextToken(); + if ( chrmNoStr == null || locStr == null || chrmNoStr.equals("") || locStr.equals("")) { + break; + } + int chrIndex = getTableChromIndex(chrmNoStr,locStr); + int posIndex = getTablePosIndex(chrmNoStr,locStr); + syzyPowerTable[chrIndex][posIndex] = Double.valueOf(syzyPowStr)/100.0; + syzySanityCheckTable[chrIndex][posIndex] = "chrm" + chrmNoStr +":" + locStr; + if( (numreads % 1000) == 0){ + System.out.println(numreads + " reads from Syzygy file"); + } + } + } catch (IOException e) { + // do nothing + System.out.println("IOException caught"); + } + System.out.println("Table generated."); + + } + + public int getTableChromIndex(String chromNo, String locNo) { + switch (Integer.valueOf(chromNo)) { + case 1: return 0; + case 2: return 1; + case 3: + if ( Integer.valueOf(locNo) < 63000000 ) + return 2; + else + return 3; + case 7: return 4; + case 10: return 5; + case 11: return 6; + case 12: return 7; + default: + System.out.println(chromNo + " " + locNo); + return -1; + } + } + + public int getTableChromIndex(String chromAndPos) { + StringTokenizer tok = new StringTokenizer(chromAndPos,":"); + return getTableChromIndex(tok.nextToken().substring(3),tok.nextToken()); + } + + public int getTablePosIndex(String chromAndPos) { + StringTokenizer tok = new StringTokenizer(chromAndPos,":"); + return getTablePosIndex(tok.nextToken().substring(3),tok.nextToken()); + } + + public int getTablePosIndex(String chromNo, String locNo) { + switch ( getTableChromIndex(chromNo, locNo) ) { + case 0: return Integer.valueOf(locNo) - 120065274; + case 1: return Integer.valueOf(locNo) - 43311321; + case 2: return Integer.valueOf(locNo) - 12157091; + case 3: return Integer.valueOf(locNo) - 64000000; + case 4: return Integer.valueOf(locNo) - 27838907; + case 5: return Integer.valueOf(locNo) - 12003199; + case 6: return Integer.valueOf(locNo) - 92342389; + case 7: return Integer.valueOf(locNo) - 69809219; + default: + System.out.println(chromNo + " " + locNo); + return -1; + } + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/ArtificalPoolWalkerMk2.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/ArtificalPoolWalkerMk2.java deleted file mode 100755 index cf6c67a09..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/ArtificalPoolWalkerMk2.java +++ /dev/null @@ -1,146 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.poolseq; - -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.playground.utils.ArtificialPoolContext; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.StingException; - -import java.io.FileOutputStream; -import java.io.PrintWriter; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Set; -import java.util.List; -import java.util.ListIterator; -import java.util.LinkedList; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Aug 26, 2009 - * Time: 11:28:26 AM - * To change this template use File | Settings | File Templates. - */ -public class ArtificalPoolWalkerMk2 extends LocusWalker { - - @Argument(fullName = "AuxOutputFile", shortName = "af", doc = "Auxiliary file for genotyp & coverage output", required = true) - String auxFilePath = null; - @Argument(fullName = "OutputBamFile", shortName = "of", doc = "Output to this file rather than standard output", required = false) - SAMFileWriter outputBamFile = null; - - public void initialize() { - } - - public ArtificialPoolContext reduceInit() { // try to initialize the file writer - ArtificialPoolContext apContext = new ArtificialPoolContext(); - apContext.setSingleSampleGenotyper(new SingleSampleGenotyper()); - apContext.setReadGroupSets(getToolkit().getMergedReadGroupsByReaders()); - apContext.setAuxWriter(initializeAuxFileWriter(apContext.getTotalNumberOfPeople())); - apContext.setSAMFileWriter(outputBamFile); - apContext.initializeSSG(); - - return apContext; - } - - public ArtificialPoolContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return new ArtificialPoolContext(tracker,ref,context); - } - - public ArtificialPoolContext reduce(ArtificialPoolContext mapCon, ArtificialPoolContext redCon){ - ArtificialPoolContext updatedContext = ArtificialPoolContext.mapReduceMerge(mapCon,redCon); - List[] newReads = updatedContext.splitReadsByGroup(updatedContext.getNewReads()); - long[] newCvg = updateRunningCoverage(updatedContext.getRunningCoverage(), getCoverageByGroup(newReads)); - updatedContext.setRunningCoverage(newCvg); - List[] sampledReads = ArtificialPoolContext.sampleReads(newReads,runningCoverageToDouble(newCvg)); - printToFiles(sampledReads,updatedContext); - return updatedContext; - } - - // Helper methods follow - - private PrintWriter initializeAuxFileWriter(int nFiles) { - PrintWriter auxFileWriter; - try { - auxFileWriter = new PrintWriter(new FileOutputStream(auxFilePath)); - auxFileWriter.println(createAuxFileHeader(nFiles)); - } catch(FileNotFoundException e) { - String errmsg = "The filepath you entered "+auxFilePath+" could not be opened. Please double-check that the input is correct."; - throw new StingException(errmsg, e); - } catch(IOException e) { - String errmsg = "The file you entered "+auxFilePath+" could not be written to. Please check your permissions to write to this file."; - throw new StingException(errmsg,e); - } - - return auxFileWriter; - } - - private String createAuxFileHeader(int nFiles) { - String sp = " "; - String st1 = "Chrom:Pos" + sp; - String st2 = ""; - for(int j = 0; j < nFiles; j++) { - st2 = st2 + "Pers " + j + " Gen" + sp; // short for "genotype of person j at this location" - st2 = st2 + "Pers " + j + " Conf" + sp; // short for "confidence in genotype call of ..." - st2 = st2 + "Pers " + j + " NewCvg" + sp; // short for "coverage of person j at this location" - } - String st3 = "TotalCvg"; - - return st1+st2+st3; - } - - private int[] getCoverageByGroup(List[] readsByGroup) { - int[] coverage = new int[readsByGroup.length]; - for(int iterator = 0; iterator < readsByGroup.length; iterator ++) { - coverage[iterator] = readsByGroup[iterator].size(); - } - - return coverage; - } - - private long[] updateRunningCoverage(long[] cvgUpToNow, int[] newCvgByGroup) { - long[] newCvg = new long[cvgUpToNow.length]; - for(int iter = 0; iter < cvgUpToNow.length; iter++) { - newCvg[iter] = cvgUpToNow[iter] + newCvgByGroup[iter]; - } - - return newCvg; - } - - private double[] runningCoverageToDouble(long[] cvg) { - double[] avgProp = new double[cvg.length]; - long sum = 0; - for(long elem : cvg) { - sum += elem; - } - for(int iter = 0; iter < cvg.length; iter++) { - avgProp[iter] = cvg[iter]/sum; - } - - return avgProp; - } - - private void printToFiles(List[] sampledNewReads, ArtificialPoolContext context) { - SAMFileWriter samWrite = context.getSAMFileWriter(); - String sp = " "; - PrintWriter auxWrite = context.getWriterToAuxiliaryFile(); - int readGroupInt = 0; - for(List readGroup : sampledNewReads) { - for(SAMRecord read : readGroup) { - samWrite.addAlignment(read); - } - auxWrite.print(context.getAlignmentContext().getLocation().toString() + sp); - auxWrite.print(context.genotypeAndConfidenceToString(readGroupInt,sp)); - readGroupInt++; - } - - } - - -} - diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java new file mode 100755 index 000000000..1fd77bd3d --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java @@ -0,0 +1,58 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +import org.broadinstitute.sting.gatk.refdata.AllelicVariant; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; + +import java.io.PrintStream; +import java.util.List; +import java.util.ArrayList; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Sep 3, 2009 + * Time: 5:04:40 PM + * To change this template use File | Settings | File Templates. + */ +public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{ + protected int numIndividualsInPool; + protected String name; + protected String[] filenames; + protected PrintStream out, callOut; + private VariantEvalWalker master; + + public BasicPoolVariantAnalysis(String name, int nIndividuals) { + this.name = name; + this.numIndividualsInPool = nIndividuals; + } + + public String getName() { return this.name; } + + public int getNumberOfIndividualsInPool() { return this.numIndividualsInPool; } + + public PrintStream getSummaryPrintStream() { return this.out; } + + public PrintStream getCallPrintStream() { return this.callOut; } + + public VariantEvalWalker getMaster() { return this.master; } + + public List getParams() { return new ArrayList(); } + + public List done() { return new ArrayList(); } + + public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String[] filenames) { + this.master = master; + this.out = out; + this.callOut = callOut; + this.filenames = filenames; + } + + public void finalize(long nSites) { + // no need to finalize data in general + } + + public abstract String update(AllelicVariant variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context); + +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java new file mode 100755 index 000000000..71084672b --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java @@ -0,0 +1,12 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Sep 3, 2009 + * Time: 4:39:12 PM + * To change this template use File | Settings | File Templates. + */ +public interface PoolAnalysis { + +}