diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/filters/ContaminatedSampleFilter.java b/java/src/org/broadinstitute/sting/oneoffprojects/filters/ContaminatedSampleFilter.java new file mode 100644 index 000000000..bde03208a --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/filters/ContaminatedSampleFilter.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.oneoffprojects.filters; + +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; + +import java.util.Arrays; +import java.util.HashSet; + +/** + * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl + * + * @Author chartl + * @Date Feb 8, 2010 + */ +public class ContaminatedSampleFilter implements SamRecordFilter { + + private final String[] filteredNames = {"NA19562","NA19006","NA19554","NA18985","NA18988","NA19062","NA19559"}; + + private HashSet contaminatedSamples = new HashSet( Arrays.asList(filteredNames) ); + + public boolean filterOut(SAMRecord read) { + return contaminatedSamples.contains(read.getReadGroup().getSample()); + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/DepthStatisticsCalculator.java b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/DepthStatisticsCalculator.java new file mode 100644 index 000000000..f268c6f46 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/DepthStatisticsCalculator.java @@ -0,0 +1,128 @@ +package org.broadinstitute.sting.oneoffprojects.firehosesummary; + +import java.util.ArrayList; +import java.util.Arrays; + +/** + * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl + * + * @Author chartl + * @Date Feb 12, 2010 + */ +public class DepthStatisticsCalculator extends SummaryStatisticsCalculator { + private int numBasesAbove0x; // % of bases with ANY coverage + private int numBasesAbove3x; // % of bases with 4x+ + private int numBasesAbove9x; // % of bases with 10x+ + private int numBasesAbove24x; // % of bases with 25x+ + private int numBasesAbove49x; // % of bases with 50x+ + private int numBasesAbove99x; // % of bases with 100x+ + + private int targetsAbove0x; + private int targetsAbove3x; + private int targetsAbove9x; + private int targetsAbove24x; + private int targetsAbove49x; + private int targetsAbove99x; + private int numTargets; + + public static double[] DEPTH_CUTOFFS = {1,4,10,25,50,100}; + + public DepthStatisticsCalculator(String name) { + super(name); + numBasesAbove0x = 0; + numBasesAbove3x = 0; + numBasesAbove9x = 0; + numBasesAbove24x = 0; + numBasesAbove49x = 0; + numBasesAbove99x = 0; + targetsAbove99x = 0; + targetsAbove49x = 0; + targetsAbove24x = 0; + targetsAbove9x = 0; + targetsAbove3x = 0; + targetsAbove0x = 0; + numTargets = 0; + } + + public void updateLocus(int depth) { + super.update(depth); + + if ( depth > 99 ) { + numBasesAbove99x++; + } + if ( depth > 49 ) { + numBasesAbove49x++; + } + if ( depth > 24 ) { + numBasesAbove24x++; + } + if ( depth > 9 ) { + numBasesAbove9x++; + } + if ( depth > 3 ) { + numBasesAbove3x++; + } + if ( depth > 0 ) { + numBasesAbove0x++; + } + } + + public void updateTargets(int targetLength, int totalCoverage) { + double avgCvg = ( (double) totalCoverage ) / ( (double) targetLength); + + if ( avgCvg >= 100 ) { + targetsAbove99x ++; + } + if ( avgCvg >= 50 ) { + targetsAbove49x++; + } + if ( avgCvg >= 25 ) { + targetsAbove24x++; + } + if ( avgCvg >= 10 ) { + targetsAbove9x++; + } + if ( avgCvg >= 4 ) { + targetsAbove3x++; + } + if ( avgCvg >= 1 ) { + targetsAbove0x++; + } + + numTargets++; + } + + public double[] getLocusProportions() { + double[] proportions = new double[6]; + + proportions[0] = ( (double) numBasesAbove0x )/( (double) getSampleSize() ); + proportions[2] = ( (double) numBasesAbove9x )/( (double) getSampleSize() ); + proportions[3] = ( (double) numBasesAbove24x )/( (double) getSampleSize() ); + proportions[4] = ( (double) numBasesAbove49x )/( (double) getSampleSize() ); + proportions[5] = ( (double) numBasesAbove99x )/( (double) getSampleSize() ); + proportions[1] = ( (double) numBasesAbove3x )/( (double) getSampleSize() ); + + return proportions; + } + + public double[] getTargetProportions() { + double[] proportions = new double[6]; + + proportions[0] = ( (double) targetsAbove0x )/( (double) numTargets ); + proportions[2] = ( (double) targetsAbove9x )/( (double) numTargets ); + proportions[3] = ( (double) targetsAbove24x )/( (double) numTargets ); + proportions[4] = ( (double) targetsAbove49x )/( (double) numTargets ); + proportions[5] = ( (double) targetsAbove99x )/( (double) numTargets ); + proportions[1] = ( (double) targetsAbove3x )/( (double) numTargets ); + + return proportions; + } + + public double getPercentWellCoveredLoci() { + return 10*( (double) numBasesAbove9x )/( (double) getSampleSize() ); + } + + public double getPercentWellCoveredTargets() { + return 10*( (double) targetsAbove9x )/( (double) numTargets ); + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/GenerateFirehoseSummary.java b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/GenerateFirehoseSummary.java new file mode 100644 index 000000000..af9896123 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/GenerateFirehoseSummary.java @@ -0,0 +1,257 @@ +package org.broadinstitute.sting.oneoffprojects.firehosesummary; + +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; + +import java.io.*; +import java.util.*; + +/** + * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl + * + * @Author chartl + * @Date Feb 11, 2010 + */ + +class FirehoseSummaryCLP extends CommandLineProgram { + @Argument(fullName = "depthOfCoverageFile", shortName = "doc", doc="Path to the depth of coverage file", required=true) + private File depthOfCoverage = null; + @Argument(fullName = "contaminationFile", shortName = "con", doc="Path to the contamination file", required=true) + private File contamination = null; + @Argument(fullName = "errorRateFile", shortName = "err", doc="Path to the error rate file", required=true) + private File errorRate = null; + @Argument(fullName = "zipFiles", shortName = "zip", doc="List of paths to zip files which contain summary metrics files", required=false) + private String zipFiles = null; + + private static String R_SCRIPT = "plotFirehoseDataQCMetrics.R"; + private static String SCRIPT_DOC_FLAG = "DOC"; + + protected int execute() { + SummaryFileCollection metricsFiles = getFileHandles(); + List depthStats = calculateDepthStatistics(depthOfCoverage); + String docSummary = makeDOCPlots(depthStats); + return 1; + } + + private String makeDOCPlots(List calcs) { + StringBuilder summaryBuilder = new StringBuilder(); + int numSamplesWithGoodBaseCoverage=0; + int numSamplesWithGoodMeanCoverage = 0; + int numSamplesWithGoodTargetCoverage=0; + double aggregateMeanCoverage = -1d; + double aggregateStdDevCoverage = -1d; + double aggregateSkewCoverage = -1d; + PrintWriter locusOutput; + PrintWriter targetOutput; + File locusOutputFile; + File targetOutputFile; + try { + locusOutputFile = File.createTempFile("locus_output","txt"); + locusOutput = new PrintWriter(locusOutputFile); + targetOutputFile = File.createTempFile("target_output","txt"); + targetOutput = new PrintWriter(targetOutputFile); + } catch ( IOException e ) { + throw new StingException("Error creating temporary output files for R-plotting. Perhaps user permissions do not allow creation of files.",e); + } + + for ( DepthStatisticsCalculator calc : calcs ) { + + if ( calc.getName().equalsIgnoreCase("total_coverage") || calc.getName().equalsIgnoreCase("coverage_without_deletions") ) { + // update summary statistics + if ( calc.getName().equalsIgnoreCase("total_coverage") ) { + aggregateMeanCoverage = calc.getMean(); + aggregateStdDevCoverage = Math.sqrt(calc.getVar()); + aggregateSkewCoverage = calc.getSkew(); + } + } else { + // update sample-based summary statistics and print output to file + locusOutput.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f%n",calc.getName(),calc.getLocusProportions()); + targetOutput.printf("%s\t%f\t%f\t%f\t%f\t%f\t%f%n", calc.getName(),calc.getTargetProportions()); + if ( calc.getPercentWellCoveredLoci() > 80 ) { + numSamplesWithGoodBaseCoverage++; + } + if ( calc.getPercentWellCoveredTargets() > 90 ) { + numSamplesWithGoodTargetCoverage++; + } + if ( calc.getMean() > 10 ) { + numSamplesWithGoodMeanCoverage++; + } + } + } + + //invokeRScript(R_SCRIPT,SCRIPT_DOC_FLAG,"PercentOfBasesCoveredBySample",locusOutputFile.getAbsolutePath()); + //invokeRScript(R_SCRIPT,SCRIPT_DOC_FLAG,"PercentOfTargetsCoveredBySample",targetOutputFile.getAbsolutePath()); + return "temporary"; + } + + private SummaryFileCollection getFileHandles() { + if ( zipFiles == null ) { + return null; + } + + SummaryFileCollection summaryFiles = new SummaryFileCollection(); + for ( String zipFile : zipFiles.split(",") ) { + summaryFiles.process(zipFile); + } + + return summaryFiles; + } + + private List calculateDepthStatistics(File docFile) { + BufferedReader docReader; + + try { + docReader = new BufferedReader( new FileReader(docFile) ); + } catch ( IOException e) { + throw new StingException("The file "+docFile.getAbsolutePath()+" could not be opened...",e); + } + + String locusHeader = getDOCSectionHeader(docReader); + List docCalculators = instantiateDOCCalculators(locusHeader); + + updateLocusInfo(docCalculators,docReader); + + String targetHeader = getDOCSectionHeader(docReader); + + updateTargetInfo(docCalculators,docReader); + + return docCalculators; + } + + private void updateLocusInfo(List calcs, BufferedReader reader) { + + String docLocLine; + try { + docLocLine = reader.readLine(); + while ( ! isEndOfSection(docLocLine) ) { + int offset = -1; + for ( String entry : docLocLine.split("\t") ) { + if ( offset > -1 ) { + calcs.get(offset).updateLocus(Integer.parseInt(entry)); + } + offset++; + } + } + } catch ( IOException e) { + throw new StingException("Error reading locus depth of coverage information",e); + } + + } + + private void updateTargetInfo(List calcs, BufferedReader reader) { + + String docLocLine; + try { + docLocLine = reader.readLine(); + while ( ! isEndOfSection(docLocLine) ) { + int offset = -1; + int targetSize = 0; + for ( String entry : docLocLine.split("\t") ) { + if ( offset == -1 ) { + targetSize = parseInterval(entry); + } else { + calcs.get(offset).updateTargets(targetSize,Integer.parseInt(entry)); + } + offset++; + } + } + } catch ( IOException e ) { + throw new StingException("Error reading target depth of coverage information",e); + } + + } + + private int parseInterval(String interval) { + String startstop = interval.split(":")[1]; + int start = Integer.parseInt(startstop.split("-")[0]); + int stop = Integer.parseInt(startstop.split("-")[1]); + return stop - start; + } + + private boolean isDOCSectionSeparator( String line ) { + return line.contains("_COVERAGE_SECTION"); + } + + private boolean isEndOfSection( String line ) { + // sections delimited by empty line + return line.equalsIgnoreCase(""); + } + + private String getDOCSectionHeader(BufferedReader reader) { + String header; + try { + do { + header = reader.readLine(); + } while ( ! isDOCSectionSeparator(header) ); + + header = reader.readLine(); + + } catch (IOException e) { + throw new StingException("Error reading depth of coverage file",e); + } + + return header; + } + + private List instantiateDOCCalculators(String header) { + List calcs = new ArrayList(); + + int offset = -1; + for ( String entry : header.split("\t") ) { + if ( offset > -1 ) { + calcs.add(new DepthStatisticsCalculator(entry)); + } + offset++; + } + + return calcs; + } + +} + +public class GenerateFirehoseSummary { + + public static void main(String[] args) { + FirehoseSummaryCLP clp = new FirehoseSummaryCLP(); + CommandLineProgram.start(clp, args); + System.exit(0); + } +} + +class SummaryFileCollection { + + // container class for files we'll be summarizing + + public Map fingerprintSummaryFiles; + public Map hybridSelectionMetricsFiles; + public Map insertSizeDistributionFiles; + public Map alignmentMetricsFiles; + + public SummaryFileCollection() { + fingerprintSummaryFiles = new HashMap(); + hybridSelectionMetricsFiles = new HashMap(); + insertSizeDistributionFiles = new HashMap(); + alignmentMetricsFiles = new HashMap(); + } + + public void process(String zipFilePath) { + String sampleName = zipFilePath.split("_sequencing_metrics.zip")[0].split("_")[1]; + File fingerprintSummaryFile = new File(sampleName+".summary_fingerprint_metrics"); + File hybridSelectionFile = new File(sampleName+".hybrid_selection_metrics"); + File insertSizeFile = new File(sampleName+".insert_size_metrics"); + File alignmentFile = new File(sampleName+".alignment_metrics"); + + String command = "unzip "+zipFilePath; + try { + Process p = Runtime.getRuntime().exec(command); + } catch (IOException e) { + throw new RuntimeException("Could not unzip the file "+zipFilePath); + } + + fingerprintSummaryFiles.put(sampleName,fingerprintSummaryFile); + hybridSelectionMetricsFiles.put(sampleName,hybridSelectionFile); + insertSizeDistributionFiles.put(sampleName,insertSizeFile); + alignmentMetricsFiles.put(sampleName,alignmentFile); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/SummaryStatisticsCalculator.java b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/SummaryStatisticsCalculator.java new file mode 100644 index 000000000..af8d1dcb0 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/firehosesummary/SummaryStatisticsCalculator.java @@ -0,0 +1,69 @@ +package org.broadinstitute.sting.oneoffprojects.firehosesummary; + +/** + * This object calculates the first three sample moments of a data set on-the-fly + * @Author chartl + * @Date Feb 12, 2010 + */ +public class SummaryStatisticsCalculator { + // todo -- can median / quantiles be estimated on-the-fly? + // todo -- can we divine a metric for modality? + private double mean; // mean of the samples fed to it + private double var; // variance of the samples fed to it + private double skew; // skew of the samples fed to it + private int sampleSize; // number of samples given to calculator + private String name; // name to associate calculator with, if any + + public SummaryStatisticsCalculator() { + mean = 0; + var = 0; + skew = 0; + sampleSize = 0; + name = "SUMMARY"; + } + + public SummaryStatisticsCalculator(String name) { + mean = 0; + var = 0; + skew = 0; + sampleSize = 0; + this.name = name; + } + + public void update(double datum) { + mean = (sampleSize*mean + datum)/(sampleSize + 1); + var = (sampleSize*var + Math.pow( mean - datum , 2 ) )/( sampleSize + 1); + // note: the variance is not re-calculated over all data points each time the mean changes + // this means the convergence will be slower than x^-(1/2); but it will still converge with the mean + // whole exome is ~3.3 million datapoints, which will be plenty for this to converge to within a very + // small proportion of the true sample variance + skew = ( skew*Math.pow(var,2/3)*sampleSize+ Math.pow( datum - mean, 3) ) / ( ( sampleSize + 1 )* Math.pow(var,2/3) ); + // again, new mean/variance estimates are not propagated over all previous data points, so convergence is a bit slower + // but that's the price one pays for on-the-fly calculation + sampleSize++; + } + + public void update(int datum) { + update( (double) datum); + } + + public double getMean() { + return mean; + } + + public double getVar() { + return var; + } + + public double getSkew() { + return skew; + } + + public String getName() { + return name; + } + + public int getSampleSize() { + return sampleSize; + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java index 3d2ac58f3..e3f1e6d69 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java @@ -20,33 +20,33 @@ class VCFConcordanceCalculator { private int minimumDepthForUpdate; private int minimumGenotypeQuality; private String name; - private Set falsePositiveLoci; - private Set falseNegativeLoci; - private Set falseNegativeLociDueToNoCall; - private Set falseNegativeLociDueToFilters; - private Set hetsCalledHoms; - private Set homsCalledHets; - private Set nonConfidentGenotypeCalls; - private Set concordantHomCalls; - private Set concordantHetCalls; - private Set concordantGenotypeReferenceCalls; - private Set chipNoCalls; - private Set ignoredDueToDepth; + private int falsePositiveLoci; + private int falseNegativeLoci; + private int falseNegativeLociDueToNoCall; + private int falseNegativeLociDueToFilters; + private int hetsCalledHoms; + private int homsCalledHets; + private int nonConfidentGenotypeCalls; + private int concordantHomCalls; + private int concordantHetCalls; + private int concordantGenotypeReferenceCalls; + private int chipNoCalls; + private int ignoredDueToDepth; public VCFConcordanceCalculator(String sampleName, int minimumDepth, int minGenQual) { name = sampleName; - falseNegativeLoci = new HashSet(); - falseNegativeLociDueToNoCall = new HashSet(); - falsePositiveLoci = new HashSet(); - falseNegativeLociDueToFilters = new HashSet(); - hetsCalledHoms = new HashSet(); - homsCalledHets = new HashSet(); - nonConfidentGenotypeCalls = new HashSet(); - concordantHomCalls = new HashSet(); - concordantHetCalls = new HashSet(); - concordantGenotypeReferenceCalls = new HashSet(); - chipNoCalls = new HashSet(); - ignoredDueToDepth = new HashSet(); + falseNegativeLoci = 0; + falseNegativeLociDueToNoCall = 0; + falsePositiveLoci = 0; + falseNegativeLociDueToFilters = 0; + hetsCalledHoms = 0; + homsCalledHets = 0; + nonConfidentGenotypeCalls = 0; + concordantHomCalls = 0; + concordantHetCalls = 0; + concordantGenotypeReferenceCalls = 0; + chipNoCalls = 0; + ignoredDueToDepth = 0; minimumDepthForUpdate = minimumDepth; minimumGenotypeQuality = minGenQual; } @@ -57,37 +57,37 @@ class VCFConcordanceCalculator { public void updateTruthOnly(LocusConcordanceInfo info) { if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase() ) ) { - falseNegativeLoci.add(info.getLoc()); + falseNegativeLoci++; } else { - concordantGenotypeReferenceCalls.add(info.getLoc()); + concordantGenotypeReferenceCalls++; } } public void updateFilteredLocus(LocusConcordanceInfo info) { if ( info.getTruthGenotype(name).isVariant( (char) info.getReferenceBase()) ) { - falseNegativeLociDueToFilters.add(info.getLoc()); + falseNegativeLociDueToFilters++; } } public String toString() { - return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth.size(), - concordantGenotypeReferenceCalls.size(),concordantHomCalls.size(),concordantHetCalls.size(),nonConfidentGenotypeCalls.size(), - homsCalledHets.size(),hetsCalledHoms.size(),falsePositiveLoci.size(),falseNegativeLoci.size(), - falseNegativeLociDueToNoCall.size(),falseNegativeLociDueToFilters.size()); + return String.format("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",name,ignoredDueToDepth, + concordantGenotypeReferenceCalls,concordantHomCalls,concordantHetCalls,nonConfidentGenotypeCalls, + homsCalledHets,hetsCalledHoms,falsePositiveLoci,falseNegativeLoci, + falseNegativeLociDueToNoCall,falseNegativeLociDueToFilters); } private void compareGenotypes(VCFGenotypeRecord truth, VCFGenotypeRecord call, GenomeLoc loc, byte ref) { if ( minimumDepthForUpdate > 0 && call.getReadCount() < minimumDepthForUpdate ) { - ignoredDueToDepth.add(loc); + ignoredDueToDepth++; } else if ( truth.isNoCall() ) { - chipNoCalls.add(loc); + chipNoCalls++; } else if ( truth.isVariant(( char) ref) ) { if ( call.isNoCall() ) { - falseNegativeLociDueToNoCall.add(loc); + falseNegativeLociDueToNoCall++; } else if ( ! call.isVariant( (char) ref ) ) { - falseNegativeLoci.add(loc); + falseNegativeLoci++; } else if ( call.isVariant((char) ref) ) { // check het vs hom checkGenotypeCall(truth,call, loc); @@ -96,9 +96,9 @@ class VCFConcordanceCalculator { } else if ( ! truth.isVariant( (char) ref ) ) { if ( call.isVariant((char) ref) ) { - falsePositiveLoci.add(loc); + falsePositiveLoci++; } else { - concordantGenotypeReferenceCalls.add(loc); + concordantGenotypeReferenceCalls++; } } } @@ -107,18 +107,18 @@ class VCFConcordanceCalculator { if ( ! call.isFiltered() && 10*call.getNegLog10PError() > minimumGenotypeQuality) { if ( truth.isHet() && call.isHom() ) { - hetsCalledHoms.add(loc); + hetsCalledHoms++; } else if ( truth.isHom() && call.isHet() ) { - homsCalledHets.add(loc); + homsCalledHets++; } else if ( ( truth.isHet() && call.isHet() ) ) { - concordantHetCalls.add(loc); + concordantHetCalls++; } else if ( truth.isHom() && call.isHom() ) { // be extra careful - concordantHomCalls.add(loc); + concordantHomCalls++; } } else { - nonConfidentGenotypeCalls.add(loc); + nonConfidentGenotypeCalls++; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java index 8b26272ae..e1dc31165 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java @@ -14,6 +14,10 @@ import net.sf.samtools.SAMReadGroupRecord; import cern.jet.stat.Probability; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + /** * FindContaminatingReadGroupsWalker lists read groups in a single-sample BAM file that appear * to be contaminants (meaning a read group that's not actually associated with the sample) by searching @@ -28,8 +32,14 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker pvalByReadGroup = new HashMap(); for (String rg : altTable.getRowNames()) { String balances = ""; @@ -179,6 +190,8 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker> samplesToReadGroups = new HashMap>(); + for ( SAMReadGroupRecord rec : getToolkit().getSAMFileHeader().getReadGroups() ) { + if ( samplesToReadGroups.containsKey(rec.getSample()) ) { + samplesToReadGroups.get(rec.getSample()).add(rec.getReadGroupId()); + } else { + ArrayList newList = new ArrayList(); + newList.add(rec.getReadGroupId()); + samplesToReadGroups.put(rec.getSample(),newList); + } + } + + for ( String sample : samplesToReadGroups.keySet() ) { + double p_value = 1; + double limit = LIMIT; + boolean containsAberrantReads = false; + for ( String rg : samplesToReadGroups.get(sample) ) { + double rg_pval = ( pvalByReadGroup.get(rg) == null ? 1 : pvalByReadGroup.get(rg) ); + p_value = p_value*rg_pval; + containsAberrantReads = containsAberrantReads || rg_pval < LIMIT; + limit = limit*SCALE; + logger.debug(rg); + } + + out.printf("%s\t%-13s\t%s\t%s%n", sample, String.format("%e",p_value), ( p_value < limit ? "aberrant" : "nominal"), ( containsAberrantReads ? "contains_aberrant_RG" : "no_aberrant_RG")); } } }