diff --git a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java index d4f29ba47..f56e14389 100755 --- a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java @@ -74,7 +74,11 @@ public abstract class AbstractGenomeAnalysisEngine { */ private SAMDataSource readsDataSource = null; - protected ReferenceDataSource getReferenceDataSource() { + /** + * Needs to be + * @return + */ + public ReferenceDataSource getReferenceDataSource() { return referenceDataSource; } diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 336cb3a65..e5e029d74 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -122,13 +122,14 @@ public class VariantContextUtils { attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts); attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs); } - // otherwise, remove them if present and requested - else if ( removeStaleValues ) { - if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) - attributes.remove(VCFConstants.ALLELE_COUNT_KEY); - if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) - attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); - } + +// // otherwise, remove them if present and requested +// else if ( removeStaleValues ) { +// if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) +// attributes.remove(VCFConstants.ALLELE_COUNT_KEY); +// if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) +// attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); +// } // otherwise, set them to 0 else { attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index e65ea8023..5e56ade8c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -46,8 +46,7 @@ public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - - if ( vc.getChromosomeCount() == 0 ) + if ( ! vc.hasGenotypes() ) return null; Map map = new HashMap(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 0af98de9b..7787c37d0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -67,8 +67,8 @@ public class VariantsToTable extends RodWalker { out.println(Utils.join("\t", fieldsToTake)); } - private static abstract class Getter { public abstract String get(VariantContext vc); } - private static Map getters = new HashMap(); + public static abstract class Getter { public abstract String get(VariantContext vc); } + public static Map getters = new HashMap(); static { // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT @@ -79,6 +79,9 @@ public class VariantsToTable extends RodWalker { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); int n = vc.getAlternateAlleles().size(); + + if ( n == 0 ) return "."; + for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); x.append(vc.getAlternateAllele(i).toString()); @@ -115,19 +118,7 @@ public class VariantsToTable extends RodWalker { Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); for ( VariantContext vc : vcs) { if ( ! ignoreMultiAllelic || vc.isBiallelic() || ( !showFiltered || !vc.isFiltered() ) ) { - List vals = new ArrayList(); - - for ( String field : fieldsToTake ) { - String val = "UNK"; - - if ( getters.containsKey(field) ) { - val = getters.get(field).get(vc); - } else if ( vc.hasAttribute(field) ) { - val = vc.getAttributeAsString(field); - } - - vals.add(val); - } + List vals = extractFields(vc, fieldsToTake); out.println(Utils.join("\t", vals)); } @@ -143,6 +134,24 @@ public class VariantsToTable extends RodWalker { } } + public static List extractFields(VariantContext vc, List fields) { + List vals = new ArrayList(); + + for ( String field : fields ) { + String val = "NA"; + + if ( getters.containsKey(field) ) { + val = getters.get(field).get(vc); + } else if ( vc.hasAttribute(field) ) { + val = vc.getAttributeAsString(field); + } + + vals.add(val); + } + + return vals; + } + public Integer reduceInit() { return 0; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AssessSimulatedPerformance.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AssessSimulatedPerformance.java new file mode 100755 index 000000000..a0d0a0f4d --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AssessSimulatedPerformance.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.variantutils.VariantsToTable; +import org.broadinstitute.sting.utils.Utils; + +import java.io.PrintStream; +import java.util.*; + +/** + * Emits specific fields as dictated by the user from one or more VCF files. + */ +@Requires(value={}) +public class AssessSimulatedPerformance extends RodWalker { + @Output(doc="File to which results should be written",required=true) + protected PrintStream out; + + @Argument(fullName="fields", shortName="F", doc="Fields to emit from the calls VCF", required=false) + public String FIELDS = "CHROM,POS,REF,ALT,QUAL,AC,AN,DP,Q,MODE"; + + @Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false) + public int MAX_RECORDS = -1; + int nRecords = 0; + + private List fieldsToTake; + + public void initialize() { + fieldsToTake = Arrays.asList(FIELDS.split(",")); + + for ( String source : Arrays.asList("sim", "called")) { + out.print(source + "." + Utils.join("\t" + source + ".", fieldsToTake)); + out.print("\t"); + } + out.println(); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) // RodWalkers can make funky map calls + return 0; + + if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { + printVCFields("sim", tracker, ref, context); + printVCFields("called", tracker, ref, context); + out.println(); + return 1; + } else { + if ( nRecords >= MAX_RECORDS ) { + logger.warn("Calling sys exit to leave after " + nRecords + " records"); + System.exit(0); // todo -- what's the recommend way to abort like this? + } + return 0; + } + } + + private void printVCFields(String name, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + VariantContext vc = tracker.getVariantContext(ref, name, null, context.getLocation(), true); + out.print(Utils.join("\t", VariantsToTable.extractFields(vc, fieldsToTake))); + out.print("\t"); + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + public void onTraversalDone(Integer sum) {} +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimulateReadsForVariants.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimulateReadsForVariants.java new file mode 100755 index 000000000..bb058a6f1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/SimulateReadsForVariants.java @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.vcf.VCFWriter; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFConstants; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RefWalker; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.vcf.VCFUtils; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; + +import java.io.PrintStream; +import java.io.PrintWriter; +import java.util.*; + +import net.sf.samtools.*; +import cern.jet.math.Arithmetic; +import cern.jet.random.Poisson; +import cern.jet.random.engine.MersenneTwister; + +/** + * Generates simulated reads for variants + */ +@Requires(value={}) +@Reference(window=@Window(start=-20,stop=20)) +public class SimulateReadsForVariants extends RefWalker { + @Argument(fullName = "vcf", shortName = "vcf", doc="Variants underlying the reads",required=true) + protected VCFWriter variantsWriter; + + @Argument(fullName = "sites", shortName = "sites", doc="Variants sites",required=true) + protected PrintWriter sitesWriter; + + @Output(fullName = "read", shortName = "reads", doc="Reads corresponding to variants",required=true) + protected StingSAMFileWriter readWriter; + + @Argument(fullName="nSamples", shortName="NS", doc="Number of samples to simulate", required=false) + public int nSamples = 1; + + @Argument(fullName="readDepth", shortName="DP", doc="Read depths to simulate", required=false) + public List readDepths = Arrays.asList(1); + + @Argument(fullName="errorRate", shortName="ER", doc="Phred-scaled error rate", required=false) + public List errorRates = Arrays.asList(20); + + @Argument(fullName="readLengths", shortName="RL", doc="Read length, in bp", required=false) + public List readLengths = Arrays.asList(3); + + public enum ReadSamplingMode { CONSTANT, POISSON }; + @Argument(fullName="readSamplingMode", shortName="RSM", doc="Sampling mode", required=false) + public List samplingModes = Arrays.asList(ReadSamplingMode.CONSTANT); + + @Argument(fullName="variantsPerBin", shortName="VPB", doc="No. of variants to generate for each bin", required=false) + public int variantsPerBin = 1; + + @Argument(fullName="verbose", shortName="verbose", doc="Verbose", required=false) + public boolean verbose = false; + + private class ParameterSet { + int readDepth, readLength; + ReadSamplingMode mode; + byte[] readQuals; + double errorRate; // in abs fraction (0.01 not Q20) + int nVariants = 0; + ParameterSet next = null; + Poisson poissonRandom = null; + Iterator acs; + int nSites = 0; + + public ParameterSet(int readDepth, int readLength, ReadSamplingMode mode, int phredErrorRate, ParameterSet next, List ACs ) { + this.readDepth = readDepth; + this.readLength = readLength; + this.mode = mode; + this.readQuals = new byte[readLength]; + Arrays.fill(readQuals, (byte)phredErrorRate); + this.errorRate = QualityUtils.qualToErrorProb((byte)phredErrorRate); + this.next = next; + nSites = ACs.size(); + acs = ACs.iterator(); + + if ( mode == ReadSamplingMode.POISSON ) + poissonRandom = new Poisson(readDepth, new MersenneTwister((int)RANDOM_SEED)); + } + + public void incCount() { nVariants++; } + public boolean done() { return ! acs.hasNext(); } + public boolean hasNext() { return next != null; } + + public int combinations() { + return nSites + ( hasNext() ? next.combinations() : 0); + } + } + + List alleleCounts = new ArrayList(); + + ParameterSet parameters = null; + SAMFileHeader header = null; + + private static String SAMPLE_PREFIX = "SAMPLE"; + public static final String PROGRAM_RECORD_NAME = "GATK SimulateReadsForVariants"; + + List sampleNames = new ArrayList(); + Map sample2RG = new HashMap(); + + private String sampleName(int i) { return sampleNames.get(i); } + private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); } + + private static final long RANDOM_SEED = 1252863495; + private static final Random ran = new Random(RANDOM_SEED); + + int SEPARATION_BETWEEN_SITES = 10; + + private SAMReadGroupRecord createRG(String name) { + SAMReadGroupRecord rg = new SAMReadGroupRecord(name); + rg.setPlatform("ILLUMINA"); + rg.setSample(name); + return rg; + } + + public void initialize() { + // initialize sample I -> sample info map + List sampleRGs = new ArrayList(); + + for ( int i = 0; i < nSamples; i++ ) { + sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i)); + SAMReadGroupRecord rg = createRG(sampleName(i)); + sampleRGs.add(rg); + sample2RG.put(sampleName(i), rg); + } + + for ( int i = 0; i <= (2 * nSamples); i++) { + int nCopies = (int)Math.round((2.0* nSamples) / (Math.max(i, 1))); + for ( int j = 0; j < (nCopies * variantsPerBin); j++ ) + alleleCounts.add(i); + } + + // initialize VCF headers + // todo -- fill out header + Set headerLines = new HashSet(); + headerLines.add(new VCFHeaderLine("source", "SimulateReadsForVariants")); + variantsWriter.writeHeader(new VCFHeader(headerLines, new HashSet(sampleNames))); + + // initialize BAM headers + header = new SAMFileHeader(); + header.setSequenceDictionary(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary()); + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + header.setReadGroups(sampleRGs); + + final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME); + final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText"); + programRecord.setProgramVersion(headerInfo.getString("org.broadinstitute.sting.gatk.version")); + programRecord.setCommandLine(getToolkit().createApproximateCommandLineArgumentString(getToolkit(), this)); + header.setProgramRecords(Arrays.asList(programRecord)); + + readWriter.writeHeader(header); + + // set up feature sets + for ( int readLength : readLengths ) { + if ( readLength % 2 == 0 ) throw new UserException.BadArgumentValue("readLength", "Read lengths must be odd"); + + for ( ReadSamplingMode mode : samplingModes ) { + for ( int errorRate : errorRates ) { + for ( int readDepth : readDepths ) { + parameters = new ParameterSet(readDepth, readLength, mode, errorRate, parameters, alleleCounts); + } + } + } + } + logger.info("Total number of combinations " + parameters.combinations()); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( parameters.done() ) { + if ( parameters.hasNext() ) + parameters = parameters.next; + else + return 0; // early abort, we're done generating + } + + if ( ref.getLocus().getStart() < parameters.readLength || ! BaseUtils.isRegularBase(ref.getBase()) ) + return 0; + + if ( ref.getLocus().getStart() % (parameters.readLength + SEPARATION_BETWEEN_SITES) != 0 ) + return 0; + + byte[] refBases = getBasesForReads(ref, parameters.readLength); + + // at this point, we want to generate variants and reads for the parameters in parameters + int AC = parameters.acs.next(); + VariantContext vc = generateVariant(context.getLocation(), ref.getBase(), AC, parameters); + if ( verbose ) logger.info(String.format("Generating reads for %s", vc)); + ReadBackedPileup rbp = generateRBPForVariant(context.getLocation(), vc, refBases, parameters); + + // BED is zero based + sitesWriter.printf("%s %d %d%n", ref.getLocus().getContig(), ref.getLocus().getStart()-1, ref.getLocus().getStart() ); + variantsWriter.add(vc, ref.getBase()); + for ( SAMRecord read : rbp.getReads() ) readWriter.addAlignment(read); + + parameters.incCount(); + + return 0; + } + + private byte[] getBasesForReads(ReferenceContext ref, int readLength) { + int center = (int)(ref.getLocus().getStart() - ref.getWindow().getStart()); + int start = center - ((readLength - 1) / 2); + byte[] bases = new byte[readLength]; + System.arraycopy(ref.getBases(), start, bases, 0, readLength); + return bases; + } + + private VariantContext generateVariant( GenomeLoc loc, byte refBase, int AC, ParameterSet params ) { + Allele ref = Allele.create(refBase, true); + Allele alt = Allele.create(BaseUtils.baseIndexToSimpleBase(BaseUtils.getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(refBase)))); + List alleles = AC == 0 ? Arrays.asList(ref) : Arrays.asList(ref, alt); + + List homRef = Arrays.asList(ref, ref); + List het = Arrays.asList(ref, alt); + List homAlt = Arrays.asList(alt, alt); + + List genotypes = new ArrayList(); + double p = AC / (2.0 * nSamples); + //double q = 1 - p; + int nHomAlt = (int) Math.round(p * p * nSamples); + int nHet = AC - nHomAlt * 2; + //int nHet = (int) Math.round(2 * p * q * nSamples); + for ( int i = 0; i < nSamples; i++ ) { + List genotype; + + if ( i < nHomAlt ) { genotype = homAlt; } + else if ( i < (nHet + nHomAlt) ) { genotype = het; } + else { genotype = homRef; } + + genotypes.add(new Genotype(sampleName(i), genotype)); + } + + Map attributes = new LinkedHashMap(); + attributes.put(VCFConstants.ALLELE_COUNT_KEY, AC); + attributes.put(VCFConstants.SAMPLE_NUMBER_KEY, nSamples); + attributes.put(VCFConstants.ALLELE_NUMBER_KEY, 2 * nSamples); + attributes.put("Q", params.readQuals[0]); + attributes.put("MODE", params.mode); + attributes.put("DP", params.readDepth); + + return new VariantContext("anonymous", loc.getContig(), loc.getStart(), loc.getStart(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, VariantContext.PASSES_FILTERS, attributes ); + } + + private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, byte[] refBases, ParameterSet params ) { + List reads = new ArrayList(); + int offset = (params.readLength - 1) / 2; + + int start = (int)(loc.getStart() - (params.readLength - 1) / 2); + byte altBase = vc.isVariant() ? vc.getAlternateAllele(0).getBases()[0] : 0; + byte[] refHaplotype = Arrays.copyOf(refBases, refBases.length); + byte[] altHaplotype = Arrays.copyOf(refBases, refBases.length); + altHaplotype[(params.readLength - 1) / 2] = altBase; + + int gi = 0; + for ( Genotype g : vc.getGenotypes().values() ) { + int myDepth = sampleDepth(params); + for ( int d = 0; d < myDepth; d++ ) { + byte[] readBases = trueHaplotype(g, refHaplotype, altHaplotype); + addMachineErrors(readBases, params.errorRate); + + SAMRecord read = new SAMRecord(header); + read.setBaseQualities(params.readQuals); + read.setReadBases(readBases); + read.setReadName("FOO"); + read.setCigarString(params.readLength + "M"); + read.setReadPairedFlag(false); + read.setAlignmentStart(start); + read.setMappingQuality(60); + read.setReferenceName(loc.getContig()); + read.setReadNegativeStrandFlag(gi++ % 2 == 0); + read.setAttribute("RG", sampleRG(g.getSampleName()).getReadGroupId()); + + reads.add(read); + } + } + + return new ReadBackedPileupImpl(loc, reads, offset); + } + + private int sampleDepth(ParameterSet params) { + switch ( params.mode ) { + case CONSTANT: return params.readDepth; + case POISSON: return params.poissonRandom.nextInt(); + default: + throw new IllegalStateException("Unexpected DepthSamplingType " + params.mode); + } + } + + private byte[] trueHaplotype(Genotype g, byte[] refHaplotype, byte[] altHaplotype) { + double refP = 0.0; + + if ( g.isHomRef() ) refP = 1; + else if ( g.isHet() ) refP = 0.5; + else refP = 0.0; + + return Arrays.copyOf(ran.nextDouble() < refP ? refHaplotype : altHaplotype, refHaplotype.length); + } + + private void addMachineErrors(byte[] readBases, double errorRate) { + for ( int i = 0; i < readBases.length; i++ ) { + double r = ran.nextDouble(); + if ( r < errorRate ) { + byte errorBase = BaseUtils.baseIndexToSimpleBase(BaseUtils.getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(readBases[i]))); + if ( errorBase == readBases[i] ) throw new IllegalStateException("Read and error bases are the same"); + readBases[i] = errorBase; + } + } + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + public void onTraversalDone(Integer sum) { + //variantsWriter.close(); + sitesWriter.close(); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/MathUtils.java b/java/src/org/broadinstitute/sting/utils/MathUtils.java index c00146762..77efeecc3 100755 --- a/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -386,6 +386,15 @@ public class MathUtils { return minI; } + public static int arrayMax(List array) { + if ( array == null ) throw new IllegalArgumentException("Array cannot be null!"); + if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!"); + + int m = array.get(0); + for ( int e : array ) m = Math.max(m, e); + return m; + } + public static double average(List vals, int maxI) { long sum = 0L;