diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 29d47cf3c..e8b05b525 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -243,7 +243,7 @@ public class MathUtils { double maxValue = arrayMax(log10p, finish); if(maxValue == Double.NEGATIVE_INFINITY) - return sum; + return maxValue; for (int i = start; i < finish; i++) { sum += Math.pow(10.0, log10p[i] - maxValue); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java new file mode 100644 index 000000000..1c372aa82 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; + +import java.util.*; + + +public class ArtificialReadPileupTestProvider { + final int contigStart = 1; + final int contigStop = 10; + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1); +// final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic"); + final String artificialContig = "chr1"; + // final int artificialContigIndex = 0; + final String artificialReadName = "synth"; + final int artificialRefStart = 1; + final int artificialMappingQuality = 60; + Map sample2RG = new HashMap(); + List sampleRGs; + + final String refBases = "AGGATACTGT"; + List sampleNames = new ArrayList(); + private String sampleName(int i) { return sampleNames.get(i); } + private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); } + public final int offset = 5; + public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset); + public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10); + public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes()); + + + public ArtificialReadPileupTestProvider(int numSamples, final String SAMPLE_PREFIX) { + sampleRGs = new ArrayList(); + + for ( int i = 0; i < numSamples; i++ ) { + sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i)); + SAMReadGroupRecord rg = createRG(sampleName(i)); + sampleRGs.add(rg); + sample2RG.put(sampleName(i), rg); + } + + } + + public List getSampleNames() { + return sampleNames; + } + public byte getRefByte() { + return refBases.substring(offset,offset+1).getBytes()[0]; + } + + public Map getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) { + // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); + + + ArrayList vcAlleles = new ArrayList(); + Allele refAllele, altAllele; + if (eventLength == 0) {// SNP case + refAllele =Allele.create(refBases.substring(offset,offset+1),true); + altAllele = Allele.create(altBases.substring(0,1), false); + + } else if (eventLength>0){ + // insertion + refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); + altAllele = Allele.create(altBases.substring(0,eventLength), false); + } + else { + // deletion + refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); + altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + } + int stop = loc.getStart(); + vcAlleles.add(refAllele); + vcAlleles.add(altAllele); + + final VariantContextBuilder builder = new VariantContextBuilder().source(""); + builder.loc(loc.getContig(), loc.getStart(), stop); + builder.alleles(vcAlleles); + builder.referenceBaseForIndel(referenceContext.getBase()); + builder.noGenotypes(); + + final VariantContext vc = builder.make(); + + Map contexts = new HashMap(); + + for (String sample: sampleNames) { + AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample)); + contexts.put(sample,context); + + } + + return contexts; + } + + private SAMReadGroupRecord createRG(String name) { + SAMReadGroupRecord rg = new SAMReadGroupRecord(name); + rg.setPlatform("ILLUMINA"); + rg.setSample(name); + return rg; + } + private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases, + int[] numReadsPerAllele, String sample) { + List pileupElements = new ArrayList(); + int readStart = contigStart; + int offset = (contigStop-contigStart+1)/2; + int refAlleleLength = 0; + int readCounter = 0; + int alleleCounter = 0; + for (Allele allele: vc.getAlleles()) { + if (allele.isReference()) + refAlleleLength = allele.getBases().length; + + int alleleLength = allele.getBases().length; + + for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) { + byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); + byte[] readQuals = new byte[readBases.length]; + Arrays.fill(readQuals, (byte) 50); + + GATKSAMRecord read = new GATKSAMRecord(header); + read.setBaseQualities(readQuals); + read.setReadBases(readBases); + read.setReadName(artificialReadName+readCounter++); + + boolean isBeforeDeletion = false, isBeforeInsertion = false; + if (allele.isReference()) + read.setCigarString(readBases.length + "M"); + else { + isBeforeDeletion = alleleLengthrefAlleleLength; + if (isBeforeDeletion || isBeforeInsertion) + read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") + + (readBases.length-offset)+"M"); + else // SNP case + read.setCigarString(readBases.length+"M"); + } + + int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0)); + read.setReadPairedFlag(false); + read.setAlignmentStart(readStart); + read.setMappingQuality(artificialMappingQuality); + read.setReferenceName(loc.getContig()); + read.setReadNegativeStrandFlag(false); + read.setAttribute("RG", sampleRG(sample).getReadGroupId()); + + + pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength)); + } + alleleCounter++; + } + + return new ReadBackedPileupImpl(loc,pileupElements); + } + + private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) { + // create haplotype based on a particular allele + String prefix = refBases.substring(offset); + String alleleBases = new String(allele.getBases()); + String postfix = refBases.substring(offset+refAlleleLength,refBases.length()); + + return (prefix+alleleBases+postfix).getBytes(); + + + + } + +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java index 5c75a9b29..e4c3b8dae 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java @@ -45,51 +45,23 @@ import org.testng.annotations.Test; */ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { - final int contigStart = 1; - final int contigStop = 10; - final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop-contigStart+1); - final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic"); - final String artificialContig = "chr1"; - final int artificialContigIndex = 0; - final String artificialReadName = "synth"; - final int artificialRefStart = 1; - final int artificialMappingQuality = 60; - Map sample2RG = new HashMap(); - final String refBases = "AGGATACTGT"; - final String SAMPLE_PREFIX = "sample"; - - List sampleNames = new ArrayList(); final int nSamples = 1; - final int numReadsPerAllele = 10; - - List sampleRGs; - - private String sampleName(int i) { return sampleNames.get(i); } - private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); } + final int[] numReadsPerAllele = new int[]{10,10}; + final String SAMPLE_PREFIX = "sample"; final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); final Logger logger = Logger.getLogger(Walker.class); - final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); final IndelGenotypeLikelihoodsCalculationModel model = new IndelGenotypeLikelihoodsCalculationModel(UAC,logger); - final int offset = 5; - final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset); - final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10); - final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes()); - @BeforeSuite + ArtificialReadPileupTestProvider pileupProvider; + + @BeforeSuite public void before() { - sampleRGs = new ArrayList(); - - for ( int i = 0; i < nSamples; i++ ) { - sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i)); - SAMReadGroupRecord rg = createRG(sampleName(i)); - sampleRGs.add(rg); - sample2RG.put(sampleName(i), rg); - } - + pileupProvider = new ArtificialReadPileupTestProvider(nSamples, SAMPLE_PREFIX); } + @Test public void testBasicConsensusCounts() { // 4 inserted bases, min cnt = 10 @@ -107,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { eventLength = 3; alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases); Assert.assertEquals(alleles.size(),2); - Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(offset,offset+eventLength)); + Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength)); // same with min Reads = 11 alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases); @@ -121,112 +93,10 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { } private List getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) { - final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(genomeLocParser, true, minCnt, minFraction); - return counter.computeConsensusAlleles(referenceContext,getContextFromAlleles(eventLength, isInsertion, altBases), AlignmentContextUtils.ReadOrientation.COMPLETE); - - } - private Map getContextFromAlleles(int eventLength, boolean isInsertion, String altBases) { - // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); - - - ArrayList vcAlleles = new ArrayList(); - Allele refAllele, altAllele; - if (isInsertion) { - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(altBases.substring(0,eventLength), false); - } - else { - refAllele =Allele.create(refBases.substring(offset,offset+eventLength),true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); - } - - int stop = loc.getStart(); - vcAlleles.add(refAllele); - vcAlleles.add(altAllele); - - final VariantContextBuilder builder = new VariantContextBuilder().source(""); - builder.loc(loc.getContig(), loc.getStart(), stop); - builder.alleles(vcAlleles); - builder.referenceBaseForIndel(referenceContext.getBase()); - builder.noGenotypes(); - - final VariantContext vc = builder.make(); - - Map contexts = new HashMap(); - - for (String sample: sampleNames) { - AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample)); - contexts.put(sample,context); - - } - - return contexts; - } - - private SAMReadGroupRecord createRG(String name) { - SAMReadGroupRecord rg = new SAMReadGroupRecord(name); - rg.setPlatform("ILLUMINA"); - rg.setSample(name); - return rg; - } - private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases, - int numReads, String sample) { - List pileupElements = new ArrayList(); - int readStart = contigStart; - int offset = (contigStop-contigStart+1)/2; - int refAlleleLength = 0; - int readCounter = 0; - for (Allele allele: vc.getAlleles()) { - if (allele.isReference()) - refAlleleLength = allele.getBases().length; - - int alleleLength = allele.getBases().length; - - for ( int d = 0; d < numReads; d++ ) { - byte[] readBases = trueHaplotype(allele, offset, refAlleleLength); - byte[] readQuals = new byte[readBases.length]; - Arrays.fill(readQuals,(byte)50); - - GATKSAMRecord read = new GATKSAMRecord(header); - read.setBaseQualities(readQuals); - read.setReadBases(readBases); - read.setReadName(artificialReadName+readCounter++); - - boolean isBeforeDeletion = false, isBeforeInsertion = false; - if (allele.isReference()) - read.setCigarString(readBases.length + "M"); - else { - isBeforeDeletion = alleleLengthrefAlleleLength; - read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") + - (readBases.length-offset)+"M"); - } - - int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0)); - read.setReadPairedFlag(false); - read.setAlignmentStart(readStart); - read.setMappingQuality(artificialMappingQuality); - read.setReferenceName(loc.getContig()); - read.setReadNegativeStrandFlag(false); - read.setAttribute("RG", sampleRG(sample).getReadGroupId()); - - - pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength)); - } - } - - return new ReadBackedPileupImpl(loc,pileupElements); - } - - byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) { - // create haplotype based on a particular allele - String prefix = refBases.substring(offset); - String alleleBases = new String(allele.getBases()); - String postfix = refBases.substring(offset+refAlleleLength,refBases.length()); - - return (prefix+alleleBases+postfix).getBytes(); - - + final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(pileupProvider.genomeLocParser, true, minCnt, minFraction); + return counter.computeConsensusAlleles(pileupProvider.referenceContext, + pileupProvider.getAlignmentContextFromAlleles(isInsertion?eventLength:-eventLength,altBases,numReadsPerAllele), + AlignmentContextUtils.ReadOrientation.COMPLETE); } }