Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e332aeaf70
|
|
@ -78,8 +78,10 @@ public class CycleCovariate implements StandardCovariate {
|
|||
increment = readOrderFactor;
|
||||
}
|
||||
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
cycles[i] = BitSetUtils.bitSetFrom(cycle);
|
||||
final int CUSHION = 4;
|
||||
final int MAX_CYCLE = read.getReadLength() - CUSHION - 1;
|
||||
for (int i = 0; i < MAX_CYCLE; i++) {
|
||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? null : BitSetUtils.bitSetFrom(cycle);
|
||||
cycle += increment;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,94 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* Number of reads ending in each category.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T ReadEndIndels \
|
||||
* -o output.grp \
|
||||
* -I input.bam \
|
||||
* [-L input.intervals]
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountReadEventsWalker extends ReadWalker<Map<CigarOperator, ArrayList<Integer>> , Map<Integer, Map<CigarOperator, Long>>> {
|
||||
@Output (doc = "GATKReport table output")
|
||||
PrintStream out;
|
||||
|
||||
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
return ReadUtils.getCigarOperatorForAllBases(read);
|
||||
}
|
||||
|
||||
public Map<Integer, Map<CigarOperator, Long>> reduceInit() {
|
||||
return new HashMap<Integer, Map<CigarOperator, Long>>();
|
||||
}
|
||||
|
||||
public Map<Integer, Map<CigarOperator, Long>> reduce(Map<CigarOperator, ArrayList<Integer>> value, Map<Integer, Map<CigarOperator, Long>> sum) {
|
||||
for (Map.Entry<CigarOperator, ArrayList<Integer>> entry : value.entrySet()) {
|
||||
CigarOperator op = entry.getKey();
|
||||
ArrayList<Integer> positions = entry.getValue();
|
||||
|
||||
for (int p : positions) {
|
||||
Map<CigarOperator, Long> operatorCount = sum.get(p);
|
||||
if (operatorCount == null) {
|
||||
operatorCount = new HashMap<CigarOperator, Long>();
|
||||
sum.put(p, operatorCount);
|
||||
}
|
||||
|
||||
Long count = operatorCount.get(op);
|
||||
if (count == null)
|
||||
count = 0L;
|
||||
count++;
|
||||
operatorCount.put(op, count);
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Map<Integer, Map<CigarOperator, Long>> result) {
|
||||
GATKReport report = GATKReport.newSimpleReport("Events", "Position", "Event", "Observations");
|
||||
for (Map.Entry<Integer, Map<CigarOperator, Long>> entry : result.entrySet()) {
|
||||
int position = entry.getKey();
|
||||
Map<CigarOperator, Long> operatorCount = entry.getValue();
|
||||
|
||||
for (Map.Entry<CigarOperator, Long> subEntry: operatorCount.entrySet()) {
|
||||
String operator = subEntry.getKey().name();
|
||||
Long observations = subEntry.getValue();
|
||||
report.addRow(position, operator, observations);
|
||||
}
|
||||
}
|
||||
report.print(out);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* Number of reads ending in each category.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T ReadEndIndels \
|
||||
* -o output.txt \
|
||||
* -I input.bam \
|
||||
* [-L input.intervals]
|
||||
* </pre>
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountTerminusEventWalker extends ReadWalker<Pair<Long, Long>, Pair<Long, Long>> {
|
||||
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
List<CigarElement> cigarElements = read.getCigar().getCigarElements();
|
||||
|
||||
CigarElement lastElement = null;
|
||||
for (CigarElement element : cigarElements) {
|
||||
if (element.getOperator() != CigarOperator.HARD_CLIP)
|
||||
lastElement = element;
|
||||
}
|
||||
|
||||
if (lastElement == null)
|
||||
throw new UserException.MalformedBAM(read, "read does not have any bases, it's all hard clips");
|
||||
|
||||
long endsInIndel = lastElement.getOperator() == CigarOperator.INSERTION || lastElement.getOperator() == CigarOperator.DELETION? 1 : 0;
|
||||
long endsInSC = lastElement.getOperator() == CigarOperator.SOFT_CLIP ? 1 : 0;
|
||||
|
||||
return new Pair<Long, Long>(endsInIndel, endsInSC);
|
||||
}
|
||||
|
||||
public Pair<Long, Long> reduceInit() { return new Pair<Long, Long>(0L, 0L); }
|
||||
|
||||
public Pair<Long, Long> reduce(Pair<Long, Long> value, Pair<Long, Long> sum) {
|
||||
sum.set(sum.getFirst() + value.getFirst(), sum.getSecond() + value.getSecond());
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Pair<Long, Long> result) {
|
||||
System.out.println(String.format("\tReads ending in indels : %d\n\tReads ending in soft-clips: %d\n", result.getFirst(), result.getSecond()));
|
||||
}
|
||||
}
|
||||
|
|
@ -243,7 +243,7 @@ public class MathUtils {
|
|||
|
||||
double maxValue = arrayMax(log10p, finish);
|
||||
if(maxValue == Double.NEGATIVE_INFINITY)
|
||||
return sum;
|
||||
return maxValue;
|
||||
|
||||
for (int i = start; i < finish; i++) {
|
||||
sum += Math.pow(10.0, log10p[i] - maxValue);
|
||||
|
|
|
|||
|
|
@ -783,4 +783,43 @@ public class ReadUtils {
|
|||
return location;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a map with each event in the read (cigar operator) and the read coordinate where it happened.
|
||||
*
|
||||
* Example:
|
||||
* D -> 2, 34, 75
|
||||
* I -> 55
|
||||
* S -> 0, 101
|
||||
* H -> 101
|
||||
*
|
||||
* @param read the read
|
||||
* @return a map with the properties described above. See example
|
||||
*/
|
||||
public static Map<CigarOperator, ArrayList<Integer>> getCigarOperatorForAllBases (GATKSAMRecord read) {
|
||||
Map<CigarOperator, ArrayList<Integer>> events = new HashMap<CigarOperator, ArrayList<Integer>>();
|
||||
|
||||
int position = 0;
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
CigarOperator op = cigarElement.getOperator();
|
||||
if (op.consumesReadBases()) {
|
||||
ArrayList<Integer> list = events.get(op);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Integer>();
|
||||
events.put(op, list);
|
||||
}
|
||||
for (int i = position; i < cigarElement.getLength(); i++)
|
||||
list.add(position++);
|
||||
}
|
||||
else {
|
||||
ArrayList<Integer> list = events.get(op);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Integer>();
|
||||
events.put(op, list);
|
||||
}
|
||||
list.add(position);
|
||||
}
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class ArtificialReadPileupTestProvider {
|
||||
final int contigStart = 1;
|
||||
final int contigStop = 10;
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
|
||||
// final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
|
||||
final String artificialContig = "chr1";
|
||||
// final int artificialContigIndex = 0;
|
||||
final String artificialReadName = "synth";
|
||||
final int artificialRefStart = 1;
|
||||
final int artificialMappingQuality = 60;
|
||||
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
|
||||
List<SAMReadGroupRecord> sampleRGs;
|
||||
|
||||
final String refBases = "AGGATACTGT";
|
||||
List<String> sampleNames = new ArrayList<String>();
|
||||
private String sampleName(int i) { return sampleNames.get(i); }
|
||||
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
||||
public final int offset = 5;
|
||||
public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
|
||||
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
||||
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
||||
|
||||
|
||||
public ArtificialReadPileupTestProvider(int numSamples, final String SAMPLE_PREFIX) {
|
||||
sampleRGs = new ArrayList<SAMReadGroupRecord>();
|
||||
|
||||
for ( int i = 0; i < numSamples; i++ ) {
|
||||
sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
|
||||
SAMReadGroupRecord rg = createRG(sampleName(i));
|
||||
sampleRGs.add(rg);
|
||||
sample2RG.put(sampleName(i), rg);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public List<String> getSampleNames() {
|
||||
return sampleNames;
|
||||
}
|
||||
public byte getRefByte() {
|
||||
return refBases.substring(offset,offset+1).getBytes()[0];
|
||||
}
|
||||
|
||||
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
|
||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||
|
||||
|
||||
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||
Allele refAllele, altAllele;
|
||||
if (eventLength == 0) {// SNP case
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+1),true);
|
||||
altAllele = Allele.create(altBases.substring(0,1), false);
|
||||
|
||||
} else if (eventLength>0){
|
||||
// insertion
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
||||
}
|
||||
else {
|
||||
// deletion
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
int stop = loc.getStart();
|
||||
vcAlleles.add(refAllele);
|
||||
vcAlleles.add(altAllele);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(referenceContext.getBase());
|
||||
builder.noGenotypes();
|
||||
|
||||
final VariantContext vc = builder.make();
|
||||
|
||||
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
||||
|
||||
for (String sample: sampleNames) {
|
||||
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
|
||||
contexts.put(sample,context);
|
||||
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
private SAMReadGroupRecord createRG(String name) {
|
||||
SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
|
||||
rg.setPlatform("ILLUMINA");
|
||||
rg.setSample(name);
|
||||
return rg;
|
||||
}
|
||||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
|
||||
int[] numReadsPerAllele, String sample) {
|
||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
int readStart = contigStart;
|
||||
int offset = (contigStop-contigStart+1)/2;
|
||||
int refAlleleLength = 0;
|
||||
int readCounter = 0;
|
||||
int alleleCounter = 0;
|
||||
for (Allele allele: vc.getAlleles()) {
|
||||
if (allele.isReference())
|
||||
refAlleleLength = allele.getBases().length;
|
||||
|
||||
int alleleLength = allele.getBases().length;
|
||||
|
||||
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
|
||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||
byte[] readQuals = new byte[readBases.length];
|
||||
Arrays.fill(readQuals, (byte) 50);
|
||||
|
||||
GATKSAMRecord read = new GATKSAMRecord(header);
|
||||
read.setBaseQualities(readQuals);
|
||||
read.setReadBases(readBases);
|
||||
read.setReadName(artificialReadName+readCounter++);
|
||||
|
||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||
if (allele.isReference())
|
||||
read.setCigarString(readBases.length + "M");
|
||||
else {
|
||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||
if (isBeforeDeletion || isBeforeInsertion)
|
||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||
(readBases.length-offset)+"M");
|
||||
else // SNP case
|
||||
read.setCigarString(readBases.length+"M");
|
||||
}
|
||||
|
||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||
read.setReadPairedFlag(false);
|
||||
read.setAlignmentStart(readStart);
|
||||
read.setMappingQuality(artificialMappingQuality);
|
||||
read.setReferenceName(loc.getContig());
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||
|
||||
|
||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||
}
|
||||
alleleCounter++;
|
||||
}
|
||||
|
||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||
}
|
||||
|
||||
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
||||
// create haplotype based on a particular allele
|
||||
String prefix = refBases.substring(offset);
|
||||
String alleleBases = new String(allele.getBases());
|
||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
||||
|
||||
return (prefix+alleleBases+postfix).getBytes();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -45,51 +45,23 @@ import org.testng.annotations.Test;
|
|||
*/
|
||||
public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||
|
||||
final int contigStart = 1;
|
||||
final int contigStop = 10;
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop-contigStart+1);
|
||||
final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
|
||||
final String artificialContig = "chr1";
|
||||
final int artificialContigIndex = 0;
|
||||
final String artificialReadName = "synth";
|
||||
final int artificialRefStart = 1;
|
||||
final int artificialMappingQuality = 60;
|
||||
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
|
||||
|
||||
final String refBases = "AGGATACTGT";
|
||||
final String SAMPLE_PREFIX = "sample";
|
||||
|
||||
List<String> sampleNames = new ArrayList<String>();
|
||||
final int nSamples = 1;
|
||||
final int numReadsPerAllele = 10;
|
||||
|
||||
List<SAMReadGroupRecord> sampleRGs;
|
||||
|
||||
private String sampleName(int i) { return sampleNames.get(i); }
|
||||
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
||||
final int[] numReadsPerAllele = new int[]{10,10};
|
||||
final String SAMPLE_PREFIX = "sample";
|
||||
|
||||
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
final Logger logger = Logger.getLogger(Walker.class);
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
final IndelGenotypeLikelihoodsCalculationModel model = new IndelGenotypeLikelihoodsCalculationModel(UAC,logger);
|
||||
final int offset = 5;
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
|
||||
final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
||||
final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
||||
|
||||
@BeforeSuite
|
||||
ArtificialReadPileupTestProvider pileupProvider;
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
sampleRGs = new ArrayList<SAMReadGroupRecord>();
|
||||
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
|
||||
SAMReadGroupRecord rg = createRG(sampleName(i));
|
||||
sampleRGs.add(rg);
|
||||
sample2RG.put(sampleName(i), rg);
|
||||
}
|
||||
|
||||
pileupProvider = new ArtificialReadPileupTestProvider(nSamples, SAMPLE_PREFIX);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicConsensusCounts() {
|
||||
// 4 inserted bases, min cnt = 10
|
||||
|
|
@ -107,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
eventLength = 3;
|
||||
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(offset,offset+eventLength));
|
||||
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
|
||||
|
||||
// same with min Reads = 11
|
||||
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
|
||||
|
|
@ -121,112 +93,10 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {
|
||||
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(genomeLocParser, true, minCnt, minFraction);
|
||||
return counter.computeConsensusAlleles(referenceContext,getContextFromAlleles(eventLength, isInsertion, altBases), AlignmentContextUtils.ReadOrientation.COMPLETE);
|
||||
|
||||
}
|
||||
private Map<String,AlignmentContext> getContextFromAlleles(int eventLength, boolean isInsertion, String altBases) {
|
||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||
|
||||
|
||||
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||
Allele refAllele, altAllele;
|
||||
if (isInsertion) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
||||
}
|
||||
else {
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+eventLength),true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
|
||||
int stop = loc.getStart();
|
||||
vcAlleles.add(refAllele);
|
||||
vcAlleles.add(altAllele);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(referenceContext.getBase());
|
||||
builder.noGenotypes();
|
||||
|
||||
final VariantContext vc = builder.make();
|
||||
|
||||
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
||||
|
||||
for (String sample: sampleNames) {
|
||||
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
|
||||
contexts.put(sample,context);
|
||||
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
private SAMReadGroupRecord createRG(String name) {
|
||||
SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
|
||||
rg.setPlatform("ILLUMINA");
|
||||
rg.setSample(name);
|
||||
return rg;
|
||||
}
|
||||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
|
||||
int numReads, String sample) {
|
||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
int readStart = contigStart;
|
||||
int offset = (contigStop-contigStart+1)/2;
|
||||
int refAlleleLength = 0;
|
||||
int readCounter = 0;
|
||||
for (Allele allele: vc.getAlleles()) {
|
||||
if (allele.isReference())
|
||||
refAlleleLength = allele.getBases().length;
|
||||
|
||||
int alleleLength = allele.getBases().length;
|
||||
|
||||
for ( int d = 0; d < numReads; d++ ) {
|
||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||
byte[] readQuals = new byte[readBases.length];
|
||||
Arrays.fill(readQuals,(byte)50);
|
||||
|
||||
GATKSAMRecord read = new GATKSAMRecord(header);
|
||||
read.setBaseQualities(readQuals);
|
||||
read.setReadBases(readBases);
|
||||
read.setReadName(artificialReadName+readCounter++);
|
||||
|
||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||
if (allele.isReference())
|
||||
read.setCigarString(readBases.length + "M");
|
||||
else {
|
||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||
(readBases.length-offset)+"M");
|
||||
}
|
||||
|
||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||
read.setReadPairedFlag(false);
|
||||
read.setAlignmentStart(readStart);
|
||||
read.setMappingQuality(artificialMappingQuality);
|
||||
read.setReferenceName(loc.getContig());
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||
|
||||
|
||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||
}
|
||||
}
|
||||
|
||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||
}
|
||||
|
||||
byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
||||
// create haplotype based on a particular allele
|
||||
String prefix = refBases.substring(offset);
|
||||
String alleleBases = new String(allele.getBases());
|
||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
||||
|
||||
return (prefix+alleleBases+postfix).getBytes();
|
||||
|
||||
|
||||
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(pileupProvider.genomeLocParser, true, minCnt, minFraction);
|
||||
return counter.computeConsensusAlleles(pileupProvider.referenceContext,
|
||||
pileupProvider.getAlignmentContextFromAlleles(isInsertion?eventLength:-eventLength,altBases,numReadsPerAllele),
|
||||
AlignmentContextUtils.ReadOrientation.COMPLETE);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue