Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e332aeaf70
|
|
@ -78,8 +78,10 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
increment = readOrderFactor;
|
increment = readOrderFactor;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < read.getReadLength(); i++) {
|
final int CUSHION = 4;
|
||||||
cycles[i] = BitSetUtils.bitSetFrom(cycle);
|
final int MAX_CYCLE = read.getReadLength() - CUSHION - 1;
|
||||||
|
for (int i = 0; i < MAX_CYCLE; i++) {
|
||||||
|
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? null : BitSetUtils.bitSetFrom(cycle);
|
||||||
cycle += increment;
|
cycle += increment;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,94 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* One or more BAM files.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* Number of reads ending in each category.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T ReadEndIndels \
|
||||||
|
* -o output.grp \
|
||||||
|
* -I input.bam \
|
||||||
|
* [-L input.intervals]
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||||
|
public class CountReadEventsWalker extends ReadWalker<Map<CigarOperator, ArrayList<Integer>> , Map<Integer, Map<CigarOperator, Long>>> {
|
||||||
|
@Output (doc = "GATKReport table output")
|
||||||
|
PrintStream out;
|
||||||
|
|
||||||
|
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||||
|
return ReadUtils.getCigarOperatorForAllBases(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Map<CigarOperator, Long>> reduceInit() {
|
||||||
|
return new HashMap<Integer, Map<CigarOperator, Long>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Integer, Map<CigarOperator, Long>> reduce(Map<CigarOperator, ArrayList<Integer>> value, Map<Integer, Map<CigarOperator, Long>> sum) {
|
||||||
|
for (Map.Entry<CigarOperator, ArrayList<Integer>> entry : value.entrySet()) {
|
||||||
|
CigarOperator op = entry.getKey();
|
||||||
|
ArrayList<Integer> positions = entry.getValue();
|
||||||
|
|
||||||
|
for (int p : positions) {
|
||||||
|
Map<CigarOperator, Long> operatorCount = sum.get(p);
|
||||||
|
if (operatorCount == null) {
|
||||||
|
operatorCount = new HashMap<CigarOperator, Long>();
|
||||||
|
sum.put(p, operatorCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
Long count = operatorCount.get(op);
|
||||||
|
if (count == null)
|
||||||
|
count = 0L;
|
||||||
|
count++;
|
||||||
|
operatorCount.put(op, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Map<Integer, Map<CigarOperator, Long>> result) {
|
||||||
|
GATKReport report = GATKReport.newSimpleReport("Events", "Position", "Event", "Observations");
|
||||||
|
for (Map.Entry<Integer, Map<CigarOperator, Long>> entry : result.entrySet()) {
|
||||||
|
int position = entry.getKey();
|
||||||
|
Map<CigarOperator, Long> operatorCount = entry.getValue();
|
||||||
|
|
||||||
|
for (Map.Entry<CigarOperator, Long> subEntry: operatorCount.entrySet()) {
|
||||||
|
String operator = subEntry.getKey().name();
|
||||||
|
Long observations = subEntry.getValue();
|
||||||
|
report.addRow(position, operator, observations);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
report.print(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* One or more BAM files.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* Number of reads ending in each category.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T ReadEndIndels \
|
||||||
|
* -o output.txt \
|
||||||
|
* -I input.bam \
|
||||||
|
* [-L input.intervals]
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||||
|
public class CountTerminusEventWalker extends ReadWalker<Pair<Long, Long>, Pair<Long, Long>> {
|
||||||
|
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||||
|
List<CigarElement> cigarElements = read.getCigar().getCigarElements();
|
||||||
|
|
||||||
|
CigarElement lastElement = null;
|
||||||
|
for (CigarElement element : cigarElements) {
|
||||||
|
if (element.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
|
lastElement = element;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastElement == null)
|
||||||
|
throw new UserException.MalformedBAM(read, "read does not have any bases, it's all hard clips");
|
||||||
|
|
||||||
|
long endsInIndel = lastElement.getOperator() == CigarOperator.INSERTION || lastElement.getOperator() == CigarOperator.DELETION? 1 : 0;
|
||||||
|
long endsInSC = lastElement.getOperator() == CigarOperator.SOFT_CLIP ? 1 : 0;
|
||||||
|
|
||||||
|
return new Pair<Long, Long>(endsInIndel, endsInSC);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<Long, Long> reduceInit() { return new Pair<Long, Long>(0L, 0L); }
|
||||||
|
|
||||||
|
public Pair<Long, Long> reduce(Pair<Long, Long> value, Pair<Long, Long> sum) {
|
||||||
|
sum.set(sum.getFirst() + value.getFirst(), sum.getSecond() + value.getSecond());
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Pair<Long, Long> result) {
|
||||||
|
System.out.println(String.format("\tReads ending in indels : %d\n\tReads ending in soft-clips: %d\n", result.getFirst(), result.getSecond()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -243,7 +243,7 @@ public class MathUtils {
|
||||||
|
|
||||||
double maxValue = arrayMax(log10p, finish);
|
double maxValue = arrayMax(log10p, finish);
|
||||||
if(maxValue == Double.NEGATIVE_INFINITY)
|
if(maxValue == Double.NEGATIVE_INFINITY)
|
||||||
return sum;
|
return maxValue;
|
||||||
|
|
||||||
for (int i = start; i < finish; i++) {
|
for (int i = start; i < finish; i++) {
|
||||||
sum += Math.pow(10.0, log10p[i] - maxValue);
|
sum += Math.pow(10.0, log10p[i] - maxValue);
|
||||||
|
|
|
||||||
|
|
@ -783,4 +783,43 @@ public class ReadUtils {
|
||||||
return location;
|
return location;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a map with each event in the read (cigar operator) and the read coordinate where it happened.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* D -> 2, 34, 75
|
||||||
|
* I -> 55
|
||||||
|
* S -> 0, 101
|
||||||
|
* H -> 101
|
||||||
|
*
|
||||||
|
* @param read the read
|
||||||
|
* @return a map with the properties described above. See example
|
||||||
|
*/
|
||||||
|
public static Map<CigarOperator, ArrayList<Integer>> getCigarOperatorForAllBases (GATKSAMRecord read) {
|
||||||
|
Map<CigarOperator, ArrayList<Integer>> events = new HashMap<CigarOperator, ArrayList<Integer>>();
|
||||||
|
|
||||||
|
int position = 0;
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
CigarOperator op = cigarElement.getOperator();
|
||||||
|
if (op.consumesReadBases()) {
|
||||||
|
ArrayList<Integer> list = events.get(op);
|
||||||
|
if (list == null) {
|
||||||
|
list = new ArrayList<Integer>();
|
||||||
|
events.put(op, list);
|
||||||
|
}
|
||||||
|
for (int i = position; i < cigarElement.getLength(); i++)
|
||||||
|
list.add(position++);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ArrayList<Integer> list = events.get(op);
|
||||||
|
if (list == null) {
|
||||||
|
list = new ArrayList<Integer>();
|
||||||
|
events.put(op, list);
|
||||||
|
}
|
||||||
|
list.add(position);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return events;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,203 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
public class ArtificialReadPileupTestProvider {
|
||||||
|
final int contigStart = 1;
|
||||||
|
final int contigStop = 10;
|
||||||
|
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
|
||||||
|
// final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
|
||||||
|
final String artificialContig = "chr1";
|
||||||
|
// final int artificialContigIndex = 0;
|
||||||
|
final String artificialReadName = "synth";
|
||||||
|
final int artificialRefStart = 1;
|
||||||
|
final int artificialMappingQuality = 60;
|
||||||
|
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
|
||||||
|
List<SAMReadGroupRecord> sampleRGs;
|
||||||
|
|
||||||
|
final String refBases = "AGGATACTGT";
|
||||||
|
List<String> sampleNames = new ArrayList<String>();
|
||||||
|
private String sampleName(int i) { return sampleNames.get(i); }
|
||||||
|
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
||||||
|
public final int offset = 5;
|
||||||
|
public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||||
|
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
|
||||||
|
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
||||||
|
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
||||||
|
|
||||||
|
|
||||||
|
public ArtificialReadPileupTestProvider(int numSamples, final String SAMPLE_PREFIX) {
|
||||||
|
sampleRGs = new ArrayList<SAMReadGroupRecord>();
|
||||||
|
|
||||||
|
for ( int i = 0; i < numSamples; i++ ) {
|
||||||
|
sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
|
||||||
|
SAMReadGroupRecord rg = createRG(sampleName(i));
|
||||||
|
sampleRGs.add(rg);
|
||||||
|
sample2RG.put(sampleName(i), rg);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getSampleNames() {
|
||||||
|
return sampleNames;
|
||||||
|
}
|
||||||
|
public byte getRefByte() {
|
||||||
|
return refBases.substring(offset,offset+1).getBytes()[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
|
||||||
|
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||||
|
|
||||||
|
|
||||||
|
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||||
|
Allele refAllele, altAllele;
|
||||||
|
if (eventLength == 0) {// SNP case
|
||||||
|
refAllele =Allele.create(refBases.substring(offset,offset+1),true);
|
||||||
|
altAllele = Allele.create(altBases.substring(0,1), false);
|
||||||
|
|
||||||
|
} else if (eventLength>0){
|
||||||
|
// insertion
|
||||||
|
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||||
|
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// deletion
|
||||||
|
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
|
||||||
|
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||||
|
}
|
||||||
|
int stop = loc.getStart();
|
||||||
|
vcAlleles.add(refAllele);
|
||||||
|
vcAlleles.add(altAllele);
|
||||||
|
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||||
|
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||||
|
builder.alleles(vcAlleles);
|
||||||
|
builder.referenceBaseForIndel(referenceContext.getBase());
|
||||||
|
builder.noGenotypes();
|
||||||
|
|
||||||
|
final VariantContext vc = builder.make();
|
||||||
|
|
||||||
|
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
||||||
|
|
||||||
|
for (String sample: sampleNames) {
|
||||||
|
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
|
||||||
|
contexts.put(sample,context);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return contexts;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SAMReadGroupRecord createRG(String name) {
|
||||||
|
SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
|
||||||
|
rg.setPlatform("ILLUMINA");
|
||||||
|
rg.setSample(name);
|
||||||
|
return rg;
|
||||||
|
}
|
||||||
|
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
|
||||||
|
int[] numReadsPerAllele, String sample) {
|
||||||
|
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||||
|
int readStart = contigStart;
|
||||||
|
int offset = (contigStop-contigStart+1)/2;
|
||||||
|
int refAlleleLength = 0;
|
||||||
|
int readCounter = 0;
|
||||||
|
int alleleCounter = 0;
|
||||||
|
for (Allele allele: vc.getAlleles()) {
|
||||||
|
if (allele.isReference())
|
||||||
|
refAlleleLength = allele.getBases().length;
|
||||||
|
|
||||||
|
int alleleLength = allele.getBases().length;
|
||||||
|
|
||||||
|
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
|
||||||
|
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||||
|
byte[] readQuals = new byte[readBases.length];
|
||||||
|
Arrays.fill(readQuals, (byte) 50);
|
||||||
|
|
||||||
|
GATKSAMRecord read = new GATKSAMRecord(header);
|
||||||
|
read.setBaseQualities(readQuals);
|
||||||
|
read.setReadBases(readBases);
|
||||||
|
read.setReadName(artificialReadName+readCounter++);
|
||||||
|
|
||||||
|
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||||
|
if (allele.isReference())
|
||||||
|
read.setCigarString(readBases.length + "M");
|
||||||
|
else {
|
||||||
|
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||||
|
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||||
|
if (isBeforeDeletion || isBeforeInsertion)
|
||||||
|
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||||
|
(readBases.length-offset)+"M");
|
||||||
|
else // SNP case
|
||||||
|
read.setCigarString(readBases.length+"M");
|
||||||
|
}
|
||||||
|
|
||||||
|
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||||
|
read.setReadPairedFlag(false);
|
||||||
|
read.setAlignmentStart(readStart);
|
||||||
|
read.setMappingQuality(artificialMappingQuality);
|
||||||
|
read.setReferenceName(loc.getContig());
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||||
|
|
||||||
|
|
||||||
|
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||||
|
}
|
||||||
|
alleleCounter++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
||||||
|
// create haplotype based on a particular allele
|
||||||
|
String prefix = refBases.substring(offset);
|
||||||
|
String alleleBases = new String(allele.getBases());
|
||||||
|
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
||||||
|
|
||||||
|
return (prefix+alleleBases+postfix).getBytes();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -45,51 +45,23 @@ import org.testng.annotations.Test;
|
||||||
*/
|
*/
|
||||||
public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||||
|
|
||||||
final int contigStart = 1;
|
|
||||||
final int contigStop = 10;
|
|
||||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop-contigStart+1);
|
|
||||||
final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
|
|
||||||
final String artificialContig = "chr1";
|
|
||||||
final int artificialContigIndex = 0;
|
|
||||||
final String artificialReadName = "synth";
|
|
||||||
final int artificialRefStart = 1;
|
|
||||||
final int artificialMappingQuality = 60;
|
|
||||||
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
|
|
||||||
|
|
||||||
final String refBases = "AGGATACTGT";
|
final String refBases = "AGGATACTGT";
|
||||||
final String SAMPLE_PREFIX = "sample";
|
|
||||||
|
|
||||||
List<String> sampleNames = new ArrayList<String>();
|
|
||||||
final int nSamples = 1;
|
final int nSamples = 1;
|
||||||
final int numReadsPerAllele = 10;
|
final int[] numReadsPerAllele = new int[]{10,10};
|
||||||
|
final String SAMPLE_PREFIX = "sample";
|
||||||
List<SAMReadGroupRecord> sampleRGs;
|
|
||||||
|
|
||||||
private String sampleName(int i) { return sampleNames.get(i); }
|
|
||||||
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
|
||||||
|
|
||||||
|
|
||||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||||
final Logger logger = Logger.getLogger(Walker.class);
|
final Logger logger = Logger.getLogger(Walker.class);
|
||||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
|
||||||
final IndelGenotypeLikelihoodsCalculationModel model = new IndelGenotypeLikelihoodsCalculationModel(UAC,logger);
|
final IndelGenotypeLikelihoodsCalculationModel model = new IndelGenotypeLikelihoodsCalculationModel(UAC,logger);
|
||||||
final int offset = 5;
|
|
||||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
|
|
||||||
final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
|
||||||
final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
|
||||||
|
|
||||||
@BeforeSuite
|
ArtificialReadPileupTestProvider pileupProvider;
|
||||||
|
|
||||||
|
@BeforeSuite
|
||||||
public void before() {
|
public void before() {
|
||||||
sampleRGs = new ArrayList<SAMReadGroupRecord>();
|
pileupProvider = new ArtificialReadPileupTestProvider(nSamples, SAMPLE_PREFIX);
|
||||||
|
|
||||||
for ( int i = 0; i < nSamples; i++ ) {
|
|
||||||
sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
|
|
||||||
SAMReadGroupRecord rg = createRG(sampleName(i));
|
|
||||||
sampleRGs.add(rg);
|
|
||||||
sample2RG.put(sampleName(i), rg);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBasicConsensusCounts() {
|
public void testBasicConsensusCounts() {
|
||||||
// 4 inserted bases, min cnt = 10
|
// 4 inserted bases, min cnt = 10
|
||||||
|
|
@ -107,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||||
eventLength = 3;
|
eventLength = 3;
|
||||||
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
|
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
|
||||||
Assert.assertEquals(alleles.size(),2);
|
Assert.assertEquals(alleles.size(),2);
|
||||||
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(offset,offset+eventLength));
|
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
|
||||||
|
|
||||||
// same with min Reads = 11
|
// same with min Reads = 11
|
||||||
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
|
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
|
||||||
|
|
@ -121,112 +93,10 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {
|
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {
|
||||||
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(genomeLocParser, true, minCnt, minFraction);
|
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(pileupProvider.genomeLocParser, true, minCnt, minFraction);
|
||||||
return counter.computeConsensusAlleles(referenceContext,getContextFromAlleles(eventLength, isInsertion, altBases), AlignmentContextUtils.ReadOrientation.COMPLETE);
|
return counter.computeConsensusAlleles(pileupProvider.referenceContext,
|
||||||
|
pileupProvider.getAlignmentContextFromAlleles(isInsertion?eventLength:-eventLength,altBases,numReadsPerAllele),
|
||||||
}
|
AlignmentContextUtils.ReadOrientation.COMPLETE);
|
||||||
private Map<String,AlignmentContext> getContextFromAlleles(int eventLength, boolean isInsertion, String altBases) {
|
|
||||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
|
||||||
|
|
||||||
|
|
||||||
ArrayList vcAlleles = new ArrayList<Allele>();
|
|
||||||
Allele refAllele, altAllele;
|
|
||||||
if (isInsertion) {
|
|
||||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
|
||||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
refAllele =Allele.create(refBases.substring(offset,offset+eventLength),true);
|
|
||||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
int stop = loc.getStart();
|
|
||||||
vcAlleles.add(refAllele);
|
|
||||||
vcAlleles.add(altAllele);
|
|
||||||
|
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
|
||||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
|
||||||
builder.alleles(vcAlleles);
|
|
||||||
builder.referenceBaseForIndel(referenceContext.getBase());
|
|
||||||
builder.noGenotypes();
|
|
||||||
|
|
||||||
final VariantContext vc = builder.make();
|
|
||||||
|
|
||||||
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
|
||||||
|
|
||||||
for (String sample: sampleNames) {
|
|
||||||
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
|
|
||||||
contexts.put(sample,context);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return contexts;
|
|
||||||
}
|
|
||||||
|
|
||||||
private SAMReadGroupRecord createRG(String name) {
|
|
||||||
SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
|
|
||||||
rg.setPlatform("ILLUMINA");
|
|
||||||
rg.setSample(name);
|
|
||||||
return rg;
|
|
||||||
}
|
|
||||||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
|
|
||||||
int numReads, String sample) {
|
|
||||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
|
||||||
int readStart = contigStart;
|
|
||||||
int offset = (contigStop-contigStart+1)/2;
|
|
||||||
int refAlleleLength = 0;
|
|
||||||
int readCounter = 0;
|
|
||||||
for (Allele allele: vc.getAlleles()) {
|
|
||||||
if (allele.isReference())
|
|
||||||
refAlleleLength = allele.getBases().length;
|
|
||||||
|
|
||||||
int alleleLength = allele.getBases().length;
|
|
||||||
|
|
||||||
for ( int d = 0; d < numReads; d++ ) {
|
|
||||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
|
||||||
byte[] readQuals = new byte[readBases.length];
|
|
||||||
Arrays.fill(readQuals,(byte)50);
|
|
||||||
|
|
||||||
GATKSAMRecord read = new GATKSAMRecord(header);
|
|
||||||
read.setBaseQualities(readQuals);
|
|
||||||
read.setReadBases(readBases);
|
|
||||||
read.setReadName(artificialReadName+readCounter++);
|
|
||||||
|
|
||||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
|
||||||
if (allele.isReference())
|
|
||||||
read.setCigarString(readBases.length + "M");
|
|
||||||
else {
|
|
||||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
|
||||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
|
||||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
|
||||||
(readBases.length-offset)+"M");
|
|
||||||
}
|
|
||||||
|
|
||||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
|
||||||
read.setReadPairedFlag(false);
|
|
||||||
read.setAlignmentStart(readStart);
|
|
||||||
read.setMappingQuality(artificialMappingQuality);
|
|
||||||
read.setReferenceName(loc.getContig());
|
|
||||||
read.setReadNegativeStrandFlag(false);
|
|
||||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
|
||||||
|
|
||||||
|
|
||||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
|
||||||
}
|
|
||||||
|
|
||||||
byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
|
||||||
// create haplotype based on a particular allele
|
|
||||||
String prefix = refBases.substring(offset);
|
|
||||||
String alleleBases = new String(allele.getBases());
|
|
||||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
|
||||||
|
|
||||||
return (prefix+alleleBases+postfix).getBytes();
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue