Merge branch 'master' of ssh://ni.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
7d029b9a28
|
|
@ -78,8 +78,10 @@ public class CycleCovariate implements StandardCovariate {
|
|||
increment = readOrderFactor;
|
||||
}
|
||||
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
cycles[i] = BitSetUtils.bitSetFrom(cycle);
|
||||
final int CUSHION = 4;
|
||||
final int MAX_CYCLE = read.getReadLength() - CUSHION - 1;
|
||||
for (int i = 0; i < MAX_CYCLE; i++) {
|
||||
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? null : BitSetUtils.bitSetFrom(cycle);
|
||||
cycle += increment;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,94 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* Number of reads ending in each category.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T ReadEndIndels \
|
||||
* -o output.grp \
|
||||
* -I input.bam \
|
||||
* [-L input.intervals]
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountReadEventsWalker extends ReadWalker<Map<CigarOperator, ArrayList<Integer>> , Map<Integer, Map<CigarOperator, Long>>> {
|
||||
@Output (doc = "GATKReport table output")
|
||||
PrintStream out;
|
||||
|
||||
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
return ReadUtils.getCigarOperatorForAllBases(read);
|
||||
}
|
||||
|
||||
public Map<Integer, Map<CigarOperator, Long>> reduceInit() {
|
||||
return new HashMap<Integer, Map<CigarOperator, Long>>();
|
||||
}
|
||||
|
||||
public Map<Integer, Map<CigarOperator, Long>> reduce(Map<CigarOperator, ArrayList<Integer>> value, Map<Integer, Map<CigarOperator, Long>> sum) {
|
||||
for (Map.Entry<CigarOperator, ArrayList<Integer>> entry : value.entrySet()) {
|
||||
CigarOperator op = entry.getKey();
|
||||
ArrayList<Integer> positions = entry.getValue();
|
||||
|
||||
for (int p : positions) {
|
||||
Map<CigarOperator, Long> operatorCount = sum.get(p);
|
||||
if (operatorCount == null) {
|
||||
operatorCount = new HashMap<CigarOperator, Long>();
|
||||
sum.put(p, operatorCount);
|
||||
}
|
||||
|
||||
Long count = operatorCount.get(op);
|
||||
if (count == null)
|
||||
count = 0L;
|
||||
count++;
|
||||
operatorCount.put(op, count);
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Map<Integer, Map<CigarOperator, Long>> result) {
|
||||
GATKReport report = GATKReport.newSimpleReport("Events", "Position", "Event", "Observations");
|
||||
for (Map.Entry<Integer, Map<CigarOperator, Long>> entry : result.entrySet()) {
|
||||
int position = entry.getKey();
|
||||
Map<CigarOperator, Long> operatorCount = entry.getValue();
|
||||
|
||||
for (Map.Entry<CigarOperator, Long> subEntry: operatorCount.entrySet()) {
|
||||
String operator = subEntry.getKey().name();
|
||||
Long observations = subEntry.getValue();
|
||||
report.addRow(position, operator, observations);
|
||||
}
|
||||
}
|
||||
report.print(out);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* Number of reads ending in each category.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T ReadEndIndels \
|
||||
* -o output.txt \
|
||||
* -I input.bam \
|
||||
* [-L input.intervals]
|
||||
* </pre>
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountTerminusEventWalker extends ReadWalker<Pair<Long, Long>, Pair<Long, Long>> {
|
||||
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
List<CigarElement> cigarElements = read.getCigar().getCigarElements();
|
||||
|
||||
CigarElement lastElement = null;
|
||||
for (CigarElement element : cigarElements) {
|
||||
if (element.getOperator() != CigarOperator.HARD_CLIP)
|
||||
lastElement = element;
|
||||
}
|
||||
|
||||
if (lastElement == null)
|
||||
throw new UserException.MalformedBAM(read, "read does not have any bases, it's all hard clips");
|
||||
|
||||
long endsInIndel = lastElement.getOperator() == CigarOperator.INSERTION || lastElement.getOperator() == CigarOperator.DELETION? 1 : 0;
|
||||
long endsInSC = lastElement.getOperator() == CigarOperator.SOFT_CLIP ? 1 : 0;
|
||||
|
||||
return new Pair<Long, Long>(endsInIndel, endsInSC);
|
||||
}
|
||||
|
||||
public Pair<Long, Long> reduceInit() { return new Pair<Long, Long>(0L, 0L); }
|
||||
|
||||
public Pair<Long, Long> reduce(Pair<Long, Long> value, Pair<Long, Long> sum) {
|
||||
sum.set(sum.getFirst() + value.getFirst(), sum.getSecond() + value.getSecond());
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Pair<Long, Long> result) {
|
||||
System.out.println(String.format("\tReads ending in indels : %d\n\tReads ending in soft-clips: %d\n", result.getFirst(), result.getSecond()));
|
||||
}
|
||||
}
|
||||
|
|
@ -243,7 +243,7 @@ public class MathUtils {
|
|||
|
||||
double maxValue = arrayMax(log10p, finish);
|
||||
if(maxValue == Double.NEGATIVE_INFINITY)
|
||||
return sum;
|
||||
return maxValue;
|
||||
|
||||
for (int i = start; i < finish; i++) {
|
||||
sum += Math.pow(10.0, log10p[i] - maxValue);
|
||||
|
|
|
|||
|
|
@ -642,7 +642,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
boolean stillClipping = true;
|
||||
|
||||
while ( stillClipping ) {
|
||||
for ( Allele a : unclippedAlleles ) {
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
|
||||
|
|
|
|||
|
|
@ -783,4 +783,43 @@ public class ReadUtils {
|
|||
return location;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a map with each event in the read (cigar operator) and the read coordinate where it happened.
|
||||
*
|
||||
* Example:
|
||||
* D -> 2, 34, 75
|
||||
* I -> 55
|
||||
* S -> 0, 101
|
||||
* H -> 101
|
||||
*
|
||||
* @param read the read
|
||||
* @return a map with the properties described above. See example
|
||||
*/
|
||||
public static Map<CigarOperator, ArrayList<Integer>> getCigarOperatorForAllBases (GATKSAMRecord read) {
|
||||
Map<CigarOperator, ArrayList<Integer>> events = new HashMap<CigarOperator, ArrayList<Integer>>();
|
||||
|
||||
int position = 0;
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
CigarOperator op = cigarElement.getOperator();
|
||||
if (op.consumesReadBases()) {
|
||||
ArrayList<Integer> list = events.get(op);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Integer>();
|
||||
events.put(op, list);
|
||||
}
|
||||
for (int i = position; i < cigarElement.getLength(); i++)
|
||||
list.add(position++);
|
||||
}
|
||||
else {
|
||||
ArrayList<Integer> list = events.get(op);
|
||||
if (list == null) {
|
||||
list = new ArrayList<Integer>();
|
||||
events.put(op, list);
|
||||
}
|
||||
list.add(position);
|
||||
}
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ import java.util.*;
|
|||
*
|
||||
* @author depristo
|
||||
*/
|
||||
public class VariantContext implements Feature { // to enable tribble intergration
|
||||
public class VariantContext implements Feature { // to enable tribble integration
|
||||
protected CommonInfo commonInfo = null;
|
||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||
|
||||
|
|
@ -377,7 +377,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
*
|
||||
* Not currently supported:
|
||||
*
|
||||
* Heterozygous sequencea
|
||||
* Heterozygous sequence
|
||||
* The term heterozygous is used to specify a region detected by certain methods that do not
|
||||
* resolve the polymorphism into a specific sequence motif. In these cases, a unique flanking
|
||||
* sequence must be provided to define a sequence context for the variation.
|
||||
|
|
@ -922,6 +922,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return number of hom var calls
|
||||
*/
|
||||
public int getHomVarCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
|
||||
}
|
||||
|
||||
|
|
@ -931,6 +932,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return number of mixed calls
|
||||
*/
|
||||
public int getMixedCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.MIXED.ordinal()];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -620,8 +620,8 @@ public class VariantContextUtils {
|
|||
String key = p.getKey();
|
||||
// if we don't like the key already, don't go anywhere
|
||||
if ( ! inconsistentAttributes.contains(key) ) {
|
||||
boolean alreadyFound = attributes.containsKey(key);
|
||||
Object boundValue = attributes.get(key);
|
||||
final boolean alreadyFound = attributes.containsKey(key);
|
||||
final Object boundValue = attributes.get(key);
|
||||
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
|
||||
|
||||
if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) {
|
||||
|
|
@ -802,17 +802,16 @@ public class VariantContextUtils {
|
|||
return inputVC;
|
||||
}
|
||||
|
||||
public static VariantContext reverseTrimAlleles(VariantContext inputVC) {
|
||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||
// see if we need to trim common reference base from all alleles
|
||||
|
||||
final int trimExtent = AbstractVCFCodec.computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true, -1);
|
||||
if ( trimExtent <= 0 )
|
||||
return inputVC;
|
||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
|
|
@ -820,8 +819,8 @@ public class VariantContextUtils {
|
|||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
|
|
@ -829,9 +828,8 @@ public class VariantContextUtils {
|
|||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
|
||||
List<Allele> originalAlleles = genotype.getAlleles();
|
||||
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
|
|
@ -841,8 +839,7 @@ public class VariantContextUtils {
|
|||
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
|
||||
}
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length());
|
||||
return builder.alleles(alleles).genotypes(genotypes).make();
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
public static GenotypesContext stripPLs(GenotypesContext genotypes) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class ArtificialReadPileupTestProvider {
|
||||
final int contigStart = 1;
|
||||
final int contigStop = 10;
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
|
||||
// final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
|
||||
final String artificialContig = "chr1";
|
||||
// final int artificialContigIndex = 0;
|
||||
final String artificialReadName = "synth";
|
||||
final int artificialRefStart = 1;
|
||||
final int artificialMappingQuality = 60;
|
||||
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
|
||||
List<SAMReadGroupRecord> sampleRGs;
|
||||
|
||||
final String refBases = "AGGATACTGT";
|
||||
List<String> sampleNames = new ArrayList<String>();
|
||||
private String sampleName(int i) { return sampleNames.get(i); }
|
||||
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
||||
public final int offset = 5;
|
||||
public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
|
||||
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
||||
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
||||
|
||||
|
||||
public ArtificialReadPileupTestProvider(int numSamples, final String SAMPLE_PREFIX) {
|
||||
sampleRGs = new ArrayList<SAMReadGroupRecord>();
|
||||
|
||||
for ( int i = 0; i < numSamples; i++ ) {
|
||||
sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
|
||||
SAMReadGroupRecord rg = createRG(sampleName(i));
|
||||
sampleRGs.add(rg);
|
||||
sample2RG.put(sampleName(i), rg);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public List<String> getSampleNames() {
|
||||
return sampleNames;
|
||||
}
|
||||
public byte getRefByte() {
|
||||
return refBases.substring(offset,offset+1).getBytes()[0];
|
||||
}
|
||||
|
||||
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
|
||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||
|
||||
|
||||
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||
Allele refAllele, altAllele;
|
||||
if (eventLength == 0) {// SNP case
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+1),true);
|
||||
altAllele = Allele.create(altBases.substring(0,1), false);
|
||||
|
||||
} else if (eventLength>0){
|
||||
// insertion
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
||||
}
|
||||
else {
|
||||
// deletion
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
int stop = loc.getStart();
|
||||
vcAlleles.add(refAllele);
|
||||
vcAlleles.add(altAllele);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(referenceContext.getBase());
|
||||
builder.noGenotypes();
|
||||
|
||||
final VariantContext vc = builder.make();
|
||||
|
||||
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
||||
|
||||
for (String sample: sampleNames) {
|
||||
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
|
||||
contexts.put(sample,context);
|
||||
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
private SAMReadGroupRecord createRG(String name) {
|
||||
SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
|
||||
rg.setPlatform("ILLUMINA");
|
||||
rg.setSample(name);
|
||||
return rg;
|
||||
}
|
||||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
|
||||
int[] numReadsPerAllele, String sample) {
|
||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
int readStart = contigStart;
|
||||
int offset = (contigStop-contigStart+1)/2;
|
||||
int refAlleleLength = 0;
|
||||
int readCounter = 0;
|
||||
int alleleCounter = 0;
|
||||
for (Allele allele: vc.getAlleles()) {
|
||||
if (allele.isReference())
|
||||
refAlleleLength = allele.getBases().length;
|
||||
|
||||
int alleleLength = allele.getBases().length;
|
||||
|
||||
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
|
||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||
byte[] readQuals = new byte[readBases.length];
|
||||
Arrays.fill(readQuals, (byte) 50);
|
||||
|
||||
GATKSAMRecord read = new GATKSAMRecord(header);
|
||||
read.setBaseQualities(readQuals);
|
||||
read.setReadBases(readBases);
|
||||
read.setReadName(artificialReadName+readCounter++);
|
||||
|
||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||
if (allele.isReference())
|
||||
read.setCigarString(readBases.length + "M");
|
||||
else {
|
||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||
if (isBeforeDeletion || isBeforeInsertion)
|
||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||
(readBases.length-offset)+"M");
|
||||
else // SNP case
|
||||
read.setCigarString(readBases.length+"M");
|
||||
}
|
||||
|
||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||
read.setReadPairedFlag(false);
|
||||
read.setAlignmentStart(readStart);
|
||||
read.setMappingQuality(artificialMappingQuality);
|
||||
read.setReferenceName(loc.getContig());
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||
|
||||
|
||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||
}
|
||||
alleleCounter++;
|
||||
}
|
||||
|
||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||
}
|
||||
|
||||
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
||||
// create haplotype based on a particular allele
|
||||
String prefix = refBases.substring(offset);
|
||||
String alleleBases = new String(allele.getBases());
|
||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
||||
|
||||
return (prefix+alleleBases+postfix).getBytes();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -45,51 +45,23 @@ import org.testng.annotations.Test;
|
|||
*/
|
||||
public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
||||
|
||||
final int contigStart = 1;
|
||||
final int contigStop = 10;
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop-contigStart+1);
|
||||
final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
|
||||
final String artificialContig = "chr1";
|
||||
final int artificialContigIndex = 0;
|
||||
final String artificialReadName = "synth";
|
||||
final int artificialRefStart = 1;
|
||||
final int artificialMappingQuality = 60;
|
||||
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
|
||||
|
||||
final String refBases = "AGGATACTGT";
|
||||
final String SAMPLE_PREFIX = "sample";
|
||||
|
||||
List<String> sampleNames = new ArrayList<String>();
|
||||
final int nSamples = 1;
|
||||
final int numReadsPerAllele = 10;
|
||||
|
||||
List<SAMReadGroupRecord> sampleRGs;
|
||||
|
||||
private String sampleName(int i) { return sampleNames.get(i); }
|
||||
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
|
||||
final int[] numReadsPerAllele = new int[]{10,10};
|
||||
final String SAMPLE_PREFIX = "sample";
|
||||
|
||||
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
final Logger logger = Logger.getLogger(Walker.class);
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
final IndelGenotypeLikelihoodsCalculationModel model = new IndelGenotypeLikelihoodsCalculationModel(UAC,logger);
|
||||
final int offset = 5;
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
|
||||
final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
|
||||
final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
|
||||
|
||||
@BeforeSuite
|
||||
ArtificialReadPileupTestProvider pileupProvider;
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
sampleRGs = new ArrayList<SAMReadGroupRecord>();
|
||||
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
|
||||
SAMReadGroupRecord rg = createRG(sampleName(i));
|
||||
sampleRGs.add(rg);
|
||||
sample2RG.put(sampleName(i), rg);
|
||||
}
|
||||
|
||||
pileupProvider = new ArtificialReadPileupTestProvider(nSamples, SAMPLE_PREFIX);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicConsensusCounts() {
|
||||
// 4 inserted bases, min cnt = 10
|
||||
|
|
@ -107,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
eventLength = 3;
|
||||
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(offset,offset+eventLength));
|
||||
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
|
||||
|
||||
// same with min Reads = 11
|
||||
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
|
||||
|
|
@ -121,112 +93,10 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {
|
||||
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(genomeLocParser, true, minCnt, minFraction);
|
||||
return counter.computeConsensusAlleles(referenceContext,getContextFromAlleles(eventLength, isInsertion, altBases), AlignmentContextUtils.ReadOrientation.COMPLETE);
|
||||
|
||||
}
|
||||
private Map<String,AlignmentContext> getContextFromAlleles(int eventLength, boolean isInsertion, String altBases) {
|
||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||
|
||||
|
||||
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||
Allele refAllele, altAllele;
|
||||
if (isInsertion) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
||||
}
|
||||
else {
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+eventLength),true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
|
||||
int stop = loc.getStart();
|
||||
vcAlleles.add(refAllele);
|
||||
vcAlleles.add(altAllele);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(referenceContext.getBase());
|
||||
builder.noGenotypes();
|
||||
|
||||
final VariantContext vc = builder.make();
|
||||
|
||||
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
|
||||
|
||||
for (String sample: sampleNames) {
|
||||
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
|
||||
contexts.put(sample,context);
|
||||
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
private SAMReadGroupRecord createRG(String name) {
|
||||
SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
|
||||
rg.setPlatform("ILLUMINA");
|
||||
rg.setSample(name);
|
||||
return rg;
|
||||
}
|
||||
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
|
||||
int numReads, String sample) {
|
||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||
int readStart = contigStart;
|
||||
int offset = (contigStop-contigStart+1)/2;
|
||||
int refAlleleLength = 0;
|
||||
int readCounter = 0;
|
||||
for (Allele allele: vc.getAlleles()) {
|
||||
if (allele.isReference())
|
||||
refAlleleLength = allele.getBases().length;
|
||||
|
||||
int alleleLength = allele.getBases().length;
|
||||
|
||||
for ( int d = 0; d < numReads; d++ ) {
|
||||
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
|
||||
byte[] readQuals = new byte[readBases.length];
|
||||
Arrays.fill(readQuals,(byte)50);
|
||||
|
||||
GATKSAMRecord read = new GATKSAMRecord(header);
|
||||
read.setBaseQualities(readQuals);
|
||||
read.setReadBases(readBases);
|
||||
read.setReadName(artificialReadName+readCounter++);
|
||||
|
||||
boolean isBeforeDeletion = false, isBeforeInsertion = false;
|
||||
if (allele.isReference())
|
||||
read.setCigarString(readBases.length + "M");
|
||||
else {
|
||||
isBeforeDeletion = alleleLength<refAlleleLength;
|
||||
isBeforeInsertion = alleleLength>refAlleleLength;
|
||||
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
|
||||
(readBases.length-offset)+"M");
|
||||
}
|
||||
|
||||
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
|
||||
read.setReadPairedFlag(false);
|
||||
read.setAlignmentStart(readStart);
|
||||
read.setMappingQuality(artificialMappingQuality);
|
||||
read.setReferenceName(loc.getContig());
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
|
||||
|
||||
|
||||
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
|
||||
}
|
||||
}
|
||||
|
||||
return new ReadBackedPileupImpl(loc,pileupElements);
|
||||
}
|
||||
|
||||
byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
|
||||
// create haplotype based on a particular allele
|
||||
String prefix = refBases.substring(offset);
|
||||
String alleleBases = new String(allele.getBases());
|
||||
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
|
||||
|
||||
return (prefix+alleleBases+postfix).getBytes();
|
||||
|
||||
|
||||
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(pileupProvider.genomeLocParser, true, minCnt, minFraction);
|
||||
return counter.computeConsensusAlleles(pileupProvider.referenceContext,
|
||||
pileupProvider.getAlignmentContextFromAlleles(isInsertion?eventLength:-eventLength,altBases,numReadsPerAllele),
|
||||
AlignmentContextUtils.ReadOrientation.COMPLETE);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -468,6 +468,28 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testGetGenotypeCounts() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
|
||||
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
|
||||
Genotype g4 = new Genotype("A.", Arrays.asList(Aref, Allele.NO_CALL));
|
||||
Genotype g5 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
|
||||
|
||||
// we need to create a new VariantContext each time
|
||||
VariantContext vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
||||
Assert.assertEquals(1, vc.getHetCount());
|
||||
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
||||
Assert.assertEquals(1, vc.getHomRefCount());
|
||||
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
||||
Assert.assertEquals(1, vc.getHomVarCount());
|
||||
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
||||
Assert.assertEquals(1, vc.getMixedCount());
|
||||
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
|
||||
Assert.assertEquals(1, vc.getNoCallCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCFfromGenotypes() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T, del);
|
||||
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
|
||||
|
|
|
|||
Loading…
Reference in New Issue