Merge branch 'master' of ssh://ni.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Christopher Hartl 2012-04-30 12:16:30 -04:00
commit 7d029b9a28
11 changed files with 462 additions and 163 deletions

View File

@ -78,8 +78,10 @@ public class CycleCovariate implements StandardCovariate {
increment = readOrderFactor;
}
for (int i = 0; i < read.getReadLength(); i++) {
cycles[i] = BitSetUtils.bitSetFrom(cycle);
final int CUSHION = 4;
final int MAX_CYCLE = read.getReadLength() - CUSHION - 1;
for (int i = 0; i < MAX_CYCLE; i++) {
cycles[i] = (i<CUSHION || i>MAX_CYCLE) ? null : BitSetUtils.bitSetFrom(cycle);
cycle += increment;
}
}

View File

@ -0,0 +1,94 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
/**
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
*
* <h2>Input</h2>
* <p>
* One or more BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* Number of reads ending in each category.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T ReadEndIndels \
* -o output.grp \
* -I input.bam \
* [-L input.intervals]
* </pre>
*/
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReadEventsWalker extends ReadWalker<Map<CigarOperator, ArrayList<Integer>> , Map<Integer, Map<CigarOperator, Long>>> {
@Output (doc = "GATKReport table output")
PrintStream out;
public Map<CigarOperator, ArrayList<Integer>> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
return ReadUtils.getCigarOperatorForAllBases(read);
}
public Map<Integer, Map<CigarOperator, Long>> reduceInit() {
return new HashMap<Integer, Map<CigarOperator, Long>>();
}
public Map<Integer, Map<CigarOperator, Long>> reduce(Map<CigarOperator, ArrayList<Integer>> value, Map<Integer, Map<CigarOperator, Long>> sum) {
for (Map.Entry<CigarOperator, ArrayList<Integer>> entry : value.entrySet()) {
CigarOperator op = entry.getKey();
ArrayList<Integer> positions = entry.getValue();
for (int p : positions) {
Map<CigarOperator, Long> operatorCount = sum.get(p);
if (operatorCount == null) {
operatorCount = new HashMap<CigarOperator, Long>();
sum.put(p, operatorCount);
}
Long count = operatorCount.get(op);
if (count == null)
count = 0L;
count++;
operatorCount.put(op, count);
}
}
return sum;
}
@Override
public void onTraversalDone(Map<Integer, Map<CigarOperator, Long>> result) {
GATKReport report = GATKReport.newSimpleReport("Events", "Position", "Event", "Observations");
for (Map.Entry<Integer, Map<CigarOperator, Long>> entry : result.entrySet()) {
int position = entry.getKey();
Map<CigarOperator, Long> operatorCount = entry.getValue();
for (Map.Entry<CigarOperator, Long> subEntry: operatorCount.entrySet()) {
String operator = subEntry.getKey().name();
Long observations = subEntry.getValue();
report.addRow(position, operator, observations);
}
}
report.print(out);
}
}

View File

@ -0,0 +1,70 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.List;
/**
* Walks over the input data set, counting the number of reads ending in insertions/deletions or soft-clips
*
* <h2>Input</h2>
* <p>
* One or more BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* Number of reads ending in each category.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T ReadEndIndels \
* -o output.txt \
* -I input.bam \
* [-L input.intervals]
* </pre>
*/
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountTerminusEventWalker extends ReadWalker<Pair<Long, Long>, Pair<Long, Long>> {
public Pair<Long, Long> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
List<CigarElement> cigarElements = read.getCigar().getCigarElements();
CigarElement lastElement = null;
for (CigarElement element : cigarElements) {
if (element.getOperator() != CigarOperator.HARD_CLIP)
lastElement = element;
}
if (lastElement == null)
throw new UserException.MalformedBAM(read, "read does not have any bases, it's all hard clips");
long endsInIndel = lastElement.getOperator() == CigarOperator.INSERTION || lastElement.getOperator() == CigarOperator.DELETION? 1 : 0;
long endsInSC = lastElement.getOperator() == CigarOperator.SOFT_CLIP ? 1 : 0;
return new Pair<Long, Long>(endsInIndel, endsInSC);
}
public Pair<Long, Long> reduceInit() { return new Pair<Long, Long>(0L, 0L); }
public Pair<Long, Long> reduce(Pair<Long, Long> value, Pair<Long, Long> sum) {
sum.set(sum.getFirst() + value.getFirst(), sum.getSecond() + value.getSecond());
return sum;
}
@Override
public void onTraversalDone(Pair<Long, Long> result) {
System.out.println(String.format("\tReads ending in indels : %d\n\tReads ending in soft-clips: %d\n", result.getFirst(), result.getSecond()));
}
}

View File

@ -243,7 +243,7 @@ public class MathUtils {
double maxValue = arrayMax(log10p, finish);
if(maxValue == Double.NEGATIVE_INFINITY)
return sum;
return maxValue;
for (int i = start; i < finish; i++) {
sum += Math.pow(10.0, log10p[i] - maxValue);

View File

@ -642,7 +642,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
boolean stillClipping = true;
while ( stillClipping ) {
for ( Allele a : unclippedAlleles ) {
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
continue;

View File

@ -783,4 +783,43 @@ public class ReadUtils {
return location;
}
/**
* Creates a map with each event in the read (cigar operator) and the read coordinate where it happened.
*
* Example:
* D -> 2, 34, 75
* I -> 55
* S -> 0, 101
* H -> 101
*
* @param read the read
* @return a map with the properties described above. See example
*/
public static Map<CigarOperator, ArrayList<Integer>> getCigarOperatorForAllBases (GATKSAMRecord read) {
Map<CigarOperator, ArrayList<Integer>> events = new HashMap<CigarOperator, ArrayList<Integer>>();
int position = 0;
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
CigarOperator op = cigarElement.getOperator();
if (op.consumesReadBases()) {
ArrayList<Integer> list = events.get(op);
if (list == null) {
list = new ArrayList<Integer>();
events.put(op, list);
}
for (int i = position; i < cigarElement.getLength(); i++)
list.add(position++);
}
else {
ArrayList<Integer> list = events.get(op);
if (list == null) {
list = new ArrayList<Integer>();
events.put(op, list);
}
list.add(position);
}
}
return events;
}
}

View File

@ -160,7 +160,7 @@ import java.util.*;
*
* @author depristo
*/
public class VariantContext implements Feature { // to enable tribble intergration
public class VariantContext implements Feature { // to enable tribble integration
protected CommonInfo commonInfo = null;
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
@ -377,7 +377,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
*
* Not currently supported:
*
* Heterozygous sequencea
* Heterozygous sequence
* The term heterozygous is used to specify a region detected by certain methods that do not
* resolve the polymorphism into a specific sequence motif. In these cases, a unique flanking
* sequence must be provided to define a sequence context for the variation.
@ -922,6 +922,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return number of hom var calls
*/
public int getHomVarCount() {
calculateGenotypeCounts();
return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()];
}
@ -931,6 +932,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return number of mixed calls
*/
public int getMixedCount() {
calculateGenotypeCounts();
return genotypeCounts[Genotype.Type.MIXED.ordinal()];
}

View File

@ -620,8 +620,8 @@ public class VariantContextUtils {
String key = p.getKey();
// if we don't like the key already, don't go anywhere
if ( ! inconsistentAttributes.contains(key) ) {
boolean alreadyFound = attributes.containsKey(key);
Object boundValue = attributes.get(key);
final boolean alreadyFound = attributes.containsKey(key);
final Object boundValue = attributes.get(key);
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) {
@ -802,17 +802,16 @@ public class VariantContextUtils {
return inputVC;
}
public static VariantContext reverseTrimAlleles(VariantContext inputVC) {
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
// see if we need to trim common reference base from all alleles
final int trimExtent = AbstractVCFCodec.computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true, -1);
if ( trimExtent <= 0 )
return inputVC;
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
return inputVC;
final List<Allele> alleles = new ArrayList<Allele>();
GenotypesContext genotypes = GenotypesContext.create();
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
final GenotypesContext genotypes = GenotypesContext.create();
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
@ -820,8 +819,8 @@ public class VariantContextUtils {
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
Allele trimmedAllele = Allele.create(newBases, a.isReference());
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
@ -829,9 +828,8 @@ public class VariantContextUtils {
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
List<Allele> originalAlleles = genotype.getAlleles();
List<Allele> trimmedAlleles = new ArrayList<Allele>();
final List<Allele> originalAlleles = genotype.getAlleles();
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
@ -841,8 +839,7 @@ public class VariantContextUtils {
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
}
final VariantContextBuilder builder = new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length());
return builder.alleles(alleles).genotypes(genotypes).make();
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
}
public static GenotypesContext stripPLs(GenotypesContext genotypes) {

View File

@ -0,0 +1,203 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import java.util.*;
public class ArtificialReadPileupTestProvider {
final int contigStart = 1;
final int contigStop = 10;
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop - contigStart + 1);
// final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
final String artificialContig = "chr1";
// final int artificialContigIndex = 0;
final String artificialReadName = "synth";
final int artificialRefStart = 1;
final int artificialMappingQuality = 60;
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
List<SAMReadGroupRecord> sampleRGs;
final String refBases = "AGGATACTGT";
List<String> sampleNames = new ArrayList<String>();
private String sampleName(int i) { return sampleNames.get(i); }
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
public final int offset = 5;
public final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
public final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
public final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
public final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
public ArtificialReadPileupTestProvider(int numSamples, final String SAMPLE_PREFIX) {
sampleRGs = new ArrayList<SAMReadGroupRecord>();
for ( int i = 0; i < numSamples; i++ ) {
sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
SAMReadGroupRecord rg = createRG(sampleName(i));
sampleRGs.add(rg);
sample2RG.put(sampleName(i), rg);
}
}
public List<String> getSampleNames() {
return sampleNames;
}
public byte getRefByte() {
return refBases.substring(offset,offset+1).getBytes()[0];
}
public Map<String,AlignmentContext> getAlignmentContextFromAlleles(int eventLength, String altBases, int[] numReadsPerAllele) {
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
ArrayList vcAlleles = new ArrayList<Allele>();
Allele refAllele, altAllele;
if (eventLength == 0) {// SNP case
refAllele =Allele.create(refBases.substring(offset,offset+1),true);
altAllele = Allele.create(altBases.substring(0,1), false);
} else if (eventLength>0){
// insertion
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(altBases.substring(0,eventLength), false);
}
else {
// deletion
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
}
int stop = loc.getStart();
vcAlleles.add(refAllele);
vcAlleles.add(altAllele);
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(vcAlleles);
builder.referenceBaseForIndel(referenceContext.getBase());
builder.noGenotypes();
final VariantContext vc = builder.make();
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
for (String sample: sampleNames) {
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
contexts.put(sample,context);
}
return contexts;
}
private SAMReadGroupRecord createRG(String name) {
SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
rg.setPlatform("ILLUMINA");
rg.setSample(name);
return rg;
}
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
int[] numReadsPerAllele, String sample) {
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
int readStart = contigStart;
int offset = (contigStop-contigStart+1)/2;
int refAlleleLength = 0;
int readCounter = 0;
int alleleCounter = 0;
for (Allele allele: vc.getAlleles()) {
if (allele.isReference())
refAlleleLength = allele.getBases().length;
int alleleLength = allele.getBases().length;
for ( int d = 0; d < numReadsPerAllele[alleleCounter]; d++ ) {
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
byte[] readQuals = new byte[readBases.length];
Arrays.fill(readQuals, (byte) 50);
GATKSAMRecord read = new GATKSAMRecord(header);
read.setBaseQualities(readQuals);
read.setReadBases(readBases);
read.setReadName(artificialReadName+readCounter++);
boolean isBeforeDeletion = false, isBeforeInsertion = false;
if (allele.isReference())
read.setCigarString(readBases.length + "M");
else {
isBeforeDeletion = alleleLength<refAlleleLength;
isBeforeInsertion = alleleLength>refAlleleLength;
if (isBeforeDeletion || isBeforeInsertion)
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
(readBases.length-offset)+"M");
else // SNP case
read.setCigarString(readBases.length+"M");
}
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
read.setReadPairedFlag(false);
read.setAlignmentStart(readStart);
read.setMappingQuality(artificialMappingQuality);
read.setReferenceName(loc.getContig());
read.setReadNegativeStrandFlag(false);
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
}
alleleCounter++;
}
return new ReadBackedPileupImpl(loc,pileupElements);
}
private byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
// create haplotype based on a particular allele
String prefix = refBases.substring(offset);
String alleleBases = new String(allele.getBases());
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
return (prefix+alleleBases+postfix).getBytes();
}
}

View File

@ -45,51 +45,23 @@ import org.testng.annotations.Test;
*/
public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
final int contigStart = 1;
final int contigStop = 10;
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigStop-contigStart+1);
final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
final String artificialContig = "chr1";
final int artificialContigIndex = 0;
final String artificialReadName = "synth";
final int artificialRefStart = 1;
final int artificialMappingQuality = 60;
Map<String, SAMReadGroupRecord> sample2RG = new HashMap<String, SAMReadGroupRecord>();
final String refBases = "AGGATACTGT";
final String SAMPLE_PREFIX = "sample";
List<String> sampleNames = new ArrayList<String>();
final int nSamples = 1;
final int numReadsPerAllele = 10;
List<SAMReadGroupRecord> sampleRGs;
private String sampleName(int i) { return sampleNames.get(i); }
private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); }
final int[] numReadsPerAllele = new int[]{10,10};
final String SAMPLE_PREFIX = "sample";
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
final Logger logger = Logger.getLogger(Walker.class);
final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
final IndelGenotypeLikelihoodsCalculationModel model = new IndelGenotypeLikelihoodsCalculationModel(UAC,logger);
final int offset = 5;
final GenomeLoc loc = genomeLocParser.createGenomeLoc(artificialContig,offset,offset);
final GenomeLoc window = genomeLocParser.createGenomeLoc(artificialContig,artificialRefStart,10);
final ReferenceContext referenceContext = new ReferenceContext(genomeLocParser,loc,window,this.refBases.getBytes());
@BeforeSuite
ArtificialReadPileupTestProvider pileupProvider;
@BeforeSuite
public void before() {
sampleRGs = new ArrayList<SAMReadGroupRecord>();
for ( int i = 0; i < nSamples; i++ ) {
sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i));
SAMReadGroupRecord rg = createRG(sampleName(i));
sampleRGs.add(rg);
sample2RG.put(sampleName(i), rg);
}
pileupProvider = new ArtificialReadPileupTestProvider(nSamples, SAMPLE_PREFIX);
}
@Test
public void testBasicConsensusCounts() {
// 4 inserted bases, min cnt = 10
@ -107,7 +79,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
eventLength = 3;
alleles = getConsensusAlleles(eventLength,false,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(offset,offset+eventLength));
Assert.assertEquals(alleles.get(0).getBaseString(), refBases.substring(pileupProvider.offset,pileupProvider.offset+eventLength));
// same with min Reads = 11
alleles = getConsensusAlleles(eventLength,false,11,0.1, altBases);
@ -121,112 +93,10 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
}
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(genomeLocParser, true, minCnt, minFraction);
return counter.computeConsensusAlleles(referenceContext,getContextFromAlleles(eventLength, isInsertion, altBases), AlignmentContextUtils.ReadOrientation.COMPLETE);
}
private Map<String,AlignmentContext> getContextFromAlleles(int eventLength, boolean isInsertion, String altBases) {
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
ArrayList vcAlleles = new ArrayList<Allele>();
Allele refAllele, altAllele;
if (isInsertion) {
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(altBases.substring(0,eventLength), false);
}
else {
refAllele =Allele.create(refBases.substring(offset,offset+eventLength),true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
}
int stop = loc.getStart();
vcAlleles.add(refAllele);
vcAlleles.add(altAllele);
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(vcAlleles);
builder.referenceBaseForIndel(referenceContext.getBase());
builder.noGenotypes();
final VariantContext vc = builder.make();
Map<String,AlignmentContext> contexts = new HashMap<String,AlignmentContext>();
for (String sample: sampleNames) {
AlignmentContext context = new AlignmentContext(loc, generateRBPForVariant(loc,vc, altBases, numReadsPerAllele, sample));
contexts.put(sample,context);
}
return contexts;
}
private SAMReadGroupRecord createRG(String name) {
SAMReadGroupRecord rg = new SAMReadGroupRecord(name);
rg.setPlatform("ILLUMINA");
rg.setSample(name);
return rg;
}
private ReadBackedPileup generateRBPForVariant( GenomeLoc loc, VariantContext vc, String altBases,
int numReads, String sample) {
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
int readStart = contigStart;
int offset = (contigStop-contigStart+1)/2;
int refAlleleLength = 0;
int readCounter = 0;
for (Allele allele: vc.getAlleles()) {
if (allele.isReference())
refAlleleLength = allele.getBases().length;
int alleleLength = allele.getBases().length;
for ( int d = 0; d < numReads; d++ ) {
byte[] readBases = trueHaplotype(allele, offset, refAlleleLength);
byte[] readQuals = new byte[readBases.length];
Arrays.fill(readQuals,(byte)50);
GATKSAMRecord read = new GATKSAMRecord(header);
read.setBaseQualities(readQuals);
read.setReadBases(readBases);
read.setReadName(artificialReadName+readCounter++);
boolean isBeforeDeletion = false, isBeforeInsertion = false;
if (allele.isReference())
read.setCigarString(readBases.length + "M");
else {
isBeforeDeletion = alleleLength<refAlleleLength;
isBeforeInsertion = alleleLength>refAlleleLength;
read.setCigarString(offset+"M"+ alleleLength + (isBeforeDeletion?"D":"I") +
(readBases.length-offset)+"M");
}
int eventLength = (isBeforeDeletion?refAlleleLength:(isBeforeInsertion?alleleLength:0));
read.setReadPairedFlag(false);
read.setAlignmentStart(readStart);
read.setMappingQuality(artificialMappingQuality);
read.setReferenceName(loc.getContig());
read.setReadNegativeStrandFlag(false);
read.setAttribute("RG", sampleRG(sample).getReadGroupId());
pileupElements.add(new PileupElement(read,offset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases.substring(0,alleleLength),eventLength));
}
}
return new ReadBackedPileupImpl(loc,pileupElements);
}
byte[] trueHaplotype(Allele allele, int offset, int refAlleleLength) {
// create haplotype based on a particular allele
String prefix = refBases.substring(offset);
String alleleBases = new String(allele.getBases());
String postfix = refBases.substring(offset+refAlleleLength,refBases.length());
return (prefix+alleleBases+postfix).getBytes();
final ConsensusAlleleCounter counter = new ConsensusAlleleCounter(pileupProvider.genomeLocParser, true, minCnt, minFraction);
return counter.computeConsensusAlleles(pileupProvider.referenceContext,
pileupProvider.getAlignmentContextFromAlleles(isInsertion?eventLength:-eventLength,altBases,numReadsPerAllele),
AlignmentContextUtils.ReadOrientation.COMPLETE);
}
}

View File

@ -468,6 +468,28 @@ public class VariantContextUnitTest extends BaseTest {
}
@Test
public void testGetGenotypeCounts() {
List<Allele> alleles = Arrays.asList(Aref, T);
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));
Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T));
Genotype g3 = new Genotype("TT", Arrays.asList(T, T));
Genotype g4 = new Genotype("A.", Arrays.asList(Aref, Allele.NO_CALL));
Genotype g5 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
// we need to create a new VariantContext each time
VariantContext vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getHetCount());
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getHomRefCount());
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getHomVarCount());
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getMixedCount());
vc = new VariantContextBuilder("foo", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g1,g2,g3,g4,g5).make();
Assert.assertEquals(1, vc.getNoCallCount());
}
@Test
public void testVCFfromGenotypes() {
List<Allele> alleles = Arrays.asList(Aref, T, del);
Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref));