Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Ami Levy-Moonshine 2013-01-30 18:44:03 -05:00
commit b875ff4c8d
93 changed files with 615 additions and 628 deletions

View File

@ -708,6 +708,9 @@
<include name="**/sting/gatk/**/*.R"/>
<include name="**/sting/alignment/**/*.R"/>
</fileset>
<fileset dir="${java.public.source.dir}">
<include name="**/phonehome/*.key"/>
</fileset>
<fileset dir="${key.dir}">
<include name="**/*.key"/>
</fileset>

View File

@ -54,7 +54,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

View File

@ -55,7 +55,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;

View File

@ -60,7 +60,7 @@ import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair;

View File

@ -47,7 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;

View File

@ -49,9 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -77,7 +77,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.*;
import java.util.*;

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype;

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;

View File

@ -62,7 +62,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -58,7 +58,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.*;
import java.io.PrintStream;
@ -84,7 +84,7 @@ public class GenotypingEngine {
final List<Haplotype> haplotypes,
final List<String> samples,
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
final byte[] ref,
final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow,
@ -124,12 +124,12 @@ public class GenotypingEngine {
// Walk along each position in the key set and create each event to be outputted
for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
final ArrayList<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view
final List<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
final List<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view
if( !in_GGA_mode ) {
for( final Haplotype h : haplotypes ) {
final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
final Map<Integer,VariantContext> eventMap = h.getEventMap();
final VariantContext vc = eventMap.get(loc);
if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
eventsAtThisLoc.add(vc);
@ -142,7 +142,7 @@ public class GenotypingEngine {
if( compVC.getStart() == loc ) {
int alleleCount = 0;
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
ArrayList<Allele> alleleSet = new ArrayList<Allele>(2);
List<Allele> alleleSet = new ArrayList<Allele>(2);
alleleSet.add(compVC.getReference());
alleleSet.add(compAltAllele);
final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount;
@ -180,7 +180,7 @@ public class GenotypingEngine {
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles.");
}
final HashMap<VariantContext, Allele> mergeMap = new HashMap<VariantContext, Allele>();
final Map<VariantContext, Allele> mergeMap = new HashMap<VariantContext, Allele>();
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) {
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
@ -232,7 +232,7 @@ public class GenotypingEngine {
return genotypes;
}
private void validatePriorityList( final ArrayList<String> priorityList, final ArrayList<VariantContext> eventsAtThisLoc ) {
private void validatePriorityList( final List<String> priorityList, final List<VariantContext> eventsAtThisLoc ) {
for( final VariantContext vc : eventsAtThisLoc ) {
if( !priorityList.contains(vc.getSource()) ) {
throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles.");
@ -251,7 +251,7 @@ public class GenotypingEngine {
private static Map<String, PerReadAlleleLikelihoodMap> filterToOnlyOverlappingReads( final GenomeLocParser parser,
final Map<String, PerReadAlleleLikelihoodMap> perSampleReadMap,
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
final VariantContext call ) {
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
@ -284,7 +284,7 @@ public class GenotypingEngine {
}
protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
final List<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
for( final Haplotype h : haplotypes ) {
for( final VariantContext vc : h.getEventMap().values() ) {
if( vc.isSymbolic() ) {
@ -407,7 +407,7 @@ public class GenotypingEngine {
// remove the old event from the eventMap on every haplotype and the start pos key set, replace with merged event
for( final Haplotype h : haplotypes ) {
final HashMap<Integer, VariantContext> eventMap = h.getEventMap();
final Map<Integer, VariantContext> eventMap = h.getEventMap();
if( eventMap.containsKey(thisStart) && eventMap.containsKey(nextStart) ) {
eventMap.remove(thisStart);
eventMap.remove(nextStart);
@ -418,7 +418,7 @@ public class GenotypingEngine {
boolean containsStart = false;
boolean containsNext = false;
for( final Haplotype h : haplotypes ) {
final HashMap<Integer, VariantContext> eventMap = h.getEventMap();
final Map<Integer, VariantContext> eventMap = h.getEventMap();
if( eventMap.containsKey(thisStart) ) { containsStart = true; }
if( eventMap.containsKey(nextStart) ) { containsNext = true; }
}
@ -457,7 +457,7 @@ public class GenotypingEngine {
if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
}
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
final List<Allele> mergedAlleles = new ArrayList<Allele>();
mergedAlleles.add( Allele.create( ArrayUtils.subarray(refBases, iii, refBases.length), true ) );
mergedAlleles.add( Allele.create( ArrayUtils.subarray(altBases, iii, altBases.length), false ) );
return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
@ -492,10 +492,10 @@ public class GenotypingEngine {
eventMapper.put(new Event(vc), new ArrayList<Haplotype>());
}
final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
final List<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
for( final Haplotype h : haplotypes ) {
if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) {
final ArrayList<Allele> alleles = new ArrayList<Allele>(2);
final List<Allele> alleles = new ArrayList<Allele>(2);
alleles.add(h.getArtificialRefAllele());
alleles.add(h.getArtificialAltAllele());
final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype")
@ -572,13 +572,13 @@ public class GenotypingEngine {
}
@Ensures({"result.size() == haplotypeAllelesForSample.size()"})
protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final ArrayList<ArrayList<Haplotype>> alleleMapper, final ArrayList<Haplotype> haplotypes ) {
protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final List<List<Haplotype>> alleleMapper, final List<Haplotype> haplotypes ) {
if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
final ArrayList<Allele> eventAllelesForSample = new ArrayList<Allele>();
final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
for( final Allele a : haplotypeAllelesForSample ) {
final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a));
for( int iii = 0; iii < alleleMapper.size(); iii++ ) {
final ArrayList<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
final List<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
if( mappedHaplotypes.contains(haplotype) ) {
eventAllelesForSample.add(eventAlleles.get(iii));
break;
@ -597,8 +597,8 @@ public class GenotypingEngine {
return false;
}
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
final HashMap<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
final Map<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
int refPos = alignmentStartHapwrtRef;
if( refPos < 0 ) { return null; } // Protection against SW failures
@ -609,7 +609,7 @@ public class GenotypingEngine {
switch( ce.getOperator() ) {
case I:
{
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final List<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) ) {
@ -639,7 +639,7 @@ public class GenotypingEngine {
case D:
{
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
final List<Allele> deletionAlleles = new ArrayList<Allele>();
final int deletionStart = refLoc.getStart() + refPos - 1;
// BUGBUG: how often does this symbolic deletion allele case happen?
//if( haplotype != null && ( (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength)
@ -667,7 +667,7 @@ public class GenotypingEngine {
final byte altByte = alignment[alignmentPos];
if( refByte != altByte ) { // SNP!
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) {
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
final List<Allele> snpAlleles = new ArrayList<Allele>();
snpAlleles.add( Allele.create( refByte, true ) );
snpAlleles.add( Allele.create( altByte, false ) );
vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make());

View File

@ -56,6 +56,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
@ -132,7 +133,7 @@ import java.util.*;
@PartitionBy(PartitionType.LOCUS)
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
@ActiveRegionTraversalParameters(extension=65, maxRegion=300)
//@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=5)
@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=20)
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
/**
@ -180,9 +181,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
protected int minKmer = 11;
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
/**
* If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling
* when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the
@ -276,10 +274,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
// bases with quality less than or equal to this value are trimmed off the tails of the reads
private static final byte MIN_TAIL_QUALITY = 20;
private ArrayList<String> samplesList = new ArrayList<String>();
private List<String> samplesList = new ArrayList<String>();
private final static double LOG_ONE_HALF = -Math.log10(2.0);
private final static double LOG_ONE_THIRD = -Math.log10(3.0);
private final ArrayList<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();
private final List<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
@ -431,7 +429,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
}
final ArrayList<Allele> alleles = new ArrayList<Allele>();
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add( FAKE_REF_ALLELE );
alleles.add( FAKE_ALT_ALLELE );
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
@ -452,7 +450,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
// we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work
return 1;
final ArrayList<VariantContext> activeAllelesToGenotype = new ArrayList<VariantContext>();
final List<VariantContext> activeAllelesToGenotype = new ArrayList<VariantContext>();
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
for( final VariantContext vc : allelesToGenotype ) {
@ -463,7 +461,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
allelesToGenotype.removeAll( activeAllelesToGenotype );
}
if( !activeRegion.isActive()) { return 0; } // Not active so nothing to do!
if( !activeRegion.isActive() ) { return 0; } // Not active so nothing to do!
if( activeRegion.size() == 0 && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { return 0; } // No reads here so nothing to do!
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && activeAllelesToGenotype.isEmpty() ) { return 0; } // No alleles found in this region so nothing to do!
@ -474,8 +472,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); // Create the reference haplotype which is the bases from the reference that make up the active region
final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING);
//int PRUNE_FACTOR = Math.max(MIN_PRUNE_FACTOR, determinePruneFactorFromCoverage( activeRegion ));
final ArrayList<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype );
final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype );
if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do!
activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region
@ -487,10 +484,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
// evaluate each sample's reads against all haplotypes
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, splitReadsBySample( activeRegion.getReads() ) );
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ?
final List<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ?
likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes );
for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoods( UG_engine,
@ -561,7 +558,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
final List<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( activeRegion.getReads() );
activeRegion.clearReads();
@ -571,18 +568,13 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
finalizedReadList.addAll( FragmentUtils.mergeOverlappingPairedFragments(overlappingPair) );
}
Collections.shuffle(finalizedReadList, GenomeAnalysisEngine.getRandomGenerator());
// Loop through the reads hard clipping the adaptor and low quality tails
final ArrayList<GATKSAMRecord> readsToUse = new ArrayList<GATKSAMRecord>(finalizedReadList.size());
final List<GATKSAMRecord> readsToUse = new ArrayList<GATKSAMRecord>(finalizedReadList.size());
for( final GATKSAMRecord myRead : finalizedReadList ) {
final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) );
if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
// protect against INTERVALS with abnormally high coverage
// TODO BUGBUG: remove when positional downsampler is hooked up to ART/HC
if( activeRegion.readOverlapsRegion(clippedRead) &&
clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) {
readsToUse.add(clippedRead);
}
}
@ -591,7 +583,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
}
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
readsToRemove.add(rec);
@ -607,10 +599,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReadSpanLoc().getContig(), padLeft, padRight);
}
private HashMap<String, ArrayList<GATKSAMRecord>> splitReadsBySample( final List<GATKSAMRecord> reads ) {
final HashMap<String, ArrayList<GATKSAMRecord>> returnMap = new HashMap<String, ArrayList<GATKSAMRecord>>();
private Map<String, List<GATKSAMRecord>> splitReadsBySample( final List<GATKSAMRecord> reads ) {
final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<String, List<GATKSAMRecord>>();
for( final String sample : samplesList) {
ArrayList<GATKSAMRecord> readList = returnMap.get( sample );
List<GATKSAMRecord> readList = returnMap.get( sample );
if( readList == null ) {
readList = new ArrayList<GATKSAMRecord>();
returnMap.put(sample, readList);
@ -711,24 +703,4 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
return new Cigar(readCigarElements);
}
/*
private int determinePruneFactorFromCoverage( final ActiveRegion activeRegion ) {
final ArrayList<Integer> readLengthDistribution = new ArrayList<Integer>();
for( final GATKSAMRecord read : activeRegion.getReads() ) {
readLengthDistribution.add(read.getReadLength());
}
final double meanReadLength = MathUtils.average(readLengthDistribution);
final double meanCoveragePerSample = (double) activeRegion.getReads().size() / ((double) activeRegion.getExtendedLoc().size() / meanReadLength) / (double) samplesList.size();
int PRUNE_FACTOR = 0;
if( meanCoveragePerSample > 8.5 ) {
PRUNE_FACTOR = (int) Math.floor( Math.sqrt( meanCoveragePerSample - 5.0 ) );
} else if( meanCoveragePerSample > 3.0 ) {
PRUNE_FACTOR = 1;
}
if( DEBUG ) { System.out.println(String.format("Mean coverage per sample = %.1f --> prune factor = %d", meanCoveragePerSample, PRUNE_FACTOR)); }
return PRUNE_FACTOR;
}
*/
}

View File

@ -91,11 +91,11 @@ public class LikelihoodCalculationEngine {
DEBUG = debug;
}
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
int X_METRIC_LENGTH = 0;
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
for( final Map.Entry<String, List<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
for( final GATKSAMRecord read : sample.getValue() ) {
final int readLength = read.getReadLength();
if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
@ -115,7 +115,7 @@ public class LikelihoodCalculationEngine {
pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
// for each sample's reads
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
for( final Map.Entry<String, List<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
//if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
// evaluate the likelihood of the reads given those haplotypes
stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue()));
@ -123,7 +123,7 @@ public class LikelihoodCalculationEngine {
return stratifiedReadMap;
}
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads) {
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List<Haplotype> haplotypes, final List<GATKSAMRecord> reads) {
// first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time)
final int numHaplotypes = haplotypes.size();
final Map<Haplotype, Allele> alleleVersions = new HashMap<Haplotype, Allele>(numHaplotypes);
@ -235,72 +235,13 @@ public class LikelihoodCalculationEngine {
return likelihoodMatrix;
}
/*
@Requires({"haplotypes.size() > 0"})
@Ensures({"result.size() <= haplotypes.size()"})
public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes ) {
// BUGBUG: This function needs a lot of work. Need to use 4-gamete test or Tajima's D to decide to break up events into separate pieces for genotyping
final int numHaplotypes = haplotypes.size();
final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
bestHaplotypesIndexList.add(0); // always start with the reference haplotype
final double[][][] haplotypeLikelihoodMatrix = new double[sampleKeySet.size()][numHaplotypes][numHaplotypes];
int sampleCount = 0;
for( final String sample : sampleKeySet ) {
haplotypeLikelihoodMatrix[sampleCount++] = computeDiploidHaplotypeLikelihoods( haplotypes, sample );
}
int hap1 = 0;
int hap2 = 0;
int chosenSample = 0;
//double bestElement = Double.NEGATIVE_INFINITY;
final int maxChosenHaplotypes = Math.min( 15, sampleKeySet.size() * 2 + 1 );
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
double maxElement = Double.NEGATIVE_INFINITY;
for( int kkk = 0; kkk < sampleCount; kkk++ ) {
for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) {
if( haplotypeLikelihoodMatrix[kkk][iii][jjj] > maxElement ) {
maxElement = haplotypeLikelihoodMatrix[kkk][iii][jjj];
hap1 = iii;
hap2 = jjj;
chosenSample = kkk;
}
}
}
}
if( maxElement == Double.NEGATIVE_INFINITY ) { break; }
if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); }
if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); }
for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) {
haplotypeLikelihoodMatrix[chosenSample][iii][jjj] = Double.NEGATIVE_INFINITY;
}
}
}
if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
final ArrayList<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
for( final int hIndex : bestHaplotypesIndexList ) {
bestHaplotypes.add( haplotypes.get(hIndex) );
}
return bestHaplotypes;
}
*/
@Requires({"haplotypes.size() > 0"})
@Ensures({"result.size() <= haplotypes.size()"})
public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap, final int maxNumHaplotypesInPopulation ) {
public List<Haplotype> selectBestHaplotypes( final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap, final int maxNumHaplotypesInPopulation ) {
final int numHaplotypes = haplotypes.size();
final Set<String> sampleKeySet = stratifiedReadMap.keySet();
final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
final List<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
final List<Allele> haplotypesAsAlleles = new ArrayList<Allele>();
for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h.getBases())); }
@ -332,7 +273,7 @@ public class LikelihoodCalculationEngine {
if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
final ArrayList<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
final List<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
for( final int hIndex : bestHaplotypesIndexList ) {
bestHaplotypes.add( haplotypes.get(hIndex) );
}

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -67,5 +67,5 @@ public abstract class LocalAssemblyEngine {
protected LocalAssemblyEngine() {
}
public abstract ArrayList<Haplotype> runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, ArrayList<VariantContext> activeAllelesToGenotype);
public abstract List<Haplotype> runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, List<VariantContext> activeAllelesToGenotype);
}

View File

@ -84,7 +84,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
private final boolean DEBUG;
private final PrintStream GRAPH_WRITER;
private final ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
private final List<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
private final int MIN_KMER;
private int PRUNE_FACTOR = 2;
@ -96,7 +96,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
MIN_KMER = minKmer;
}
public ArrayList<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList<VariantContext> activeAllelesToGenotype ) {
public List<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final List<VariantContext> activeAllelesToGenotype ) {
this.PRUNE_FACTOR = PRUNE_FACTOR;
// create the graphs
@ -168,7 +168,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
final ArrayList<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
final List<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
for( final DeBruijnEdge e : graph.edgeSet() ) {
if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
edgesToRemove.add(e);
@ -177,7 +177,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
graph.removeAllEdges(edgesToRemove);
// Run through the graph and clean up singular orphaned nodes
final ArrayList<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
final List<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( graph.inDegreeOf(v) == 0 && graph.outDegreeOf(v) == 0 ) {
verticesToRemove.add(v);
@ -187,7 +187,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
protected static void eliminateNonRefPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
final ArrayList<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
final List<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
boolean done = false;
while( !done ) {
done = true;
@ -313,8 +313,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
@Ensures({"result.contains(refHaplotype)"})
private ArrayList<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final ArrayList<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
final ArrayList<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
private List<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final List<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
final List<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
// add the reference haplotype separately from all the others
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( fullReferenceWithPadding, refHaplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
@ -343,7 +343,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
// for GGA mode, add the desired allele into the haplotype if it isn't already present
if( !activeAllelesToGenotype.isEmpty() ) {
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
final Map<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
@ -378,7 +378,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
return returnHaplotypes;
}
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final List<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
if( haplotype == null ) { return false; }
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );

View File

@ -75,7 +75,7 @@ import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.File;

View File

@ -46,7 +46,7 @@
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.Arrays;
import java.util.LinkedList;

View File

@ -46,7 +46,7 @@
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

View File

@ -59,7 +59,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.SampleUtils;
@ -320,7 +320,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
// logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc);
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
return GATKVariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
}
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {

View File

@ -46,7 +46,7 @@
package org.broadinstitute.sting.gatk.walkers.phasing;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;

View File

@ -59,6 +59,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@ -66,7 +67,6 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.io.Resource;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import java.io.File;
@ -274,7 +274,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
datum.loc = getToolkit().getGenomeLocParser().createGenomeLoc(vc);
datum.originalQual = vc.getPhredScaledQual();
datum.isSNP = vc.isSNP() && vc.isBiallelic();
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);
datum.isTransition = datum.isSNP && GATKVariantContextUtils.isTransition(vc);
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );

View File

@ -46,6 +46,7 @@
package org.broadinstitute.sting.utils.recalibration;
import com.google.java.contract.Ensures;
import net.sf.samtools.SAMTag;
import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
@ -57,6 +58,8 @@ import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* Utility methods to facilitate on-the-fly base quality score recalibration.
@ -78,10 +81,6 @@ public class BaseRecalibration {
private final double globalQScorePrior;
private final boolean emitOriginalQuals;
private final NestedIntegerArray<Double> globalDeltaQs;
private final NestedIntegerArray<Double> deltaQReporteds;
/**
* Constructor using a GATK Report file
*
@ -105,44 +104,6 @@ public class BaseRecalibration {
this.preserveQLessThan = preserveQLessThan;
this.globalQScorePrior = globalQScorePrior;
this.emitOriginalQuals = emitOriginalQuals;
logger.info("Calculating cached tables...");
//
// Create a NestedIntegerArray<Double> that maps from rgKey x errorModel -> double,
// where the double is the result of this calculation. The entire calculation can
// be done upfront, on initialization of this BaseRecalibration structure
//
final NestedIntegerArray<RecalDatum> byReadGroupTable = recalibrationTables.getReadGroupTable();
globalDeltaQs = new NestedIntegerArray<Double>( byReadGroupTable.getDimensions() );
logger.info("Calculating global delta Q table...");
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byReadGroupTable.getAllLeaves() ) {
final int rgKey = leaf.keys[0];
final int eventIndex = leaf.keys[1];
final double globalDeltaQ = calculateGlobalDeltaQ(rgKey, EventType.eventFrom(eventIndex));
globalDeltaQs.put(globalDeltaQ, rgKey, eventIndex);
}
// The calculation of the deltaQ report is constant. key[0] and key[1] are the read group and qual, respectively
// and globalDeltaQ is a constant for the read group. So technically the delta Q reported is simply a lookup
// into a matrix indexed by rgGroup, qual, and event type.
// the code below actually creates this cache with a NestedIntegerArray calling into the actual
// calculateDeltaQReported code.
final NestedIntegerArray<RecalDatum> byQualTable = recalibrationTables.getQualityScoreTable();
deltaQReporteds = new NestedIntegerArray<Double>( byQualTable.getDimensions() );
logger.info("Calculating delta Q reported table...");
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
final int rgKey = leaf.keys[0];
final int qual = leaf.keys[1];
final int eventIndex = leaf.keys[2];
final EventType event = EventType.eventFrom(eventIndex);
final double globalDeltaQ = getGlobalDeltaQ(rgKey, event);
final double deltaQReported = calculateDeltaQReported(rgKey, qual, event, globalDeltaQ, (byte)qual);
deltaQReporteds.put(deltaQReported, rgKey, qual, eventIndex);
}
logger.info("done calculating cache");
}
/**
@ -189,29 +150,37 @@ public class BaseRecalibration {
// the rg key is constant over the whole read, the global deltaQ is too
final int rgKey = fullReadKeySet[0][0];
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal());
final double globalDeltaQ = getGlobalDeltaQ(rgKey, errorModel);
if( empiricalQualRG != null ) {
final double epsilon = ( globalQScorePrior > 0.0 && errorModel.equals(EventType.BASE_SUBSTITUTION) ? globalQScorePrior : empiricalQualRG.getEstimatedQReported() );
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
final byte origQual = quals[offset];
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
final byte origQual = quals[offset];
// only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
if ( origQual >= preserveQLessThan ) {
// get the keyset for this base using the error model
final int[] keySet = fullReadKeySet[offset];
final double deltaQReported = getDeltaQReported(keySet[0], keySet[1], errorModel, globalDeltaQ);
final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, keySet, errorModel, globalDeltaQ, deltaQReported, origQual);
// only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
if ( origQual >= preserveQLessThan ) {
// get the keyset for this base using the error model
final int[] keySet = fullReadKeySet[offset];
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(keySet[0], keySet[1], errorModel.ordinal());
final List<RecalDatum> empiricalQualCovs = new ArrayList<RecalDatum>();
for (int i = 2; i < requestedCovariates.length; i++) {
if (keySet[i] < 0) {
continue;
}
empiricalQualCovs.add(recalibrationTables.getTable(i).get(keySet[0], keySet[1], keySet[i], errorModel.ordinal()));
}
// calculate the recalibrated qual using the BQSR formula
double recalibratedQualDouble = origQual + globalDeltaQ + deltaQReported + deltaQCovariates;
double recalibratedQualDouble = hierarchicalBayesianQualityEstimate( epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovs );
// recalibrated quality is bound between 1 and MAX_QUAL
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
// recalibrated quality is bound between 1 and MAX_QUAL
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE);
// return the quantized version of the recalibrated quality
final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual);
// return the quantized version of the recalibrated quality
final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual);
quals[offset] = recalibratedQualityScore;
quals[offset] = recalibratedQualityScore;
}
}
}
@ -220,102 +189,15 @@ public class BaseRecalibration {
}
}
private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) {
final Double cached = globalDeltaQs.get(rgKey, errorModel.ordinal());
if ( TEST_CACHING ) {
final double calcd = calculateGlobalDeltaQ(rgKey, errorModel);
if ( calcd != cached )
throw new IllegalStateException("calculated " + calcd + " and cached " + cached + " global delta q not equal at " + rgKey + " / " + errorModel);
@Ensures("result > 0.0")
protected static double hierarchicalBayesianQualityEstimate( final double epsilon, final RecalDatum empiricalQualRG, final RecalDatum empiricalQualQS, final List<RecalDatum> empiricalQualCovs ) {
final double globalDeltaQ = ( empiricalQualRG == null ? 0.0 : empiricalQualRG.getEmpiricalQuality(epsilon) - epsilon );
final double deltaQReported = ( empiricalQualQS == null ? 0.0 : empiricalQualQS.getEmpiricalQuality(globalDeltaQ + epsilon) - (globalDeltaQ + epsilon) );
double deltaQCovariates = 0.0;
for( final RecalDatum empiricalQualCov : empiricalQualCovs ) {
deltaQCovariates += ( empiricalQualCov == null ? 0.0 : empiricalQualCov.getEmpiricalQuality(deltaQReported + globalDeltaQ + epsilon) - (deltaQReported + globalDeltaQ + epsilon) );
}
return cachedWithDefault(cached);
}
private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) {
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.ordinal());
if ( TEST_CACHING ) {
final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey);
if ( calcd != cached )
throw new IllegalStateException("calculated " + calcd + " and cached " + cached + " global delta q not equal at " + rgKey + " / " + qualKey + " / " + errorModel);
}
return cachedWithDefault(cached);
}
/**
* @param d a Double (that may be null) that is the result of a delta Q calculation
* @return a double == d if d != null, or 0.0 if it is
*/
private double cachedWithDefault(final Double d) {
return d == null ? 0.0 : d;
}
/**
* Note that this calculation is a constant for each rgKey and errorModel. We need only
* compute this value once for all data.
*
* @param rgKey read group key
* @param errorModel the event type
* @return global delta Q
*/
private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) {
double result = 0.0;
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal());
if (empiricalQualRG != null) {
final double aggregrateQReported = ( globalQScorePrior > 0.0 && errorModel.equals(EventType.BASE_SUBSTITUTION) ? globalQScorePrior : empiricalQualRG.getEstimatedQReported() );
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality(aggregrateQReported);
result = globalDeltaQEmpirical - aggregrateQReported;
}
return result;
}
private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
double result = 0.0;
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.ordinal());
if (empiricalQualQS != null) {
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality(globalDeltaQ + qualFromRead);
result = deltaQReportedEmpirical - (globalDeltaQ + qualFromRead);
}
return result;
}
private double calculateDeltaQCovariates(final RecalibrationTables recalibrationTables, final int[] key, final EventType errorModel, final double globalDeltaQ, final double deltaQReported, final byte qualFromRead) {
double result = 0.0;
// for all optional covariates
for (int i = 2; i < requestedCovariates.length; i++) {
if (key[i] < 0)
continue;
result += calculateDeltaQCovariate(recalibrationTables.getTable(i),
key[0], key[1], key[i], errorModel,
globalDeltaQ, deltaQReported, qualFromRead);
}
return result;
}
private double calculateDeltaQCovariate(final NestedIntegerArray<RecalDatum> table,
final int rgKey,
final int qualKey,
final int tableKey,
final EventType errorModel,
final double globalDeltaQ,
final double deltaQReported,
final byte qualFromRead) {
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.ordinal());
if (empiricalQualCO != null) {
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(deltaQReported + globalDeltaQ + qualFromRead);
return deltaQCovariateEmpirical - (deltaQReported + globalDeltaQ + qualFromRead);
} else {
return 0.0;
}
return epsilon + globalDeltaQ + deltaQReported + deltaQCovariates;
}
}

View File

@ -181,6 +181,7 @@ public class RecalDatum {
if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN");
this.estimatedQReported = estimatedQReported;
empiricalQuality = UNINITIALIZED;
}
public final double getEstimatedQReported() {

View File

@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.recalibration.covariates.*;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;

View File

@ -131,7 +131,7 @@ public class RecalibrationReport {
* Combines two recalibration reports by adding all observations and errors
*
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate
* them after combining. The reason for not calculating it is because this function is inteded for combining a
* them after combining. The reason for not calculating it is because this function is intended for combining a
* series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized
* qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate,
* makes this method faster

View File

@ -48,7 +48,7 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

View File

@ -48,7 +48,7 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollectio
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.Arrays;

View File

@ -125,19 +125,19 @@ public class BQSRGathererUnitTest extends BaseTest {
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);
// test the RecalTable0 table
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
originalTable = originalReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE);
calculatedTable = calculatedReport.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE);
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);
// test the RecalTable1 table
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
originalTable = originalReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE);
calculatedTable = calculatedReport.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE);
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);
// test the RecalTable2 table
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.COVARIATE_VALUE_COLUMN_NAME, RecalUtils.COVARIATE_NAME_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
columnsToTest = Arrays.asList(RecalUtils.READGROUP_COLUMN_NAME, RecalUtils.QUALITY_SCORE_COLUMN_NAME, RecalUtils.COVARIATE_VALUE_COLUMN_NAME, RecalUtils.COVARIATE_NAME_COLUMN_NAME, RecalUtils.EVENT_TYPE_COLUMN_NAME, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, RecalUtils.NUMBER_ERRORS_COLUMN_NAME);
originalTable = originalReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE);
calculatedTable = calculatedReport.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE);
testTablesWithColumns(originalTable, calculatedTable, columnsToTest);

View File

@ -188,12 +188,12 @@ public class BQSRIntegrationTest extends WalkerTest {
public Object[][] createPRTestData() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{1, new PRTest(" -qq -1", "64622d37e234e57cf472068a00b4ec89")});
tests.add(new Object[]{1, new PRTest(" -qq 6", "735270581d1ee836f3f99d68ecbd5f24")});
tests.add(new Object[]{1, new PRTest(" -DIQ", "1e3b0a11f246a238ac4c06b084142715")});
tests.add(new Object[]{1, new PRTest(" -qq -1", "b8d296fb78adc5cff7ce12073a69d985")});
tests.add(new Object[]{1, new PRTest(" -qq 6", "2ee0cedf84c1a33a807438172bc36c11")});
tests.add(new Object[]{1, new PRTest(" -DIQ", "46cf74900bf8b13438e6a195b3085c48")});
for ( final int nct : Arrays.asList(1, 2, 4) ) {
tests.add(new Object[]{nct, new PRTest("", "39ecde2c2ba4a7c109e65372618a419a")});
tests.add(new Object[]{nct, new PRTest("", "88b88f006ebb1aade85089bfba5a9e8d")});
}
return tests.toArray(new Object[][]{});

View File

@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.QualityUtils;

View File

@ -50,7 +50,7 @@ import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "664a14590d0966e63d3aabff2d7bab0a");
HCTest(CEUTRIO_BAM, "", "72ce6a5e46644dfd73aeffba9d6131ea");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "111f3dc086a3cea1be9bd1ad6e1d18ed");
HCTest(NA12878_BAM, "", "f9d696391f1f337092d70e3abcd32bfb");
}
@Test(enabled = false)
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"c70f753f7918a1c670ce4ed5c66de09e");
"4e8beb2cdc3d77427f14acf37cea2bd0");
}
private void HCTestComplexGGA(String bam, String args, String md5) {
@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"b1d3070f0c49becf34101e480ab6c589");
"75e1df0dcf3728fd2b6e4735c4cc88ce");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"20eba2e54266f6aebf35b7b7b7e754e3");
"1d244f2adbc72a0062eb673d56cbb5a8");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -113,7 +113,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "f9805488d85e59e1ae002d0d32d7011d");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a1bc844f62a9cb60dbb70d00ad36b85d");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -124,7 +124,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "4544a2916f46f58b32db8008776b91a3");
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "23956e572f19ff26d25bbdfaa307675b");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -135,7 +135,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f3984a91e7562494c2a7e41fd05a6734");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "1255f466aa2d288f015cd55d8fece1ac");
}
// That problem bam came from a user on the forum and it spotted a problem where the ReadClipper
@ -146,14 +146,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3e9e025c539be6c5e0d0f2e5d8e86a62"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("103c91c4a78164949e166d3d27eb459b"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("34129e6c6310ef4eeeeb59b0e7ac0464"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("87fe31a4bbd68a9eb5d5910db5011c82"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -175,7 +175,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("5f4c07aaf1d2d34cccce43196a5fbd71"));
Arrays.asList("0fa19ec5cf737a3445544b59ecc995e9"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("6ead001b1f8e7cb433fd335f78fde5f0"));
Arrays.asList("5f4cbdcc9bffee6bba258dfac89492ed"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
}
}

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;

View File

@ -50,7 +50,7 @@ package org.broadinstitute.sting.utils.pairhmm;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;

View File

@ -50,6 +50,7 @@ package org.broadinstitute.sting.utils.recalibration;
// the imports for unit testing.
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
@ -57,7 +58,9 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
public class RecalDatumUnitTest extends BaseTest {
@ -277,4 +280,31 @@ public class RecalDatumUnitTest extends BaseTest {
Assert.assertFalse(Double.isInfinite(log10likelihood));
Assert.assertFalse(Double.isNaN(log10likelihood));
}
@Test
public void basicHierarchicalBayesianQualityEstimateTest() {
for( double epsilon = 15.0; epsilon <= 60.0; epsilon += 2.0 ) {
double RG_Q = 45.0;
RecalDatum RG = new RecalDatum( (long)100000000, (long) (100000000 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q);
double Q = 30.0;
RecalDatum QS = new RecalDatum( (long)100000000, (long) (100000000 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q);
RecalDatum COV = new RecalDatum( (long)15, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality
// initial epsilon condition shouldn't matter when there are a lot of observations
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( epsilon, RG, QS, Collections.singletonList(COV)), Q, 1E-4 );
}
for( double epsilon = 15.0; epsilon <= 60.0; epsilon += 2.0 ) {
double RG_Q = 45.0;
RecalDatum RG = new RecalDatum( (long)10, (long) (10 * 1.0 / (Math.pow(10.0, RG_Q/10.0))), (byte)RG_Q);
double Q = 30.0;
RecalDatum QS = new RecalDatum( (long)10, (long) (10 * 1.0 / (Math.pow(10.0, Q/10.0))), (byte)Q);
RecalDatum COV = new RecalDatum( (long)15, (long) 1, (byte)45.0); // no data here so Bayesian prior has a huge effect on the empirical quality
// initial epsilon condition dominates when there is no data
Assert.assertEquals(BaseRecalibration.hierarchicalBayesianQualityEstimate( epsilon, RG, QS, Collections.singletonList(COV)), epsilon, 1E-4 );
}
}
}

View File

@ -52,7 +52,7 @@ import org.broadinstitute.sting.utils.recalibration.covariates.*;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;

View File

@ -26,7 +26,7 @@
package org.broadinstitute.sting.alignment;
import net.sf.samtools.*;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -29,7 +29,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.*;
import org.broadinstitute.sting.alignment.Aligner;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.reference.bwt.*;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import java.io.File;

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.contexts;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;

View File

@ -29,7 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.Allele;
import java.io.PrintStream;

View File

@ -31,8 +31,11 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.crypt.CryptUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.io.IOUtils;
import org.broadinstitute.sting.utils.io.Resource;
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
import org.jets3t.service.S3Service;
import org.jets3t.service.S3ServiceException;
@ -48,6 +51,7 @@ import org.simpleframework.xml.stream.HyphenStyle;
import java.io.*;
import java.security.NoSuchAlgorithmException;
import java.security.PublicKey;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@ -309,6 +313,51 @@ public class GATKRunReport {
}
}
/**
* Decrypts encrypted AWS key from encryptedKeySource
* @param encryptedKeySource a file containing an encrypted AWS key
* @return a decrypted AWS key as a String
*/
public static String decryptAWSKey(final File encryptedKeySource) throws FileNotFoundException {
return decryptAWSKey(new FileInputStream(encryptedKeySource));
}
/**
* @see #decryptAWSKey(java.io.File) but with input from an inputstream
*/
private static String decryptAWSKey(final InputStream encryptedKeySource) {
final PublicKey key = CryptUtils.loadGATKDistributedPublicKey();
final byte[] fromDisk = IOUtils.readStreamIntoByteArray(encryptedKeySource);
final byte[] decrypted = CryptUtils.decryptData(fromDisk, key);
return new String(decrypted);
}
/**
* Get the decrypted AWS key sorted in the resource directories of name
* @param name the name of the file containing the needed AWS key
* @return a non-null GATK
*/
private static String getAWSKey(final String name) {
final Resource resource = new Resource(name, GATKRunReport.class);
return decryptAWSKey(resource.getResourceContentsAsStream());
}
/**
* Get the AWS access key for the GATK user
* @return a non-null AWS access key for the GATK user
*/
protected static String getAWSAccessKey() {
return getAWSKey("GATK_AWS_access.key");
}
/**
* Get the AWS secret key for the GATK user
* @return a non-null AWS secret key for the GATK user
*/
protected static String getAWSSecretKey() {
return getAWSKey("GATK_AWS_secret.key");
}
private class S3PutRunnable implements Runnable {
public AtomicBoolean isSuccess;
@ -331,17 +380,17 @@ public class GATKRunReport {
// are stored in an AWSCredentials object:
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
String awsSecretKey = "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"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
final String awsAccessKey = getAWSAccessKey(); // GATK AWS user
final String awsSecretKey = getAWSSecretKey(); // GATK AWS user
final AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
S3Service s3Service = new RestS3Service(awsCredentials);
final S3Service s3Service = new RestS3Service(awsCredentials);
// Create an S3Object based on a file, with Content-Length set automatically and
// Content-Type set based on the file's extension (using the Mimetypes utility class)
S3Object fileObject = new S3Object(key, report);
final S3Object fileObject = new S3Object(key, report);
//logger.info("Created S3Object" + fileObject);
//logger.info("Uploading " + localFile + " to AWS bucket");
s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);

View File

@ -0,0 +1,2 @@
B<EFBFBD>w“לX~¨[u<><1E>£›װרֵ¦e‰קב‡,םˆ‰)<29>s­/sg1µב<C2B5>L„±ƒQ¹<>aG%$·R<C2B7><52>ל<D79C>{x<>qPz׳<7A><D7B3><>J®¬}”ב\¸–´”{(<28>הגֽׁB<D6BD>K&<26>¶™<C2B6><E284A2>ץַ,`¥³ק”@`oX
%/`mגײָ₪3ְhר3<D7A8>אrQ÷ v1<14>a×W•<57>?ˆuH¼<48>ײױַ<D7B1>״זsנֵג<06>aּz¬<7A>¿­ֵ<10>Mֶֻ¦?מUzhקל·k#+<2B>xִ„M<E2809E>םk“ ¦©X<C2A9>²)÷°Mִ±<D6B4>ױ• ַipע-מ?jך<6A>תHµ@d+HR|<7C>bע³F

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.variant.variantcontext.VariantContext;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.variant.vcf.*;

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import java.io.File;
import java.io.FileNotFoundException;

View File

@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;

View File

@ -41,7 +41,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.coverage;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.HashMap;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

View File

@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipping.ClippingOp;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;

View File

@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.codecs.table.TableFeature;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

View File

@ -32,9 +32,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
@Analysis(description = "Evaluation summary for multi-allelic variants")
public class MultiallelicSummary extends VariantEvaluator implements StandardEval {
@ -122,7 +122,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
private void calculatePairwiseTiTv(VariantContext vc) {
for ( Allele alt : vc.getAlternateAlleles() ) {
if ( VariantContextUtils.isTransition(vc.getReference(), alt) )
if ( GATKVariantContextUtils.isTransition(vc.getReference(), alt) )
nTi++;
else
nTv++;

View File

@ -30,9 +30,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
@Analysis(description = "Ti/Tv Variant Evaluator")
public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval {
@ -61,7 +61,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
public void updateTiTv(VariantContext vc, boolean updateStandard) {
if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphicInSamples()) {
if (VariantContextUtils.isTransition(vc)) {
if ( GATKVariantContextUtils.isTransition(vc)) {
if (updateStandard) nTiInComp++;
else nTi++;
} else {

View File

@ -35,11 +35,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.*;
@ -226,7 +226,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
// type specific calculations
if ( type == Type.SNP && eval.isBiallelic() ) {
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
titvTable = GATKVariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
titvTable.inc(type, ALL);
}

View File

@ -280,7 +280,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
Map<VariantContext.Type, List<VariantContext>> VCsByType = GATKVariantContextUtils.separateVariantContextsByType(vcs);
// TODO -- clean this up in a refactoring
// merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type)
@ -320,7 +320,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
// re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(builder, false);
if ( minimalVCF )
VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
GATKVariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
vcfWriter.add(builder.make());
}

View File

@ -25,11 +25,9 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader;
@ -277,8 +275,8 @@ public class ConcordanceMetrics {
if ( truth.isMonomorphicInSamples() )
return EVAL_ONLY;
boolean evalSubsetTruth = VariantContextUtils.allelesAreSubset(eval,truth);
boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truth,eval);
boolean evalSubsetTruth = GATKVariantContextUtils.allelesAreSubset(eval, truth);
boolean truthSubsetEval = GATKVariantContextUtils.allelesAreSubset(truth, eval);
if ( evalSubsetTruth && truthSubsetEval )
return ALLELES_MATCH;

View File

@ -40,12 +40,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory;
@ -122,7 +122,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
if ( toInterval != null ) {
// check whether the strand flips, and if so reverse complement everything
if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && vc.isPointEvent() ) {
vc = VariantContextUtils.reverseComplement(vc);
vc = GATKVariantContextUtils.reverseComplement(vc);
}
vc = new VariantContextBuilder(vc).loc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length).make();
@ -133,7 +133,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
.attribute("OriginalStart", fromInterval.getStart()).make();
}
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(vc) ) {
if ( originalVC.isSNP() && originalVC.isBiallelic() && GATKVariantContextUtils.getSNPSubstitutionType(originalVC) != GATKVariantContextUtils.getSNPSubstitutionType(vc) ) {
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
originalVC.getChr(), originalVC.getStart(), vc.getChr(), vc.getStart(),
originalVC.getReference(), originalVC.getAlternateAllele(0), vc.getReference(), vc.getAlternateAllele(0)));

View File

@ -41,7 +41,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.io.PrintStream;
import java.lang.reflect.Array;
@ -432,7 +431,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } });
getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) {
if ( vc.isSNP() && vc.isBiallelic() )
return VariantContextUtils.isTransition(vc) ? "1" : "0";
return GATKVariantContextUtils.isTransition(vc) ? "1" : "0";
else
return "-1";
}});

View File

@ -39,11 +39,12 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@ -246,7 +247,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
}
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
vcfwriter.add(vc);
}

View File

@ -23,12 +23,14 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.variant.utils;
package org.broadinstitute.sting.utils;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.Arrays;
import java.util.Random;
/**
* BaseUtils contains some basic utilities for manipulating nucleotides.
@ -95,9 +97,6 @@ public class BaseUtils {
baseIndexWithIupacMap['v'] = Base.N.ordinal();
}
// Use a fixed random seed to allow for deterministic results when using random bases
private static final Random randomNumberGen = new Random(47382911L);
/// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
// a pyrimidine to another pyrimidine nucleotide (C <-> T).
// Approximately two out of every three single nucleotide polymorphisms (SNPs) are transitions.
@ -174,7 +173,7 @@ public class BaseUtils {
if ( baseIndex == Base.N.ordinal() ) {
bases[i] = 'N';
} else if ( errorOnBadReferenceBase && baseIndex == -1 ) {
throw new IllegalStateException("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'");
throw new UserException.BadInput("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'");
}
}
return bases;
@ -251,7 +250,7 @@ public class BaseUtils {
*/
static public int simpleBaseToBaseIndex(final byte base) {
if ( base < 0 || base >= 256 )
throw new IllegalArgumentException("Non-standard bases were encountered in either the input reference or BAM file(s)");
throw new UserException.BadInput("Non-standard bases were encountered in either the input reference or BAM file(s)");
return baseIndexMap[base];
}
@ -491,7 +490,7 @@ public class BaseUtils {
int randomBaseIndex = excludeBaseIndex;
while (randomBaseIndex == excludeBaseIndex) {
randomBaseIndex = randomNumberGen.nextInt(4);
randomBaseIndex = GenomeAnalysisEngine.getRandomGenerator().nextInt(4);
}
return randomBaseIndex;
@ -515,7 +514,7 @@ public class BaseUtils {
case 'N':
return 'N';
default:
throw new IllegalArgumentException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
}
}
}

View File

@ -40,7 +40,7 @@ import java.util.*;
public class Haplotype extends Allele {
private GenomeLoc genomeLocation = null;
private HashMap<Integer, VariantContext> eventMap = null;
private Map<Integer, VariantContext> eventMap = null;
private Cigar cigar;
private int alignmentStartHapwrtRef;
public int leftBreakPoint = 0;
@ -81,11 +81,11 @@ public class Haplotype extends Allele {
return Arrays.hashCode(getBases());
}
public HashMap<Integer, VariantContext> getEventMap() {
public Map<Integer, VariantContext> getEventMap() {
return eventMap;
}
public void setEventMap( final HashMap<Integer, VariantContext> eventMap ) {
public void setEventMap( final Map<Integer, VariantContext> eventMap ) {
this.eventMap = eventMap;
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.utils;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMProgramRecord;
@ -34,7 +35,10 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.math.BigInteger;
import java.net.InetAddress;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
/**
@ -911,4 +915,28 @@ public class Utils {
return subLists;
}
/**
* @see #calcMD5(byte[])
*/
public static String calcMD5(final String s) throws NoSuchAlgorithmException {
return calcMD5(s.getBytes());
}
/**
* Calculate the md5 for bytes, and return the result as a 32 character string
*
* @param bytes the bytes to calculate the md5 of
* @return the md5 of bytes, as a 32-character long string
* @throws NoSuchAlgorithmException
*/
@Ensures({"result != null", "result.length() == 32"})
public static String calcMD5(final byte[] bytes) throws NoSuchAlgorithmException {
if ( bytes == null ) throw new IllegalArgumentException("bytes cannot be null");
final byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytes);
final BigInteger bigInt = new BigInteger(1, thedigest);
String md5String = bigInt.toString(16);
while (md5String.length() < 32) md5String = "0" + md5String; // pad to length 32
return md5String;
}
}

View File

@ -67,7 +67,7 @@ public class ActiveRegion implements HasGenomeLocation {
* The reads included in this active region. May be empty upon creation, and expand / contract
* as reads are added or removed from this region.
*/
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
private final List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
/**
* An ordered list (by genomic coordinate) of the ActivityProfileStates that went
@ -355,7 +355,7 @@ public class ActiveRegion implements HasGenomeLocation {
* read coordinates.
*/
public void hardClipToActiveRegion() {
final ArrayList<GATKSAMRecord> clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() );
final List<GATKSAMRecord> clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() );
ReadUtils.sortReadsByCoordinate(clippedReads);
clearReads();
addAll(clippedReads);

View File

@ -362,8 +362,8 @@ public class ReadClipper {
return GATKSAMRecord.emptyRead(read);
}
public static ArrayList<GATKSAMRecord> hardClipToRegion( final ArrayList<GATKSAMRecord> reads, final int refStart, final int refStop ) {
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>( reads.size() );
public static List<GATKSAMRecord> hardClipToRegion( final List<GATKSAMRecord> reads, final int refStart, final int refStop ) {
final List<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>( reads.size() );
for( final GATKSAMRecord read : reads ) {
final GATKSAMRecord clippedRead = hardClipToRegion( read, refStart, refStop );
if( !clippedRead.isEmpty() ) {

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.utils.duplicates;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.QualityUtils;

View File

@ -33,7 +33,7 @@ import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.StringUtil;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import java.io.File;
import java.io.FileNotFoundException;

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.utils.genotyper;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
public enum DiploidGenotype {
AA ('A', 'A'),

View File

@ -359,19 +359,9 @@ public class IOUtils {
*/
public static void writeResource(Resource resource, File file) {
String path = resource.getPath();
Class<?> clazz = resource.getRelativeClass();
InputStream inputStream = null;
InputStream inputStream = resource.getResourceContentsAsStream();
OutputStream outputStream = null;
try {
if (clazz == null) {
inputStream = ClassLoader.getSystemResourceAsStream(path);
if (inputStream == null)
throw new IllegalArgumentException("Resource not found: " + path);
} else {
inputStream = clazz.getResourceAsStream(path);
if (inputStream == null)
throw new IllegalArgumentException("Resource not found relative to " + clazz + ": " + path);
}
outputStream = FileUtils.openOutputStream(file);
org.apache.commons.io.IOUtils.copy(inputStream, outputStream);
} catch (IOException e) {

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.utils.io;
import java.io.File;
import java.io.InputStream;
/**
* Stores a resource by path and a relative class.
@ -64,4 +65,27 @@ public class Resource {
File.separator,
path);
}
/**
* Get the contents of this resource as an InputStream
* @throws IllegalArgumentException if resource cannot be read
* @return an input stream that will read the contents of this resource
*/
public InputStream getResourceContentsAsStream() {
final Class<?> clazz = getRelativeClass();
final InputStream inputStream;
if (clazz == null) {
inputStream = ClassLoader.getSystemResourceAsStream(path);
if (inputStream == null)
throw new IllegalArgumentException("Resource not found: " + path);
} else {
inputStream = clazz.getResourceAsStream(path);
if (inputStream == null)
throw new IllegalArgumentException("Resource not found relative to " + clazz + ": " + path);
}
return inputStream;
}
}

View File

@ -29,7 +29,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.*;

View File

@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import java.io.File;
import java.util.*;

View File

@ -30,10 +30,7 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFConstants;
@ -53,6 +50,7 @@ public class GATKVariantContextUtils {
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_INTERSECTION = "Intersection";
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
@ -108,6 +106,68 @@ public class GATKVariantContextUtils {
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
}
public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) {
if (!context.isSNP() || !context.isBiallelic())
throw new IllegalStateException("Requested SNP substitution type for bialleic non-SNP " + context);
return BaseUtils.SNPSubstitutionType(context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]);
}
/**
* If this is a BiAlleic SNP, is it a transition?
*/
public static boolean isTransition(VariantContext context) {
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION;
}
/**
* If this is a BiAlleic SNP, is it a transversion?
*/
public static boolean isTransversion(VariantContext context) {
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
}
public static boolean isTransition(Allele ref, Allele alt) {
return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSITION;
}
public static boolean isTransversion(Allele ref, Allele alt) {
return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
}
/**
* Returns a context identical to this with the REF and ALT alleles reverse complemented.
*
* @param vc variant context
* @return new vc
*/
public static VariantContext reverseComplement(VariantContext vc) {
// create a mapping from original allele to reverse complemented allele
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
for ( Allele originalAllele : vc.getAlleles() ) {
Allele newAllele;
if ( originalAllele.isNoCall() )
newAllele = originalAllele;
else
newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
alleleMap.put(originalAllele, newAllele);
}
// create new Genotype objects
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype genotype : vc.getGenotypes() ) {
List<Allele> newAlleles = new ArrayList<Allele>();
for ( Allele allele : genotype.getAlleles() ) {
Allele newAllele = alleleMap.get(allele);
if ( newAllele == null )
newAllele = Allele.NO_CALL;
newAlleles.add(newAllele);
}
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
}
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
}
/**
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
* contraction of a series of identical bases in the reference.
@ -954,6 +1014,124 @@ public class GATKVariantContextUtils {
}
}
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
for ( final String key : keysToPreserve ) {
if ( igc.hasAttribute(key) )
attributes.put(key, igc.getAttribute(key));
}
return attributes;
}
/**
* @deprecated use variant context builder version instead
* @param vc the variant context
* @param keysToPreserve the keys to preserve
* @return a pruned version of the original variant context
*/
@Deprecated
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
}
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
final VariantContext vc = builder.make();
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
// VC info
final Map<String, Object> attributes = subsetAttributes(vc.getCommonInfo(), keysToPreserve);
// Genotypes
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
final GenotypeBuilder gb = new GenotypeBuilder(g);
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
gb.noAD().noDP().noPL().noAttributes();
genotypes.add(gb.make());
}
return builder.genotypes(genotypes).attributes(attributes);
}
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
// if all alleles of vc1 are a contained in alleles of vc2, return true
if (!vc1.getReference().equals(vc2.getReference()))
return false;
for (Allele a :vc1.getAlternateAlleles()) {
if (!vc2.getAlternateAlleles().contains(a))
return false;
}
return true;
}
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
for ( VariantContext vc : VCs ) {
// look at previous variant contexts of different type. If:
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
// c) neither: do nothing, just add vc to its own list
boolean addtoOwnList = true;
for (VariantContext.Type type : VariantContext.Type.values()) {
if (type.equals(vc.getType()))
continue;
if (!mappedVCs.containsKey(type))
continue;
List<VariantContext> vcList = mappedVCs.get(type);
for (int k=0; k < vcList.size(); k++) {
VariantContext otherVC = vcList.get(k);
if (allelesAreSubset(otherVC,vc)) {
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
vcList.remove(k);
// avoid having empty lists
if (vcList.size() == 0)
mappedVCs.remove(type);
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(otherVC);
break;
}
else if (allelesAreSubset(vc,otherVC)) {
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
mappedVCs.get(type).add(vc);
addtoOwnList = false;
break;
}
}
}
if (addtoOwnList) {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
}
}
return mappedVCs;
}
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
if ( allowedAttributes == null )
return vc;
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype genotype : vc.getGenotypes() ) {
Map<String, Object> attrs = new HashMap<String, Object>();
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
if ( allowedAttributes.contains(attr.getKey()) )
attrs.put(attr.getKey(), attr.getValue());
}
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
}
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
}
private static class AlleleMapper {
private VariantContext vc = null;
private Map<Allele, Allele> map = null;

View File

@ -25,7 +25,7 @@
package org.broadinstitute.variant.variantcontext;
import org.broadinstitute.variant.utils.BaseUtils;
import net.sf.samtools.util.StringUtil;
import java.util.Arrays;
import java.util.Collection;
@ -130,7 +130,7 @@ public class Allele implements Comparable<Allele> {
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
}
else {
BaseUtils.convertToUpperCase(bases);
StringUtil.toUpperCase(bases);
}
this.isRef = isRef;

View File

@ -625,6 +625,10 @@ public class VariantContext implements Feature { // to enable tribble integratio
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
public CommonInfo getCommonInfo() {
return commonInfo;
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with alleles

View File

@ -30,7 +30,6 @@ import com.google.java.contract.Requires;
import org.apache.commons.jexl2.Expression;
import org.apache.commons.jexl2.JexlEngine;
import org.broad.tribble.TribbleException;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.*;
@ -317,106 +316,6 @@ public class VariantContextUtils {
return r;
}
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
for ( final String key : keysToPreserve ) {
if ( igc.hasAttribute(key) )
attributes.put(key, igc.getAttribute(key));
}
return attributes;
}
/**
* @deprecated use variant context builder version instead
* @param vc the variant context
* @param keysToPreserve the keys to preserve
* @return a pruned version of the original variant context
*/
@Deprecated
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
}
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
final VariantContext vc = builder.make();
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
// VC info
final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve);
// Genotypes
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
final GenotypeBuilder gb = new GenotypeBuilder(g);
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
gb.noAD().noDP().noPL().noAttributes();
genotypes.add(gb.make());
}
return builder.genotypes(genotypes).attributes(attributes);
}
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
// if all alleles of vc1 are a contained in alleles of vc2, return true
if (!vc1.getReference().equals(vc2.getReference()))
return false;
for (Allele a :vc1.getAlternateAlleles()) {
if (!vc2.getAlternateAlleles().contains(a))
return false;
}
return true;
}
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
for ( VariantContext vc : VCs ) {
// look at previous variant contexts of different type. If:
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
// c) neither: do nothing, just add vc to its own list
boolean addtoOwnList = true;
for (VariantContext.Type type : VariantContext.Type.values()) {
if (type.equals(vc.getType()))
continue;
if (!mappedVCs.containsKey(type))
continue;
List<VariantContext> vcList = mappedVCs.get(type);
for (int k=0; k < vcList.size(); k++) {
VariantContext otherVC = vcList.get(k);
if (allelesAreSubset(otherVC,vc)) {
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
vcList.remove(k);
// avoid having empty lists
if (vcList.size() == 0)
mappedVCs.remove(type);
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(otherVC);
break;
}
else if (allelesAreSubset(vc,otherVC)) {
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
mappedVCs.get(type).add(vc);
addtoOwnList = false;
break;
}
}
}
if (addtoOwnList) {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
}
}
return mappedVCs;
}
// TODO: remove that after testing
// static private void verifyUniqueSampleNames(Collection<VariantContext> unsortedVCs) {
// Set<String> names = new HashSet<String>();
@ -432,85 +331,6 @@ public class VariantContextUtils {
// }
/**
* Returns a context identical to this with the REF and ALT alleles reverse complemented.
*
* @param vc variant context
* @return new vc
*/
public static VariantContext reverseComplement(VariantContext vc) {
// create a mapping from original allele to reverse complemented allele
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
for ( Allele originalAllele : vc.getAlleles() ) {
Allele newAllele;
if ( originalAllele.isNoCall() )
newAllele = originalAllele;
else
newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
alleleMap.put(originalAllele, newAllele);
}
// create new Genotype objects
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype genotype : vc.getGenotypes() ) {
List<Allele> newAlleles = new ArrayList<Allele>();
for ( Allele allele : genotype.getAlleles() ) {
Allele newAllele = alleleMap.get(allele);
if ( newAllele == null )
newAllele = Allele.NO_CALL;
newAlleles.add(newAllele);
}
newGenotypes.add(new GenotypeBuilder(genotype).alleles(newAlleles).make());
}
return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
}
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
if ( allowedAttributes == null )
return vc;
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype genotype : vc.getGenotypes() ) {
Map<String, Object> attrs = new HashMap<String, Object>();
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
if ( allowedAttributes.contains(attr.getKey()) )
attrs.put(attr.getKey(), attr.getValue());
}
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
}
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
}
public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) {
if (!context.isSNP() || !context.isBiallelic())
throw new IllegalStateException("Requested SNP substitution type for bialleic non-SNP " + context);
return BaseUtils.SNPSubstitutionType(context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]);
}
/**
* If this is a BiAlleic SNP, is it a transition?
*/
public static boolean isTransition(VariantContext context) {
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION;
}
/**
* If this is a BiAlleic SNP, is it a transversion?
*/
public static boolean isTransversion(VariantContext context) {
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
}
public static boolean isTransition(Allele ref, Allele alt) {
return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSITION;
}
public static boolean isTransversion(Allele ref, Allele alt) {
return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
}
public static int getSize( VariantContext vc ) {
return vc.getEnd() - vc.getStart() + 1;
}

View File

@ -28,11 +28,10 @@ package org.broadinstitute.sting;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.util.Arrays;
/**
@ -252,11 +251,7 @@ public class MD5DB {
*/
public String testFileMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
try {
byte[] bytesOfMessage = getBytesFromFile(resultsFile);
byte[] thedigest = MessageDigest.getInstance("MD5").digest(bytesOfMessage);
BigInteger bigInt = new BigInteger(1, thedigest);
String filemd5sum = bigInt.toString(16);
while (filemd5sum.length() < 32) filemd5sum = "0" + filemd5sum; // pad to length 32
final String filemd5sum = Utils.calcMD5(getBytesFromFile(resultsFile));
//
// copy md5 to integrationtests

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.phonehome;
import junit.framework.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.testng.annotations.Test;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
public class GATKRunReportUnitTest extends BaseTest {
@Test
public void testAccessKey() throws Exception {
testAWSKey(GATKRunReport.getAWSAccessKey(), "c0f0afa1ff5ba41d9bf216cfcdbf26bf");
}
@Test
public void testSecretKey() throws Exception {
testAWSKey(GATKRunReport.getAWSSecretKey(), "db2f13b3a7c98ad24e28783733ec4a62");
}
private void testAWSKey(final String accessKey, final String expectedMD5) throws Exception {
Assert.assertNotNull(accessKey, "AccessKey should not be null");
final String actualmd5 = Utils.calcMD5(accessKey);
Assert.assertEquals(actualmd5, expectedMD5);
}
}

View File

@ -23,20 +23,22 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.variant.utils;
package org.broadinstitute.sting.utils;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.testng.annotations.BeforeClass;
public class BaseUtilsUnitTest extends VariantBaseTest {
public class BaseUtilsUnitTest extends BaseTest {
@BeforeClass
public void init() { }
@Test
public void testMostFrequentBaseFraction() {
logger.warn("Executing testMostFrequentBaseFraction");
compareFrequentBaseFractionToExpected("AAAAA", 1.0);
compareFrequentBaseFractionToExpected("ACCG", 0.5);
compareFrequentBaseFractionToExpected("ACCCCTTTTG", 4.0/10.0);
@ -44,7 +46,7 @@ public class BaseUtilsUnitTest extends VariantBaseTest {
private void compareFrequentBaseFractionToExpected(String sequence, double expected) {
double fraction = BaseUtils.mostFrequentBaseFraction(sequence.getBytes());
Assert.assertTrue(GeneralUtils.compareDoubles(fraction, expected) == 0);
Assert.assertTrue(MathUtils.compareDoubles(fraction, expected) == 0);
}
@Test
@ -64,6 +66,8 @@ public class BaseUtilsUnitTest extends VariantBaseTest {
@Test
public void testTransitionTransversion() {
logger.warn("Executing testTransitionTransversion");
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'T' ) == BaseUtils.BaseSubstitutionType.TRANSVERSION );
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'C' ) == BaseUtils.BaseSubstitutionType.TRANSVERSION );
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'G' ) == BaseUtils.BaseSubstitutionType.TRANSITION );
@ -89,6 +93,8 @@ public class BaseUtilsUnitTest extends VariantBaseTest {
@Test
public void testReverseComplementString() {
logger.warn("Executing testReverseComplementString");
compareRCStringToExpected("ACGGT", "ACCGT");
compareRCStringToExpected("TCGTATATCTCGCTATATATATATAGCTCTAGTATA", "TATACTAGAGCTATATATATATAGCGAGATATACGA");
compareRCStringToExpected("AAAN", "NTTT");

View File

@ -25,10 +25,13 @@
package org.broadinstitute.sting.utils;
import org.apache.commons.io.FileUtils;
import org.broadinstitute.sting.utils.io.IOUtils;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.Test;
import java.io.File;
import java.util.LinkedHashMap;
import java.util.Map;
@ -135,4 +138,16 @@ public class UtilsUnitTest extends BaseTest {
actual = Utils.escapeExpressions(" one two 'three four' ");
Assert.assertEquals(actual, expected);
}
@Test
public void testCalcMD5() throws Exception {
final File source = new File(publicTestDir + "exampleFASTA.fasta");
final String sourceMD5 = "36880691cf9e4178216f7b52e8d85fbe";
final byte[] sourceBytes = IOUtils.readFileIntoByteArray(source);
Assert.assertEquals(Utils.calcMD5(sourceBytes), sourceMD5);
final String sourceString = FileUtils.readFileToString(source);
Assert.assertEquals(Utils.calcMD5(sourceString), sourceMD5);
}
}

View File

@ -252,7 +252,7 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
Assert.assertEquals(changingNs, preservingNs + 4);
}
@Test(enabled = true, expectedExceptions = {IllegalStateException.class})
@Test(enabled = true, expectedExceptions = {UserException.class})
public void testFailOnBadBase() throws FileNotFoundException, InterruptedException {
final String testFasta = privateTestDir + "problematicFASTA.fasta";
final CachingIndexedFastaSequenceFile fasta = new CachingIndexedFastaSequenceFile(new File(testFasta));

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.utils.sam;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

View File

@ -31,7 +31,6 @@ package org.broadinstitute.variant.variantcontext;
import org.broad.tribble.TribbleException;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -154,9 +153,9 @@ public class GenotypeLikelihoodsUnitTest extends VariantBaseTest {
public void testGetQualFromLikelihoodsMultiAllelic() {
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
Allele ref = Allele.create(BaseUtils.Base.A.base,true);
Allele alt1 = Allele.create(BaseUtils.Base.C.base);
Allele alt2 = Allele.create(BaseUtils.Base.T.base);
Allele ref = Allele.create((byte)'A',true);
Allele alt1 = Allele.create((byte)'C');
Allele alt2 = Allele.create((byte)'T');
List<Allele> allAlleles = Arrays.asList(ref,alt1,alt2);
List<Allele> gtAlleles = Arrays.asList(alt1,alt2);
GenotypeBuilder gtBuilder = new GenotypeBuilder();