Use base List and Map types in the HaplotypeCaller when possible.

This commit is contained in:
Ryan Poplin 2013-01-30 17:09:27 -05:00
parent 5f4a063def
commit bb29bd7df7
8 changed files with 60 additions and 119 deletions

View File

@ -84,7 +84,7 @@ public class GenotypingEngine {
final List<Haplotype> haplotypes,
final List<String> samples,
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
final byte[] ref,
final GenomeLoc refLoc,
final GenomeLoc activeRegionWindow,
@ -124,12 +124,12 @@ public class GenotypingEngine {
// Walk along each position in the key set and create each event to be outputted
for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
final ArrayList<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view
final List<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
final List<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view
if( !in_GGA_mode ) {
for( final Haplotype h : haplotypes ) {
final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
final Map<Integer,VariantContext> eventMap = h.getEventMap();
final VariantContext vc = eventMap.get(loc);
if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
eventsAtThisLoc.add(vc);
@ -142,7 +142,7 @@ public class GenotypingEngine {
if( compVC.getStart() == loc ) {
int alleleCount = 0;
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
ArrayList<Allele> alleleSet = new ArrayList<Allele>(2);
List<Allele> alleleSet = new ArrayList<Allele>(2);
alleleSet.add(compVC.getReference());
alleleSet.add(compAltAllele);
final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount;
@ -180,7 +180,7 @@ public class GenotypingEngine {
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles.");
}
final HashMap<VariantContext, Allele> mergeMap = new HashMap<VariantContext, Allele>();
final Map<VariantContext, Allele> mergeMap = new HashMap<VariantContext, Allele>();
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) {
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
@ -232,7 +232,7 @@ public class GenotypingEngine {
return genotypes;
}
private void validatePriorityList( final ArrayList<String> priorityList, final ArrayList<VariantContext> eventsAtThisLoc ) {
private void validatePriorityList( final List<String> priorityList, final List<VariantContext> eventsAtThisLoc ) {
for( final VariantContext vc : eventsAtThisLoc ) {
if( !priorityList.contains(vc.getSource()) ) {
throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles.");
@ -251,7 +251,7 @@ public class GenotypingEngine {
private static Map<String, PerReadAlleleLikelihoodMap> filterToOnlyOverlappingReads( final GenomeLocParser parser,
final Map<String, PerReadAlleleLikelihoodMap> perSampleReadMap,
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
final VariantContext call ) {
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
@ -284,7 +284,7 @@ public class GenotypingEngine {
}
protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
final List<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
for( final Haplotype h : haplotypes ) {
for( final VariantContext vc : h.getEventMap().values() ) {
if( vc.isSymbolic() ) {
@ -407,7 +407,7 @@ public class GenotypingEngine {
// remove the old event from the eventMap on every haplotype and the start pos key set, replace with merged event
for( final Haplotype h : haplotypes ) {
final HashMap<Integer, VariantContext> eventMap = h.getEventMap();
final Map<Integer, VariantContext> eventMap = h.getEventMap();
if( eventMap.containsKey(thisStart) && eventMap.containsKey(nextStart) ) {
eventMap.remove(thisStart);
eventMap.remove(nextStart);
@ -418,7 +418,7 @@ public class GenotypingEngine {
boolean containsStart = false;
boolean containsNext = false;
for( final Haplotype h : haplotypes ) {
final HashMap<Integer, VariantContext> eventMap = h.getEventMap();
final Map<Integer, VariantContext> eventMap = h.getEventMap();
if( eventMap.containsKey(thisStart) ) { containsStart = true; }
if( eventMap.containsKey(nextStart) ) { containsNext = true; }
}
@ -457,7 +457,7 @@ public class GenotypingEngine {
if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
}
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
final List<Allele> mergedAlleles = new ArrayList<Allele>();
mergedAlleles.add( Allele.create( ArrayUtils.subarray(refBases, iii, refBases.length), true ) );
mergedAlleles.add( Allele.create( ArrayUtils.subarray(altBases, iii, altBases.length), false ) );
return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
@ -492,10 +492,10 @@ public class GenotypingEngine {
eventMapper.put(new Event(vc), new ArrayList<Haplotype>());
}
final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
final List<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
for( final Haplotype h : haplotypes ) {
if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) {
final ArrayList<Allele> alleles = new ArrayList<Allele>(2);
final List<Allele> alleles = new ArrayList<Allele>(2);
alleles.add(h.getArtificialRefAllele());
alleles.add(h.getArtificialAltAllele());
final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype")
@ -572,13 +572,13 @@ public class GenotypingEngine {
}
@Ensures({"result.size() == haplotypeAllelesForSample.size()"})
protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final ArrayList<ArrayList<Haplotype>> alleleMapper, final ArrayList<Haplotype> haplotypes ) {
protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final List<List<Haplotype>> alleleMapper, final List<Haplotype> haplotypes ) {
if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
final ArrayList<Allele> eventAllelesForSample = new ArrayList<Allele>();
final List<Allele> eventAllelesForSample = new ArrayList<Allele>();
for( final Allele a : haplotypeAllelesForSample ) {
final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a));
for( int iii = 0; iii < alleleMapper.size(); iii++ ) {
final ArrayList<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
final List<Haplotype> mappedHaplotypes = alleleMapper.get(iii);
if( mappedHaplotypes.contains(haplotype) ) {
eventAllelesForSample.add(eventAlleles.get(iii));
break;
@ -597,8 +597,8 @@ public class GenotypingEngine {
return false;
}
protected static HashMap<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
final HashMap<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) {
final Map<Integer,VariantContext> vcs = new HashMap<Integer,VariantContext>();
int refPos = alignmentStartHapwrtRef;
if( refPos < 0 ) { return null; } // Protection against SW failures
@ -609,7 +609,7 @@ public class GenotypingEngine {
switch( ce.getOperator() ) {
case I:
{
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final List<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
final byte refByte = ref[refPos-1];
if( BaseUtils.isRegularBase(refByte) ) {
@ -639,7 +639,7 @@ public class GenotypingEngine {
case D:
{
final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base
final ArrayList<Allele> deletionAlleles = new ArrayList<Allele>();
final List<Allele> deletionAlleles = new ArrayList<Allele>();
final int deletionStart = refLoc.getStart() + refPos - 1;
// BUGBUG: how often does this symbolic deletion allele case happen?
//if( haplotype != null && ( (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength)
@ -667,7 +667,7 @@ public class GenotypingEngine {
final byte altByte = alignment[alignmentPos];
if( refByte != altByte ) { // SNP!
if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) {
final ArrayList<Allele> snpAlleles = new ArrayList<Allele>();
final List<Allele> snpAlleles = new ArrayList<Allele>();
snpAlleles.add( Allele.create( refByte, true ) );
snpAlleles.add( Allele.create( altByte, false ) );
vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make());

View File

@ -274,10 +274,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
// bases with quality less than or equal to this value are trimmed off the tails of the reads
private static final byte MIN_TAIL_QUALITY = 20;
private ArrayList<String> samplesList = new ArrayList<String>();
private List<String> samplesList = new ArrayList<String>();
private final static double LOG_ONE_HALF = -Math.log10(2.0);
private final static double LOG_ONE_THIRD = -Math.log10(3.0);
private final ArrayList<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();
private final List<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
@ -429,7 +429,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
}
final ArrayList<Allele> alleles = new ArrayList<Allele>();
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add( FAKE_REF_ALLELE );
alleles.add( FAKE_ALT_ALLELE );
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
@ -450,7 +450,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
// we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work
return 1;
final ArrayList<VariantContext> activeAllelesToGenotype = new ArrayList<VariantContext>();
final List<VariantContext> activeAllelesToGenotype = new ArrayList<VariantContext>();
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
for( final VariantContext vc : allelesToGenotype ) {
@ -472,7 +472,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); // Create the reference haplotype which is the bases from the reference that make up the active region
final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING);
final ArrayList<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype );
final List<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype );
if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do!
activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region
@ -484,10 +484,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
// evaluate each sample's reads against all haplotypes
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, splitReadsBySample( activeRegion.getReads() ) );
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ?
final List<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ?
likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes );
for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoods( UG_engine,
@ -558,7 +558,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
final List<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( activeRegion.getReads() );
activeRegion.clearReads();
@ -569,7 +569,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
}
// Loop through the reads hard clipping the adaptor and low quality tails
final ArrayList<GATKSAMRecord> readsToUse = new ArrayList<GATKSAMRecord>(finalizedReadList.size());
final List<GATKSAMRecord> readsToUse = new ArrayList<GATKSAMRecord>(finalizedReadList.size());
for( final GATKSAMRecord myRead : finalizedReadList ) {
final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) );
if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
@ -583,7 +583,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
}
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
readsToRemove.add(rec);
@ -599,10 +599,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReadSpanLoc().getContig(), padLeft, padRight);
}
private HashMap<String, ArrayList<GATKSAMRecord>> splitReadsBySample( final List<GATKSAMRecord> reads ) {
final HashMap<String, ArrayList<GATKSAMRecord>> returnMap = new HashMap<String, ArrayList<GATKSAMRecord>>();
private Map<String, List<GATKSAMRecord>> splitReadsBySample( final List<GATKSAMRecord> reads ) {
final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<String, List<GATKSAMRecord>>();
for( final String sample : samplesList) {
ArrayList<GATKSAMRecord> readList = returnMap.get( sample );
List<GATKSAMRecord> readList = returnMap.get( sample );
if( readList == null ) {
readList = new ArrayList<GATKSAMRecord>();
returnMap.put(sample, readList);

View File

@ -91,11 +91,11 @@ public class LikelihoodCalculationEngine {
DEBUG = debug;
}
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
int X_METRIC_LENGTH = 0;
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
for( final Map.Entry<String, List<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
for( final GATKSAMRecord read : sample.getValue() ) {
final int readLength = read.getReadLength();
if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
@ -115,7 +115,7 @@ public class LikelihoodCalculationEngine {
pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
// for each sample's reads
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
for( final Map.Entry<String, List<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
//if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
// evaluate the likelihood of the reads given those haplotypes
stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue()));
@ -123,7 +123,7 @@ public class LikelihoodCalculationEngine {
return stratifiedReadMap;
}
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads) {
private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List<Haplotype> haplotypes, final List<GATKSAMRecord> reads) {
// first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time)
final int numHaplotypes = haplotypes.size();
final Map<Haplotype, Allele> alleleVersions = new HashMap<Haplotype, Allele>(numHaplotypes);
@ -235,72 +235,13 @@ public class LikelihoodCalculationEngine {
return likelihoodMatrix;
}
/*
@Requires({"haplotypes.size() > 0"})
@Ensures({"result.size() <= haplotypes.size()"})
public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes ) {
// BUGBUG: This function needs a lot of work. Need to use 4-gamete test or Tajima's D to decide to break up events into separate pieces for genotyping
final int numHaplotypes = haplotypes.size();
final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
bestHaplotypesIndexList.add(0); // always start with the reference haplotype
final double[][][] haplotypeLikelihoodMatrix = new double[sampleKeySet.size()][numHaplotypes][numHaplotypes];
int sampleCount = 0;
for( final String sample : sampleKeySet ) {
haplotypeLikelihoodMatrix[sampleCount++] = computeDiploidHaplotypeLikelihoods( haplotypes, sample );
}
int hap1 = 0;
int hap2 = 0;
int chosenSample = 0;
//double bestElement = Double.NEGATIVE_INFINITY;
final int maxChosenHaplotypes = Math.min( 15, sampleKeySet.size() * 2 + 1 );
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
double maxElement = Double.NEGATIVE_INFINITY;
for( int kkk = 0; kkk < sampleCount; kkk++ ) {
for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) {
if( haplotypeLikelihoodMatrix[kkk][iii][jjj] > maxElement ) {
maxElement = haplotypeLikelihoodMatrix[kkk][iii][jjj];
hap1 = iii;
hap2 = jjj;
chosenSample = kkk;
}
}
}
}
if( maxElement == Double.NEGATIVE_INFINITY ) { break; }
if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); }
if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); }
for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) {
haplotypeLikelihoodMatrix[chosenSample][iii][jjj] = Double.NEGATIVE_INFINITY;
}
}
}
if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
final ArrayList<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
for( final int hIndex : bestHaplotypesIndexList ) {
bestHaplotypes.add( haplotypes.get(hIndex) );
}
return bestHaplotypes;
}
*/
@Requires({"haplotypes.size() > 0"})
@Ensures({"result.size() <= haplotypes.size()"})
public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap, final int maxNumHaplotypesInPopulation ) {
public List<Haplotype> selectBestHaplotypes( final List<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap, final int maxNumHaplotypesInPopulation ) {
final int numHaplotypes = haplotypes.size();
final Set<String> sampleKeySet = stratifiedReadMap.keySet();
final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
final List<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
final List<Allele> haplotypesAsAlleles = new ArrayList<Allele>();
for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h.getBases())); }
@ -332,7 +273,7 @@ public class LikelihoodCalculationEngine {
if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
final ArrayList<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
final List<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
for( final int hIndex : bestHaplotypesIndexList ) {
bestHaplotypes.add( haplotypes.get(hIndex) );
}

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -67,5 +67,5 @@ public abstract class LocalAssemblyEngine {
protected LocalAssemblyEngine() {
}
public abstract ArrayList<Haplotype> runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, ArrayList<VariantContext> activeAllelesToGenotype);
public abstract List<Haplotype> runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, List<VariantContext> activeAllelesToGenotype);
}

View File

@ -84,7 +84,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
private final boolean DEBUG;
private final PrintStream GRAPH_WRITER;
private final ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
private final List<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
private final int MIN_KMER;
private int PRUNE_FACTOR = 2;
@ -96,7 +96,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
MIN_KMER = minKmer;
}
public ArrayList<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList<VariantContext> activeAllelesToGenotype ) {
public List<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final List<VariantContext> activeAllelesToGenotype ) {
this.PRUNE_FACTOR = PRUNE_FACTOR;
// create the graphs
@ -168,7 +168,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
protected static void pruneGraph( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph, final int pruneFactor ) {
final ArrayList<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
final List<DeBruijnEdge> edgesToRemove = new ArrayList<DeBruijnEdge>();
for( final DeBruijnEdge e : graph.edgeSet() ) {
if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
edgesToRemove.add(e);
@ -177,7 +177,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
graph.removeAllEdges(edgesToRemove);
// Run through the graph and clean up singular orphaned nodes
final ArrayList<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
final List<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
for( final DeBruijnVertex v : graph.vertexSet() ) {
if( graph.inDegreeOf(v) == 0 && graph.outDegreeOf(v) == 0 ) {
verticesToRemove.add(v);
@ -187,7 +187,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
protected static void eliminateNonRefPaths( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph ) {
final ArrayList<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
final List<DeBruijnVertex> verticesToRemove = new ArrayList<DeBruijnVertex>();
boolean done = false;
while( !done ) {
done = true;
@ -313,8 +313,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
@Ensures({"result.contains(refHaplotype)"})
private ArrayList<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final ArrayList<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
final ArrayList<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
private List<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final List<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
final List<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
// add the reference haplotype separately from all the others
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( fullReferenceWithPadding, refHaplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
@ -343,7 +343,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
// for GGA mode, add the desired allele into the haplotype if it isn't already present
if( !activeAllelesToGenotype.isEmpty() ) {
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
final Map<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
@ -378,7 +378,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
return returnHaplotypes;
}
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final List<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
if( haplotype == null ) { return false; }
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );

View File

@ -40,7 +40,7 @@ import java.util.*;
public class Haplotype extends Allele {
private GenomeLoc genomeLocation = null;
private HashMap<Integer, VariantContext> eventMap = null;
private Map<Integer, VariantContext> eventMap = null;
private Cigar cigar;
private int alignmentStartHapwrtRef;
public int leftBreakPoint = 0;
@ -81,11 +81,11 @@ public class Haplotype extends Allele {
return Arrays.hashCode(getBases());
}
public HashMap<Integer, VariantContext> getEventMap() {
public Map<Integer, VariantContext> getEventMap() {
return eventMap;
}
public void setEventMap( final HashMap<Integer, VariantContext> eventMap ) {
public void setEventMap( final Map<Integer, VariantContext> eventMap ) {
this.eventMap = eventMap;
}

View File

@ -67,7 +67,7 @@ public class ActiveRegion implements HasGenomeLocation {
* The reads included in this active region. May be empty upon creation, and expand / contract
* as reads are added or removed from this region.
*/
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
private final List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
/**
* An ordered list (by genomic coordinate) of the ActivityProfileStates that went
@ -355,7 +355,7 @@ public class ActiveRegion implements HasGenomeLocation {
* read coordinates.
*/
public void hardClipToActiveRegion() {
final ArrayList<GATKSAMRecord> clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() );
final List<GATKSAMRecord> clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() );
ReadUtils.sortReadsByCoordinate(clippedReads);
clearReads();
addAll(clippedReads);

View File

@ -362,8 +362,8 @@ public class ReadClipper {
return GATKSAMRecord.emptyRead(read);
}
public static ArrayList<GATKSAMRecord> hardClipToRegion( final ArrayList<GATKSAMRecord> reads, final int refStart, final int refStop ) {
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>( reads.size() );
public static List<GATKSAMRecord> hardClipToRegion( final List<GATKSAMRecord> reads, final int refStart, final int refStop ) {
final List<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>( reads.size() );
for( final GATKSAMRecord read : reads ) {
final GATKSAMRecord clippedRead = hardClipToRegion( read, refStart, refStop );
if( !clippedRead.isEmpty() ) {