From bb29bd7df75b258cc122cbff0968bb5b8da46c3a Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 30 Jan 2013 17:09:27 -0500 Subject: [PATCH] Use base List and Map types in the HaplotypeCaller when possible. --- .../haplotypecaller/GenotypingEngine.java | 44 +++++------ .../haplotypecaller/HaplotypeCaller.java | 26 +++---- .../LikelihoodCalculationEngine.java | 73 ++----------------- .../haplotypecaller/LocalAssemblyEngine.java | 4 +- .../SimpleDeBruijnAssembler.java | 18 ++--- .../broadinstitute/sting/utils/Haplotype.java | 6 +- .../utils/activeregion/ActiveRegion.java | 4 +- .../sting/utils/clipping/ReadClipper.java | 4 +- 8 files changed, 60 insertions(+), 119 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index d254f5b8b..8b789791d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -84,7 +84,7 @@ public class GenotypingEngine { final List haplotypes, final List samples, final Map haplotypeReadMap, - final Map> perSampleFilteredReadList, + final Map> perSampleFilteredReadList, final byte[] ref, final GenomeLoc refLoc, final GenomeLoc activeRegionWindow, @@ -124,12 +124,12 @@ public class GenotypingEngine { // Walk along each position in the key set and create each event to be outputted for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region - final ArrayList eventsAtThisLoc = new ArrayList(); // the overlapping events to merge into a common reference view - final ArrayList priorityList = new ArrayList(); // used to merge overlapping events into common reference view + final List eventsAtThisLoc = new ArrayList(); // the overlapping events to merge into a common reference view + final List priorityList = new ArrayList(); // used to merge overlapping events into common reference view if( !in_GGA_mode ) { for( final Haplotype h : haplotypes ) { - final HashMap eventMap = h.getEventMap(); + final Map eventMap = h.getEventMap(); final VariantContext vc = eventMap.get(loc); if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) { eventsAtThisLoc.add(vc); @@ -142,7 +142,7 @@ public class GenotypingEngine { if( compVC.getStart() == loc ) { int alleleCount = 0; for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - ArrayList alleleSet = new ArrayList(2); + List alleleSet = new ArrayList(2); alleleSet.add(compVC.getReference()); alleleSet.add(compAltAllele); final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount; @@ -180,7 +180,7 @@ public class GenotypingEngine { if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles."); } - final HashMap mergeMap = new HashMap(); + final Map mergeMap = new HashMap(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) { mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function @@ -232,7 +232,7 @@ public class GenotypingEngine { return genotypes; } - private void validatePriorityList( final ArrayList priorityList, final ArrayList eventsAtThisLoc ) { + private void validatePriorityList( final List priorityList, final List eventsAtThisLoc ) { for( final VariantContext vc : eventsAtThisLoc ) { if( !priorityList.contains(vc.getSource()) ) { throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles."); @@ -251,7 +251,7 @@ public class GenotypingEngine { private static Map filterToOnlyOverlappingReads( final GenomeLocParser parser, final Map perSampleReadMap, - final Map> perSampleFilteredReadList, + final Map> perSampleFilteredReadList, final VariantContext call ) { final Map returnMap = new HashMap(); @@ -284,7 +284,7 @@ public class GenotypingEngine { } protected static void cleanUpSymbolicUnassembledEvents( final List haplotypes ) { - final ArrayList haplotypesToRemove = new ArrayList(); + final List haplotypesToRemove = new ArrayList(); for( final Haplotype h : haplotypes ) { for( final VariantContext vc : h.getEventMap().values() ) { if( vc.isSymbolic() ) { @@ -407,7 +407,7 @@ public class GenotypingEngine { // remove the old event from the eventMap on every haplotype and the start pos key set, replace with merged event for( final Haplotype h : haplotypes ) { - final HashMap eventMap = h.getEventMap(); + final Map eventMap = h.getEventMap(); if( eventMap.containsKey(thisStart) && eventMap.containsKey(nextStart) ) { eventMap.remove(thisStart); eventMap.remove(nextStart); @@ -418,7 +418,7 @@ public class GenotypingEngine { boolean containsStart = false; boolean containsNext = false; for( final Haplotype h : haplotypes ) { - final HashMap eventMap = h.getEventMap(); + final Map eventMap = h.getEventMap(); if( eventMap.containsKey(thisStart) ) { containsStart = true; } if( eventMap.containsKey(nextStart) ) { containsNext = true; } } @@ -457,7 +457,7 @@ public class GenotypingEngine { if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } } - final ArrayList mergedAlleles = new ArrayList(); + final List mergedAlleles = new ArrayList(); mergedAlleles.add( Allele.create( ArrayUtils.subarray(refBases, iii, refBases.length), true ) ); mergedAlleles.add( Allele.create( ArrayUtils.subarray(altBases, iii, altBases.length), false ) ); return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make(); @@ -492,10 +492,10 @@ public class GenotypingEngine { eventMapper.put(new Event(vc), new ArrayList()); } - final ArrayList undeterminedHaplotypes = new ArrayList(haplotypes.size()); + final List undeterminedHaplotypes = new ArrayList(haplotypes.size()); for( final Haplotype h : haplotypes ) { if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) { - final ArrayList alleles = new ArrayList(2); + final List alleles = new ArrayList(2); alleles.add(h.getArtificialRefAllele()); alleles.add(h.getArtificialAltAllele()); final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype") @@ -572,13 +572,13 @@ public class GenotypingEngine { } @Ensures({"result.size() == haplotypeAllelesForSample.size()"}) - protected static List findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final ArrayList> alleleMapper, final ArrayList haplotypes ) { + protected static List findEventAllelesInSample( final List eventAlleles, final List haplotypeAlleles, final List haplotypeAllelesForSample, final List> alleleMapper, final List haplotypes ) { if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; } - final ArrayList eventAllelesForSample = new ArrayList(); + final List eventAllelesForSample = new ArrayList(); for( final Allele a : haplotypeAllelesForSample ) { final Haplotype haplotype = haplotypes.get(haplotypeAlleles.indexOf(a)); for( int iii = 0; iii < alleleMapper.size(); iii++ ) { - final ArrayList mappedHaplotypes = alleleMapper.get(iii); + final List mappedHaplotypes = alleleMapper.get(iii); if( mappedHaplotypes.contains(haplotype) ) { eventAllelesForSample.add(eventAlleles.get(iii)); break; @@ -597,8 +597,8 @@ public class GenotypingEngine { return false; } - protected static HashMap generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) { - final HashMap vcs = new HashMap(); + protected static Map generateVCsFromAlignment( final Haplotype haplotype, final int alignmentStartHapwrtRef, final Cigar cigar, final byte[] ref, final byte[] alignment, final GenomeLoc refLoc, final String sourceNameToAdd ) { + final Map vcs = new HashMap(); int refPos = alignmentStartHapwrtRef; if( refPos < 0 ) { return null; } // Protection against SW failures @@ -609,7 +609,7 @@ public class GenotypingEngine { switch( ce.getOperator() ) { case I: { - final ArrayList insertionAlleles = new ArrayList(); + final List insertionAlleles = new ArrayList(); final int insertionStart = refLoc.getStart() + refPos - 1; final byte refByte = ref[refPos-1]; if( BaseUtils.isRegularBase(refByte) ) { @@ -639,7 +639,7 @@ public class GenotypingEngine { case D: { final byte[] deletionBases = Arrays.copyOfRange( ref, refPos - 1, refPos + elementLength ); // add padding base - final ArrayList deletionAlleles = new ArrayList(); + final List deletionAlleles = new ArrayList(); final int deletionStart = refLoc.getStart() + refPos - 1; // BUGBUG: how often does this symbolic deletion allele case happen? //if( haplotype != null && ( (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 >= deletionStart && haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() + elementLength - 1 < deletionStart + elementLength) @@ -667,7 +667,7 @@ public class GenotypingEngine { final byte altByte = alignment[alignmentPos]; if( refByte != altByte ) { // SNP! if( BaseUtils.isRegularBase(refByte) && BaseUtils.isRegularBase(altByte) ) { - final ArrayList snpAlleles = new ArrayList(); + final List snpAlleles = new ArrayList(); snpAlleles.add( Allele.create( refByte, true ) ); snpAlleles.add( Allele.create( altByte, false ) ); vcs.put(refLoc.getStart() + refPos, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), refLoc.getStart() + refPos, refLoc.getStart() + refPos, snpAlleles).make()); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 6cfbc3830..027c62e68 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -274,10 +274,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // bases with quality less than or equal to this value are trimmed off the tails of the reads private static final byte MIN_TAIL_QUALITY = 20; - private ArrayList samplesList = new ArrayList(); + private List samplesList = new ArrayList(); private final static double LOG_ONE_HALF = -Math.log10(2.0); private final static double LOG_ONE_THIRD = -Math.log10(3.0); - private final ArrayList allelesToGenotype = new ArrayList(); + private final List allelesToGenotype = new ArrayList(); private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file private final static Allele FAKE_ALT_ALLELE = Allele.create("", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file @@ -429,7 +429,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() ); } - final ArrayList alleles = new ArrayList(); + final List alleles = new ArrayList(); alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_ALT_ALLELE ); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); @@ -450,7 +450,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // we're benchmarking ART and/or the active region determination code in the HC, just leave without doing any work return 1; - final ArrayList activeAllelesToGenotype = new ArrayList(); + final List activeAllelesToGenotype = new ArrayList(); if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { for( final VariantContext vc : allelesToGenotype ) { @@ -472,7 +472,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); // Create the reference haplotype which is the bases from the reference that make up the active region final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING); - final ArrayList haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype ); + final List haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype ); if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do! activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region @@ -484,10 +484,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem // evaluate each sample's reads against all haplotypes final Map stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, splitReadsBySample( activeRegion.getReads() ) ); - final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); + final Map> perSampleFilteredReadList = splitReadsBySample( filteredReads ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? + final List bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap, maxNumHaplotypesInPopulation ) : haplotypes ); for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoods( UG_engine, @@ -558,7 +558,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } - final ArrayList finalizedReadList = new ArrayList(); + final List finalizedReadList = new ArrayList(); final FragmentCollection fragmentCollection = FragmentUtils.create( activeRegion.getReads() ); activeRegion.clearReads(); @@ -569,7 +569,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } // Loop through the reads hard clipping the adaptor and low quality tails - final ArrayList readsToUse = new ArrayList(finalizedReadList.size()); + final List readsToUse = new ArrayList(finalizedReadList.size()); for( final GATKSAMRecord myRead : finalizedReadList ) { final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) ); if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) { @@ -583,7 +583,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { - final ArrayList readsToRemove = new ArrayList(); + final List readsToRemove = new ArrayList(); for( final GATKSAMRecord rec : activeRegion.getReads() ) { if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); @@ -599,10 +599,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReadSpanLoc().getContig(), padLeft, padRight); } - private HashMap> splitReadsBySample( final List reads ) { - final HashMap> returnMap = new HashMap>(); + private Map> splitReadsBySample( final List reads ) { + final Map> returnMap = new HashMap>(); for( final String sample : samplesList) { - ArrayList readList = returnMap.get( sample ); + List readList = returnMap.get( sample ); if( readList == null ) { readList = new ArrayList(); returnMap.put(sample, readList); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 57e071189..655b3e529 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -91,11 +91,11 @@ public class LikelihoodCalculationEngine { DEBUG = debug; } - public Map computeReadLikelihoods( final ArrayList haplotypes, final HashMap> perSampleReadList ) { + public Map computeReadLikelihoods( final List haplotypes, final Map> perSampleReadList ) { final Map stratifiedReadMap = new HashMap(); int X_METRIC_LENGTH = 0; - for( final Map.Entry> sample : perSampleReadList.entrySet() ) { + for( final Map.Entry> sample : perSampleReadList.entrySet() ) { for( final GATKSAMRecord read : sample.getValue() ) { final int readLength = read.getReadLength(); if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; } @@ -115,7 +115,7 @@ public class LikelihoodCalculationEngine { pairHMM.initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH); // for each sample's reads - for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { + for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); } // evaluate the likelihood of the reads given those haplotypes stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue())); @@ -123,7 +123,7 @@ public class LikelihoodCalculationEngine { return stratifiedReadMap; } - private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList haplotypes, final ArrayList reads) { + private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List haplotypes, final List reads) { // first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time) final int numHaplotypes = haplotypes.size(); final Map alleleVersions = new HashMap(numHaplotypes); @@ -235,72 +235,13 @@ public class LikelihoodCalculationEngine { return likelihoodMatrix; } - /* @Requires({"haplotypes.size() > 0"}) @Ensures({"result.size() <= haplotypes.size()"}) - public ArrayList selectBestHaplotypes( final ArrayList haplotypes ) { - - // BUGBUG: This function needs a lot of work. Need to use 4-gamete test or Tajima's D to decide to break up events into separate pieces for genotyping - - final int numHaplotypes = haplotypes.size(); - final Set sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples - final ArrayList bestHaplotypesIndexList = new ArrayList(); - bestHaplotypesIndexList.add(0); // always start with the reference haplotype - final double[][][] haplotypeLikelihoodMatrix = new double[sampleKeySet.size()][numHaplotypes][numHaplotypes]; - - int sampleCount = 0; - for( final String sample : sampleKeySet ) { - haplotypeLikelihoodMatrix[sampleCount++] = computeDiploidHaplotypeLikelihoods( haplotypes, sample ); - } - - int hap1 = 0; - int hap2 = 0; - int chosenSample = 0; - //double bestElement = Double.NEGATIVE_INFINITY; - final int maxChosenHaplotypes = Math.min( 15, sampleKeySet.size() * 2 + 1 ); - while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { - double maxElement = Double.NEGATIVE_INFINITY; - for( int kkk = 0; kkk < sampleCount; kkk++ ) { - for( int iii = 0; iii < numHaplotypes; iii++ ) { - for( int jjj = 0; jjj <= iii; jjj++ ) { - if( haplotypeLikelihoodMatrix[kkk][iii][jjj] > maxElement ) { - maxElement = haplotypeLikelihoodMatrix[kkk][iii][jjj]; - hap1 = iii; - hap2 = jjj; - chosenSample = kkk; - } - } - } - } - if( maxElement == Double.NEGATIVE_INFINITY ) { break; } - - if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); } - if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); } - - for( int iii = 0; iii < numHaplotypes; iii++ ) { - for( int jjj = 0; jjj <= iii; jjj++ ) { - haplotypeLikelihoodMatrix[chosenSample][iii][jjj] = Double.NEGATIVE_INFINITY; - } - } - } - - if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); } - - final ArrayList bestHaplotypes = new ArrayList(); - for( final int hIndex : bestHaplotypesIndexList ) { - bestHaplotypes.add( haplotypes.get(hIndex) ); - } - return bestHaplotypes; - } - */ - - @Requires({"haplotypes.size() > 0"}) - @Ensures({"result.size() <= haplotypes.size()"}) - public ArrayList selectBestHaplotypes( final ArrayList haplotypes, final Map stratifiedReadMap, final int maxNumHaplotypesInPopulation ) { + public List selectBestHaplotypes( final List haplotypes, final Map stratifiedReadMap, final int maxNumHaplotypesInPopulation ) { final int numHaplotypes = haplotypes.size(); final Set sampleKeySet = stratifiedReadMap.keySet(); - final ArrayList bestHaplotypesIndexList = new ArrayList(); + final List bestHaplotypesIndexList = new ArrayList(); bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype final List haplotypesAsAlleles = new ArrayList(); for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h.getBases())); } @@ -332,7 +273,7 @@ public class LikelihoodCalculationEngine { if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); } - final ArrayList bestHaplotypes = new ArrayList(); + final List bestHaplotypes = new ArrayList(); for( final int hIndex : bestHaplotypesIndexList ) { bestHaplotypes.add( haplotypes.get(hIndex) ); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index b0e340dc2..3efa342b1 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -67,5 +67,5 @@ public abstract class LocalAssemblyEngine { protected LocalAssemblyEngine() { } - public abstract ArrayList runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, ArrayList activeAllelesToGenotype); + public abstract List runLocalAssembly(ActiveRegion activeRegion, Haplotype refHaplotype, byte[] fullReferenceWithPadding, GenomeLoc refLoc, int PRUNE_FACTOR, List activeAllelesToGenotype); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index e16994fa4..a9768557d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -84,7 +84,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { private final boolean DEBUG; private final PrintStream GRAPH_WRITER; - private final ArrayList> graphs = new ArrayList>(); + private final List> graphs = new ArrayList>(); private final int MIN_KMER; private int PRUNE_FACTOR = 2; @@ -96,7 +96,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { MIN_KMER = minKmer; } - public ArrayList runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList activeAllelesToGenotype ) { + public List runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final List activeAllelesToGenotype ) { this.PRUNE_FACTOR = PRUNE_FACTOR; // create the graphs @@ -168,7 +168,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } protected static void pruneGraph( final DefaultDirectedGraph graph, final int pruneFactor ) { - final ArrayList edgesToRemove = new ArrayList(); + final List edgesToRemove = new ArrayList(); for( final DeBruijnEdge e : graph.edgeSet() ) { if( e.getMultiplicity() <= pruneFactor && !e.getIsRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor edgesToRemove.add(e); @@ -177,7 +177,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { graph.removeAllEdges(edgesToRemove); // Run through the graph and clean up singular orphaned nodes - final ArrayList verticesToRemove = new ArrayList(); + final List verticesToRemove = new ArrayList(); for( final DeBruijnVertex v : graph.vertexSet() ) { if( graph.inDegreeOf(v) == 0 && graph.outDegreeOf(v) == 0 ) { verticesToRemove.add(v); @@ -187,7 +187,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } protected static void eliminateNonRefPaths( final DefaultDirectedGraph graph ) { - final ArrayList verticesToRemove = new ArrayList(); + final List verticesToRemove = new ArrayList(); boolean done = false; while( !done ) { done = true; @@ -313,8 +313,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { } @Ensures({"result.contains(refHaplotype)"}) - private ArrayList findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final ArrayList activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) { - final ArrayList returnHaplotypes = new ArrayList(); + private List findBestPaths( final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final List activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) { + final List returnHaplotypes = new ArrayList(); // add the reference haplotype separately from all the others final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( fullReferenceWithPadding, refHaplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); @@ -343,7 +343,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { // for GGA mode, add the desired allele into the haplotype if it isn't already present if( !activeAllelesToGenotype.isEmpty() ) { - final HashMap eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place + final Map eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); @@ -378,7 +378,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { return returnHaplotypes; } - private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) { + private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final List haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) { if( haplotype == null ) { return false; } final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index baab1f5fa..6e8a412c3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -40,7 +40,7 @@ import java.util.*; public class Haplotype extends Allele { private GenomeLoc genomeLocation = null; - private HashMap eventMap = null; + private Map eventMap = null; private Cigar cigar; private int alignmentStartHapwrtRef; public int leftBreakPoint = 0; @@ -81,11 +81,11 @@ public class Haplotype extends Allele { return Arrays.hashCode(getBases()); } - public HashMap getEventMap() { + public Map getEventMap() { return eventMap; } - public void setEventMap( final HashMap eventMap ) { + public void setEventMap( final Map eventMap ) { this.eventMap = eventMap; } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 13add5e7d..dd6735d89 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -67,7 +67,7 @@ public class ActiveRegion implements HasGenomeLocation { * The reads included in this active region. May be empty upon creation, and expand / contract * as reads are added or removed from this region. */ - private final ArrayList reads = new ArrayList(); + private final List reads = new ArrayList(); /** * An ordered list (by genomic coordinate) of the ActivityProfileStates that went @@ -355,7 +355,7 @@ public class ActiveRegion implements HasGenomeLocation { * read coordinates. */ public void hardClipToActiveRegion() { - final ArrayList clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() ); + final List clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() ); ReadUtils.sortReadsByCoordinate(clippedReads); clearReads(); addAll(clippedReads); diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index 87526545d..45dd55af7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -362,8 +362,8 @@ public class ReadClipper { return GATKSAMRecord.emptyRead(read); } - public static ArrayList hardClipToRegion( final ArrayList reads, final int refStart, final int refStop ) { - final ArrayList returnList = new ArrayList( reads.size() ); + public static List hardClipToRegion( final List reads, final int refStart, final int refStop ) { + final List returnList = new ArrayList( reads.size() ); for( final GATKSAMRecord read : reads ) { final GATKSAMRecord clippedRead = hardClipToRegion( read, refStart, refStop ); if( !clippedRead.isEmpty() ) {