From 8e3f72ced9d6f914d68db53e75415e5e4977bc0a Mon Sep 17 00:00:00 2001 From: chartl Date: Wed, 21 Oct 2009 19:04:51 +0000 Subject: [PATCH] BTTJ - Code refactoring (major) - passes integration test VariantEvalWalker - whoops, wrote PooledGenotypeAnalysis rather than PooledAnalysis, now passes tests again - PooledFrequencyAnalysis - don't bother initializing matrices if this isn't a pool git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1895 348d0f76-0448-11de-a6fe-93d51630548a --- ...seTransitionTableCalculatorJavaWalker.java | 326 ++++++++---------- .../varianteval/PooledFrequencyAnalysis.java | 18 +- .../varianteval/VariantEvalWalker.java | 2 +- 3 files changed, 152 insertions(+), 194 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java index 8e58d5cd3..37b4c807e 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.By; -import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.genotyper.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -25,7 +23,8 @@ import net.sf.samtools.SAMRecord; * To change this template use File | Settings | File Templates. */ @By(DataSource.REFERENCE) -public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker{ +@Reference(window=@Window(start=-3,stop=3)) +public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker,Set> { @Argument(fullName="usePreviousBases", doc="Use previous bases of the reference as part of the calculation, uses the specified number, defaults to 0", required=false) int nPreviousBases = 0; @Argument(fullName="useSecondaryBase",doc="Use the secondary base of a read as part of the calculation", required=false) @@ -46,81 +45,121 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker conditionalTables; + // private ReferenceContextWindow refWindow; + // private Set conditionalTables; + private List usePreviousBases; + private List previousBaseLoci; public void initialize() { + if ( nPreviousBases > 3 ) { + throw new StingException("You have opted to use a number of previous bases in excess of 3. In order to do this you must change the reference window size in the walker itself."); + } ug = new UnifiedGenotyper(); ug.initialize(); - refWindow = new ReferenceContextWindow(nPreviousBases); - conditionalTables = new TreeSet(); + // refWindow = new ReferenceContextWindow(nPreviousBases); + usePreviousBases = new ArrayList(); + previousBaseLoci = new ArrayList(); + } - public Integer reduceInit() { - return 0; + public Set reduceInit() { + return new TreeSet(); } - // todo -- emit table from map and reduce just sums - public ReferenceContextWindow map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { - // todo -- change to use windowed reference itself - // todo -- move up calculations into map not reduce + public Set map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(),context); - refWindow.update(ref,pileup,baseIsUsable(tracker,ref,pileup,context)); + Set newCounts = null; + //System.out.println(pileup.getBases()); + if ( baseIsUsable(tracker, ref, pileup, context) ) { + //System.out.println("Pileup will be used"); + if ( previousLociCanBeUsed(usePreviousBases,previousBaseLoci,context.getLocation()) ) { + for ( int r = 0; r < pileup.getReads().size(); r ++ ) { + if ( useRead ( pileup.getReads().get(r), pileup.getOffsets().get(r), ref ) ) { + newCounts = updateTables( newCounts, pileup.getReads().get(r), pileup.getOffsets().get(r), ref, pileup ); + } + } + } else { + updatePreviousBases(usePreviousBases,true,previousBaseLoci,context.getLocation() ); + } + } else { + updatePreviousBases( usePreviousBases,false,previousBaseLoci,context.getLocation() ); + } - return refWindow; + return newCounts; } - public Integer reduce ( ReferenceContextWindow map, Integer prevReduce ) { - if ( map.isValidWindow() ) { - prevReduce++; - List reads = map.getPileup().getReads(); - List offsets = map.getPileup().getOffsets(); - ReferenceContext ref = map.getMiddleReferenceContext(); - // ReadBackedPileup pileup = splitPileupNonref(map.getPileup()); - // List reads = pileup.getReads(); - // List offsets = pileup.getOffsets(); - - - // System.out.println("Base and read are usable:"); - // System.out.println("Num Mismatches: "+countMismatches(map.getPileup())); - // System.out.println("Ref: "+map.getMiddleReferenceContext().getBase()+" Pileup ref: "+map.getPileup().getRef()); - // System.out.println("Pileup: "+map.getPileup().getBases()); - - for ( int r = 0; r < reads.size(); r ++ ) { - if ( Character.toUpperCase(reads.get(r).getReadBases()[offsets.get(r)]) != ref.getBase() ) { - // System.out.println("Examining read. Mapping quality is: " + reads.get(r).getMappingQuality()); - // System.out.println("Base quality is: "+reads.get(r).getBaseQualities()[offsets.get(r)]); - // System.out.println("Read base is: "+ (char) reads.get(r).getReadBases()[offsets.get(r)]); - // System.out.println("Pileup is: " + map.getPileup().getBases()); - // if ( ! useRead(reads.get(r),offsets.get(r),ref) ) { - // System.out.println("Read will not be used."); - //} + public Set reduce ( Set map, Set reduce ) { + if ( map != null && ! map.isEmpty() ) { + for ( BaseTransitionTable t : map ) { + boolean add = true; + for ( BaseTransitionTable r : reduce ) { + if ( r.conditionsMatch(t) ) { + r.incorporateTable(t); + add = false; + break; + } } - if ( useRead( reads.get(r), offsets.get(r), ref ) ) { - updateTables( reads.get(r), offsets.get(r), map ); - // prevReduce++; + if ( add ) { + reduce.add(t); } } } - - return prevReduce; + // System.out.println("Reduce: size of TransitionTable set is " + reduce.size() + " -- size of Map: " + (map != null ? map.size() : "null")); + return reduce; } - public void onTraversalDone( Integer numValidObservedMismatchingReads ) { - logger.info(numValidObservedMismatchingReads); + public void onTraversalDone( Set conditionalTables ) { out.print(createHeaderFromConditions()); - for ( BaseTransitionTable t : conditionalTables ) - t.print(out); + for ( BaseTransitionTable t : conditionalTables ) + t.print(out); } - public void updateTables ( SAMRecord read, int offset, ReferenceContextWindow map ) { - List readConditions = buildConditions(read,offset,map); + public void updatePreviousBases(List usage, boolean canUse, List loci, GenomeLoc locus) { + // early return + if ( nPreviousBases < 1 ) { + return; + } + + if ( usage.size() <= nPreviousBases ) { + usage.add(canUse); + loci.add(locus); + } else { + usage.remove(0); + usage.add(canUse); + loci.remove(0); + loci.add(locus); + } + } + + public boolean previousLociCanBeUsed( List canUse, List loci, GenomeLoc locus ) { + if ( nPreviousBases < 1 ) { + return true; + } + + boolean use = true; + for ( boolean b : canUse ) { + use = use && b; + } + + if ( use ) { + use = use && ( loci.get(0).distance(locus) == 1 ); // truly is PREVIOUS base + } + + return use; + } + + public Set updateTables ( Set tables, SAMRecord read, int offset, ReferenceContext ref, ReadBackedPileup pileup ) { + List readConditions = buildConditions(read,offset,ref, pileup); + + if ( tables == null ) { + tables = new TreeSet(); + } boolean createNewTable = true; - for ( BaseTransitionTable t : conditionalTables ) { + for ( BaseTransitionTable t : tables ) { if ( t.conditionsMatch(readConditions) ) { - updateTable(t,read,offset,map); + updateTable(t,read,offset,ref); createNewTable = false; break; } @@ -128,16 +167,19 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker buildConditions( SAMRecord read, int offset, ReferenceContextWindow map ) { + public List buildConditions( SAMRecord read, int offset, ReferenceContext ref, ReadBackedPileup pileup ) { ArrayList conditions = new ArrayList(); if ( nPreviousBases > 0 ) { - if ( ! read.getReadNegativeStrandFlag() ) - conditions.add(map.getForwardRefString()); - else - conditions.add(map.getReverseRefString()); + conditions.add(buildRefString(ref,nPreviousBases, ! read.getReadNegativeStrandFlag())); + } if ( useSecondaryBase ) { @@ -177,7 +217,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker conditions; @@ -283,7 +335,10 @@ class BaseTransitionTable implements Comparable { public boolean conditionsMatch(Object obj) { if ( obj == null ) { return false; + } else if ( obj instanceof BaseTransitionTable ) { + return ((BaseTransitionTable) obj).conditionsMatch(conditions); } else if ( ! (obj instanceof List) ) { + return false; } else if ( this.numConditions() != ((List)obj).size() ){ return false; @@ -300,6 +355,7 @@ class BaseTransitionTable implements Comparable { } } + public int compareTo(Object obj) { if ( ! ( obj instanceof BaseTransitionTable ) ) { return -1; @@ -311,11 +367,13 @@ class BaseTransitionTable implements Comparable { if ( this.numConditions() == t.numConditions() ) { ListIterator thisIter = this.conditions.listIterator(); ListIterator thatIter = t.conditions.listIterator(); - while ( thisIter.next() == thatIter.next() ) { - // todo -- compareTo - // do nothing - } - return thisIter.previous().compareTo(thatIter.previous()); + int g = 0; + do { + g = thisIter.next().compareTo(thatIter.next()); + } while ( g == 0 ); + + return g; + } else { return (this.numConditions() > t.numConditions() ) ? 1 : -1; } @@ -325,18 +383,18 @@ class BaseTransitionTable implements Comparable { } public void print( PrintStream out ) { - + StringBuilder s = new StringBuilder(); for ( char observedBase : BaseUtils.BASES ) { for ( char refBase : BaseUtils.BASES ) { - // todo -- String.format please - // todo -- in these situations use StringBuilder - String outString = observedBase+"\t"+refBase; + s.append(String.format("%s\t%s",observedBase,refBase)); for ( Comparable c : conditions ) { - outString = outString+"\t"+c.toString(); + s.append(String.format("\t%s",c.toString())); } - out.printf("%s\t%d%n",outString,table[BaseUtils.simpleBaseToBaseIndex(observedBase)][BaseUtils.simpleBaseToBaseIndex(refBase)]); + s.append(String.format("\t%d%n", table[BaseUtils.simpleBaseToBaseIndex(observedBase)][BaseUtils.simpleBaseToBaseIndex(refBase)])); } } + + out.print(s.toString()); } public void update(char observedBase, char refBase ) { @@ -362,118 +420,16 @@ class BaseTransitionTable implements Comparable { return conditions.listIterator(); } -} - - - -class ReferenceContextWindow { - - protected int windowSize; - protected int nPrevBases; - protected LinkedList prevAlignments; - protected LinkedList prevRefs; - protected LinkedList usePrevious; - protected boolean initialized; - - public ReferenceContextWindow( int nPrevBases ) { - windowSize = 2*nPrevBases + 1; - this.nPrevBases = nPrevBases; - prevAlignments = new LinkedList(); - prevRefs = new LinkedList(); - usePrevious = new LinkedList(); - initialized = false; - } - - public void update( ReferenceContext ref, ReadBackedPileup pileup, boolean useLocus ) { - if ( ! initialized ) { - prevAlignments.add(pileup); - prevRefs.add(ref); - usePrevious.add(useLocus); - if ( prevAlignments.size() == windowSize ) { - initialized = true; - } - } else { - prevAlignments.removeFirst(); - prevRefs.removeFirst(); - usePrevious.removeFirst(); - prevAlignments.add(pileup); - prevRefs.add(ref); - usePrevious.add(useLocus); - } - } - - public String getReferenceString() { - String ref = ""; - for ( ReferenceContext c : prevRefs ) { - ref = ref + c.getBase(); - } - - return ref; - } - - public String getForwardRefString() { - String ref = ""; - for ( ReferenceContext c : prevRefs.subList(0,nPrevBases) ) { - ref = ref + c.getBase(); - } - - return ref; - } - - public String getReverseRefString() { // todo -- make sure we want to flip this done (yes we do) - String ref = ""; - for ( int base = prevRefs.size()-1; base > nPrevBases; base -- ) { - ref = ref + prevRefs.get(base).getBase(); - } - - return BaseUtils.simpleComplement(ref); - } - - public ReadBackedPileup getPileup() { - // because lists are 0-indexed, this returns the alignments - // to the middle base in the window. - return prevAlignments.get(nPrevBases); - } - - public ReferenceContext getMiddleReferenceContext() { - return prevRefs.get(nPrevBases); - } - - public boolean isValidWindow() { - boolean valid; - if ( ! initialized ) { - valid = false; - } else { - valid = true; - // check if everything is confident ref - for ( Boolean b : usePrevious ) { - if ( !b ) { - valid = false; - break; - } - } - // if still valid, check distances - if ( valid ) { - ListIterator iter = prevRefs.listIterator(); - ReferenceContext prev = iter.next(); - while ( iter.hasNext() ) { - ReferenceContext cur = iter.next(); - - if ( cur.getLocus().distance(prev.getLocus()) > 1 ) { - valid = false; - break; - } - - prev = cur; - } + public void incorporateTable(BaseTransitionTable t) { + for ( int i = 0; i < BaseUtils.BASES.length; i ++ ) { + for ( int j = 0; j < BaseUtils.BASES.length; j ++ ) { + table[i][j] += t.observationsOf(i,j); } } - - return valid; } - public int getWindowSize() { - return windowSize; + public int observationsOf( int observedBaseIndex, int referenceBaseIndex ) { + return table[observedBaseIndex][referenceBaseIndex]; } -} +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledFrequencyAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledFrequencyAnalysis.java index 85711f124..d1d7238c5 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledFrequencyAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledFrequencyAnalysis.java @@ -26,15 +26,17 @@ public class PooledFrequencyAnalysis extends BasicPoolVariantAnalysis implements public PooledFrequencyAnalysis(int poolSize, String knownDBSNPName ) { super("Pooled_Frequency_Analysis",poolSize); - coverageAnalysisByFrequency = new VariantDBCoverage[getNumberOfAllelesInPool()+1]; - variantCounterByFrequency = new VariantCounter[getNumberOfAllelesInPool()+1]; - transitionTransversionByFrequency = new TransitionTranversionAnalysis[getNumberOfAllelesInPool()+1]; - for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) { - coverageAnalysisByFrequency[j] = new VariantDBCoverage(knownDBSNPName); - variantCounterByFrequency[j] = new VariantCounter(); - transitionTransversionByFrequency[j] = new TransitionTranversionAnalysis(); + if ( poolSize > 0 ) { + coverageAnalysisByFrequency = new VariantDBCoverage[getNumberOfAllelesInPool()+1]; + variantCounterByFrequency = new VariantCounter[getNumberOfAllelesInPool()+1]; + transitionTransversionByFrequency = new TransitionTranversionAnalysis[getNumberOfAllelesInPool()+1]; + for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) { + coverageAnalysisByFrequency[j] = new VariantDBCoverage(knownDBSNPName); + variantCounterByFrequency[j] = new VariantCounter(); + transitionTransversionByFrequency[j] = new TransitionTranversionAnalysis(); + } } - } + } public void initialize(VariantEvalWalker master, PrintStream out1, PrintStream out2, String name) { super.initialize(master,out1,out2,name); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 146466df7..b01fa1a5c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -149,7 +149,7 @@ public class VariantEvalWalker extends RodWalker { VariantAnalysis analysis = iter.next(); boolean disableForGenotyping = evalContainsGenotypes && !(analysis instanceof GenotypeAnalysis); boolean disableForPopulation = !evalContainsGenotypes && !(analysis instanceof PopulationAnalysis); - boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance) || (numPeopleInPool < 1 && analysis instanceof PooledGenotypeConcordance); + boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance) || (numPeopleInPool < 1 && analysis instanceof PoolAnalysis); boolean disable = disableForGenotyping | disableForPopulation | disableForPools; String causeName = disableForGenotyping ? "population" : (disableForPopulation ? "genotype" : (disableForPools ? "pool" : null)); if (disable) {