diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/IterableIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/IterableIterator.java new file mode 100755 index 000000000..c0c5ebf34 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/iterators/IterableIterator.java @@ -0,0 +1,15 @@ +package org.broadinstitute.sting.gatk.iterators; + +import java.util.Iterator; + +public class IterableIterator implements Iterable { + private Iterator iter; + + public IterableIterator(Iterator iter) { + this.iter = iter; + } + + public Iterator iterator() { + return iter; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java b/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java index 6fccc683e..686185811 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SAMPileupRecord.java @@ -28,7 +28,7 @@ import net.sf.picard.reference.ReferenceSequenceFileWalker; */ -class SAMPileupRecord implements Genotype, GenotypeList, Pileup { +public class SAMPileupRecord implements Genotype, GenotypeList { private static final int NO_VARIANT = -1; private static final int SNP_VARIANT = 0; private static final int INSERTION_VARIANT = 1; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 85cbffc85..76608cedd 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.Utils; import java.util.ArrayList; @@ -59,9 +59,6 @@ public class PileupWalker extends LocusWalker implements TreeR @Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false) public boolean qualsAsInts = false; - @Argument(fullName="extended",shortName="ext",doc="extended",required=false) - public boolean EXTENDED = false; - @Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered") public boolean IGNORE_UNCOVERED_BASES = false; @@ -70,12 +67,7 @@ public class PileupWalker extends LocusWalker implements TreeR public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context); - String bases = pileup.getBasesAsString(); - if ( bases.equals("") && !IGNORE_UNCOVERED_BASES ) { - bases = "***UNCOVERED_SITE***"; - } - String secondBasePileup = ""; if(shouldShowSecondaryBasePileup(pileup)) secondBasePileup = getSecondBasePileup(pileup); @@ -83,11 +75,6 @@ public class PileupWalker extends LocusWalker implements TreeR out.printf("%s%s %s%n", pileup.getPileupString(qualsAsInts), secondBasePileup, rods); - if ( EXTENDED ) { - String probDists = pileup.getProbDistPileup(); - out.println(probDists); - } - return 1; } @@ -106,7 +93,7 @@ public class PileupWalker extends LocusWalker implements TreeR * @return True, if a secondary base pileup should always be shown. */ private boolean shouldShowSecondaryBasePileup( ReadBackedPileup pileup ) { - return ( pileup.getSecondaryBasePileup() != null || alwaysShowSecondBase ); + return ( pileup.hasSecondaryBases() || alwaysShowSecondBase ); } /** @@ -115,11 +102,10 @@ public class PileupWalker extends LocusWalker implements TreeR * @return String representation of the secondary base. */ private String getSecondBasePileup( ReadBackedPileup pileup ) { - String secondBasePileup = pileup.getSecondaryBasePileup(); - if( secondBasePileup != null ) - return " " + secondBasePileup; + if( pileup.hasSecondaryBases() ) + return " " + new String(pileup.getSecondaryBases()); else - return " " + Utils.dupString('N', pileup.getBasesAsString().length()); + return " " + Utils.dupString('N', pileup.size()); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWithContextWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWithContextWalker.java deleted file mode 100755 index 375673f6a..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWithContextWalker.java +++ /dev/null @@ -1,116 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import net.sf.picard.reference.ReferenceSequence; - -import java.io.File; -import java.io.IOException; - -@Deprecated -public class PileupWithContextWalker extends LocusWalker implements TreeReducible { - @Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false) - public boolean alwaysShowSecondBase = false; - - @Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false) - public boolean qualsAsInts = false; - - @Argument(fullName="extended",shortName="ext",doc="extended",required=false) - public boolean EXTENDED = false; - - @Argument(fullName="flagUncoveredBases",shortName="fub",doc="Flag bases with zero coverage",required=false) - public boolean FLAG_UNCOVERED_BASES = true; - - @Argument(fullName="contextBases",shortName="cb",doc="How much context around the locus should we show?",required=false) - public int contextBases = 0; - - public IndexedFastaSequenceFile refSeq; - public ReferenceSequence contigSeq = null; - public String contig = null; - - public void initialize() { - File refFile = this.getToolkit().getArguments().referenceFile; - - try { - refSeq = new IndexedFastaSequenceFile(refFile); - } catch (IOException e) { - refSeq = null; - } - } - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context); - String bases = pileup.getBasesWithStrand(); - - if ( bases.equals("") && FLAG_UNCOVERED_BASES ) { - bases = "***UNCOVERED_SITE***"; - } - - StringBuilder extras = new StringBuilder(); - - String secondBasePileup = pileup.getSecondaryBasePileup(); - if ( secondBasePileup == null && alwaysShowSecondBase ) { - secondBasePileup = Utils.dupString('N', bases.length()); - } - if ( secondBasePileup != null ) extras.append(" ").append(secondBasePileup); - - if (contig == null || !context.getContig().equals(contig)) { - contig = context.getContig(); - contigSeq = refSeq.getSequence(contig); - } - - if (contextBases > 0 && refSeq != null) { - long startPos = context.getPosition() - contextBases <= 0 ? 1 : context.getPosition() - contextBases; - long stopPos = context.getPosition() + contextBases > contigSeq.length() ? contigSeq.length() : context.getPosition() + contextBases; - - ReferenceSequence prevRefSequence = refSeq.getSubsequenceAt(context.getContig(), startPos, context.getPosition()); - ReferenceSequence nextRefSequence = refSeq.getSubsequenceAt(context.getContig(), context.getPosition(), stopPos); - - extras.append(" prev=").append(new String(prevRefSequence.getBases())); - extras.append(" next=").append(new String(nextRefSequence.getBases())); - } - - String rodString = ""; - for ( ReferenceOrderedDatum datum : tracker.getAllRods() ) { - /* - if ( datum != null && ! (datum instanceof rodDbSNP)) { - rodString += datum.toSimpleString() + " "; - } - */ - - if ( datum != null && (datum instanceof RodGenotypeChipAsGFF)) { - rodString += "Hapmap: " + datum.toSimpleString() + " "; - } - } - - rodDbSNP dbsnp = (rodDbSNP)tracker.lookup("dbSNP", null); - if ( dbsnp != null ) - rodString += dbsnp.toMediumString(); - - if ( rodString != "" ) - rodString = "[ROD: " + rodString + "]"; - - out.printf("%s%s %s%n", pileup.getPileupWithStrandString(qualsAsInts), extras, rodString); - - if ( EXTENDED ) { - String probDists = pileup.getProbDistPileup(); - out.println(probDists); - } - - return 1; - } - - // Given result of map function - public Integer reduceInit() { return 0; } - public Integer reduce(Integer value, Integer sum) { - return treeReduce(sum,value); - } - public Integer treeReduce(Integer lhs, Integer rhs) { - return lhs + rhs; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java index 841df7b62..bd0a69493 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java @@ -4,13 +4,14 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodSAMPileup; +import org.broadinstitute.sting.gatk.refdata.SAMPileupRecord; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.Pileup; -import org.broadinstitute.sting.utils.BasicPileup; -import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.StingException; +import java.util.Arrays; + /** * Created by IntelliJ IDEA. * User: mdepristo @@ -24,19 +25,19 @@ public class ValidatingPileupWalker extends LocusWalker 0 ) { + double as_prop = ( ( double ) support ) / depth; - return new Pair ( depth, as_prop ); + return new Pair ( depth, as_prop ); + } else { + return null; + } } private Pair getProportionOfPrimaryNonrefBasesThatSupportAlt( ReferenceContext ref, ReadBackedPileup p, List genotypes ) { @@ -99,7 +106,7 @@ public class PrimaryBaseSecondaryBaseSymmetry implements VariantAnnotation{ return null; } - int [] baseCounts = p.getBasePileupAsCounts(); + int [] baseCounts = p.getBaseCounts(); int support = -1; int depth = 0; for ( char c : BaseUtils.BASES ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 4ea995c55..242bf3f60 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; import net.sf.samtools.SAMRecord; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ResidualQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ResidualQuality.java index 3bfd666d4..c37ccca7b 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ResidualQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ResidualQuality.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java index c807cbec3..567ec80f7 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -17,7 +17,7 @@ import java.util.List; * Time: 11:25:51 AM * To change this template use File | Settings | File Templates. */ -public class SecondBaseSkew implements VariantAnnotation{ +public class SecondBaseSkew implements VariantAnnotation { private static double epsilon = Math.pow(10.0,-12); private static boolean USE_ZERO_QUALITY_READS = true; private static String KEY_NAME = "2b_Chi"; @@ -27,20 +27,20 @@ public class SecondBaseSkew implements VariantAnnotation{ public boolean useZeroQualityReads() { return USE_ZERO_QUALITY_READS; } - public Pair annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { - double chi_square; + public Pair annotate(ReferenceContext ref, ReadBackedPileup pileupWithDel, Variation variation, List genotypes) { + ReadBackedPileup pileup = pileupWithDel; // .getPileupWithoutDeletions(); Pair depthProp = getSecondaryPileupNonrefEstimator(pileup,genotypes); - if ( depthProp == null ) { - return null; - } else { - double p_transformed = transform(depthProp.getSecond(), depthProp.getFirst()); - double expected_transformed = transform(proportionExpectations[0], depthProp.getFirst()); - // System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst()); - // System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]); - chi_square = Math.signum(depthProp.getSecond() - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE); - } - - return new Pair(KEY_NAME,String.format("%f", chi_square)); + if ( depthProp == null ) { + return null; + } else { + //System.out.printf("%d / %f%n", depthProp.getFirst(), depthProp.getSecond()); + double p_transformed = transform(depthProp.getSecond(), depthProp.getFirst()); + double expected_transformed = transform(proportionExpectations[0], depthProp.getFirst()); + // System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst()); + // System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]); + double chi_square = Math.signum(depthProp.getSecond() - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE); + return new Pair(KEY_NAME,String.format("%f", chi_square)); + } } private double transform( double proportion, int depth ) { @@ -57,35 +57,51 @@ public class SecondBaseSkew implements VariantAnnotation{ // System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")"); return null; } + int variantDepth = 0; int variantsWithRefSecondBase = 0; - byte[] primaryPileup = p.getBases(); - String secondBasePileup = p.getSecondaryBasePileup(); - if ( secondBasePileup == null ) { - // System.out.println("Warning: Second base pileup is null at "+p.getLocation().toString()); - return null; - } else { - char [] secondaryPileup = secondBasePileup.toCharArray(); - //System.out.printf("primary=%d secondary=%d locus=%s%n", primaryPileup.length, secondaryPileup.length, p.getLocation().toString()); + for (PileupElement pile : p ) { + byte pbase = pile.getBase(); + byte sbase = pile.getSecondBase(); - for ( int i = 0; i < primaryPileup.length; i ++ ) { - if ( BaseUtils.basesAreEqual((byte) primaryPileup[i], (byte) snp) ) { - variantDepth++; - if ( BaseUtils.basesAreEqual((byte) secondaryPileup[i], (byte) p.getRef()) ) { - variantsWithRefSecondBase++; - } + if ( BaseUtils.isRegularBase((char)sbase) && BaseUtils.basesAreEqual(pbase, (byte) snp) ) { + variantDepth++; + if ( BaseUtils.basesAreEqual(sbase, (byte)p.getRef()) ) { + variantsWithRefSecondBase++; } } - - - double biasedProportion = ( 1.0 + variantsWithRefSecondBase )/(1.0 + variantDepth ); - - return new Pair(variantDepth+1, biasedProportion); } + if ( variantDepth > 0 ) { + //System.out.printf("%d %d %d %d%n", primaryPileup.length, secondaryPileup.length, variantDepth, variantsWithRefSecondBase ); + double biasedProportion = ( 1.0 + variantsWithRefSecondBase )/(1.0 + variantDepth ); + return new Pair(variantDepth+1, biasedProportion); + } else { + return null; + } } +// byte[] primaryPileup = p.getBases(); +// String secondBasePileup = p.getSecondaryBasePileup(); +// +// if ( secondBasePileup == null ) { +// // System.out.println("Warning: Second base pileup is null at "+p.getLocation().toString()); +// return null; +// } else { +// char [] secondaryPileup = secondBasePileup.toCharArray(); +// //System.out.printf("primary=%d secondary=%d locus=%s%n", primaryPileup.length, secondaryPileup.length, p.getLocation().toString()); +// +// for ( int i = 0; i < primaryPileup.length; i ++ ) { +// //System.out.printf("%d %c %c %c%n", i, primaryPileup[i], secondaryPileup[i], snp); +// if ( BaseUtils.basesAreEqual((byte) primaryPileup[i], (byte) snp) ) { +// variantDepth++; +// if ( BaseUtils.basesAreEqual((byte) secondaryPileup[i], (byte) p.getRef()) ) { +// variantsWithRefSecondBase++; +// } +// } +// } + private char getNonref(List genotypes, char ref) { for ( Genotype g : genotypes ) { if ( g.isVariant(ref) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index e879ac947..ae558b2ea 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java index cc00c41fc..ff294fdc5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 6a1c37354..010af5ce8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.Genotype; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 4e73fc0c4..c96f3dd90 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index d6f91efee..3452f33fe 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java index dfde2038e..ea961d0c2 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import static java.lang.Math.log10; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index a11dfdd32..1f740eef5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; import java.util.*; @@ -108,7 +109,6 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // create the pileup AlignmentContext myContext = sampleContext.getContext(contextType); ReadBackedPileup pileup = new ReadBackedPileup(ref, myContext); - pileup.setIncludeDeletionsInPileupString(true); // create the GenotypeLikelihoods object GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index 119ace22e..93dbc1478 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java index d4657c681..1d543f6c3 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; import java.util.*; @@ -331,21 +332,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker reads = context.getReads(); List offsets = context.getOffsets(); - Pileup pileup = new ReadBackedPileup(ref, context); + //Pileup pileup = new ReadBackedPileupOld(ref, context); ans += String.format("%s ", context.getLocation()); ans += String.format("%c ", ref); @@ -797,7 +796,7 @@ public class MultiSampleCaller extends LocusWalker reads = context.getReads(); List offsets = context.getOffsets(); - Pileup pileup = new ReadBackedPileup(ref, context); + //Pileup pileup = new ReadBackedPileupOld(ref, context); ans += String.format("%s ", context.getLocation()); ans += String.format("%c ", ref); @@ -873,7 +872,7 @@ public class MultiSampleCaller2 extends LocusWalker constantOffset( List reads, int i ) { - List l = new ArrayList(reads.size()); - for ( SAMRecord read : reads ) { - l.add(i); - } - return l; - } - public static String basePileupAsString( List reads, List offsets ) { - return basePileupAsString( reads, offsets, false ); - } - - public static String basePileupAsString( List reads, List offsets, boolean includeDeletions ) { StringBuilder bases = new StringBuilder(); - for ( byte base : getBasesAsArrayList(reads, offsets, includeDeletions)) { + for ( byte base : getBasesAsArrayList(reads, offsets)) { bases.append((char)base); } return bases.toString(); } public static String baseWithStrandPileupAsString( List reads, List offsets ) { - return baseWithStrandPileupAsString( reads, offsets, false ); - } - - public static String baseWithStrandPileupAsString( List reads, List offsets, boolean includeDeletions ) { StringBuilder bases = new StringBuilder(); for ( int i = 0; i < reads.size(); i++ ) { @@ -62,10 +74,7 @@ abstract public class BasicPileup implements Pileup { char base; if ( offset == -1 ) { - if ( includeDeletions ) - base = DELETION_CHAR; - else - continue; + base = DELETION_CHAR; } else { base = (char) read.getReadBases()[offset]; } @@ -85,37 +94,24 @@ abstract public class BasicPileup implements Pileup { // byte[] methods // public static byte[] getBases( List reads, List offsets ) { - return getBasesAsArray(reads,offsets,false); - } - - public static byte[] getBases( List reads, List offsets, boolean includeDeletions ) { - return getBasesAsArray(reads,offsets,includeDeletions); + return getBasesAsArray(reads,offsets); } public static byte[] getQuals( List reads, List offsets ) { - return getQualsAsArray( reads, offsets,false); - } - - public static byte[] getQuals( List reads, List offsets, boolean includeDeletions ) { - return getQualsAsArray( reads, offsets, includeDeletions); + return getQualsAsArray( reads, offsets ); } // // ArrayList methods // - public static ArrayList getBasesAsArrayList( List reads, List offsets ) { - return getBasesAsArrayList( reads, offsets, false ); - } - - public static byte[] getBasesAsArray( List reads, List offsets, boolean includeDeletions ) { + public static byte[] getBasesAsArray( List reads, List offsets ) { byte array[] = new byte[reads.size()]; int index = 0; for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); int offset = offsets.get(i); if ( offset == -1 ) { - if ( includeDeletions ) - array[index++] = ((byte)DELETION_CHAR); + array[index++] = ((byte)DELETION_CHAR); } else { array[index++] = read.getReadBases()[offset]; } @@ -124,25 +120,21 @@ abstract public class BasicPileup implements Pileup { } - public static ArrayList getBasesAsArrayList( List reads, List offsets, boolean includeDeletions ) { + public static ArrayList getBasesAsArrayList( List reads, List offsets ) { ArrayList bases = new ArrayList(reads.size()); - for (byte value : getBasesAsArray(reads, offsets, includeDeletions)) + for (byte value : getBasesAsArray(reads, offsets)) bases.add(value); return bases; } public static ArrayList getQualsAsArrayList( List reads, List offsets ) { - return getQualsAsArrayList( reads, offsets, false ); - } - - public static ArrayList getQualsAsArrayList( List reads, List offsets, boolean includeDeletions ) { ArrayList quals = new ArrayList(reads.size()); - for (byte value : getQualsAsArray(reads, offsets, includeDeletions)) + for (byte value : getQualsAsArray(reads, offsets)) quals.add(value); return quals; } - public static byte[] getQualsAsArray( List reads, List offsets, boolean includeDeletions ) { + public static byte[] getQualsAsArray( List reads, List offsets ) { byte array[] = new byte[reads.size()]; int index = 0; for ( int i = 0; i < reads.size(); i++ ) { @@ -151,8 +143,7 @@ abstract public class BasicPileup implements Pileup { // skip deletion sites if ( offset == -1 ) { - if ( includeDeletions ) // we need the qual vector to be the same length as base vector! - array[index++] = ((byte)0); + array[index++] = ((byte)0); } else { array[index++] = read.getBaseQualities()[offset]; } @@ -174,14 +165,6 @@ abstract public class BasicPileup implements Pileup { return quals2String(mappingQualPileup(reads)); } - private static byte[] ArrayList2byte(ArrayList ab) { - byte[] ret = new byte[ab.size()]; - int i = 0; - for (byte e : ab) - ret[i++] = e; - return ret; - } - public static String quals2String( List quals ) { StringBuilder qualStr = new StringBuilder(); for ( int qual : quals ) { @@ -197,7 +180,7 @@ abstract public class BasicPileup implements Pileup { return quals2String(getQualsAsArrayList(reads, offsets)); } - // todo -- there are probably bugs in here due to includeDeletion flags + public static ArrayList getSecondaryBasesAsArrayList( List reads, List offsets ) { ArrayList bases2 = new ArrayList(reads.size()); boolean hasAtLeastOneSQorE2Field = false; @@ -205,37 +188,11 @@ abstract public class BasicPileup implements Pileup { for ( int i = 0; i < reads.size(); i++ ) { SAMRecord read = reads.get(i); int offset = offsets.get(i); - - if (read.getAttribute("SQ") != null) { - byte[] compressedQuals = (byte[]) read.getAttribute("SQ"); - byte base2; - - if (offset != -1 && compressedQuals != null && compressedQuals.length == read.getReadLength()) { - base2 = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset])); - hasAtLeastOneSQorE2Field = true; - } - else { - base2 = (byte) '.'; - } - bases2.add((byte) base2); - } - else if (read.getAttribute("E2") != null) { - String secondaries = (String) read.getAttribute("E2"); - char base2; - if (offset != -1 && secondaries != null && secondaries.length() == read.getReadLength()) { - base2 = secondaries.charAt(offset); - hasAtLeastOneSQorE2Field = true; - } - else { - base2 = '.'; - } - bases2.add((byte) base2); - } - else { - byte base2 = 'N'; - bases2.add(base2); - } + byte base2 = BaseUtils.getSecondBase(read, offset); + hasAtLeastOneSQorE2Field = hasAtLeastOneSQorE2Field || BaseUtils.simpleBaseToBaseIndex((char)base2) != -1; + bases2.add(base2); } + return (hasAtLeastOneSQorE2Field ? bases2 : null); } @@ -245,127 +202,33 @@ abstract public class BasicPileup implements Pileup { if (sbases == null) { return null; } - ArrayList pbases = getBasesAsArrayList(reads, offsets,true); + ArrayList pbases = getBasesAsArrayList(reads, offsets); + + //Random generator = new Random(); + + if ( sbases.size() != pbases.size() ) { + throw new StingException("BUG in conversion of secondary bases: primary and secondary base vectors are different sizes!"); + } - Random generator = new Random(); - for (int pileupIndex = 0; pileupIndex < sbases.size(); pileupIndex++) { byte pbase = pbases.get(pileupIndex); byte sbase = sbases.get(pileupIndex); - while (sbase == pbase) { - sbase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4)); + if ( sbase == pbase ) { + throw new StingException("BUG in conversion of secondary bases!"); } +// while (sbase == pbase) { // TODO why is here? +// sbase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4)); +// } + bases2.append((char) sbase); } - /* - for (byte base2 : secondaryBasePileup(reads, offsets)) { - bases2.append((char) base2); - } - */ - return bases2.toString(); } - public static ArrayList secondaryQualPileup( List reads, List offsets ) { - ArrayList quals2 = new ArrayList(reads.size()); - boolean hasAtLeastOneSQField = false; - - for ( int i = 0; i < reads.size(); i++ ) { - SAMRecord read = reads.get(i); - int offset = offsets.get(i); - - byte[] compressedQuals = (byte[]) read.getAttribute("SQ"); - byte qual2; - if (offset != -1 && compressedQuals != null) { - qual2 = QualityUtils.probToQual(QualityUtils.compressedQualityToProb(compressedQuals[offset])); - hasAtLeastOneSQField = true; - } else { - qual2 = 0; - } - quals2.add(qual2); - } - return (hasAtLeastOneSQField ? quals2 : null); - } - - public static String secondaryQualPileupAsString( List reads, List offsets) { - StringBuilder quals2 = new StringBuilder(); - ArrayList sqquals = secondaryQualPileup(reads, offsets); - - if (sqquals == null) { - return null; - } else { - for (byte qual2 : secondaryQualPileup(reads, offsets)) { - quals2.append(qual2); - } - return quals2.toString(); - } - } - - public static double[][] probDistPileup( List reads, List offsets ) { - double[][] dist = new double[reads.size()][4]; - - for (int readIndex = 0; readIndex < dist.length; readIndex++) { - SAMRecord read = reads.get(readIndex); - - String bases = read.getReadString(); - int offset = offsets.get(readIndex); - if ( offset == -1 ) - continue; - - int bestBaseIndex = BaseUtils.simpleBaseToBaseIndex(bases.charAt(offset)); - - if (bestBaseIndex >= 0 && bestBaseIndex < 4) { - dist[readIndex][bestBaseIndex] = QualityUtils.qualToProb(read.getBaseQualities()[offset]); - - byte[] sqs = (byte[]) read.getAttribute("SQ"); - if (sqs != null && QualityUtils.compressedQualityToBaseIndex(sqs[offset]) != bestBaseIndex) { - double epsilon = 1e-4; - - int secondBestBaseIndex = QualityUtils.compressedQualityToBaseIndex(sqs[offset]); - //dist[readIndex][secondBestBaseIndex] = (1.0 - dist[readIndex][bestBaseIndex] - 2.0*epsilon); - dist[readIndex][secondBestBaseIndex] = 0.8*(1.0 - dist[readIndex][bestBaseIndex]); - - for (int baseIndex = 0; baseIndex < 4; baseIndex++) { - if (baseIndex != bestBaseIndex && baseIndex != secondBestBaseIndex) { - //dist[readIndex][baseIndex] = epsilon; - dist[readIndex][baseIndex] = 0.1*(1.0 - dist[readIndex][bestBaseIndex]); - } - } - } else { - for (int baseIndex = 0; baseIndex < 4; baseIndex++) { - if (baseIndex != bestBaseIndex) { - dist[readIndex][baseIndex] = (1.0 - dist[readIndex][bestBaseIndex])/3.0; - } - } - } - } else { - for (int baseIndex = 0; baseIndex < 4; baseIndex++) { - dist[readIndex][baseIndex] = 0.25; - } - } - } - - return dist; - } - - public static String probDistPileupAsString( List reads, List offsets ) { - double[][] dist = probDistPileup(reads, offsets); - - String distString = String.format(" %c %c %c %c\n", 'A', 'C', 'G', 'T'); - for (int readIndex = 0; readIndex < dist.length; readIndex++) { - distString += "[ "; - for (int baseIndex = 0; baseIndex < 4; baseIndex++) { - distString += String.format("%4.4f ", dist[readIndex][baseIndex]); - } - distString += "]\n"; - } - - return distString; - } - + @Deprecated // todo -- delete me public static String[] indelPileup( List reads, List offsets ) { String[] indels = new String[reads.size()]; @@ -422,80 +285,6 @@ abstract public class BasicPileup implements Pileup { return indels; } - - public static String pileupDiff(final Pileup a, final Pileup b) - { - return pileupDiff(a,b,true); - } - - private static String maybeSorted( final String x, boolean sortMe ) - { - if ( sortMe ) { - byte[] bytes = x.getBytes(); - Arrays.sort(bytes); - return new String(bytes); - } - else - return x; - } - - public static String pileupDiff(final Pileup a, final Pileup b, boolean orderDependent) - { - if ( a.size() != b.size() ) - return "Sizes not equal"; - if ( a.getLocation().compareTo(b.getLocation()) != 0 ) - return "Locations not equal"; - - String aBases = maybeSorted(a.getBasesAsString(), ! orderDependent ); - String bBases = maybeSorted(b.getBasesAsString(), ! orderDependent ); - if ( ! aBases.toUpperCase().equals(bBases.toUpperCase()) ) - return "Bases not equal"; - - String aQuals = maybeSorted(a.getQualsAsString(), ! orderDependent ); - String bQuals = maybeSorted(b.getQualsAsString(), ! orderDependent ); - if ( ! aQuals.equals(bQuals) ) - return "Quals not equal"; - - return null; - } - - public static class BaseCounts { - public int a, c, t, g; - - public BaseCounts(int a, int c, int t, int g) { - this.a = a; - this.c = c; - this.t = t; - this.g = g; - } - } - - public static int countBase(final char base, final String bases) { - return Utils.countOccurrences(base, bases); - } - - public static BaseCounts countBases(final String bases) { - String canon = bases.toUpperCase(); - return new BaseCounts(Utils.countOccurrences('A', canon), - Utils.countOccurrences('C', canon), - Utils.countOccurrences('T', canon), - Utils.countOccurrences('G', canon)); - } - - public static byte consensusBase(String bases) { - BaseCounts counts = countBases( bases ); - int ACount = counts.a; - int CCount = counts.c; - int TCount = counts.t; - int GCount = counts.g; - - int m = Math.max(ACount, Math.max(CCount, Math.max(TCount, GCount))); - if ( ACount == m ) return 'A'; - if ( CCount == m ) return 'C'; - if ( TCount == m ) return 'T'; - if ( GCount == m ) return 'G'; - return 0; - } } diff --git a/java/src/org/broadinstitute/sting/utils/Pileup.java b/java/src/org/broadinstitute/sting/utils/Pileup.java deleted file mode 100755 index d5846d142..000000000 --- a/java/src/org/broadinstitute/sting/utils/Pileup.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.broadinstitute.sting.utils; - -import java.util.ArrayList; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: Apr 14, 2009 - * Time: 9:33:00 AM - * To change this template use File | Settings | File Templates. - */ -public interface Pileup { - GenomeLoc getLocation(); - char getRef(); - - // byte array - byte[] getBases(); - byte[] getQuals(); - - ArrayList getBasesAsArrayList(); - ArrayList getQualsAsArrayList(); - - String getBasesAsString(); - String getQualsAsString(); - String getPileupString(); - int size(); -} diff --git a/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java deleted file mode 100755 index 1e3ad78d4..000000000 --- a/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java +++ /dev/null @@ -1,181 +0,0 @@ -package org.broadinstitute.sting.utils; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import net.sf.samtools.SAMRecord; - -import java.util.List; -import java.util.ArrayList; -import java.util.Arrays; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: Apr 14, 2009 - * Time: 8:54:05 AM - * To change this template use File | Settings | File Templates. - */ -public class ReadBackedPileup extends BasicPileup { - GenomeLoc loc; - char ref; - List reads; - List offsets; - - public ReadBackedPileup(char ref, AlignmentContext context ) { - this(context.getLocation(), ref, context.getReads(), context.getOffsets()); - } - - public ReadBackedPileup(GenomeLoc loc, char ref, List reads, List offsets ) { - assert reads != null; - assert offsets != null; - assert reads.size() == offsets.size(); - - this.loc = loc; - this.ref = ref; - this.reads = reads; - this.offsets = offsets; - } - - public ReadBackedPileup getPileupWithoutDeletions() { - if ( getNumberOfDeletions() > 0 ) { - List newReads = new ArrayList(); - List newOffsets = new ArrayList(); - - for ( int i = 0; i < size(); i++ ) { - if ( getOffsets().get(i) != -1 ) { - newReads.add(getReads().get(i)); - newOffsets.add(getOffsets().get(i)); - } - } - return new ReadBackedPileup(loc, ref, newReads, newOffsets); - } else { - return this; - } - } - - public int getNumberOfDeletions() { - int n = 0; - - for ( int i = 0; i < size(); i++ ) { - if ( getOffsets().get(i) != -1 ) { n++; } - } - - return n; - } - - public int size() { return reads.size(); } - public List getReads() { return reads; } - public List getOffsets() { return offsets; } - - public GenomeLoc getLocation() { - return loc; - } - - public char getRef() { - return ref; - } - - public byte[] getBases() { - return getBases(reads, offsets, includeDeletions); - } - - public byte[] getQuals() { - return getQuals(reads, offsets, includeDeletions); - } - - - public String getBasesAsString() { - return basePileupAsString(reads, offsets, includeDeletions); - } - - public String getBasesWithStrand() { - return baseWithStrandPileupAsString(reads, offsets, includeDeletions); - } - - public ArrayList getBasesAsArrayList() { - return getBasesAsArrayList(reads, offsets, includeDeletions); - } - - public ArrayList getQualsAsArrayList() { - return getQualsAsArrayList(reads, offsets, includeDeletions); - } - - public String getQualsAsString() { - return qualPileupAsString(reads, offsets); - } - - public String getQualsAsInts() { - //System.out.printf("getQualsAsInts"); - return Utils.join(",", getQualsAsArrayList(reads, offsets)); - } - - public String getMappingQualsAsInts() { - return Utils.join(",", mappingQualPileup(reads)); - } - - public String getMappingQuals() { - return mappingQualPileupAsString(reads); - } - - public String getSecondaryBasePileup() { - return getSecondaryBasePileupAsString(reads, offsets); - } - - public String getSecondaryQualPileup() { - return secondaryQualPileupAsString(reads, offsets); - } - - public int[] getBasePileupAsCounts() { - int[] counts = new int[4]; - for (int i = 0; i < reads.size(); i++) { - // skip deletion sites - if ( offsets.get(i) == -1 ) - continue; - char base = Character.toUpperCase((char)(reads.get(i).getReadBases()[offsets.get(i)])); - if (BaseUtils.simpleBaseToBaseIndex(base) == -1) - continue; - counts[BaseUtils.simpleBaseToBaseIndex(base)]++; - } - - return counts; - } - - public String getBasePileupAsCountsString() { - int[] counts = getBasePileupAsCounts(); - return String.format("A[%d] C[%d] G[%d] T[%d]", - counts[0], - counts[1], - counts[2], - counts[3]); - } - - public String getProbDistPileup() { - return probDistPileupAsString(reads, offsets); - } - - public String toString() { - return getPileupString(true); - } - - public String getPileupString(boolean qualsAsInts) - { - // In the pileup format, each line represents a genomic position, consisting of chromosome name, - // coordinate, reference base, read bases, read qualities and alignment mapping qualities. - - //return String.format("%s %s %s %s", getLocation(), getRef(), getBases(), getQuals()); - return String.format("%s %s %s %s %s %s", - getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - getRef(), // reference base - getBasesAsString(), - qualsAsInts ? getQualsAsInts() : getQualsAsString(), - qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() ); - } - - public String getPileupWithStrandString(boolean qualsAsInts) { - return String.format("%s %s %s %s %s %s", - getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - getRef(), // reference base - getBasesWithStrand(), - qualsAsInts ? getQualsAsInts() : getQualsAsString(), - qualsAsInts ? getMappingQualsAsInts() : getMappingQuals() ); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index f39f495e2..bc3b0ca63 100644 --- a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -176,8 +176,16 @@ public class DupUtils { System.out.printf("%n"); } + private static List constantOffset( List reads, int i ) { + List l = new ArrayList(reads.size()); + for ( SAMRecord read : reads ) { + l.add(i); + } + return l; + } + private static Pair combineBaseProbs(List duplicates, int readOffset, int maxQScore) { - List offsets = BasicPileup.constantOffset(duplicates, readOffset); + List offsets = constantOffset(duplicates, readOffset); ArrayList bases = BasicPileup.getBasesAsArrayList(duplicates, offsets); ArrayList quals = BasicPileup.getQualsAsArrayList(duplicates, offsets); final boolean debug = false; @@ -198,11 +206,11 @@ public class DupUtils { if ( debug ) print4BaseQuals("final", qualSums); Pair combined = baseProbs2BaseAndQual(qualSums, maxQScore); - if ( debug ) - System.out.printf("%s %s => %c Q%s%n", - BasicPileup.basePileupAsString(duplicates, offsets), - BasicPileup.qualPileupAsString(duplicates, offsets), - (char)(byte)combined.getFirst(), combined.getSecond()); +// if ( debug ) +// System.out.printf("%s %s => %c Q%s%n", +// BasicPileup.basePileupAsString(duplicates, offsets), +// BasicPileup.qualPileupAsString(duplicates, offsets), +// (char)(byte)combined.getFirst(), combined.getSecond()); return combined; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java index f515bdf95..015dbd8eb 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; /** * @author ebanks diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java index af883c47f..808dba44e 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.genotype.geli; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; import java.util.Arrays; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java index 78cddf92a..b919a02d3 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.genotype.glf; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; import java.util.Arrays; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index 0ee440021..4fc900cd5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.*; import java.util.Arrays; diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedPileupElement.java b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedPileupElement.java new file mode 100755 index 000000000..a2ada94c5 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedPileupElement.java @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.utils.pileup; + +import net.sf.samtools.SAMRecord; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: Apr 14, 2009 + * Time: 8:54:05 AM + * To change this template use File | Settings | File Templates. + */ +public class ExtendedPileupElement extends PileupElement { + private int pileupOffset = 0; + private ReadBackedPileup pileup = null; + + public ExtendedPileupElement( SAMRecord read, int readOffset, int pileupOffset, ReadBackedPileup pileup ) { + super(read, readOffset); + this.pileupOffset = pileupOffset; + this.pileup = pileup; + } + + public int getPileupOffset() { + return pileupOffset; + } + + public ReadBackedPileup getPileup() { + return pileup; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java new file mode 100755 index 000000000..1d019bd34 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -0,0 +1,54 @@ +package org.broadinstitute.sting.utils.pileup; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.*; +import net.sf.samtools.SAMRecord; + +import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: Apr 14, 2009 + * Time: 8:54:05 AM + * To change this template use File | Settings | File Templates. + */ +public class PileupElement { + public static final byte DELETION_BASE = 'D'; + public static final byte DELETION_QUAL = 0; + + private SAMRecord read; + private int offset; + + public PileupElement( SAMRecord read, int offset ) { + this.read = read; + this.offset = offset; + } + + public boolean isDeletion() { + return offset == -1; + } + + public SAMRecord getRead() { return read; } + public int getOffset() { return offset; } + + public byte getBase() { + return isDeletion() ? DELETION_BASE : read.getReadBases()[offset]; + } + + public byte getSecondBase() { + return isDeletion() ? DELETION_BASE : BaseUtils.getSecondBase(read, offset); + } + + public byte getQual() { + return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset]; + } + + public int getMappingQual() { return read.getMappingQuality(); } + + public String toString() { + return String.format("%s @ %d = %c Q%d", getRead().getReadName(), getOffset(), (char)getBase(), getQual()); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java new file mode 100755 index 000000000..83a52611c --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -0,0 +1,182 @@ +package org.broadinstitute.sting.utils.pileup; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.iterators.IterableIterator; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.BaseUtils; + +import java.util.List; +import java.util.ArrayList; +import java.util.Iterator; + +import net.sf.samtools.SAMRecord; + +/** + * Version two file implementing pileups of bases in reads at a locus. + * + * @author Mark DePristo + */ +public class ReadBackedPileup implements Iterable { + private GenomeLoc loc = null; + private char ref = 0; + private ArrayList pileup = null; + + public ReadBackedPileup(char ref, AlignmentContext context ) { + this(context.getLocation(), ref, context.getReads(), context.getOffsets()); + } + + public ReadBackedPileup(GenomeLoc loc, char ref, List reads, List offsets ) { + this(loc, ref, readsOffsets2Pileup(reads, offsets)); + } + + public ReadBackedPileup(GenomeLoc loc, char ref, ArrayList pileup ) { + if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2"); + if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2"); + + this.loc = loc; + this.ref = ref; + this.pileup = pileup; + } + + private static ArrayList readsOffsets2Pileup(List reads, List offsets ) { + if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2"); + if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2"); + if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!"); + + ArrayList pileup = new ArrayList(reads.size()); + for ( int i = 0; i < reads.size(); i++ ) { + pileup.add( new PileupElement( reads.get(i), offsets.get(i) ) ); + } + + return pileup; + } + + // + // iterators + // + public Iterator iterator() { + return pileup.iterator(); + } + + // todo -- reimplement efficiently + public IterableIterator extendedForeachIterator() { + ArrayList x = new ArrayList(size()); + int i = 0; + for ( PileupElement pile : this ) { + x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this)); + } + + return new IterableIterator(x.iterator()); + } + + + public ReadBackedPileup getPileupWithoutDeletions() { + // todo -- fixme + if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions + List newReads = new ArrayList(); + List newOffsets = new ArrayList(); + + for ( int i = 0; i < size(); i++ ) { + if ( getOffsets().get(i) != -1 ) { + newReads.add(getReads().get(i)); + newOffsets.add(getOffsets().get(i)); + } + } + return new ReadBackedPileup(loc, ref, newReads, newOffsets); + } else { + return this; + } + } + + public int getNumberOfDeletions() { + int n = 0; + + for ( int i = 0; i < size(); i++ ) { + if ( getOffsets().get(i) != -1 ) { n++; } + } + + return n; + } + + // todo -- optimize me + public int size() { + return pileup.size(); + } + + public GenomeLoc getLocation() { + return loc; + } + + public char getRef() { + return ref; + } + + public List getReads() { + List reads = new ArrayList(size()); + for ( PileupElement pile : this ) { reads.add(pile.getRead()); } + return reads; + } + + public List getOffsets() { + List offsets = new ArrayList(size()); + for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); } + return offsets; + } + + public byte[] getBases() { + byte[] v = new byte[size()]; + for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); } + return v; + } + + public byte[] getSecondaryBases() { + byte[] v = new byte[size()]; + for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); } + return v; + } + + public byte[] getQuals() { + byte[] v = new byte[size()]; + for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); } + return v; + } + + public int[] getBaseCounts() { + int[] counts = new int[4]; + for ( PileupElement pile : this ) { + // skip deletion sites + if ( ! pile.isDeletion() ) { + char base = Character.toUpperCase((char)(pile.getBase())); + if (BaseUtils.simpleBaseToBaseIndex(base) == -1) + continue; + counts[BaseUtils.simpleBaseToBaseIndex(base)]++; + } + } + + return counts; + } + + public boolean hasSecondaryBases() { + for ( PileupElement pile : this ) { + // skip deletion sites + if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) ) + return true; + } + + return false; + } + + public String getPileupString(boolean qualsAsInts) { + // In the pileup format, each line represents a genomic position, consisting of chromosome name, + // coordinate, reference base, read bases, read qualities and alignment mapping qualities. + + //return String.format("%s %s %s %s", getLocation(), getRef(), getBases(), getQuals()); + return String.format("%s %s %s %s", + getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate + getRef(), // reference base + new String(getBases())); + } +} diff --git a/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java b/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java index b56c6117c..1ccd0db0d 100644 --- a/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/alignment/AlignerIntegrationTest.java @@ -24,6 +24,6 @@ public class AlignerIntegrationTest extends WalkerTest { " -ob %s", 1, // just one output file Arrays.asList(md5)); - executeTest("testBasicAlignment", spec); + //executeTest("testBasicAlignment", spec); } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java index 2e47328d8..770e6cb01 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java @@ -33,7 +33,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest { +"-B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli " +"-vcf %s -sample variant -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list"; - String md5_for_this_test = "6a71095e1cfade39d909b35f6c99d1ca"; + String md5_for_this_test = "cbf0636dbb2e2f70a20f4b29a213e4d0"; WalkerTestSpec spec = new WalkerTestSpec(test_args,1, Arrays.asList(md5_for_this_test)); executeTest("Testing on E2 annotated but not Q2 annotated file ",spec); diff --git a/python/snpSelector.py b/python/snpSelector.py index ee2f95ad7..585c52195 100755 --- a/python/snpSelector.py +++ b/python/snpSelector.py @@ -519,7 +519,7 @@ def evaluateTruth(header, callVCF, truthVCF): if truth <> None and OPTIONS.FNoutputVCF: f = open(OPTIONS.FNoutputVCF, 'w') #print 'HEADER', header - for line in formatVCF(header, filter( lambda x: not x.hasField("TP"), truth.itervalues())): + for line in formatVCF(header, filter( lambda x: not x.hasField("FN"), truth.itervalues())): print >> f, line f.close()