diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index e0a2329f8..bd0d4e3fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -33,18 +33,18 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( mendelianViolation == null ) { - if ( walker instanceof VariantAnnotator ) { + if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) { mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); } else { - throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator"); + throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line."); } } Map toRet = new HashMap(1); - boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && - vc.hasGenotype(mendelianViolation.getSampleDad()) && - vc.hasGenotype(mendelianViolation.getSampleMom()); + boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() && + vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() && + vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods(); if ( hasAppropriateGenotypes ) toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index b28980d9c..3715d2d14 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -528,10 +528,10 @@ public class PairHMMIndelErrorModel { } else { - byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartClippedBases, + final byte[] readBases = Arrays.copyOfRange(unclippedReadBases,numStartClippedBases, unclippedReadBases.length-numEndClippedBases); - byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartClippedBases, + final byte[] readQuals = Arrays.copyOfRange(unclippedReadQuals,numStartClippedBases, unclippedReadBases.length-numEndClippedBases); int j=0; @@ -540,6 +540,7 @@ public class PairHMMIndelErrorModel { double[][] matchMetricArray = null, XMetricArray = null, YMetricArray = null; byte[] previousHaplotypeSeen = null; double[] previousGOP = null; + double[] previousGCP = null; int startIdx; for (Allele a: haplotypeMap.keySet()) { @@ -555,7 +556,7 @@ public class PairHMMIndelErrorModel { long indStart = start - haplotype.getStartPosition(); long indStop = stop - haplotype.getStartPosition(); - byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), + final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), (int)indStart, (int)indStop); double readLikelihood; @@ -572,13 +573,14 @@ public class PairHMMIndelErrorModel { if (previousHaplotypeSeen == null) startIdx = 0; else { - int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen); - int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP); - startIdx = Math.min(s1,s2); + final int s1 = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen); + final int s2 = computeFirstDifferingPosition(currentContextGOP, previousGOP); + final int s3 = computeFirstDifferingPosition(currentContextGCP, previousGCP); + startIdx = Math.min(Math.min(s1, s2), s3); } previousHaplotypeSeen = haplotypeBases.clone(); previousGOP = currentContextGOP.clone(); - + previousGCP = currentContextGCP.clone(); readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 2e17d68d8..609593acc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -556,9 +556,9 @@ public class SelectVariants extends RodWalker { if (vc == null) return false; - // if we're not looking at specific samples then the absense of a compVC means discordance - if (NO_SAMPLES_SPECIFIED && (compVCs == null || compVCs.isEmpty())) - return true; + // if we're not looking at specific samples then the absence of a compVC means discordance + if (NO_SAMPLES_SPECIFIED) + return (compVCs == null || compVCs.isEmpty()); // check if we find it in the variant rod Map genotypes = vc.getGenotypes(samples); diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index a6df986ba..6e4ddddc4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -58,15 +58,6 @@ public class ReadClipper { return hardClipByReferenceCoordinates(refStart, -1); } - private int numDeletions(GATKSAMRecord read) { - int result = 0; - for (CigarElement e: read.getCigar().getCigarElements()) { - if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D ) - result =+ e.getLength(); - } - return result; - } - protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); @@ -90,7 +81,7 @@ public class ReadClipper { @Requires("left <= right") public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { - if (left == right) + if (read.isEmpty() || left == right) return new GATKSAMRecord(read.getHeader()); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); @@ -104,6 +95,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipLowQualEnds(byte lowQual) { + if (read.isEmpty()) + return read; + byte [] quals = read.getBaseQualities(); int leftClipIndex = 0; int rightClipIndex = read.getReadLength() - 1; @@ -126,6 +120,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipSoftClippedBases () { + if (read.isEmpty()) + return read; + int readIndex = 0; int cutLeft = -1; // first position to hard clip (inclusive) int cutRight = -1; // first position to hard clip (inclusive) @@ -182,6 +179,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipLeadingInsertions() { + if (read.isEmpty()) + return read; + for(CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java index 97c15fbb8..226c997dd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java @@ -87,7 +87,13 @@ public class TableCodec implements ReferenceDependentFeatureCodec { public Object readHeader(LineReader reader) { String line = ""; try { + boolean isFirst = true; while ((line = reader.readLine()) != null) { + System.out.println(line); + if ( isFirst && ! line.startsWith(headerDelimiter) ) { + throw new UserException.MalformedFile("TableCodec file does not have a header"); + } + isFirst &= false; if (line.startsWith(headerDelimiter)) { if (header.size() > 0) throw new IllegalStateException("Input table file seems to have two header lines. The second is = " + line); String spl[] = line.split(delimiterRegex); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java index a85849f0b..4b5c51bd4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java @@ -1,7 +1,9 @@ package org.broadinstitute.sting.utils.codecs.table; + import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import java.util.List; @@ -44,6 +46,10 @@ public class TableFeature implements Feature { return values.get(columnPosition); } + public String toString() { + return String.format("%s\t%s",position.toString(), Utils.join("\t",values)); + } + public String get(String columnName) { int position = keys.indexOf(columnName); if (position < 0) throw new IllegalArgumentException("We don't have a column named " + columnName); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index ede75817a..9e07b1112 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -24,7 +24,10 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.*; +import net.sf.samtools.BAMRecord; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.NGSPlatform; import java.util.HashMap; @@ -83,8 +86,13 @@ public class GATKSAMRecord extends BAMRecord { read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getInferredInsertSize(), - new byte[]{}); - super.clearAttributes(); + null); + SAMReadGroupRecord samRG = read.getReadGroup(); + clearAttributes(); + if (samRG != null) { + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); + setReadGroup(rg); + } } public GATKSAMRecord(final SAMFileHeader header, @@ -131,6 +139,21 @@ public class GATKSAMRecord extends BAMRecord { return mReadGroup; } + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + + if (!(o instanceof GATKSAMRecord)) return false; + + // note that we do not consider the GATKSAMRecord internal state at all + return super.equals(o); + } + /** * Efficient caching accessor that returns the GATK NGSPlatform of this read * @return @@ -144,11 +167,9 @@ public class GATKSAMRecord extends BAMRecord { retrievedReadGroup = true; } - // - // - // Reduced read functions - // - // + /////////////////////////////////////////////////////////////////////////////// + // *** ReduceReads functions ***// + /////////////////////////////////////////////////////////////////////////////// public byte[] getReducedReadCounts() { if ( ! retrievedReduceReadCounts ) { @@ -167,6 +188,12 @@ public class GATKSAMRecord extends BAMRecord { return getReducedReadCounts()[i]; } + + /////////////////////////////////////////////////////////////////////////////// + // *** GATKSAMRecord specific methods ***// + /////////////////////////////////////////////////////////////////////////////// + + /** * Checks whether an attribute has been set for the given key. * @@ -220,18 +247,26 @@ public class GATKSAMRecord extends BAMRecord { return null; } - @Override - public int hashCode() { - return super.hashCode(); + /** + * Checks whether if the read has any bases. + * + * Empty reads can be dangerous as it may have no cigar strings, no read names and + * other missing attributes. + * + * @return true if the read has no bases + */ + public boolean isEmpty() { + return this.getReadLength() == 0; } - @Override - public boolean equals(Object o) { - if (this == o) return true; - - if (!(o instanceof GATKSAMRecord)) return false; - - // note that we do not consider the GATKSAMRecord internal state at all - return super.equals(o); + /** + * Clears all attributes except ReadGroup of the read. + */ + public void simplify () { + GATKSAMReadGroupRecord rg = getReadGroup(); + this.clearAttributes(); + setReadGroup(rg); } + + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index e4ded491b..6e994be3a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -64,6 +64,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testDiscordance--" + testFile, spec); } + @Test + public void testDiscordanceNoSampleSpecified() { + String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("5d7d899c0c4954ec59104aebfe4addd5") + ); + + executeTest("testDiscordanceNoSampleSpecified--" + testFile, spec); + } + @Test public void testConcordance() { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";