diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 7ea515591..1ef452a5c 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram { private String RECAL_FILE = "output.recal_data.csv"; @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) private String OUTPUT_DIR = "analyzeCovariates/"; - @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false) private String PATH_TO_RSCRIPT = "Rscript"; @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) private String PATH_TO_RESOURCES = "public/R/"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java old mode 100644 new mode 100755 index a33631c85..a73123b6c --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java @@ -24,12 +24,14 @@ package org.broadinstitute.sting.gatk.report; +import org.broadinstitute.sting.utils.collections.Pair; + import java.util.*; /** * Tracks a linked list of GATKReportColumn in order by name. */ -public class GATKReportColumns extends LinkedHashMap { +public class GATKReportColumns extends LinkedHashMap implements Iterable { private List columnNames = new ArrayList(); /** @@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap { columnNames.add(key); return super.put(key, value); } + + @Override + public Iterator iterator() { + return new Iterator() { + int offset = 0; + public boolean hasNext() { return offset < columnNames.size() ; } + public GATKReportColumn next() { return getByIndex(offset++); } + public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); } + }; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 3e3aa29a7..2fd5ad7e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -286,6 +286,10 @@ public class GATKReportTable { } } + public boolean containsKey(Object primaryKey) { + return primaryKeyColumn.contains(primaryKey); + } + /** * Set the value for a given position in the table * diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java old mode 100644 new mode 100755 diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index f6a1c4f31..3be87da80 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; + @Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation") + public String familyStr = null; + + @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio") + public double minGenotypeQualityP = 0.0; + private VariantAnnotatorEngine engine; private Collection indelBufferContext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 529d17285..89e702b64 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker { throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); } } else - mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD)); + mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); } else if (!FAMILY_STRUCTURE.isEmpty()) { mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index c6a07b5ce..8da118174 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.utils; +import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.regex.Matcher; @@ -32,6 +34,9 @@ public class MendelianViolation { private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); + static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; + static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; + public String getSampleMom() { return sampleMom; @@ -168,4 +173,41 @@ public class MendelianViolation { return true; } + /** + * @return the likelihood ratio for a mendelian violation + */ + public double violationLikelihoodRatio(VariantContext vc) { + double[] logLikAssignments = new double[27]; + // the matrix to set up is + // MOM DAD CHILD + // |- AA + // AA AA | AB + // |- BB + // |- AA + // AA AB | AB + // |- BB + // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs + double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); + double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); + double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); + int offset = 0; + for ( int oMom = 0; oMom < 3; oMom++ ) { + for ( int oDad = 0; oDad < 3; oDad++ ) { + for ( int oChild = 0; oChild < 3; oChild ++ ) { + logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild]; + } + } + } + double[] mvLiks = new double[12]; + double[] nonMVLiks = new double[15]; + for ( int i = 0; i < 12; i ++ ) { + mvLiks[i] = logLikAssignments[mvOffsets[i]]; + } + + for ( int i = 0; i < 15; i++) { + nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]]; + } + + return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index c0493fe22..58f7942fe 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -53,7 +53,7 @@ public class RScriptExecutor { public static class RScriptArgumentCollection { @Advanced - @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false) public String PATH_TO_RSCRIPT = "Rscript"; @Advanced diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index a8a3fad9e..9b9ce4fdf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.clipreads; import com.google.java.contract.Requires; +import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; @@ -8,6 +9,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; /** @@ -128,6 +130,39 @@ public class ReadClipper { return this.clipRead(ClippingRepresentation.HARDCLIP_BASES); } + public SAMRecord hardClipSoftClippedBases () { + int readIndex = 0; + int cutLeft = -1; // first position to hard clip (inclusive) + int cutRight = -1; // first position to hard clip (inclusive) + boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail + + for (CigarElement cigarElement : read.getCigar().getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { + if (rightTail) { + cutRight = readIndex; + } + else { + cutLeft = readIndex + cigarElement.getLength() - 1; + } + } + else + rightTail = true; + + if (cigarElement.getOperator().consumesReadBases()) + readIndex += cigarElement.getLength(); + } + + // It is extremely important that we cut the end first otherwise the read coordinates change. + if (cutRight >= 0) + this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1)); + if (cutLeft >= 0) + this.addOp(new ClippingOp(0, cutLeft)); + + return clipRead(ClippingRepresentation.HARDCLIP_BASES); + } + + + /** * Return a new read corresponding to this.read that's been clipped according to ops, if any are present. * diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 18646b057..83c7083d0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec; import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; +import org.broad.tribble.util.BlockCompressedInputStream; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -590,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) { try { return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) || - isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); + isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) || + isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); } catch ( FileNotFoundException e ) { return false; } catch ( IOException e ) { @@ -601,12 +603,17 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) { try { byte[] buff = new byte[MAGIC_HEADER_LINE.length()]; - stream.read(buff, 0, MAGIC_HEADER_LINE.length()); - String firstLine = new String(buff); - stream.close(); - return firstLine.startsWith(MAGIC_HEADER_LINE); + int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length()); + boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes()); + return eq; +// String firstLine = new String(buff); +// return firstLine.startsWith(MAGIC_HEADER_LINE); } catch ( IOException e ) { return false; + } catch ( RuntimeException e ) { + return false; + } finally { + try { stream.close(); } catch ( IOException e ) {} } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java old mode 100644 new mode 100755 index 5d3ef3086..60c9c1780 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -681,6 +681,9 @@ public class ReadUtils { @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { + if ( read.getCigar().numCigarElements() == 1 && read.getCigar().getCigarElement(0).getOperator().equals(CigarOperator.INSERTION)) { + return read.getUnclippedEnd(); + } int stop = read.getUnclippedStart(); if (readIsEntirelyInsertion(read)) @@ -787,5 +790,47 @@ public class ReadUtils { return readBases; } + public static SAMRecord unclipSoftClippedBases(SAMRecord rec) { + int newReadStart = rec.getAlignmentStart(); + int newReadEnd = rec.getAlignmentEnd(); + List newCigarElements = new ArrayList(rec.getCigar().getCigarElements().size()); + int heldOver = -1; + boolean sSeen = false; + for ( CigarElement e : rec.getCigar().getCigarElements() ) { + if ( e.getOperator().equals(CigarOperator.S) ) { + newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M)); + if ( sSeen ) { + newReadEnd += e.getLength(); + sSeen = true; + } else { + newReadStart -= e.getLength(); + } + } else { + newCigarElements.add(e); + } + } + // merge duplicate operators together + int idx = 0; + List finalCigarElements = new ArrayList(rec.getCigar().getCigarElements().size()); + while ( idx < newCigarElements.size() -1 ) { + if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) { + int combSize = newCigarElements.get(idx).getLength(); + int offset = 0; + while ( idx + offset < newCigarElements.size()-1 && newCigarElements.get(idx+offset).getOperator().equals(newCigarElements.get(idx+1+offset).getOperator()) ) { + combSize += newCigarElements.get(idx+offset+1).getLength(); + offset++; + } + finalCigarElements.add(new CigarElement(combSize,newCigarElements.get(idx).getOperator())); + idx = idx + offset -1; + } else { + finalCigarElements.add(newCigarElements.get(idx)); + } + idx++; + } + rec.setCigar(new Cigar(finalCigarElements)); + rec.setAlignmentStart(newReadStart); + + return rec; + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java index bae8e99ed..e8799e2ab 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest { private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf"); private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf"); private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz"); + private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz"); private FeatureManager manager; private GenomeLocParser genomeLocParser; @@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest { new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE); new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE); new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ); + new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP); new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null); return FMTest.getTests(FMTest.class); } diff --git a/public/testdata/HiSeq.10000.bgzip.vcf.gz b/public/testdata/HiSeq.10000.bgzip.vcf.gz new file mode 100644 index 000000000..3f2b9bf14 Binary files /dev/null and b/public/testdata/HiSeq.10000.bgzip.vcf.gz differ