Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e53cb79d42
|
|
@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
private String RECAL_FILE = "output.recal_data.csv";
|
||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||
private String OUTPUT_DIR = "analyzeCovariates/";
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
|
||||
private String PATH_TO_RSCRIPT = "Rscript";
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||
private String PATH_TO_RESOURCES = "public/R/";
|
||||
|
|
|
|||
|
|
@ -24,12 +24,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Tracks a linked list of GATKReportColumn in order by name.
|
||||
*/
|
||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
|
||||
private List<String> columnNames = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
|
|
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
|||
columnNames.add(key);
|
||||
return super.put(key, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<GATKReportColumn> iterator() {
|
||||
return new Iterator<GATKReportColumn>() {
|
||||
int offset = 0;
|
||||
public boolean hasNext() { return offset < columnNames.size() ; }
|
||||
public GATKReportColumn next() { return getByIndex(offset++); }
|
||||
public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -286,6 +286,10 @@ public class GATKReportTable {
|
|||
}
|
||||
}
|
||||
|
||||
public boolean containsKey(Object primaryKey) {
|
||||
return primaryKeyColumn.contains(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value for a given position in the table
|
||||
*
|
||||
|
|
|
|||
|
|
@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||
protected boolean indelsOnly = false;
|
||||
|
||||
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
|
||||
public String familyStr = null;
|
||||
|
||||
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
||||
public double minGenotypeQualityP = 0.0;
|
||||
|
||||
private VariantAnnotatorEngine engine;
|
||||
|
||||
private Collection<VariantContext> indelBufferContext;
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
private double[] TS_TRANCHES = new double[] {100.0, 99.9, 99.0, 90.0};
|
||||
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
|
||||
private String[] IGNORE_INPUT_FILTERS = null;
|
||||
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required=false)
|
||||
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required=false)
|
||||
private String PATH_TO_RSCRIPT = "Rscript";
|
||||
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
|
||||
private String RSCRIPT_FILE = null;
|
||||
|
|
|
|||
|
|
@ -452,7 +452,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
||||
}
|
||||
else
|
||||
mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
}
|
||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
|
|
@ -32,6 +34,9 @@ public class MendelianViolation {
|
|||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
||||
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||
|
||||
|
||||
public String getSampleMom() {
|
||||
return sampleMom;
|
||||
|
|
@ -168,4 +173,41 @@ public class MendelianViolation {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the likelihood ratio for a mendelian violation
|
||||
*/
|
||||
public double violationLikelihoodRatio(VariantContext vc) {
|
||||
double[] logLikAssignments = new double[27];
|
||||
// the matrix to set up is
|
||||
// MOM DAD CHILD
|
||||
// |- AA
|
||||
// AA AA | AB
|
||||
// |- BB
|
||||
// |- AA
|
||||
// AA AB | AB
|
||||
// |- BB
|
||||
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||
int offset = 0;
|
||||
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||
for ( int oChild = 0; oChild < 3; oChild ++ ) {
|
||||
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
|
||||
}
|
||||
}
|
||||
}
|
||||
double[] mvLiks = new double[12];
|
||||
double[] nonMVLiks = new double[15];
|
||||
for ( int i = 0; i < 12; i ++ ) {
|
||||
mvLiks[i] = logLikAssignments[mvOffsets[i]];
|
||||
}
|
||||
|
||||
for ( int i = 0; i < 15; i++) {
|
||||
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
|
||||
}
|
||||
|
||||
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ public class RScriptExecutor {
|
|||
|
||||
public static class RScriptArgumentCollection {
|
||||
@Advanced
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
|
||||
public String PATH_TO_RSCRIPT = "Rscript";
|
||||
|
||||
@Advanced
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -8,6 +9,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -128,6 +130,39 @@ public class ReadClipper {
|
|||
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
||||
public SAMRecord hardClipSoftClippedBases () {
|
||||
int readIndex = 0;
|
||||
int cutLeft = -1; // first position to hard clip (inclusive)
|
||||
int cutRight = -1; // first position to hard clip (inclusive)
|
||||
boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail
|
||||
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
|
||||
if (rightTail) {
|
||||
cutRight = readIndex;
|
||||
}
|
||||
else {
|
||||
cutLeft = readIndex + cigarElement.getLength() - 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
rightTail = true;
|
||||
|
||||
if (cigarElement.getOperator().consumesReadBases())
|
||||
readIndex += cigarElement.getLength();
|
||||
}
|
||||
|
||||
// It is extremely important that we cut the end first otherwise the read coordinates change.
|
||||
if (cutRight >= 0)
|
||||
this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1));
|
||||
if (cutLeft >= 0)
|
||||
this.addOp(new ClippingOp(0, cutLeft));
|
||||
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec;
|
|||
import org.broad.tribble.NameAwareCodec;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broad.tribble.util.BlockCompressedInputStream;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -590,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
|
||||
try {
|
||||
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
|
||||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
|
||||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) ||
|
||||
isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
return false;
|
||||
} catch ( IOException e ) {
|
||||
|
|
@ -601,12 +603,17 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
|
||||
try {
|
||||
byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
|
||||
stream.read(buff, 0, MAGIC_HEADER_LINE.length());
|
||||
String firstLine = new String(buff);
|
||||
stream.close();
|
||||
return firstLine.startsWith(MAGIC_HEADER_LINE);
|
||||
int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length());
|
||||
boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes());
|
||||
return eq;
|
||||
// String firstLine = new String(buff);
|
||||
// return firstLine.startsWith(MAGIC_HEADER_LINE);
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
} catch ( RuntimeException e ) {
|
||||
return false;
|
||||
} finally {
|
||||
try { stream.close(); } catch ( IOException e ) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -681,6 +681,9 @@ public class ReadUtils {
|
|||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
|
||||
if ( read.getCigar().numCigarElements() == 1 && read.getCigar().getCigarElement(0).getOperator().equals(CigarOperator.INSERTION)) {
|
||||
return read.getUnclippedEnd();
|
||||
}
|
||||
int stop = read.getUnclippedStart();
|
||||
|
||||
if (readIsEntirelyInsertion(read))
|
||||
|
|
@ -787,5 +790,47 @@ public class ReadUtils {
|
|||
return readBases;
|
||||
}
|
||||
|
||||
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
|
||||
int newReadStart = rec.getAlignmentStart();
|
||||
int newReadEnd = rec.getAlignmentEnd();
|
||||
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
|
||||
int heldOver = -1;
|
||||
boolean sSeen = false;
|
||||
for ( CigarElement e : rec.getCigar().getCigarElements() ) {
|
||||
if ( e.getOperator().equals(CigarOperator.S) ) {
|
||||
newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M));
|
||||
if ( sSeen ) {
|
||||
newReadEnd += e.getLength();
|
||||
sSeen = true;
|
||||
} else {
|
||||
newReadStart -= e.getLength();
|
||||
}
|
||||
} else {
|
||||
newCigarElements.add(e);
|
||||
}
|
||||
}
|
||||
// merge duplicate operators together
|
||||
int idx = 0;
|
||||
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
|
||||
while ( idx < newCigarElements.size() -1 ) {
|
||||
if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) {
|
||||
int combSize = newCigarElements.get(idx).getLength();
|
||||
int offset = 0;
|
||||
while ( idx + offset < newCigarElements.size()-1 && newCigarElements.get(idx+offset).getOperator().equals(newCigarElements.get(idx+1+offset).getOperator()) ) {
|
||||
combSize += newCigarElements.get(idx+offset+1).getLength();
|
||||
offset++;
|
||||
}
|
||||
finalCigarElements.add(new CigarElement(combSize,newCigarElements.get(idx).getOperator()));
|
||||
idx = idx + offset -1;
|
||||
} else {
|
||||
finalCigarElements.add(newCigarElements.get(idx));
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
rec.setCigar(new Cigar(finalCigarElements));
|
||||
rec.setAlignmentStart(newReadStart);
|
||||
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
|||
private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
|
||||
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
|
||||
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
|
||||
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");
|
||||
|
||||
private FeatureManager manager;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
|
@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
|||
new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
|
||||
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
|
||||
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ);
|
||||
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP);
|
||||
new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
|
||||
return FMTest.getTests(FMTest.class);
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Loading…
Reference in New Issue