Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-09-21 20:26:54 -04:00
commit e53cb79d42
14 changed files with 163 additions and 10 deletions

View File

@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
private String OUTPUT_DIR = "analyzeCovariates/";
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
private String PATH_TO_RESOURCES = "public/R/";

View File

@ -24,12 +24,14 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
/**
* Tracks a linked list of GATKReportColumn in order by name.
*/
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
private List<String> columnNames = new ArrayList<String>();
/**
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
columnNames.add(key);
return super.put(key, value);
}
@Override
public Iterator<GATKReportColumn> iterator() {
return new Iterator<GATKReportColumn>() {
int offset = 0;
public boolean hasNext() { return offset < columnNames.size() ; }
public GATKReportColumn next() { return getByIndex(offset++); }
public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
};
}
}

View File

@ -286,6 +286,10 @@ public class GATKReportTable {
}
}
public boolean containsKey(Object primaryKey) {
return primaryKeyColumn.contains(primaryKey);
}
/**
* Set the value for a given position in the table
*

View File

@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false;
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
public String familyStr = null;
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
public double minGenotypeQualityP = 0.0;
private VariantAnnotatorEngine engine;
private Collection<VariantContext> indelBufferContext;

View File

@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
private double[] TS_TRANCHES = new double[] {100.0, 99.9, 99.0, 90.0};
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
private String[] IGNORE_INPUT_FILTERS = null;
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required=false)
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required=false)
private String PATH_TO_RSCRIPT = "Rscript";
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
private String RSCRIPT_FILE = null;

View File

@ -452,7 +452,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
}
else
mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD));
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
}
else if (!FAMILY_STRUCTURE.isEmpty()) {
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));

View File

@ -1,11 +1,13 @@
package org.broadinstitute.sting.utils;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
@ -32,6 +34,9 @@ public class MendelianViolation {
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
public String getSampleMom() {
return sampleMom;
@ -168,4 +173,41 @@ public class MendelianViolation {
return true;
}
/**
* @return the likelihood ratio for a mendelian violation
*/
public double violationLikelihoodRatio(VariantContext vc) {
double[] logLikAssignments = new double[27];
// the matrix to set up is
// MOM DAD CHILD
// |- AA
// AA AA | AB
// |- BB
// |- AA
// AA AB | AB
// |- BB
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
int offset = 0;
for ( int oMom = 0; oMom < 3; oMom++ ) {
for ( int oDad = 0; oDad < 3; oDad++ ) {
for ( int oChild = 0; oChild < 3; oChild ++ ) {
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
}
}
}
double[] mvLiks = new double[12];
double[] nonMVLiks = new double[15];
for ( int i = 0; i < 12; i ++ ) {
mvLiks[i] = logLikAssignments[mvOffsets[i]];
}
for ( int i = 0; i < 15; i++) {
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
}
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
}
}

View File

@ -53,7 +53,7 @@ public class RScriptExecutor {
public static class RScriptArgumentCollection {
@Advanced
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
public String PATH_TO_RSCRIPT = "Rscript";
@Advanced

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.clipreads;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
@ -8,6 +9,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
@ -128,6 +130,39 @@ public class ReadClipper {
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
public SAMRecord hardClipSoftClippedBases () {
int readIndex = 0;
int cutLeft = -1; // first position to hard clip (inclusive)
int cutRight = -1; // first position to hard clip (inclusive)
boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
if (rightTail) {
cutRight = readIndex;
}
else {
cutLeft = readIndex + cigarElement.getLength() - 1;
}
}
else
rightTail = true;
if (cigarElement.getOperator().consumesReadBases())
readIndex += cigarElement.getLength();
}
// It is extremely important that we cut the end first otherwise the read coordinates change.
if (cutRight >= 0)
this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1));
if (cutLeft >= 0)
this.addOp(new ClippingOp(0, cutLeft));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
/**
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
*

View File

@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -590,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
try {
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) ||
isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
} catch ( FileNotFoundException e ) {
return false;
} catch ( IOException e ) {
@ -601,12 +603,17 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
try {
byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
stream.read(buff, 0, MAGIC_HEADER_LINE.length());
String firstLine = new String(buff);
stream.close();
return firstLine.startsWith(MAGIC_HEADER_LINE);
int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length());
boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes());
return eq;
// String firstLine = new String(buff);
// return firstLine.startsWith(MAGIC_HEADER_LINE);
} catch ( IOException e ) {
return false;
} catch ( RuntimeException e ) {
return false;
} finally {
try { stream.close(); } catch ( IOException e ) {}
}
}
}

View File

@ -681,6 +681,9 @@ public class ReadUtils {
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
if ( read.getCigar().numCigarElements() == 1 && read.getCigar().getCigarElement(0).getOperator().equals(CigarOperator.INSERTION)) {
return read.getUnclippedEnd();
}
int stop = read.getUnclippedStart();
if (readIsEntirelyInsertion(read))
@ -787,5 +790,47 @@ public class ReadUtils {
return readBases;
}
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
int newReadStart = rec.getAlignmentStart();
int newReadEnd = rec.getAlignmentEnd();
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
int heldOver = -1;
boolean sSeen = false;
for ( CigarElement e : rec.getCigar().getCigarElements() ) {
if ( e.getOperator().equals(CigarOperator.S) ) {
newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M));
if ( sSeen ) {
newReadEnd += e.getLength();
sSeen = true;
} else {
newReadStart -= e.getLength();
}
} else {
newCigarElements.add(e);
}
}
// merge duplicate operators together
int idx = 0;
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
while ( idx < newCigarElements.size() -1 ) {
if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) {
int combSize = newCigarElements.get(idx).getLength();
int offset = 0;
while ( idx + offset < newCigarElements.size()-1 && newCigarElements.get(idx+offset).getOperator().equals(newCigarElements.get(idx+1+offset).getOperator()) ) {
combSize += newCigarElements.get(idx+offset+1).getLength();
offset++;
}
finalCigarElements.add(new CigarElement(combSize,newCigarElements.get(idx).getOperator()));
idx = idx + offset -1;
} else {
finalCigarElements.add(newCigarElements.get(idx));
}
idx++;
}
rec.setCigar(new Cigar(finalCigarElements));
rec.setAlignmentStart(newReadStart);
return rec;
}
}

View File

@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest {
private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");
private FeatureManager manager;
private GenomeLocParser genomeLocParser;
@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest {
new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP);
new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
return FMTest.getTests(FMTest.class);
}

Binary file not shown.