Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-07-02 16:37:54 -04:00
commit 7e7b4cd1b9
12 changed files with 163 additions and 65 deletions

View File

@ -285,7 +285,10 @@ public class ClippingOp {
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"}) @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) { private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() - 1) final int firstBaseAfterSoftClips = read.getAlignmentStart() - read.getSoftStart();
final int lastBaseBeforeSoftClips = read.getSoftEnd() - read.getSoftStart();
if (start == firstBaseAfterSoftClips && stop == lastBaseBeforeSoftClips) // note that if the read has no soft clips, these constants will be 0 and read length - 1 (beauty of math).
return GATKSAMRecord.emptyRead(read); return GATKSAMRecord.emptyRead(read);
@ -309,6 +312,7 @@ public class ClippingOp {
throw new ReviewedStingException("Where did the clone go?"); throw new ReviewedStingException("Where did the clone go?");
} }
hardClippedRead.resetSoftStartAndEnd(); // reset the cached soft start and end because they may have changed now that the read was hard clipped. No need to calculate them now. They'll be lazily calculated on the next call to getSoftStart()/End()
hardClippedRead.setBaseQualities(newQuals); hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setReadBases(newBases); hardClippedRead.setReadBases(newBases);
hardClippedRead.setCigar(cigarShift.cigar); hardClippedRead.setCigar(cigarShift.cigar);

View File

@ -239,10 +239,10 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
final String contig = lookupContigName(contigOffset); final String contig = lookupContigName(contigOffset);
builder.chr(contig); builder.chr(contig);
this.pos = decoder.decodeInt(BCF2Type.INT32); this.pos = decoder.decodeInt(BCF2Type.INT32) + 1; // GATK is one based, BCF2 is zero-based
final int refLength = decoder.decodeInt(BCF2Type.INT32); final int refLength = decoder.decodeInt(BCF2Type.INT32);
builder.start((long)pos); builder.start((long)pos);
builder.stop((long)(pos + refLength - 1)); // minus one because of our open intervals builder.stop((long)(pos + refLength - 1)); // minus one because GATK has closed intervals but BCF2 is open
} }
/** /**

View File

@ -363,10 +363,10 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
int eqI = infoFieldArray[i].indexOf("="); int eqI = infoFieldArray[i].indexOf("=");
if ( eqI != -1 ) { if ( eqI != -1 ) {
key = infoFieldArray[i].substring(0, eqI); key = infoFieldArray[i].substring(0, eqI);
String str = infoFieldArray[i].substring(eqI+1); String valueString = infoFieldArray[i].substring(eqI+1);
// split on the INFO field separator // split on the INFO field separator
int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false); int infoValueSplitSize = ParsingUtils.split(valueString, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false);
if ( infoValueSplitSize == 1 ) { if ( infoValueSplitSize == 1 ) {
value = infoValueArray[0]; value = infoValueArray[0];
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key); final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
@ -396,6 +396,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
} }
} }
// this line ensures that key/value pairs that look like key=; are parsed correctly as MISSING
if ( "".equals(value) ) value = VCFConstants.MISSING_VALUE_v4;
attributes.put(key, value); attributes.put(key, value);
} }
} }

View File

@ -214,7 +214,7 @@ public class VCFHeader {
private final <T extends VCFCompoundHeaderLine> void addMetaDataMapBinding(final Map<String, T> map, T line) { private final <T extends VCFCompoundHeaderLine> void addMetaDataMapBinding(final Map<String, T> map, T line) {
final String key = line.getID(); final String key = line.getID();
if ( map.containsKey(key) ) if ( map.containsKey(key) )
logger.warn("Found duplicate VCF header lines for " + key + "; keeping the first only" ); logger.debug("Found duplicate VCF header lines for " + key + "; keeping the first only" );
else else
map.put(key, line); map.put(key, line);
} }

View File

@ -59,6 +59,8 @@ public class GATKSAMRecord extends BAMRecord {
private String mReadString = null; private String mReadString = null;
private GATKSAMReadGroupRecord mReadGroup = null; private GATKSAMReadGroupRecord mReadGroup = null;
private byte[] reducedReadCounts = null; private byte[] reducedReadCounts = null;
private int softStart = -1;
private int softEnd = -1;
// because some values can be null, we don't want to duplicate effort // because some values can be null, we don't want to duplicate effort
private boolean retrievedReadGroup = false; private boolean retrievedReadGroup = false;
@ -385,15 +387,17 @@ public class GATKSAMRecord extends BAMRecord {
* @return the unclipped start of the read taking soft clips (but not hard clips) into account * @return the unclipped start of the read taking soft clips (but not hard clips) into account
*/ */
public int getSoftStart() { public int getSoftStart() {
int start = this.getUnclippedStart(); if (softStart < 0) {
for (CigarElement cigarElement : this.getCigar().getCigarElements()) { int start = this.getUnclippedStart();
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
start += cigarElement.getLength(); if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
else start += cigarElement.getLength();
break; else
break;
}
softStart = start;
} }
return softStart;
return start;
} }
/** /**
@ -404,23 +408,43 @@ public class GATKSAMRecord extends BAMRecord {
* @return the unclipped end of the read taking soft clips (but not hard clips) into account * @return the unclipped end of the read taking soft clips (but not hard clips) into account
*/ */
public int getSoftEnd() { public int getSoftEnd() {
int stop = this.getUnclippedStart(); if (softEnd < 0) {
int stop = this.getUnclippedStart();
if (ReadUtils.readIsEntirelyInsertion(this)) if (ReadUtils.readIsEntirelyInsertion(this))
return stop; return stop;
int shift = 0; int shift = 0;
CigarOperator lastOperator = null; CigarOperator lastOperator = null;
for (CigarElement cigarElement : this.getCigar().getCigarElements()) { for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
stop += shift; stop += shift;
lastOperator = cigarElement.getOperator(); lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP) if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength(); shift = cigarElement.getLength();
else else
shift = 0; shift = 0;
}
softEnd = (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
} }
return softEnd;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; /**
* If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache
* so that the next call to getSoftStart/End will recalculate it lazily.
*/
public void resetSoftStartAndEnd() {
softStart = -1;
softEnd = -1;
}
/**
* If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache
* so that the next call to getSoftStart/End will recalculate it lazily.
*/
public void resetSoftStartAndEnd(int softStart, int softEnd) {
this.softStart = softStart;
this.softEnd = softEnd;
} }
/** /**

View File

@ -520,7 +520,7 @@ public class ReadUtils {
if (allowGoalNotReached) { if (allowGoalNotReached) {
return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false); return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
} else { } else {
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Alignment " + alignmentStart + " | " + cigar);
} }
} }

View File

@ -84,7 +84,6 @@ import java.util.*;
*/ */
class BCF2Writer extends IndexingVariantContextWriter { class BCF2Writer extends IndexingVariantContextWriter {
final protected static Logger logger = Logger.getLogger(BCF2Writer.class); final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
final private static List<Allele> MISSING_GENOTYPE = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
final private static boolean ALLOW_MISSING_CONTIG_LINES = false; final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
@ -203,10 +202,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
// note use of encodeRawValue to not insert the typing byte // note use of encodeRawValue to not insert the typing byte
encoder.encodeRawValue(contigIndex, BCF2Type.INT32); encoder.encodeRawValue(contigIndex, BCF2Type.INT32);
// pos // pos. GATK is 1 based, BCF2 is 0 based
encoder.encodeRawValue(vc.getStart(), BCF2Type.INT32); encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32);
// ref length // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1
// for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1
encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32);
// qual // qual

View File

@ -564,6 +564,6 @@ class VCFWriter extends IndexingVariantContextWriter {
+ " at " + vc.getChr() + ":" + vc.getStart() + " at " + vc.getChr() + ":" + vc.getStart()
+ " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have" + " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have"
+ " complete VCF headers by default. This error can be disabled with the engine argument" + " complete VCF headers by default. This error can be disabled with the engine argument"
+ " -U LENIENT_VCF_PROCESSING"); + " -U LENIENT_VCF_PROCESSING or repair the VCF file header using repairVCFHeader");
} }
} }

View File

@ -64,7 +64,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile), baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
1, 1,
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b") Arrays.asList("4386fbb258dcef4437495a37f5a83c53")
); );
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testComplexSelection--" + testfile, spec); executeTest("testComplexSelection--" + testfile, spec);
@ -167,36 +167,6 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testNoGTs--" + testFile, spec); executeTest("testNoGTs--" + testFile, spec);
} }
@Test(enabled = false)
public void testParallelization2() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b")
);
spec.disableShadowBCF();
executeTest("testParallelization (2 threads)--" + testfile, spec);
}
@Test(enabled = false)
public void testParallelization4() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b")
);
spec.disableShadowBCF();
executeTest("testParallelization (4 threads)--" + testfile, spec);
}
@Test @Test
public void testSelectFromMultiAllelic() { public void testSelectFromMultiAllelic() {
String testfile = privateTestDir + "multi-allelic.bi-allelicInGIH.vcf"; String testfile = privateTestDir + "multi-allelic.bi-allelicInGIH.vcf";

View File

@ -0,0 +1,59 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
public class SelectVariantsParallelIntegrationTest extends WalkerTest {
private class ParallelSelectTestProvider extends TestDataProvider {
final String reference;
final String args;
final String md5;
final int nt;
private ParallelSelectTestProvider(final String reference, final String args, final String md5, final int nt) {
super(ParallelSelectTestProvider.class);
this.reference = reference;
this.args = args;
this.md5 = md5;
this.nt = nt;
}
public final String getCmdLine() {
return "-T SelectVariants -R " + reference + " -o %s -L 1 --no_cmdline_in_header -nt " + nt + " " + args;
}
public String toString() {
return String.format("ParallelSelectVariants nt=%d args=%s", nt, args);
}
}
@DataProvider(name = "ParallelSelectTest")
public Object[][] makeParallelSelectTestProvider() {
for ( int nt : Arrays.asList(1, 2, 4) ) {
{ // original MAF test
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
String args = " -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile;
new ParallelSelectTestProvider(b36KGReference, args, "4386fbb258dcef4437495a37f5a83c53", nt);
}
{ // new tests on b37 using testdir VCF
final String testfile = privateTestDir + "NA12878.hg19.example1.vcf";
final String args = "-select 'DP > 30' -V " + testfile;
new ParallelSelectTestProvider(b37KGReference, args, "c64b45a14d41b1e5cddbe036b47e7519", nt);
}
}
return ParallelSelectTestProvider.getTests(ParallelSelectTestProvider.class);
}
@Test(dataProvider = "ParallelSelectTest")
public void testParallelSelectTestProvider(final ParallelSelectTestProvider cfg) {
final WalkerTestSpec spec = new WalkerTestSpec( cfg.getCmdLine(), 1, Arrays.asList(cfg.md5) );
executeTest(cfg.toString(), spec);
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.codecs.vcf; package org.broadinstitute.sting.utils.codecs.vcf;
import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File; import java.io.File;
@ -58,7 +59,7 @@ public class VCFIntegrationTest extends WalkerTest {
executeTest("Test writing samtools WEx BCF example", spec1); executeTest("Test writing samtools WEx BCF example", spec1);
} }
@Test @Test(enabled = false) // TODO disabled because current BCF2 is 1 based
public void testReadingSamtoolsWExBCFExample() { public void testReadingSamtoolsWExBCFExample() {
String testVCF = privateTestDir + "ex2.bcf"; String testVCF = privateTestDir + "ex2.bcf";
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
@ -66,4 +67,38 @@ public class VCFIntegrationTest extends WalkerTest {
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("63a2e0484ae37b0680514f53e0bf0c94")); WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("63a2e0484ae37b0680514f53e0bf0c94"));
executeTest("Test reading samtools WEx BCF example", spec1); executeTest("Test reading samtools WEx BCF example", spec1);
} }
//
//
// Tests to ensure that -U LENIENT_VCF_PROCESS and header repairs are working
//
//
@Test
public void testFailingOnVCFWithoutHeaders() {
runVCFWithoutHeaders("", "", UserException.class, false);
}
@Test
public void testPassingOnVCFWithoutHeadersWithLenientProcessing() {
runVCFWithoutHeaders("-U LENIENT_VCF_PROCESSING", "6de8cb7457154dd355aa55befb943f88", null, true);
}
@Test
public void testPassingOnVCFWithoutHeadersRepairingHeaders() {
runVCFWithoutHeaders("-repairVCFHeader " + privateTestDir + "vcfexample2.justHeader.vcf", "ff61e9cad6653c7f93d82d391f7ecdcb", null, false);
}
private void runVCFWithoutHeaders(final String moreArgs, final String expectedMD5, final Class expectedException, final boolean disableBCF) {
final String testVCF = privateTestDir + "vcfexample2.noHeader.vcf";
final String baseCommand = "-R " + b37KGReference
+ " --no_cmdline_in_header -o %s "
+ "-T VariantsToVCF -V " + testVCF + " " + moreArgs;
WalkerTestSpec spec1 = expectedException != null
? new WalkerTestSpec(baseCommand, 1, expectedException)
: new WalkerTestSpec(baseCommand, 1, Arrays.asList(expectedMD5));
if ( disableBCF )
spec1.disableShadowBCF();
executeTest("Test reading VCF without header lines with additional args " + moreArgs, spec1);
}
} }

View File

@ -52,6 +52,9 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME) val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean] this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
// ensure that the gather function receives the same unsafe parameter as the scattered function
this.unsafe = this.originalGATK.unsafe
super.freezeFieldValues() super.freezeFieldValues()
} }
} }