Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
7e7b4cd1b9
|
|
@ -285,7 +285,10 @@ public class ClippingOp {
|
|||
|
||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
||||
private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {
|
||||
if (start == 0 && stop == read.getReadLength() - 1)
|
||||
final int firstBaseAfterSoftClips = read.getAlignmentStart() - read.getSoftStart();
|
||||
final int lastBaseBeforeSoftClips = read.getSoftEnd() - read.getSoftStart();
|
||||
|
||||
if (start == firstBaseAfterSoftClips && stop == lastBaseBeforeSoftClips) // note that if the read has no soft clips, these constants will be 0 and read length - 1 (beauty of math).
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
|
||||
|
||||
|
|
@ -309,6 +312,7 @@ public class ClippingOp {
|
|||
throw new ReviewedStingException("Where did the clone go?");
|
||||
}
|
||||
|
||||
hardClippedRead.resetSoftStartAndEnd(); // reset the cached soft start and end because they may have changed now that the read was hard clipped. No need to calculate them now. They'll be lazily calculated on the next call to getSoftStart()/End()
|
||||
hardClippedRead.setBaseQualities(newQuals);
|
||||
hardClippedRead.setReadBases(newBases);
|
||||
hardClippedRead.setCigar(cigarShift.cigar);
|
||||
|
|
|
|||
|
|
@ -239,10 +239,10 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
final String contig = lookupContigName(contigOffset);
|
||||
builder.chr(contig);
|
||||
|
||||
this.pos = decoder.decodeInt(BCF2Type.INT32);
|
||||
this.pos = decoder.decodeInt(BCF2Type.INT32) + 1; // GATK is one based, BCF2 is zero-based
|
||||
final int refLength = decoder.decodeInt(BCF2Type.INT32);
|
||||
builder.start((long)pos);
|
||||
builder.stop((long)(pos + refLength - 1)); // minus one because of our open intervals
|
||||
builder.stop((long)(pos + refLength - 1)); // minus one because GATK has closed intervals but BCF2 is open
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -363,10 +363,10 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
int eqI = infoFieldArray[i].indexOf("=");
|
||||
if ( eqI != -1 ) {
|
||||
key = infoFieldArray[i].substring(0, eqI);
|
||||
String str = infoFieldArray[i].substring(eqI+1);
|
||||
String valueString = infoFieldArray[i].substring(eqI+1);
|
||||
|
||||
// split on the INFO field separator
|
||||
int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false);
|
||||
int infoValueSplitSize = ParsingUtils.split(valueString, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false);
|
||||
if ( infoValueSplitSize == 1 ) {
|
||||
value = infoValueArray[0];
|
||||
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
|
||||
|
|
@ -396,6 +396,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
}
|
||||
}
|
||||
|
||||
// this line ensures that key/value pairs that look like key=; are parsed correctly as MISSING
|
||||
if ( "".equals(value) ) value = VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
attributes.put(key, value);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -214,7 +214,7 @@ public class VCFHeader {
|
|||
private final <T extends VCFCompoundHeaderLine> void addMetaDataMapBinding(final Map<String, T> map, T line) {
|
||||
final String key = line.getID();
|
||||
if ( map.containsKey(key) )
|
||||
logger.warn("Found duplicate VCF header lines for " + key + "; keeping the first only" );
|
||||
logger.debug("Found duplicate VCF header lines for " + key + "; keeping the first only" );
|
||||
else
|
||||
map.put(key, line);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
private String mReadString = null;
|
||||
private GATKSAMReadGroupRecord mReadGroup = null;
|
||||
private byte[] reducedReadCounts = null;
|
||||
private int softStart = -1;
|
||||
private int softEnd = -1;
|
||||
|
||||
// because some values can be null, we don't want to duplicate effort
|
||||
private boolean retrievedReadGroup = false;
|
||||
|
|
@ -385,15 +387,17 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
||||
*/
|
||||
public int getSoftStart() {
|
||||
int start = this.getUnclippedStart();
|
||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
start += cigarElement.getLength();
|
||||
else
|
||||
break;
|
||||
if (softStart < 0) {
|
||||
int start = this.getUnclippedStart();
|
||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
start += cigarElement.getLength();
|
||||
else
|
||||
break;
|
||||
}
|
||||
softStart = start;
|
||||
}
|
||||
|
||||
return start;
|
||||
return softStart;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -404,23 +408,43 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
||||
*/
|
||||
public int getSoftEnd() {
|
||||
int stop = this.getUnclippedStart();
|
||||
if (softEnd < 0) {
|
||||
int stop = this.getUnclippedStart();
|
||||
|
||||
if (ReadUtils.readIsEntirelyInsertion(this))
|
||||
return stop;
|
||||
if (ReadUtils.readIsEntirelyInsertion(this))
|
||||
return stop;
|
||||
|
||||
int shift = 0;
|
||||
CigarOperator lastOperator = null;
|
||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||
stop += shift;
|
||||
lastOperator = cigarElement.getOperator();
|
||||
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
shift = cigarElement.getLength();
|
||||
else
|
||||
shift = 0;
|
||||
int shift = 0;
|
||||
CigarOperator lastOperator = null;
|
||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||
stop += shift;
|
||||
lastOperator = cigarElement.getOperator();
|
||||
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
shift = cigarElement.getLength();
|
||||
else
|
||||
shift = 0;
|
||||
}
|
||||
softEnd = (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
return softEnd;
|
||||
}
|
||||
|
||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
/**
|
||||
* If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache
|
||||
* so that the next call to getSoftStart/End will recalculate it lazily.
|
||||
*/
|
||||
public void resetSoftStartAndEnd() {
|
||||
softStart = -1;
|
||||
softEnd = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* If the read is hard clipped, the soft start and end will change. You can set manually or just reset the cache
|
||||
* so that the next call to getSoftStart/End will recalculate it lazily.
|
||||
*/
|
||||
public void resetSoftStartAndEnd(int softStart, int softEnd) {
|
||||
this.softStart = softStart;
|
||||
this.softEnd = softEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -520,7 +520,7 @@ public class ReadUtils {
|
|||
if (allowGoalNotReached) {
|
||||
return new Pair<Integer, Boolean>(CLIPPING_GOAL_NOT_REACHED, false);
|
||||
} else {
|
||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Alignment " + alignmentStart + " | " + cigar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,6 @@ import java.util.*;
|
|||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
final private static List<Allele> MISSING_GENOTYPE = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
|
||||
|
||||
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
|
||||
|
|
@ -203,10 +202,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
// note use of encodeRawValue to not insert the typing byte
|
||||
encoder.encodeRawValue(contigIndex, BCF2Type.INT32);
|
||||
|
||||
// pos
|
||||
encoder.encodeRawValue(vc.getStart(), BCF2Type.INT32);
|
||||
// pos. GATK is 1 based, BCF2 is 0 based
|
||||
encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32);
|
||||
|
||||
// ref length
|
||||
// ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1
|
||||
// for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1
|
||||
encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32);
|
||||
|
||||
// qual
|
||||
|
|
|
|||
|
|
@ -564,6 +564,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
+ " at " + vc.getChr() + ":" + vc.getStart()
|
||||
+ " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have"
|
||||
+ " complete VCF headers by default. This error can be disabled with the engine argument"
|
||||
+ " -U LENIENT_VCF_PROCESSING");
|
||||
+ " -U LENIENT_VCF_PROCESSING or repair the VCF file header using repairVCFHeader");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b")
|
||||
Arrays.asList("4386fbb258dcef4437495a37f5a83c53")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
|
|
@ -167,36 +167,6 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testNoGTs--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testParallelization2() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
WalkerTestSpec spec;
|
||||
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
1,
|
||||
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testParallelization4() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
WalkerTestSpec spec;
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
1,
|
||||
Arrays.asList("433eccaf1ac6e6be500ef0984a5d8d8b")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testParallelization (4 threads)--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSelectFromMultiAllelic() {
|
||||
String testfile = privateTestDir + "multi-allelic.bi-allelicInGIH.vcf";
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class SelectVariantsParallelIntegrationTest extends WalkerTest {
|
||||
|
||||
private class ParallelSelectTestProvider extends TestDataProvider {
|
||||
final String reference;
|
||||
final String args;
|
||||
final String md5;
|
||||
final int nt;
|
||||
|
||||
private ParallelSelectTestProvider(final String reference, final String args, final String md5, final int nt) {
|
||||
super(ParallelSelectTestProvider.class);
|
||||
this.reference = reference;
|
||||
this.args = args;
|
||||
this.md5 = md5;
|
||||
this.nt = nt;
|
||||
}
|
||||
|
||||
public final String getCmdLine() {
|
||||
return "-T SelectVariants -R " + reference + " -o %s -L 1 --no_cmdline_in_header -nt " + nt + " " + args;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("ParallelSelectVariants nt=%d args=%s", nt, args);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ParallelSelectTest")
|
||||
public Object[][] makeParallelSelectTestProvider() {
|
||||
for ( int nt : Arrays.asList(1, 2, 4) ) {
|
||||
{ // original MAF test
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
String args = " -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile;
|
||||
new ParallelSelectTestProvider(b36KGReference, args, "4386fbb258dcef4437495a37f5a83c53", nt);
|
||||
}
|
||||
{ // new tests on b37 using testdir VCF
|
||||
final String testfile = privateTestDir + "NA12878.hg19.example1.vcf";
|
||||
final String args = "-select 'DP > 30' -V " + testfile;
|
||||
new ParallelSelectTestProvider(b37KGReference, args, "c64b45a14d41b1e5cddbe036b47e7519", nt);
|
||||
}
|
||||
}
|
||||
|
||||
return ParallelSelectTestProvider.getTests(ParallelSelectTestProvider.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ParallelSelectTest")
|
||||
public void testParallelSelectTestProvider(final ParallelSelectTestProvider cfg) {
|
||||
final WalkerTestSpec spec = new WalkerTestSpec( cfg.getCmdLine(), 1, Arrays.asList(cfg.md5) );
|
||||
executeTest(cfg.toString(), spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -58,7 +59,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = false) // TODO disabled because current BCF2 is 1 based
|
||||
public void testReadingSamtoolsWExBCFExample() {
|
||||
String testVCF = privateTestDir + "ex2.bcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
|
@ -66,4 +67,38 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("63a2e0484ae37b0680514f53e0bf0c94"));
|
||||
executeTest("Test reading samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// Tests to ensure that -U LENIENT_VCF_PROCESS and header repairs are working
|
||||
//
|
||||
//
|
||||
|
||||
@Test
|
||||
public void testFailingOnVCFWithoutHeaders() {
|
||||
runVCFWithoutHeaders("", "", UserException.class, false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPassingOnVCFWithoutHeadersWithLenientProcessing() {
|
||||
runVCFWithoutHeaders("-U LENIENT_VCF_PROCESSING", "6de8cb7457154dd355aa55befb943f88", null, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPassingOnVCFWithoutHeadersRepairingHeaders() {
|
||||
runVCFWithoutHeaders("-repairVCFHeader " + privateTestDir + "vcfexample2.justHeader.vcf", "ff61e9cad6653c7f93d82d391f7ecdcb", null, false);
|
||||
}
|
||||
|
||||
private void runVCFWithoutHeaders(final String moreArgs, final String expectedMD5, final Class expectedException, final boolean disableBCF) {
|
||||
final String testVCF = privateTestDir + "vcfexample2.noHeader.vcf";
|
||||
final String baseCommand = "-R " + b37KGReference
|
||||
+ " --no_cmdline_in_header -o %s "
|
||||
+ "-T VariantsToVCF -V " + testVCF + " " + moreArgs;
|
||||
WalkerTestSpec spec1 = expectedException != null
|
||||
? new WalkerTestSpec(baseCommand, 1, expectedException)
|
||||
: new WalkerTestSpec(baseCommand, 1, Arrays.asList(expectedMD5));
|
||||
if ( disableBCF )
|
||||
spec1.disableShadowBCF();
|
||||
executeTest("Test reading VCF without header lines with additional args " + moreArgs, spec1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,6 +52,9 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
|
|||
val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
|
||||
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
|
||||
|
||||
// ensure that the gather function receives the same unsafe parameter as the scattered function
|
||||
this.unsafe = this.originalGATK.unsafe
|
||||
|
||||
super.freezeFieldValues()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue