Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
0268da7560
|
|
@ -18,6 +18,7 @@ import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
|
||||||
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
|
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
|
||||||
|
|
||||||
protected final static Logger log = Logger.getLogger(VCFCodec.class);
|
protected final static Logger log = Logger.getLogger(VCFCodec.class);
|
||||||
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
|
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
|
||||||
|
|
@ -252,7 +253,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
|
|
||||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||||
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
||||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||||
" tokens, and saw " + nParts + " )", lineNo);
|
" tokens, and saw " + nParts + " )", lineNo);
|
||||||
|
|
||||||
|
|
@ -518,8 +519,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
* @param lineNo the line number for this record
|
* @param lineNo the line number for this record
|
||||||
*/
|
*/
|
||||||
private static void checkAllele(String allele, boolean isRef, int lineNo) {
|
private static void checkAllele(String allele, boolean isRef, int lineNo) {
|
||||||
if ( allele == null || allele.length() == 0 )
|
if ( allele == null || allele.length() == 0 )
|
||||||
generateException("Empty alleles are not permitted in VCF records", lineNo);
|
generateException("Empty alleles are not permitted in VCF records", lineNo);
|
||||||
|
|
||||||
|
if ( MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING )
|
||||||
|
log.warn(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
|
||||||
|
|
||||||
if ( isSymbolicAllele(allele) ) {
|
if ( isSymbolicAllele(allele) ) {
|
||||||
if ( isRef ) {
|
if ( isRef ) {
|
||||||
|
|
@ -572,12 +576,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
|
|
||||||
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||||
boolean clipping = true;
|
boolean clipping = true;
|
||||||
|
final byte ref0 = (byte)ref.charAt(0);
|
||||||
|
|
||||||
for ( Allele a : unclippedAlleles ) {
|
for ( Allele a : unclippedAlleles ) {
|
||||||
if ( a.isSymbolic() )
|
if ( a.isSymbolic() )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) {
|
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
|
||||||
clipping = false;
|
clipping = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -604,7 +609,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
stillClipping = false;
|
stillClipping = false;
|
||||||
else if ( ref.length() == clipping )
|
else if ( ref.length() == clipping )
|
||||||
generateException("bad alleles encountered", lineNo);
|
generateException("bad alleles encountered", lineNo);
|
||||||
else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] )
|
else if ( a.getBases()[a.length()-clipping-1] != ((byte)ref.charAt(ref.length()-clipping-1)) )
|
||||||
stillClipping = false;
|
stillClipping = false;
|
||||||
}
|
}
|
||||||
if ( stillClipping )
|
if ( stillClipping )
|
||||||
|
|
@ -613,6 +618,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
||||||
|
|
||||||
return clipping;
|
return clipping;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* clip the alleles, based on the reference
|
* clip the alleles, based on the reference
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
public class VCFCodecUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Provider
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class AlleleClippingTestProvider extends TestDataProvider {
|
||||||
|
final String ref;
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
final int expectedClip;
|
||||||
|
|
||||||
|
private AlleleClippingTestProvider(final int expectedClip, final String ref, final String ... alleles) {
|
||||||
|
super(AlleleClippingTestProvider.class);
|
||||||
|
this.ref = ref;
|
||||||
|
for ( final String allele : alleles )
|
||||||
|
this.alleles.add(Allele.create(allele));
|
||||||
|
this.expectedClip = expectedClip;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "AlleleClippingTestProvider")
|
||||||
|
public Object[][] MakeAlleleClippingTest() {
|
||||||
|
// pair clipping
|
||||||
|
new AlleleClippingTestProvider(0, "ATT", "CCG");
|
||||||
|
new AlleleClippingTestProvider(1, "ATT", "CCT");
|
||||||
|
new AlleleClippingTestProvider(2, "ATT", "CTT");
|
||||||
|
new AlleleClippingTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
||||||
|
|
||||||
|
// triplets
|
||||||
|
new AlleleClippingTestProvider(0, "ATT", "CTT", "CGG");
|
||||||
|
new AlleleClippingTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
||||||
|
new AlleleClippingTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
||||||
|
|
||||||
|
return AlleleClippingTestProvider.getTests(AlleleClippingTestProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(dataProvider = "AlleleClippingTestProvider")
|
||||||
|
public void TestAlleleClipping(AlleleClippingTestProvider cfg) {
|
||||||
|
int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref, 0, 1);
|
||||||
|
Assert.assertEquals(result, cfg.expectedClip);
|
||||||
|
}
|
||||||
|
}
|
||||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
||||||
<ivy-module version="1.0">
|
<ivy-module version="1.0">
|
||||||
<info organisation="org.broad" module="tribble" revision="46" status="integration" />
|
<info organisation="org.broad" module="tribble" revision="53" status="integration" />
|
||||||
</ivy-module>
|
</ivy-module>
|
||||||
Loading…
Reference in New Issue