adding the original allele list to a variant context (as the annotation ORIGINAL_ALLELE_LIST), in the case where the set alleles are the result of clipping. Added tests for both cases.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3658 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1292c96e29
commit
62d22ff1aa
|
|
@ -25,6 +25,9 @@ import java.util.*;
|
|||
*/
|
||||
public class VCF4Codec implements FeatureCodec {
|
||||
|
||||
// a variant context flag for original allele strings
|
||||
public static final String ORIGINAL_ALLELE_LIST = "ORIGINAL_ALLELE_LIST";
|
||||
|
||||
// we have to store the list of strings that make up the header until they're needed
|
||||
private VCFHeader header = null;
|
||||
|
||||
|
|
@ -364,6 +367,7 @@ public class VCF4Codec implements FeatureCodec {
|
|||
*/
|
||||
private VariantContext parseVCFLine(String[] parts, boolean parseGenotypes) {
|
||||
try {
|
||||
// increment the line count
|
||||
lineNo++;
|
||||
|
||||
// parse out the required fields
|
||||
|
|
@ -376,15 +380,19 @@ public class VCF4Codec implements FeatureCodec {
|
|||
String filter = parts[6];
|
||||
String info = parts[7];
|
||||
|
||||
|
||||
// get our alleles, filters, and setup an attribute map
|
||||
List<Allele> alleles = parseAlleles(ref, alts);
|
||||
Set<String> filters = parseFilters(filter);
|
||||
Map<String, Object> attributes = parseInfo(info, id);
|
||||
|
||||
// find out our current location, and clip the alleles down to their minimum length
|
||||
Pair<GenomeLoc, List<Allele>> locAndAlleles = (ref.length() > 1) ?
|
||||
clipAlleles(contig, pos, ref, alleles) :
|
||||
new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles);
|
||||
Pair<GenomeLoc, List<Allele>> locAndAlleles;
|
||||
if (ref.length() > 1) {
|
||||
attributes.put(ORIGINAL_ALLELE_LIST,alleles);
|
||||
locAndAlleles = clipAlleles(contig, pos, ref, alleles);
|
||||
} else {
|
||||
locAndAlleles = new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles);
|
||||
}
|
||||
|
||||
// a map to store our genotypes
|
||||
Map<String, Genotype> genotypes = null;
|
||||
|
|
@ -477,7 +485,11 @@ public class VCF4Codec implements FeatureCodec {
|
|||
|
||||
/**
|
||||
* clip the alleles, based on the reference
|
||||
* @param unclippedAlleles the list of alleles
|
||||
*
|
||||
* @param contig our contig position
|
||||
* @param position the unadjusted start position (pre-clipping)
|
||||
* @param ref the reference string
|
||||
* @param unclippedAlleles the list of unclipped alleles
|
||||
* @return a list of alleles, clipped to the reference
|
||||
*/
|
||||
static Pair<GenomeLoc,List<Allele>> clipAlleles(String contig, long position, String ref, List<Allele> unclippedAlleles) {
|
||||
|
|
@ -502,7 +514,6 @@ public class VCF4Codec implements FeatureCodec {
|
|||
if (clipping) reverseClipped++;
|
||||
}
|
||||
|
||||
|
||||
for (Allele a : unclippedAlleles)
|
||||
newAlleleList.add(Allele.create(Arrays.copyOfRange(a.getBases(),forwardClipping,a.getBases().length-reverseClipped),a.isReference()));
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ public class VCF4UnitTest extends BaseTest {
|
|||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -197,13 +197,33 @@ public class VCF4UnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
// test too few info lines, we don't provide the DP in this line
|
||||
String twoFewInfoLine = "20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t0|0:48:1:51,51\t0|0:48:1:51,51";
|
||||
String twoFewInfoLine = "20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;AF=0.5;DB\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t0|0:48:1:51,51\t0|0:48:1:51,51";
|
||||
@Test
|
||||
public void testCheckTwoFewInfoValidation() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
testSetup.codec.decode(twoFewInfoLine);
|
||||
}
|
||||
|
||||
// test that the variant context adds attributes for the original alleles when clipped
|
||||
String clippedAlleleLine = "20\t14370\trs6054257\tGGG\tG\t29\tPASS\tNS=3;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|1:48:1:51,51\t0|0:48:1:51,51\t0|0:48:1:51,51";
|
||||
@Test
|
||||
public void testClippedAllelesAddedAsAnnotation() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
VariantContext context = (VariantContext)testSetup.codec.decode(clippedAlleleLine);
|
||||
Assert.assertTrue(context.hasAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST));
|
||||
List<Allele> alleles = (List<Allele>)context.getAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST);
|
||||
Assert.assertEquals("Expected allele list of 2, got " + alleles.size(),2,alleles.size());
|
||||
Assert.assertTrue(alleles.get(0).basesMatch("GGG"));
|
||||
Assert.assertTrue(alleles.get(1).basesMatch("G"));
|
||||
}
|
||||
// test that when we don't clip the alleles, we don't see the annotation
|
||||
@Test
|
||||
public void testNoClippedAllelesNoAddedAsAnnotation() {
|
||||
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
|
||||
VariantContext context = (VariantContext)testSetup.codec.decode(twoFewInfoLine);
|
||||
Assert.assertTrue(!context.hasAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST));
|
||||
}
|
||||
|
||||
// test that we're getting the right genotype for a multi-base polymorphism
|
||||
String MNPLine = "20\t14370\trs6054257\tGG\tAT\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,.";
|
||||
@Test
|
||||
|
|
|
|||
Loading…
Reference in New Issue