Now processes VCF files with repeated loci without crashing.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4481 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2010-10-12 04:36:07 +00:00
parent ccc22c2331
commit f348ca2976
2 changed files with 42 additions and 28 deletions

View File

@ -203,10 +203,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if ( tracker == null ) if ( tracker == null )
return 0; return 0;
VariantContext vc = tracker.getVariantContext(ref, "variant", null, context.getLocation(), true); Collection<VariantContext> vcs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
if ( vc == null ){ if ( vcs == null || vcs.size() == 0) {
return 0; return 0;
} }
for (VariantContext vc : vcs) {
VariantContext sub = subsetRecord(vc, samples); VariantContext sub = subsetRecord(vc, samples);
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
@ -218,6 +220,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
vcfWriter.add(sub, ref.getBase()); vcfWriter.add(sub, ref.getBase());
} }
}
return 1; return 1;
} }
@ -243,35 +246,33 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes()); HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());
VariantContextUtils.calculateChromosomeCounts(sub, attributes, false); int alleleCount = 0;
int numberOfAlleles = 0;
// because we may want to select against the chromosome count attributes,
// we need to convert them to literals instead of arrays
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) && attributes.get(VCFConstants.ALLELE_COUNT_KEY) instanceof List ) {
List<Integer> counts = (List<Integer>)attributes.get(VCFConstants.ALLELE_COUNT_KEY);
if ( counts.size() == 1 )
attributes.put(VCFConstants.ALLELE_COUNT_KEY, counts.get(0));
}
if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) && attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY) instanceof List ) {
List<Double> freqs = (List<Double>)attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY);
if ( freqs.size() == 1 )
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, freqs.get(0));
}
int depth = 0; int depth = 0;
for (String sample : sub.getSampleNames()) { for (String sample : sub.getSampleNames()) {
Genotype g = sub.getGenotype(sample); Genotype g = sub.getGenotype(sample);
if (g.isNotFiltered() && g.isCalled()) { if (g.isNotFiltered() && g.isCalled()) {
numberOfAlleles += g.getPloidy();
String dp = (String) g.getAttribute(VCFConstants.DEPTH_KEY); if (g.isHet()) { alleleCount++; }
else if (g.isHomVar()) { alleleCount += 2; }
String dp = (String) g.getAttribute("DP");
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) { if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {
depth += Integer.valueOf(dp); depth += Integer.valueOf(dp);
} }
} }
} }
attributes.put(VCFConstants.DEPTH_KEY, depth); attributes.put("AC", alleleCount);
attributes.put("AN", numberOfAlleles);
if (numberOfAlleles == 0) {
attributes.put("AF", 0.0);
} else {
attributes.put("AF", ((double) alleleCount) / ((double) numberOfAlleles));
}
attributes.put("DP", depth);
sub = VariantContext.modifyAttributes(sub, attributes); sub = VariantContext.modifyAttributes(sub, attributes);

View File

@ -23,4 +23,17 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testComplexSelection--" + testfile, spec); executeTest("testComplexSelection--" + testfile, spec);
} }
@Test
public void testRepeatedLineSelection() {
String testfile = validationDataLocation + "test.dup.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -sn B -sn C -B:variant,VCF " + testfile + " -NO_HEADER"),
1,
Arrays.asList("fae9822d5f7ad6c76b411e8ca0886409")
);
executeTest("testRepeatedLineSelection--" + testfile, spec);
}
} }