Continuing bugfixes to get new VC working
This commit is contained in:
parent
df415da4ab
commit
e56d52006a
|
|
@ -333,11 +333,11 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
VariantContext filteredVC;
|
||||
if ( beagleVarCounts > 0 || DONT_FILTER_MONOMORPHIC_SITES )
|
||||
filteredVC = new VariantContext("outputvcf", VCFConstants.EMPTY_ID_FIELD, vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.filtersWereApplied() ? vc_input.getFilters() : null, vc_input.getAttributes());
|
||||
filteredVC = new VariantContext("outputvcf", vc_input.getID(), vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.filtersWereApplied() ? vc_input.getFilters() : null, vc_input.getAttributes());
|
||||
else {
|
||||
Set<String> removedFilters = vc_input.filtersWereApplied() ? new HashSet<String>(vc_input.getFilters()) : new HashSet<String>(1);
|
||||
removedFilters.add(String.format("BGL_RM_WAS_%s",vc_input.getAlternateAllele(0)));
|
||||
filteredVC = new VariantContext("outputvcf", VCFConstants.EMPTY_ID_FIELD, vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), new HashSet<Allele>(Arrays.asList(vc_input.getReference())), genotypes, vc_input.getNegLog10PError(), removedFilters, vc_input.getAttributes());
|
||||
filteredVC = new VariantContext("outputvcf", vc_input.getID(), vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), new HashSet<Allele>(Arrays.asList(vc_input.getReference())), genotypes, vc_input.getNegLog10PError(), removedFilters, vc_input.getAttributes());
|
||||
}
|
||||
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>(filteredVC.getAttributes());
|
||||
|
|
|
|||
|
|
@ -654,7 +654,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
if ( samples == null || samples.isEmpty() )
|
||||
return vc;
|
||||
|
||||
// logger.info("Genotypes in full vc: " + vc.getGenotypes());
|
||||
// logger.info("My own sub : " + vc.getGenotypes().subsetToSamples(samples));
|
||||
VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
// logger.info("Genotypes in sub vc: " + sub.getGenotypes());
|
||||
|
||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||
|
|
@ -691,6 +694,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
sub = VariantContext.modifyAttributes(sub, attributes);
|
||||
|
||||
// logger.info("Genotypes in final vc: " + sub.getGenotypes());
|
||||
return sub;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,11 +30,13 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
public class GenotypeCollection implements List<Genotype> {
|
||||
public final static GenotypeCollection NO_GENOTYPES = new GenotypeCollection();
|
||||
public final static GenotypeCollection NO_GENOTYPES =
|
||||
new GenotypeCollection(new ArrayList<Genotype>(0), new HashMap<String, Integer>(0), new HashSet<String>(0), true);
|
||||
|
||||
Set<String> sampleNamesInOrder = null;
|
||||
Map<String, Integer> sampleNameToOffset = null;
|
||||
boolean cacheIsInvalid = true;
|
||||
final ArrayList<Genotype> genotypes;
|
||||
List<Genotype> genotypes;
|
||||
boolean immutable = false;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -54,6 +56,19 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
private GenotypeCollection(final ArrayList<Genotype> genotypes, final boolean immutable) {
|
||||
this.genotypes = genotypes;
|
||||
this.immutable = immutable;
|
||||
this.sampleNameToOffset = null;
|
||||
this.cacheIsInvalid = true;
|
||||
}
|
||||
|
||||
private GenotypeCollection(final ArrayList<Genotype> genotypes,
|
||||
final Map<String, Integer> sampleNameToOffset,
|
||||
final Set<String> sampleNamesInOrder,
|
||||
final boolean immutable) {
|
||||
this.genotypes = genotypes;
|
||||
this.immutable = immutable;
|
||||
this.sampleNameToOffset = sampleNameToOffset;
|
||||
this.sampleNamesInOrder = sampleNamesInOrder;
|
||||
this.cacheIsInvalid = false;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -108,12 +123,8 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
public final GenotypeCollection mutable() {
|
||||
immutable = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
public final GenotypeCollection immutable() {
|
||||
this.genotypes = Collections.unmodifiableList(genotypes);
|
||||
immutable = true;
|
||||
return this;
|
||||
}
|
||||
|
|
@ -135,17 +146,20 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
|
||||
private void invalidateCaches() {
|
||||
cacheIsInvalid = true;
|
||||
if ( sampleNameToOffset != null ) sampleNameToOffset.clear();
|
||||
sampleNamesInOrder = null;
|
||||
sampleNameToOffset = null;
|
||||
}
|
||||
|
||||
private void buildCache() {
|
||||
cacheIsInvalid = false;
|
||||
sampleNamesInOrder = new TreeSet<String>();
|
||||
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
|
||||
|
||||
if ( sampleNameToOffset == null )
|
||||
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
|
||||
|
||||
for ( int i = 0; i < genotypes.size(); i++ )
|
||||
sampleNameToOffset.put(genotypes.get(i).getSampleName(), i);
|
||||
for ( int i = 0; i < genotypes.size(); i++ ) {
|
||||
final Genotype g = genotypes.get(i);
|
||||
sampleNamesInOrder.add(g.getSampleName());
|
||||
sampleNameToOffset.put(g.getSampleName(), i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -341,7 +355,8 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
}
|
||||
|
||||
public Set<String> getSampleNamesOrderedByName() {
|
||||
return new TreeSet<String>(getSampleNames());
|
||||
buildCache();
|
||||
return sampleNamesInOrder;
|
||||
}
|
||||
|
||||
public boolean containsSample(final String sample) {
|
||||
|
|
@ -365,10 +380,41 @@ public class GenotypeCollection implements List<Genotype> {
|
|||
return NO_GENOTYPES;
|
||||
else {
|
||||
GenotypeCollection subset = create(samples.size());
|
||||
for ( final Genotype g : genotypes )
|
||||
if ( samples.contains(g.getSampleName()) )
|
||||
for ( final Genotype g : genotypes ) {
|
||||
if ( samples.contains(g.getSampleName()) ) {
|
||||
subset.add(g);
|
||||
}
|
||||
}
|
||||
return subset;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final List<String> gS = new ArrayList<String>();
|
||||
for ( final Genotype g : this.iterateInSampleNameOrder() )
|
||||
gS.add(g.toString());
|
||||
return "[" + join(",", gS) + "]";
|
||||
}
|
||||
|
||||
// copied from Utils
|
||||
private static <T> String join(final String separator, final Collection<T> objects) {
|
||||
if (objects.isEmpty()) { // fast path for empty collection
|
||||
return "";
|
||||
} else {
|
||||
final Iterator<T> iter = objects.iterator();
|
||||
final T first = iter.next();
|
||||
|
||||
if ( ! iter.hasNext() ) // fast path for singleton collections
|
||||
return first.toString();
|
||||
else { // full path for 2+ collection that actually need a join
|
||||
final StringBuilder ret = new StringBuilder(first.toString());
|
||||
while(iter.hasNext()) {
|
||||
ret.append(separator);
|
||||
ret.append(iter.next().toString());
|
||||
}
|
||||
return ret.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -411,7 +411,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
// we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles
|
||||
this.alleles = makeAlleles(alleles);
|
||||
|
||||
if ( genotypes == null ) {
|
||||
if ( genotypes == null || genotypes == NO_GENOTYPES ) {
|
||||
this.genotypes = NO_GENOTYPES;
|
||||
} else {
|
||||
this.genotypes = genotypes.immutable();
|
||||
|
|
@ -543,8 +543,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
// }
|
||||
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
|
||||
loadGenotypes();
|
||||
GenotypeCollection newGenotypes = genotypes.subsetToSamples(sampleNames);
|
||||
return new VariantContext(getSource(), getID(), contig, start, stop, alleles,
|
||||
genotypes.subsetToSamples(sampleNames),
|
||||
newGenotypes,
|
||||
getNegLog10PError(),
|
||||
filtersWereApplied() ? getFilters() : null,
|
||||
getAttributes(),
|
||||
|
|
@ -552,6 +554,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames) {
|
||||
loadGenotypes();
|
||||
GenotypeCollection newGenotypes = genotypes.subsetToSamples(sampleNames);
|
||||
return new VariantContext(getSource(), getID(), contig, start, stop, allelesOfGenotypes(newGenotypes),
|
||||
newGenotypes,
|
||||
|
|
@ -562,7 +565,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
|
||||
public VariantContext subContextFromSample(String sampleName) {
|
||||
return subContextFromSamples(new HashSet<String>(Arrays.asList(sampleName)));
|
||||
return subContextFromSamples(Collections.singleton(sampleName));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1460,7 +1463,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
public String toString() {
|
||||
return String.format("[VC %s @ %s of type=%s alleles=%s attr=%s GT=%s",
|
||||
getSource(), contig + ":" + (start - stop == 0 ? start : start + "-" + stop), this.getType(),
|
||||
ParsingUtils.sortList(this.getAlleles()), ParsingUtils.sortedString(this.getAttributes()), this.getGenotypesSortedByName());
|
||||
ParsingUtils.sortList(this.getAlleles()),
|
||||
ParsingUtils.sortedString(this.getAttributes()),
|
||||
this.getGenotypes());
|
||||
}
|
||||
|
||||
// protected basic manipulation routines
|
||||
|
|
|
|||
|
|
@ -1,51 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.phasing;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest {
|
||||
|
||||
public static String baseTestString(String reference, String VCF, int maxDist) {
|
||||
return "-T MergeSegregatingAlternateAlleles" +
|
||||
" -R " + reference +
|
||||
" --variant:vcf " + validationDataLocation + VCF +
|
||||
" --maxGenomicDistance " + maxDist +
|
||||
" -o %s" +
|
||||
" -NO_HEADER";
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 1)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("af5e1370822551c0c6f50f23447dc627"));
|
||||
executeTest("Merge sites within genomic distance of 1 [TEST ONE]", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 10)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("dd8c44ae1ef059a7fe85399467e102eb"));
|
||||
executeTest("Merge sites within genomic distance of 10 [TEST TWO]", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test3() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(hg18Reference, "merging_test_chr20_556259_756570.vcf", 100)
|
||||
+ " -L chr20:556259-756570",
|
||||
1,
|
||||
Arrays.asList("f81fd72ecaa57b3215406fcea860bcc5"));
|
||||
executeTest("Merge sites within genomic distance of 100 [TEST THREE]", spec);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue