Vastly better way of doing on-demand genotyping loading
-- With our GenotypesContext class we can naturally create a LazyGenotypesContext subclass that does the on-demand loading. -- This new class was replaced all of the old, complex functionality -- Better still, there were many cases were the genotypes were being loaded unnecessarily, resulting in efficiency. This was detected because some of the integration tests changed as the genotypes were no longer being parsing unnecessarily -- Misc. bug fixes throughout the system -- Bug fixes for PhaseByTransmission with new GenotypesContext
This commit is contained in:
parent
f392d330c3
commit
9cb3fe3a59
|
|
@ -746,11 +746,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
||||||
|
|
||||||
if (tracker != null) {
|
if (tracker != null) {
|
||||||
VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
|
VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation());
|
||||||
|
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||||
|
|
||||||
GenotypesContext genotypeMap = vc.getGenotypes();
|
GenotypesContext genotypesContext = GenotypesContext.copy(vc.getGenotypes());
|
||||||
|
|
||||||
int mvCount;
|
|
||||||
|
|
||||||
for (Sample sample : trios) {
|
for (Sample sample : trios) {
|
||||||
Genotype mother = vc.getGenotype(sample.getMaternalID());
|
Genotype mother = vc.getGenotype(sample.getMaternalID());
|
||||||
Genotype father = vc.getGenotype(sample.getPaternalID());
|
Genotype father = vc.getGenotype(sample.getPaternalID());
|
||||||
|
|
@ -761,18 +759,18 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ArrayList<Genotype> trioGenotypes = new ArrayList<Genotype>(3);
|
ArrayList<Genotype> trioGenotypes = new ArrayList<Genotype>(3);
|
||||||
mvCount = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child,trioGenotypes);
|
final int mvCount = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child,trioGenotypes);
|
||||||
|
|
||||||
Genotype phasedMother = trioGenotypes.get(0);
|
Genotype phasedMother = trioGenotypes.get(0);
|
||||||
Genotype phasedFather = trioGenotypes.get(1);
|
Genotype phasedFather = trioGenotypes.get(1);
|
||||||
Genotype phasedChild = trioGenotypes.get(2);
|
Genotype phasedChild = trioGenotypes.get(2);
|
||||||
|
|
||||||
//Fill the genotype map with the new genotypes and increment metrics counters
|
//Fill the genotype map with the new genotypes and increment metrics counters
|
||||||
genotypeMap.add(phasedChild);
|
genotypesContext.replace(phasedChild);
|
||||||
if(mother != null){
|
if(mother != null){
|
||||||
genotypeMap.add(phasedMother);
|
genotypesContext.replace(phasedMother);
|
||||||
if(father != null){
|
if(father != null){
|
||||||
genotypeMap.add(phasedFather);
|
genotypesContext.replace(phasedFather);
|
||||||
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
|
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
|
||||||
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString());
|
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoods().toString(),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute("AD"),phasedFather.getLikelihoods().toString(),phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute("AD"),phasedChild.getLikelihoods().toString());
|
||||||
if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
|
if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
|
||||||
|
|
@ -786,7 +784,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
genotypeMap.add(phasedFather);
|
genotypesContext.replace(phasedFather);
|
||||||
updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters);
|
updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters);
|
||||||
if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
|
if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
|
||||||
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
|
||||||
|
|
@ -797,10 +795,10 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
||||||
//TODO: ADAPT FOR PAIRS TOO!!
|
//TODO: ADAPT FOR PAIRS TOO!!
|
||||||
if(mvCount>0 && mvFile != null)
|
if(mvCount>0 && mvFile != null)
|
||||||
mvFile.println(mvfLine);
|
mvFile.println(mvfLine);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vcfWriter.add(new VariantContextBuilder(vc).genotypes(genotypeMap).make());
|
builder.genotypes(genotypesContext);
|
||||||
|
vcfWriter.add(builder.make());
|
||||||
}
|
}
|
||||||
return metricsCounters;
|
return metricsCounters;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,10 +10,7 @@ import org.broad.tribble.util.BlockCompressedInputStream;
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -255,11 +252,14 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
*/
|
*/
|
||||||
private VariantContext parseVCFLine(String[] parts) {
|
private VariantContext parseVCFLine(String[] parts) {
|
||||||
VariantContextBuilder builder = new VariantContextBuilder();
|
VariantContextBuilder builder = new VariantContextBuilder();
|
||||||
|
builder.source(getName());
|
||||||
|
|
||||||
// increment the line count
|
// increment the line count
|
||||||
lineNo++;
|
lineNo++;
|
||||||
|
|
||||||
// parse out the required fields
|
// parse out the required fields
|
||||||
builder.chr(getCachedString(parts[0]));
|
final String chr = getCachedString(parts[0]);
|
||||||
|
builder.chr(chr);
|
||||||
int pos = Integer.valueOf(parts[1]);
|
int pos = Integer.valueOf(parts[1]);
|
||||||
builder.start(pos);
|
builder.start(pos);
|
||||||
|
|
||||||
|
|
@ -294,9 +294,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
|
|
||||||
// do we have genotyping data
|
// do we have genotyping data
|
||||||
if (parts.length > NUM_STANDARD_FIELDS) {
|
if (parts.length > NUM_STANDARD_FIELDS) {
|
||||||
builder.attribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, new String(parts[8]));
|
LazyGenotypesContext lazy = new LazyGenotypesContext(this, parts[8], chr, pos, alleles, header.getGenotypeSamples().size());
|
||||||
builder.attribute(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY, this);
|
builder.genotypesNoValidation(lazy);
|
||||||
builder.genotypesAreUnparsed();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VariantContext vc = null;
|
VariantContext vc = null;
|
||||||
|
|
|
||||||
|
|
@ -219,9 +219,6 @@ public class StandardVCFWriter extends IndexingVCFWriter {
|
||||||
Map<String, String> infoFields = new TreeMap<String, String>();
|
Map<String, String> infoFields = new TreeMap<String, String>();
|
||||||
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
||||||
String key = field.getKey();
|
String key = field.getKey();
|
||||||
if ( key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
String outputValue = formatVCFField(field.getValue());
|
String outputValue = formatVCFField(field.getValue());
|
||||||
if ( outputValue != null )
|
if ( outputValue != null )
|
||||||
infoFields.put(key, outputValue);
|
infoFields.put(key, outputValue);
|
||||||
|
|
@ -229,9 +226,10 @@ public class StandardVCFWriter extends IndexingVCFWriter {
|
||||||
writeInfoString(infoFields);
|
writeInfoString(infoFields);
|
||||||
|
|
||||||
// FORMAT
|
// FORMAT
|
||||||
if ( vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) ) {
|
final GenotypesContext gc = vc.getGenotypes();
|
||||||
|
if ( gc instanceof LazyGenotypesContext && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() != null) {
|
||||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||||
mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, ""));
|
mWriter.write(((LazyGenotypesContext)gc).getUnparsedGenotypeData());
|
||||||
} else {
|
} else {
|
||||||
List<String> genotypeAttributeKeys = new ArrayList<String>();
|
List<String> genotypeAttributeKeys = new ArrayList<String>();
|
||||||
if ( vc.hasGenotypes() ) {
|
if ( vc.hasGenotypes() ) {
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ public class VCF3Codec extends AbstractVCFCodec {
|
||||||
|
|
||||||
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
|
||||||
|
|
||||||
GenotypesContext genotypes = GenotypesContext.create(nParts);
|
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
|
||||||
|
|
||||||
// get the format keys
|
// get the format keys
|
||||||
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
|
||||||
|
|
@ -191,7 +191,7 @@ public class VCF3Codec extends AbstractVCFCodec {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return genotypes;
|
return GenotypesContext.create(genotypes, header.sampleNameToOffset, header.sampleNamesInOrder);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -191,8 +191,6 @@ public class GCF {
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
||||||
String key = field.getKey();
|
String key = field.getKey();
|
||||||
if ( key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
|
|
||||||
continue;
|
|
||||||
int stringIndex = GCFHeaderBuilder.encodeString(key);
|
int stringIndex = GCFHeaderBuilder.encodeString(key);
|
||||||
String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
|
String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
|
||||||
if ( outputValue != null ) {
|
if ( outputValue != null ) {
|
||||||
|
|
|
||||||
|
|
@ -55,8 +55,14 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
/** if true, then we need to reinitialize sampleNamesInOrder and sampleNameToOffset before we use them /*/
|
/** if true, then we need to reinitialize sampleNamesInOrder and sampleNameToOffset before we use them /*/
|
||||||
boolean cacheIsInvalid = true;
|
boolean cacheIsInvalid = true;
|
||||||
|
|
||||||
/** An ArrayList of genotypes contained in this context */
|
/**
|
||||||
List<Genotype> genotypes;
|
* An ArrayList of genotypes contained in this context
|
||||||
|
*
|
||||||
|
* WARNING: TO ENABLE THE LAZY VERSION OF THIS CLASS, NO METHODS SHOULD DIRECTLY
|
||||||
|
* ACCESS THIS VARIABLE. USE getGenotypes() INSTEAD.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
ArrayList<Genotype> notToBeDirectlyAccessedGenotypes;
|
||||||
|
|
||||||
/** Are we allowing users to modify the list? */
|
/** Are we allowing users to modify the list? */
|
||||||
boolean immutable = false;
|
boolean immutable = false;
|
||||||
|
|
@ -70,7 +76,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
/**
|
/**
|
||||||
* Create an empty GenotypeContext
|
* Create an empty GenotypeContext
|
||||||
*/
|
*/
|
||||||
private GenotypesContext() {
|
protected GenotypesContext() {
|
||||||
this(10, false);
|
this(10, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -78,7 +84,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
* Create an empty GenotypeContext, with initial capacity for n elements
|
* Create an empty GenotypeContext, with initial capacity for n elements
|
||||||
*/
|
*/
|
||||||
@Requires("n >= 0")
|
@Requires("n >= 0")
|
||||||
private GenotypesContext(final int n, final boolean immutable) {
|
protected GenotypesContext(final int n, final boolean immutable) {
|
||||||
this(new ArrayList<Genotype>(n), immutable);
|
this(new ArrayList<Genotype>(n), immutable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -86,8 +92,8 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
* Create an GenotypeContext containing genotypes
|
* Create an GenotypeContext containing genotypes
|
||||||
*/
|
*/
|
||||||
@Requires("genotypes != null")
|
@Requires("genotypes != null")
|
||||||
private GenotypesContext(final ArrayList<Genotype> genotypes, final boolean immutable) {
|
protected GenotypesContext(final ArrayList<Genotype> genotypes, final boolean immutable) {
|
||||||
this.genotypes = genotypes;
|
this.notToBeDirectlyAccessedGenotypes = genotypes;
|
||||||
this.immutable = immutable;
|
this.immutable = immutable;
|
||||||
this.sampleNameToOffset = null;
|
this.sampleNameToOffset = null;
|
||||||
this.cacheIsInvalid = true;
|
this.cacheIsInvalid = true;
|
||||||
|
|
@ -110,11 +116,11 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
"sampleNamesInOrder != null",
|
"sampleNamesInOrder != null",
|
||||||
"genotypes.size() == sampleNameToOffset.size()",
|
"genotypes.size() == sampleNameToOffset.size()",
|
||||||
"genotypes.size() == sampleNamesInOrder.size()"})
|
"genotypes.size() == sampleNamesInOrder.size()"})
|
||||||
private GenotypesContext(final ArrayList<Genotype> genotypes,
|
protected GenotypesContext(final ArrayList<Genotype> genotypes,
|
||||||
final Map<String, Integer> sampleNameToOffset,
|
final Map<String, Integer> sampleNameToOffset,
|
||||||
final List<String> sampleNamesInOrder,
|
final List<String> sampleNamesInOrder,
|
||||||
final boolean immutable) {
|
final boolean immutable) {
|
||||||
this.genotypes = genotypes;
|
this.notToBeDirectlyAccessedGenotypes = genotypes;
|
||||||
this.immutable = immutable;
|
this.immutable = immutable;
|
||||||
this.sampleNameToOffset = sampleNameToOffset;
|
this.sampleNameToOffset = sampleNameToOffset;
|
||||||
this.sampleNamesInOrder = sampleNamesInOrder;
|
this.sampleNamesInOrder = sampleNamesInOrder;
|
||||||
|
|
@ -203,7 +209,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
@Requires({"toCopy != null"})
|
@Requires({"toCopy != null"})
|
||||||
@Ensures({"result != null"})
|
@Ensures({"result != null"})
|
||||||
public static final GenotypesContext copy(final GenotypesContext toCopy) {
|
public static final GenotypesContext copy(final GenotypesContext toCopy) {
|
||||||
return create(new ArrayList<Genotype>(toCopy.genotypes));
|
return create(new ArrayList<Genotype>(toCopy.getGenotypes()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -225,7 +231,6 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
public final GenotypesContext immutable() {
|
public final GenotypesContext immutable() {
|
||||||
this.genotypes = Collections.unmodifiableList(genotypes);
|
|
||||||
immutable = true;
|
immutable = true;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
@ -255,16 +260,16 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
@Ensures({"cacheIsInvalid == false",
|
@Ensures({"cacheIsInvalid == false",
|
||||||
"sampleNamesInOrder != null",
|
"sampleNamesInOrder != null",
|
||||||
"sampleNameToOffset != null",
|
"sampleNameToOffset != null",
|
||||||
"sameSamples(genotypes, sampleNamesInOrder)",
|
"sameSamples(notToBeDirectlyAccessedGenotypes, sampleNamesInOrder)",
|
||||||
"sameSamples(genotypes, sampleNameToOffset.keySet())"})
|
"sameSamples(notToBeDirectlyAccessedGenotypes, sampleNameToOffset.keySet())"})
|
||||||
private synchronized void buildCache() {
|
protected synchronized void buildCache() {
|
||||||
if ( cacheIsInvalid ) {
|
if ( cacheIsInvalid ) {
|
||||||
cacheIsInvalid = false;
|
cacheIsInvalid = false;
|
||||||
sampleNamesInOrder = new ArrayList<String>(genotypes.size());
|
sampleNamesInOrder = new ArrayList<String>(size());
|
||||||
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
|
sampleNameToOffset = new HashMap<String, Integer>(size());
|
||||||
|
|
||||||
for ( int i = 0; i < genotypes.size(); i++ ) {
|
for ( int i = 0; i < size(); i++ ) {
|
||||||
final Genotype g = genotypes.get(i);
|
final Genotype g = getGenotypes().get(i);
|
||||||
sampleNamesInOrder.add(g.getSampleName());
|
sampleNamesInOrder.add(g.getSampleName());
|
||||||
sampleNameToOffset.put(g.getSampleName(), i);
|
sampleNameToOffset.put(g.getSampleName(), i);
|
||||||
}
|
}
|
||||||
|
|
@ -279,20 +284,24 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
//
|
//
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
protected ArrayList<Genotype> getGenotypes() {
|
||||||
|
return notToBeDirectlyAccessedGenotypes;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
genotypes.clear();
|
getGenotypes().clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int size() {
|
public int size() {
|
||||||
return genotypes.size();
|
return getGenotypes().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return genotypes.isEmpty();
|
return getGenotypes().isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -300,14 +309,14 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
public boolean add(final Genotype genotype) {
|
public boolean add(final Genotype genotype) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateCaches();
|
||||||
return genotypes.add(genotype);
|
return getGenotypes().add(genotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("genotype != null")
|
@Requires("genotype != null")
|
||||||
public boolean add(final Genotype ... genotype) {
|
public boolean add(final Genotype ... genotype) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateCaches();
|
||||||
return genotypes.addAll(Arrays.asList(genotype));
|
return getGenotypes().addAll(Arrays.asList(genotype));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -319,7 +328,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
public boolean addAll(final Collection<? extends Genotype> genotypes) {
|
public boolean addAll(final Collection<? extends Genotype> genotypes) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateCaches();
|
||||||
return this.genotypes.addAll(genotypes);
|
return getGenotypes().addAll(genotypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -329,38 +338,43 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean contains(final Object o) {
|
public boolean contains(final Object o) {
|
||||||
return this.genotypes.contains(o);
|
return getGenotypes().contains(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean containsAll(final Collection<?> objects) {
|
public boolean containsAll(final Collection<?> objects) {
|
||||||
return this.genotypes.containsAll(objects);
|
return getGenotypes().containsAll(objects);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Genotype get(final int i) {
|
public Genotype get(final int i) {
|
||||||
return genotypes.get(i);
|
return getGenotypes().get(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Genotype get(final String sampleName) {
|
public Genotype get(final String sampleName) {
|
||||||
buildCache();
|
buildCache();
|
||||||
Integer offset = sampleNameToOffset.get(sampleName);
|
Integer offset = getSampleI(sampleName);
|
||||||
return offset == null ? null : genotypes.get(offset);
|
return offset == null ? null : getGenotypes().get(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Integer getSampleI(final String sampleName) {
|
||||||
|
buildCache();
|
||||||
|
return sampleNameToOffset.get(sampleName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int indexOf(final Object o) {
|
public int indexOf(final Object o) {
|
||||||
return genotypes.indexOf(o);
|
return getGenotypes().indexOf(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<Genotype> iterator() {
|
public Iterator<Genotype> iterator() {
|
||||||
return genotypes.iterator();
|
return getGenotypes().iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int lastIndexOf(final Object o) {
|
public int lastIndexOf(final Object o) {
|
||||||
return genotypes.lastIndexOf(o);
|
return getGenotypes().lastIndexOf(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -381,50 +395,67 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
public Genotype remove(final int i) {
|
public Genotype remove(final int i) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateCaches();
|
||||||
return genotypes.remove(i);
|
return getGenotypes().remove(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean remove(final Object o) {
|
public boolean remove(final Object o) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateCaches();
|
||||||
return genotypes.remove(o);
|
return getGenotypes().remove(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean removeAll(final Collection<?> objects) {
|
public boolean removeAll(final Collection<?> objects) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateCaches();
|
||||||
return genotypes.removeAll(objects);
|
return getGenotypes().removeAll(objects);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean retainAll(final Collection<?> objects) {
|
public boolean retainAll(final Collection<?> objects) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateCaches();
|
||||||
return genotypes.retainAll(objects);
|
return getGenotypes().retainAll(objects);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Genotype set(final int i, final Genotype genotype) {
|
public Genotype set(final int i, final Genotype genotype) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateCaches();
|
||||||
return genotypes.set(i, genotype);
|
return getGenotypes().set(i, genotype);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces the genotype in this context -- note for efficiency
|
||||||
|
* reasons we do not add the genotype if it's not present. The
|
||||||
|
* return value will be null indicating this happened.
|
||||||
|
* @param genotype a non null genotype to bind in this context
|
||||||
|
* @return null if genotype was not added, otherwise returns the previous genotype
|
||||||
|
*/
|
||||||
|
@Requires("genotype != null")
|
||||||
|
public Genotype replace(final Genotype genotype) {
|
||||||
|
checkImmutability();
|
||||||
|
Integer offset = getSampleI(genotype.getSampleName());
|
||||||
|
if ( offset == null )
|
||||||
|
return null;
|
||||||
|
else
|
||||||
|
return getGenotypes().set(offset, genotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Genotype> subList(final int i, final int i1) {
|
public List<Genotype> subList(final int i, final int i1) {
|
||||||
return genotypes.subList(i, i1);
|
return getGenotypes().subList(i, i1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object[] toArray() {
|
public Object[] toArray() {
|
||||||
return genotypes.toArray();
|
return getGenotypes().toArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public <T> T[] toArray(final T[] ts) {
|
public <T> T[] toArray(final T[] ts) {
|
||||||
return genotypes.toArray(ts);
|
return getGenotypes().toArray(ts);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -528,13 +559,13 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
@Requires("samples != null")
|
@Requires("samples != null")
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public GenotypesContext subsetToSamples( final Set<String> samples ) {
|
public GenotypesContext subsetToSamples( final Set<String> samples ) {
|
||||||
if ( samples.size() == genotypes.size() )
|
if ( samples.size() == size() )
|
||||||
return this;
|
return this;
|
||||||
else if ( samples.isEmpty() )
|
else if ( samples.isEmpty() )
|
||||||
return NO_GENOTYPES;
|
return NO_GENOTYPES;
|
||||||
else {
|
else {
|
||||||
GenotypesContext subset = create(samples.size());
|
GenotypesContext subset = create(samples.size());
|
||||||
for ( final Genotype g : genotypes ) {
|
for ( final Genotype g : getGenotypes() ) {
|
||||||
if ( samples.contains(g.getSampleName()) ) {
|
if ( samples.contains(g.getSampleName()) ) {
|
||||||
subset.add(g);
|
subset.add(g);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,128 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFParser;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [Short one sentence description of this walker]
|
||||||
|
* <p/>
|
||||||
|
* <p>
|
||||||
|
* [Functionality of this walker]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* [Input description]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* [Output description]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -T $WalkerName
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class LazyGenotypesContext extends GenotypesContext {
|
||||||
|
final VCFParser parser;
|
||||||
|
String unparsedGenotypeData;
|
||||||
|
final List<Allele> alleles;
|
||||||
|
final String contig;
|
||||||
|
final int start;
|
||||||
|
final int nUnparsedGenotypes;
|
||||||
|
|
||||||
|
boolean loaded = false;
|
||||||
|
|
||||||
|
private final static ArrayList<Genotype> EMPTY = new ArrayList<Genotype>(0);
|
||||||
|
|
||||||
|
public LazyGenotypesContext(final VCFParser parser, final String unparsedGenotypeData,
|
||||||
|
final String contig, final int start, final List<Allele> alleles,
|
||||||
|
int nUnparsedGenotypes ) {
|
||||||
|
super(EMPTY, false);
|
||||||
|
this.unparsedGenotypeData = unparsedGenotypeData;
|
||||||
|
this.start = start;
|
||||||
|
this.parser = parser;
|
||||||
|
this.contig = contig;
|
||||||
|
this.alleles = alleles;
|
||||||
|
this.nUnparsedGenotypes = nUnparsedGenotypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ArrayList<Genotype> getGenotypes() {
|
||||||
|
if ( ! loaded ) {
|
||||||
|
//System.out.printf("Loading genotypes... %s:%d%n", contig, start);
|
||||||
|
GenotypesContext subcontext = parser.createGenotypeMap(unparsedGenotypeData, alleles, contig, start);
|
||||||
|
notToBeDirectlyAccessedGenotypes = subcontext.notToBeDirectlyAccessedGenotypes;
|
||||||
|
sampleNamesInOrder = subcontext.sampleNamesInOrder;
|
||||||
|
sampleNameToOffset = subcontext.sampleNameToOffset;
|
||||||
|
cacheIsInvalid = false;
|
||||||
|
loaded = true;
|
||||||
|
unparsedGenotypeData = null;
|
||||||
|
|
||||||
|
// warning -- this path allows us to create a VariantContext that doesn't run validateGenotypes()
|
||||||
|
// That said, it's not such an important routine -- it's just checking that the genotypes
|
||||||
|
// are well formed w.r.t. the alleles list, but this will be enforced within the VCFCodec
|
||||||
|
}
|
||||||
|
|
||||||
|
return notToBeDirectlyAccessedGenotypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected synchronized void buildCache() {
|
||||||
|
if ( cacheIsInvalid ) {
|
||||||
|
getGenotypes(); // will load up all of the necessary data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isEmpty() {
|
||||||
|
// optimization -- we know the number of samples in the unparsed data, so use it here to
|
||||||
|
// avoid parsing just to know if the genotypes context is empty
|
||||||
|
return loaded ? super.isEmpty() : nUnparsedGenotypes == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
// optimization -- we know the number of samples in the unparsed data, so use it here to
|
||||||
|
// avoid parsing just to know the size of the context
|
||||||
|
return loaded ? super.size() : nUnparsedGenotypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getUnparsedGenotypeData() {
|
||||||
|
return unparsedGenotypeData;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -165,8 +165,6 @@ import java.util.*;
|
||||||
public class VariantContext implements Feature { // to enable tribble intergration
|
public class VariantContext implements Feature { // to enable tribble intergration
|
||||||
protected CommonInfo commonInfo = null;
|
protected CommonInfo commonInfo = null;
|
||||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||||
public final static String UNPARSED_GENOTYPE_MAP_KEY = "_UNPARSED_GENOTYPE_MAP_";
|
|
||||||
public final static String UNPARSED_GENOTYPE_PARSER_KEY = "_UNPARSED_GENOTYPE_PARSER_";
|
|
||||||
|
|
||||||
@Deprecated // ID is no longer stored in the attributes map
|
@Deprecated // ID is no longer stored in the attributes map
|
||||||
private final static String ID_KEY = "ID";
|
private final static String ID_KEY = "ID";
|
||||||
|
|
@ -231,7 +229,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
* @param other the VariantContext to copy
|
* @param other the VariantContext to copy
|
||||||
*/
|
*/
|
||||||
protected VariantContext(VariantContext other) {
|
protected VariantContext(VariantContext other) {
|
||||||
this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false, NO_VALIDATION);
|
this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
|
||||||
|
other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
|
||||||
|
other.getFiltersMaybeNull(),
|
||||||
|
other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL,
|
||||||
|
NO_VALIDATION);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -247,14 +249,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
* @param filters filters: use null for unfiltered and empty set for passes filters
|
* @param filters filters: use null for unfiltered and empty set for passes filters
|
||||||
* @param attributes attributes
|
* @param attributes attributes
|
||||||
* @param referenceBaseForIndel padded reference base
|
* @param referenceBaseForIndel padded reference base
|
||||||
* @param genotypesAreUnparsed true if the genotypes have not yet been parsed
|
|
||||||
* @param validationToPerform set of validation steps to take
|
* @param validationToPerform set of validation steps to take
|
||||||
*/
|
*/
|
||||||
protected VariantContext(String source, String ID,
|
protected VariantContext(String source, String ID,
|
||||||
String contig, long start, long stop,
|
String contig, long start, long stop,
|
||||||
Collection<Allele> alleles, GenotypesContext genotypes,
|
Collection<Allele> alleles, GenotypesContext genotypes,
|
||||||
double log10PError, Set<String> filters, Map<String, Object> attributes,
|
double log10PError, Set<String> filters, Map<String, Object> attributes,
|
||||||
Byte referenceBaseForIndel, boolean genotypesAreUnparsed,
|
Byte referenceBaseForIndel,
|
||||||
EnumSet<Validation> validationToPerform ) {
|
EnumSet<Validation> validationToPerform ) {
|
||||||
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
|
if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
|
||||||
this.contig = contig;
|
this.contig = contig;
|
||||||
|
|
@ -265,17 +266,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
if ( ID == null || ID.equals("") ) throw new IllegalArgumentException("ID field cannot be the null or the empty string");
|
if ( ID == null || ID.equals("") ) throw new IllegalArgumentException("ID field cannot be the null or the empty string");
|
||||||
this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
|
this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
|
||||||
|
|
||||||
if ( !genotypesAreUnparsed && attributes != null ) {
|
|
||||||
if ( attributes.containsKey(UNPARSED_GENOTYPE_MAP_KEY) ) {
|
|
||||||
attributes = new HashMap<String, Object>(attributes);
|
|
||||||
attributes.remove(UNPARSED_GENOTYPE_MAP_KEY);
|
|
||||||
}
|
|
||||||
if ( attributes.containsKey(UNPARSED_GENOTYPE_PARSER_KEY) ) {
|
|
||||||
attributes = new HashMap<String, Object>(attributes);
|
|
||||||
attributes.remove(UNPARSED_GENOTYPE_PARSER_KEY);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
|
this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
|
||||||
REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
|
REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel;
|
||||||
|
|
||||||
|
|
@ -316,13 +306,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
|
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
|
||||||
loadGenotypes();
|
|
||||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||||
return builder.genotypes(genotypes.subsetToSamples(sampleNames)).alleles(alleles).make();
|
return builder.genotypes(genotypes.subsetToSamples(sampleNames)).alleles(alleles).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext subContextFromSamples(Set<String> sampleNames) {
|
public VariantContext subContextFromSamples(Set<String> sampleNames) {
|
||||||
loadGenotypes();
|
|
||||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||||
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
|
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
|
||||||
return builder.genotypes(newGenotypes).alleles(allelesOfGenotypes(newGenotypes)).make();
|
return builder.genotypes(newGenotypes).alleles(allelesOfGenotypes(newGenotypes)).make();
|
||||||
|
|
@ -698,35 +686,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
//
|
//
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
private void loadGenotypes() {
|
|
||||||
if ( !hasAttribute(UNPARSED_GENOTYPE_MAP_KEY) ) {
|
|
||||||
if ( genotypes == null )
|
|
||||||
genotypes = NO_GENOTYPES;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Object parserObj = getAttribute(UNPARSED_GENOTYPE_PARSER_KEY);
|
|
||||||
if ( parserObj == null || !(parserObj instanceof VCFParser) )
|
|
||||||
throw new IllegalStateException("There is no VCF parser stored to unparse the genotype data");
|
|
||||||
VCFParser parser = (VCFParser)parserObj;
|
|
||||||
|
|
||||||
Object mapObj = getAttribute(UNPARSED_GENOTYPE_MAP_KEY);
|
|
||||||
if ( mapObj == null )
|
|
||||||
throw new IllegalStateException("There is no mapping string stored to unparse the genotype data");
|
|
||||||
|
|
||||||
genotypes = parser.createGenotypeMap(mapObj.toString(), new ArrayList<Allele>(alleles), getChr(), getStart());
|
|
||||||
|
|
||||||
commonInfo.removeAttribute(UNPARSED_GENOTYPE_MAP_KEY);
|
|
||||||
commonInfo.removeAttribute(UNPARSED_GENOTYPE_PARSER_KEY);
|
|
||||||
|
|
||||||
validateGenotypes();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the number of samples in the context
|
* @return the number of samples in the context
|
||||||
*/
|
*/
|
||||||
public int getNSamples() {
|
public int getNSamples() {
|
||||||
loadGenotypes();
|
|
||||||
return genotypes.size();
|
return genotypes.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -734,12 +697,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
* @return true if the context has associated genotypes
|
* @return true if the context has associated genotypes
|
||||||
*/
|
*/
|
||||||
public boolean hasGenotypes() {
|
public boolean hasGenotypes() {
|
||||||
loadGenotypes();
|
|
||||||
return ! genotypes.isEmpty();
|
return ! genotypes.isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasGenotypes(Collection<String> sampleNames) {
|
public boolean hasGenotypes(Collection<String> sampleNames) {
|
||||||
loadGenotypes();
|
|
||||||
return genotypes.containsSamples(sampleNames);
|
return genotypes.containsSamples(sampleNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -747,17 +708,14 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
* @return set of all Genotypes associated with this context
|
* @return set of all Genotypes associated with this context
|
||||||
*/
|
*/
|
||||||
public GenotypesContext getGenotypes() {
|
public GenotypesContext getGenotypes() {
|
||||||
loadGenotypes();
|
|
||||||
return genotypes;
|
return genotypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterable<Genotype> getGenotypesOrderedByName() {
|
public Iterable<Genotype> getGenotypesOrderedByName() {
|
||||||
loadGenotypes();
|
|
||||||
return genotypes.iterateInSampleNameOrder();
|
return genotypes.iterateInSampleNameOrder();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Iterable<Genotype> getGenotypesOrderedBy(Iterable<String> sampleOrdering) {
|
public Iterable<Genotype> getGenotypesOrderedBy(Iterable<String> sampleOrdering) {
|
||||||
loadGenotypes();
|
|
||||||
return genotypes.iterateInSampleNameOrder(sampleOrdering);
|
return genotypes.iterateInSampleNameOrder(sampleOrdering);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,6 @@ public class VariantContextBuilder {
|
||||||
private Map<String, Object> attributes = null;
|
private Map<String, Object> attributes = null;
|
||||||
private boolean attributesCanBeModified = false;
|
private boolean attributesCanBeModified = false;
|
||||||
private Byte referenceBaseForIndel = null;
|
private Byte referenceBaseForIndel = null;
|
||||||
private boolean genotypesAreUnparsed = false;
|
|
||||||
|
|
||||||
/** enum of what must be validated */
|
/** enum of what must be validated */
|
||||||
final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
|
final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
|
||||||
|
|
@ -112,7 +111,6 @@ public class VariantContextBuilder {
|
||||||
this.contig = parent.contig;
|
this.contig = parent.contig;
|
||||||
this.filters = parent.getFiltersMaybeNull();
|
this.filters = parent.getFiltersMaybeNull();
|
||||||
this.genotypes = parent.genotypes;
|
this.genotypes = parent.genotypes;
|
||||||
this.genotypesAreUnparsed = parent.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY);
|
|
||||||
this.ID = parent.getID();
|
this.ID = parent.getID();
|
||||||
this.log10PError = parent.getLog10PError();
|
this.log10PError = parent.getLog10PError();
|
||||||
this.referenceBaseForIndel = parent.getReferenceBaseForIndel();
|
this.referenceBaseForIndel = parent.getReferenceBaseForIndel();
|
||||||
|
|
@ -179,7 +177,7 @@ public class VariantContextBuilder {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Makes the attributes field modifiable. In many cases attributes is just a pointer to an immutable
|
* Makes the attributes field modifiable. In many cases attributes is just a pointer to an immutable
|
||||||
* collection, so methods that want to add / remove records require the attributes to be copied first
|
* collection, so methods that want to add / remove records require the attributes to be copied to a
|
||||||
*/
|
*/
|
||||||
private void makeAttributesModifiable() {
|
private void makeAttributesModifiable() {
|
||||||
if ( ! attributesCanBeModified ) {
|
if ( ! attributesCanBeModified ) {
|
||||||
|
|
@ -243,6 +241,11 @@ public class VariantContextBuilder {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public VariantContextBuilder genotypesNoValidation(final GenotypesContext genotypes) {
|
||||||
|
this.genotypes = genotypes;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells this builder that the resulting VariantContext should use a GenotypeContext containing genotypes
|
* Tells this builder that the resulting VariantContext should use a GenotypeContext containing genotypes
|
||||||
*
|
*
|
||||||
|
|
@ -270,15 +273,6 @@ public class VariantContextBuilder {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* ADVANCED! tells us that the genotypes data is stored as an unparsed attribute
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public VariantContextBuilder genotypesAreUnparsed() {
|
|
||||||
this.genotypesAreUnparsed = true;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells us that the resulting VariantContext should have ID
|
* Tells us that the resulting VariantContext should have ID
|
||||||
* @param ID
|
* @param ID
|
||||||
|
|
@ -395,6 +389,6 @@ public class VariantContextBuilder {
|
||||||
public VariantContext make() {
|
public VariantContext make() {
|
||||||
return new VariantContext(source, ID, contig, start, stop, alleles,
|
return new VariantContext(source, ID, contig, start, stop, alleles,
|
||||||
genotypes, log10PError, filters, attributes,
|
genotypes, log10PError, filters, attributes,
|
||||||
referenceBaseForIndel, genotypesAreUnparsed, toValidate);
|
referenceBaseForIndel, toValidate);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,9 +14,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNoAction() {
|
public void testNoAction() {
|
||||||
|
// note that this input if slightly malformed, but with the new properly
|
||||||
|
// only when really needed genotype loading of VCF files we don't actually
|
||||||
|
// fix the file in the output
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c"));
|
Arrays.asList("b7b7c218e219cd923ce5b6eefc5b7171"));
|
||||||
executeTest("test no action", spec);
|
executeTest("test no action", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -24,59 +27,86 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
public void testClusteredSnps() {
|
public void testClusteredSnps() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -window 10 --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -window 10 --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("27b13f179bb4920615dff3a32730d845"));
|
Arrays.asList("6d45a19e4066e7de6ff6a61f43ffad2b"));
|
||||||
executeTest("test clustered SNPs", spec);
|
executeTest("test clustered SNPs", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMasks() {
|
public void testMask1() {
|
||||||
|
// note that this input if slightly malformed, but with the new properly
|
||||||
|
// only when really needed genotype loading of VCF files we don't actually
|
||||||
|
// fix the file in the output
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||||
baseTestString() + " -maskName foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -maskName foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("578f9e774784c25871678e6464fd212b"));
|
Arrays.asList("65b5006bf3ee9d9d08a36d6b854773f2"));
|
||||||
executeTest("test mask all", spec1);
|
executeTest("test mask all", spec1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMask2() {
|
||||||
|
// note that this input if slightly malformed, but with the new properly
|
||||||
|
// only when really needed genotype loading of VCF files we don't actually
|
||||||
|
// fix the file in the output
|
||||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||||
baseTestString() + " -maskName foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -maskName foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f"));
|
Arrays.asList("a275d36baca81a1ce03dbb528e95a069"));
|
||||||
executeTest("test mask some", spec2);
|
executeTest("test mask some", spec2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMask3() {
|
||||||
|
// note that this input if slightly malformed, but with the new properly
|
||||||
|
// only when really needed genotype loading of VCF files we don't actually
|
||||||
|
// fix the file in the output
|
||||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||||
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("5939f80d14b32d88587373532d7b90e5"));
|
Arrays.asList("c9489e1c1342817c36ab4f0770609bdb"));
|
||||||
executeTest("test mask extend", spec3);
|
executeTest("test mask extend", spec3);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFilter1() {
|
public void testFilter1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
// note that this input if slightly malformed, but with the new properly
|
||||||
|
// only when really needed genotype loading of VCF files we don't actually
|
||||||
|
// fix the file in the output
|
||||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368"));
|
Arrays.asList("327a611bf82c6c4ae77fbb6d06359f9d"));
|
||||||
executeTest("test filter #1", spec);
|
executeTest("test filter #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFilter2() {
|
public void testFilter2() {
|
||||||
|
// note that this input if slightly malformed, but with the new properly
|
||||||
|
// only when really needed genotype loading of VCF files we don't actually
|
||||||
|
// fix the file in the output
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("c95845e817da7352b9b72bc9794f18fb"));
|
Arrays.asList("7612b3460575402ad78fa4173178bdcc"));
|
||||||
executeTest("test filter #2", spec);
|
executeTest("test filter #2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFilterWithSeparateNames() {
|
public void testFilterWithSeparateNames() {
|
||||||
|
// note that this input if slightly malformed, but with the new properly
|
||||||
|
// only when really needed genotype loading of VCF files we don't actually
|
||||||
|
// fix the file in the output
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530"));
|
Arrays.asList("dce33441f58b284ac9ab94f8e64b84e3"));
|
||||||
executeTest("test filter with separate names #2", spec);
|
executeTest("test filter with separate names #2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGenotypeFilters() {
|
public void testGenotypeFilters1() {
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("96b61e4543a73fe725e433f007260039"));
|
Arrays.asList("96b61e4543a73fe725e433f007260039"));
|
||||||
executeTest("test genotype filter #1", spec1);
|
executeTest("test genotype filter #1", spec1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGenotypeFilters2() {
|
||||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e"));
|
Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e"));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue