GenotypesContext now updates cached data for add, set, replace operations when possible
-- Involved separately managing the sample -> offset and sample sorted list operations. This should improve performance throughout the system
This commit is contained in:
parent
29ca24694a
commit
e484625594
|
|
@ -52,9 +52,6 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
*/
|
*/
|
||||||
Map<String, Integer> sampleNameToOffset = null;
|
Map<String, Integer> sampleNameToOffset = null;
|
||||||
|
|
||||||
/** if true, then we need to reinitialize sampleNamesInOrder and sampleNameToOffset before we use them /*/
|
|
||||||
boolean cacheIsInvalid = true;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An ArrayList of genotypes contained in this context
|
* An ArrayList of genotypes contained in this context
|
||||||
*
|
*
|
||||||
|
|
@ -95,7 +92,6 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
protected GenotypesContext(final ArrayList<Genotype> genotypes) {
|
protected GenotypesContext(final ArrayList<Genotype> genotypes) {
|
||||||
this.notToBeDirectlyAccessedGenotypes = genotypes;
|
this.notToBeDirectlyAccessedGenotypes = genotypes;
|
||||||
this.sampleNameToOffset = null;
|
this.sampleNameToOffset = null;
|
||||||
this.cacheIsInvalid = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -120,7 +116,6 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
this.notToBeDirectlyAccessedGenotypes = genotypes;
|
this.notToBeDirectlyAccessedGenotypes = genotypes;
|
||||||
this.sampleNameToOffset = sampleNameToOffset;
|
this.sampleNameToOffset = sampleNameToOffset;
|
||||||
this.sampleNamesInOrder = sampleNamesInOrder;
|
this.sampleNamesInOrder = sampleNamesInOrder;
|
||||||
this.cacheIsInvalid = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
@ -246,33 +241,46 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
//
|
//
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@Ensures({"cacheIsInvalid == true"})
|
@Ensures({"sampleNameToOffset == null"})
|
||||||
protected void invalidateCaches() {
|
protected void invalidateSampleNameMap() {
|
||||||
cacheIsInvalid = true;
|
|
||||||
sampleNamesInOrder = null;
|
|
||||||
sampleNameToOffset = null;
|
sampleNameToOffset = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures({"cacheIsInvalid == false",
|
@Ensures({"sampleNamesInOrder == null"})
|
||||||
"sampleNamesInOrder != null",
|
protected void invalidateSampleOrdering() {
|
||||||
"sampleNameToOffset != null",
|
sampleNamesInOrder = null;
|
||||||
"sameSamples(notToBeDirectlyAccessedGenotypes, sampleNamesInOrder)",
|
}
|
||||||
"sameSamples(notToBeDirectlyAccessedGenotypes, sampleNameToOffset.keySet())"})
|
|
||||||
protected void buildCache() {
|
@Ensures({"sampleNamesInOrder != null",
|
||||||
if ( cacheIsInvalid ) {
|
"sameSamples(notToBeDirectlyAccessedGenotypes, sampleNamesInOrder)"})
|
||||||
cacheIsInvalid = false;
|
protected void ensureSampleOrdering() {
|
||||||
|
if ( sampleNamesInOrder == null ) {
|
||||||
sampleNamesInOrder = new ArrayList<String>(size());
|
sampleNamesInOrder = new ArrayList<String>(size());
|
||||||
sampleNameToOffset = new HashMap<String, Integer>(size());
|
|
||||||
|
|
||||||
for ( int i = 0; i < size(); i++ ) {
|
for ( int i = 0; i < size(); i++ ) {
|
||||||
final Genotype g = getGenotypes().get(i);
|
sampleNamesInOrder.add(getGenotypes().get(i).getSampleName());
|
||||||
sampleNamesInOrder.add(g.getSampleName());
|
|
||||||
sampleNameToOffset.put(g.getSampleName(), i);
|
|
||||||
}
|
}
|
||||||
Collections.sort(sampleNamesInOrder);
|
Collections.sort(sampleNamesInOrder);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures({"sampleNameToOffset != null",
|
||||||
|
"sameSamples(notToBeDirectlyAccessedGenotypes, sampleNameToOffset.keySet())"})
|
||||||
|
protected void ensureSampleNameMap() {
|
||||||
|
if ( sampleNameToOffset == null ) {
|
||||||
|
sampleNameToOffset = new HashMap<String, Integer>(size());
|
||||||
|
|
||||||
|
for ( int i = 0; i < size(); i++ ) {
|
||||||
|
sampleNameToOffset.put(getGenotypes().get(i).getSampleName(), i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for testing purposes
|
||||||
|
protected void ensureAll() {
|
||||||
|
ensureSampleNameMap();
|
||||||
|
ensureSampleOrdering();
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -287,7 +295,8 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateSampleNameMap();
|
||||||
|
invalidateSampleOrdering();
|
||||||
getGenotypes().clear();
|
getGenotypes().clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -301,21 +310,43 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
return getGenotypes().isEmpty();
|
return getGenotypes().isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a single genotype to this context.
|
||||||
|
*
|
||||||
|
* There are many constraints on this input, and important
|
||||||
|
* impacts on the performance of other functions provided by this
|
||||||
|
* context.
|
||||||
|
*
|
||||||
|
* First, the sample name of genotype must be unique within this
|
||||||
|
* context. However, this is not enforced in the code itself, through
|
||||||
|
* you will invalid the contract on this context if you add duplicate
|
||||||
|
* samples and are running with CoFoJa enabled.
|
||||||
|
*
|
||||||
|
* Second, adding genotype also updates the sample name -> index map,
|
||||||
|
* so add() followed by containsSample and related function is an efficient
|
||||||
|
* series of operations.
|
||||||
|
*
|
||||||
|
* Third, adding the genotype invalidates the sorted list of sample names, to
|
||||||
|
* add() followed by any of the SampleNamesInOrder operations is inefficient, as
|
||||||
|
* each SampleNamesInOrder must rebuild the sorted list of sample names at
|
||||||
|
* an O(n log n) cost.
|
||||||
|
*
|
||||||
|
* @param genotype
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
@Requires({"genotype != null", "get(genotype.getSampleName()) == null"})
|
@Requires({"genotype != null", "get(genotype.getSampleName()) == null"})
|
||||||
@Ensures("noDups(getGenotypes())")
|
@Ensures("noDups(getGenotypes())")
|
||||||
public boolean add(final Genotype genotype) {
|
public boolean add(final Genotype genotype) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateSampleOrdering();
|
||||||
return getGenotypes().add(genotype);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Requires({"genotype != null", "! containsAny(Arrays.asList(genotype))"})
|
if ( sampleNameToOffset != null ) {
|
||||||
@Ensures("noDups(getGenotypes())")
|
// update the name map by adding entries
|
||||||
public boolean add(final Genotype ... genotype) {
|
sampleNameToOffset.put(genotype.getSampleName(), size());
|
||||||
checkImmutability();
|
}
|
||||||
invalidateCaches();
|
|
||||||
return getGenotypes().addAll(Arrays.asList(genotype));
|
return getGenotypes().add(genotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -325,12 +356,30 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds all of the genotypes to this context
|
||||||
|
*
|
||||||
|
* See {@link #add(Genotype)} for important information about this functions
|
||||||
|
* constraints and performance costs
|
||||||
|
*
|
||||||
|
* @param genotypes
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
@Requires("! containsAny(genotypes)")
|
@Requires("! containsAny(genotypes)")
|
||||||
@Ensures("noDups(getGenotypes())")
|
@Ensures("noDups(getGenotypes())")
|
||||||
public boolean addAll(final Collection<? extends Genotype> genotypes) {
|
public boolean addAll(final Collection<? extends Genotype> genotypes) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateSampleOrdering();
|
||||||
|
|
||||||
|
if ( sampleNameToOffset != null ) {
|
||||||
|
// update the name map by adding entries
|
||||||
|
int pos = size();
|
||||||
|
for ( final Genotype g : genotypes ) {
|
||||||
|
sampleNameToOffset.put(g.getSampleName(), pos++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return getGenotypes().addAll(genotypes);
|
return getGenotypes().addAll(genotypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -362,13 +411,12 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Genotype get(final String sampleName) {
|
public Genotype get(final String sampleName) {
|
||||||
buildCache();
|
|
||||||
Integer offset = getSampleI(sampleName);
|
Integer offset = getSampleI(sampleName);
|
||||||
return offset == null ? null : getGenotypes().get(offset);
|
return offset == null ? null : getGenotypes().get(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Integer getSampleI(final String sampleName) {
|
private Integer getSampleI(final String sampleName) {
|
||||||
buildCache();
|
ensureSampleNameMap();
|
||||||
return sampleNameToOffset.get(sampleName);
|
return sampleNameToOffset.get(sampleName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -401,31 +449,58 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
// return genotypes.listIterator(i);
|
// return genotypes.listIterator(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Note that remove requires us to invalidate our sample -> index
|
||||||
|
* cache. The loop:
|
||||||
|
*
|
||||||
|
* GenotypesContext gc = ...
|
||||||
|
* for ( sample in samples )
|
||||||
|
* if ( gc.containsSample(sample) )
|
||||||
|
* gc.remove(sample)
|
||||||
|
*
|
||||||
|
* is extremely inefficient, as each call to remove invalidates the cache
|
||||||
|
* and containsSample requires us to rebuild it, an O(n) operation.
|
||||||
|
*
|
||||||
|
* If you must remove many samples from the GC, use either removeAll or retainAll
|
||||||
|
* to avoid this O(n * m) operation.
|
||||||
|
*
|
||||||
|
* @param i
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Genotype remove(final int i) {
|
public Genotype remove(final int i) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateSampleNameMap();
|
||||||
|
invalidateSampleOrdering();
|
||||||
return getGenotypes().remove(i);
|
return getGenotypes().remove(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* See for important warning {@link this.remove(Integer)}
|
||||||
|
* @param o
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean remove(final Object o) {
|
public boolean remove(final Object o) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateSampleNameMap();
|
||||||
|
invalidateSampleOrdering();
|
||||||
return getGenotypes().remove(o);
|
return getGenotypes().remove(o);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean removeAll(final Collection<?> objects) {
|
public boolean removeAll(final Collection<?> objects) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateSampleNameMap();
|
||||||
|
invalidateSampleOrdering();
|
||||||
return getGenotypes().removeAll(objects);
|
return getGenotypes().removeAll(objects);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean retainAll(final Collection<?> objects) {
|
public boolean retainAll(final Collection<?> objects) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
invalidateSampleNameMap();
|
||||||
|
invalidateSampleOrdering();
|
||||||
return getGenotypes().retainAll(objects);
|
return getGenotypes().retainAll(objects);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -433,14 +508,28 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
@Ensures("noDups(getGenotypes())")
|
@Ensures("noDups(getGenotypes())")
|
||||||
public Genotype set(final int i, final Genotype genotype) {
|
public Genotype set(final int i, final Genotype genotype) {
|
||||||
checkImmutability();
|
checkImmutability();
|
||||||
invalidateCaches();
|
final Genotype prev = getGenotypes().set(i, genotype);
|
||||||
return getGenotypes().set(i, genotype);
|
|
||||||
|
invalidateSampleOrdering();
|
||||||
|
if ( sampleNameToOffset != null ) {
|
||||||
|
// update the name map by removing the old entry and replacing it with the new one
|
||||||
|
sampleNameToOffset.remove(prev.getSampleName());
|
||||||
|
sampleNameToOffset.put(genotype.getSampleName(), i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return prev;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Replaces the genotype in this context -- note for efficiency
|
* Replaces the genotype in this context -- note for efficiency
|
||||||
* reasons we do not add the genotype if it's not present. The
|
* reasons we do not add the genotype if it's not present. The
|
||||||
* return value will be null indicating this happened.
|
* return value will be null indicating this happened.
|
||||||
|
*
|
||||||
|
* Note this operation is preserves the map cache Sample -> Offset but
|
||||||
|
* invalidates the sorted list of samples. Using replace within a loop
|
||||||
|
* containing any of the SampleNameInOrder operation requires an O(n log n)
|
||||||
|
* resorting after each replace operation.
|
||||||
|
*
|
||||||
* @param genotype a non null genotype to bind in this context
|
* @param genotype a non null genotype to bind in this context
|
||||||
* @return null if genotype was not added, otherwise returns the previous genotype
|
* @return null if genotype was not added, otherwise returns the previous genotype
|
||||||
*/
|
*/
|
||||||
|
|
@ -451,7 +540,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
if ( offset == null )
|
if ( offset == null )
|
||||||
return null;
|
return null;
|
||||||
else
|
else
|
||||||
return getGenotypes().set(offset, genotype);
|
return set(offset, genotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -523,7 +612,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
*/
|
*/
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public Set<String> getSampleNames() {
|
public Set<String> getSampleNames() {
|
||||||
buildCache();
|
ensureSampleNameMap();
|
||||||
return sampleNameToOffset.keySet();
|
return sampleNameToOffset.keySet();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -532,19 +621,18 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
*/
|
*/
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public List<String> getSampleNamesOrderedByName() {
|
public List<String> getSampleNamesOrderedByName() {
|
||||||
buildCache();
|
ensureSampleOrdering();
|
||||||
return sampleNamesInOrder;
|
return sampleNamesInOrder;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("sample != null")
|
@Requires("sample != null")
|
||||||
public boolean containsSample(final String sample) {
|
public boolean containsSample(final String sample) {
|
||||||
buildCache();
|
ensureSampleNameMap();
|
||||||
return sampleNameToOffset.containsKey(sample);
|
return sampleNameToOffset.containsKey(sample);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("samples != null")
|
@Requires("samples != null")
|
||||||
public boolean containsSamples(final Collection<String> samples) {
|
public boolean containsSamples(final Collection<String> samples) {
|
||||||
buildCache();
|
|
||||||
return getSampleNames().containsAll(samples);
|
return getSampleNames().containsAll(samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,8 +81,8 @@ public class LazyGenotypesContext extends GenotypesContext {
|
||||||
final List<String> sampleNamesInOrder;
|
final List<String> sampleNamesInOrder;
|
||||||
|
|
||||||
@Requires({"genotypes != null", "sampleNamesInOrder != null", "sampleNameToOffset != null",
|
@Requires({"genotypes != null", "sampleNamesInOrder != null", "sampleNameToOffset != null",
|
||||||
"sameSamples(genotypes, sampleNamesInOrder)",
|
"sameSamples(genotypes, sampleNamesInOrder)",
|
||||||
"sameSamples(genotypes, sampleNameToOffset.keySet())"})
|
"sameSamples(genotypes, sampleNameToOffset.keySet())"})
|
||||||
public LazyData(final ArrayList<Genotype> genotypes,
|
public LazyData(final ArrayList<Genotype> genotypes,
|
||||||
final List<String> sampleNamesInOrder,
|
final List<String> sampleNamesInOrder,
|
||||||
final Map<String, Integer> sampleNameToOffset) {
|
final Map<String, Integer> sampleNameToOffset) {
|
||||||
|
|
@ -119,13 +119,20 @@ public class LazyGenotypesContext extends GenotypesContext {
|
||||||
@Override
|
@Override
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
protected ArrayList<Genotype> getGenotypes() {
|
protected ArrayList<Genotype> getGenotypes() {
|
||||||
|
decode();
|
||||||
|
return notToBeDirectlyAccessedGenotypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Force us to decode the genotypes, if not already done
|
||||||
|
*/
|
||||||
|
public void decode() {
|
||||||
if ( ! loaded ) {
|
if ( ! loaded ) {
|
||||||
//System.out.printf("Loading genotypes... %s:%d%n", contig, start);
|
//System.out.printf("Loading genotypes... %s:%d%n", contig, start);
|
||||||
LazyData parsed = parser.parse(unparsedGenotypeData);
|
LazyData parsed = parser.parse(unparsedGenotypeData);
|
||||||
notToBeDirectlyAccessedGenotypes = parsed.genotypes;
|
notToBeDirectlyAccessedGenotypes = parsed.genotypes;
|
||||||
sampleNamesInOrder = parsed.sampleNamesInOrder;
|
sampleNamesInOrder = parsed.sampleNamesInOrder;
|
||||||
sampleNameToOffset = parsed.sampleNameToOffset;
|
sampleNameToOffset = parsed.sampleNameToOffset;
|
||||||
cacheIsInvalid = false; // these values build the cache
|
|
||||||
loaded = true;
|
loaded = true;
|
||||||
unparsedGenotypeData = null; // don't hold the unparsed data any longer
|
unparsedGenotypeData = null; // don't hold the unparsed data any longer
|
||||||
|
|
||||||
|
|
@ -133,31 +140,43 @@ public class LazyGenotypesContext extends GenotypesContext {
|
||||||
// That said, it's not such an important routine -- it's just checking that the genotypes
|
// That said, it's not such an important routine -- it's just checking that the genotypes
|
||||||
// are well formed w.r.t. the alleles list, but this will be enforced within the VCFCodec
|
// are well formed w.r.t. the alleles list, but this will be enforced within the VCFCodec
|
||||||
}
|
}
|
||||||
|
|
||||||
return notToBeDirectlyAccessedGenotypes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Overrides the buildCache functionality. If the data hasn't been loaded
|
* Overrides the ensure* functionality. If the data hasn't been loaded
|
||||||
* yet and we want to build the cache, just decode it and we're done. If we've
|
* yet and we want to build the cache, just decode it and we're done. If we've
|
||||||
* already decoded the data, though, go through the super class
|
* already decoded the data, though, go through the super class
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected synchronized void buildCache() {
|
protected synchronized void ensureSampleNameMap() {
|
||||||
if ( cacheIsInvalid ) {
|
if ( ! loaded ) {
|
||||||
if ( ! loaded ) {
|
decode(); // will load up all of the necessary data
|
||||||
getGenotypes(); // will load up all of the necessary data
|
} else {
|
||||||
} else {
|
super.ensureSampleNameMap();
|
||||||
super.buildCache();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void invalidateCaches() {
|
protected synchronized void ensureSampleOrdering() {
|
||||||
|
if ( ! loaded ) {
|
||||||
|
decode(); // will load up all of the necessary data
|
||||||
|
} else {
|
||||||
|
super.ensureSampleOrdering();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void invalidateSampleNameMap() {
|
||||||
// if the cache is invalidated, and we haven't loaded our data yet, do so
|
// if the cache is invalidated, and we haven't loaded our data yet, do so
|
||||||
if ( ! loaded ) getGenotypes();
|
if ( ! loaded ) decode();
|
||||||
super.invalidateCaches();
|
super.invalidateSampleNameMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void invalidateSampleOrdering() {
|
||||||
|
// if the cache is invalidated, and we haven't loaded our data yet, do so
|
||||||
|
if ( ! loaded ) decode();
|
||||||
|
super.invalidateSampleOrdering();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -177,11 +196,4 @@ public class LazyGenotypesContext extends GenotypesContext {
|
||||||
public Object getUnparsedGenotypeData() {
|
public Object getUnparsedGenotypeData() {
|
||||||
return unparsedGenotypeData;
|
return unparsedGenotypeData;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Force us to decode the genotypes
|
|
||||||
*/
|
|
||||||
public void decode() {
|
|
||||||
buildCache();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -532,7 +532,6 @@ public class VariantContextUtils {
|
||||||
final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
|
final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
|
||||||
double log10PError = 1;
|
double log10PError = 1;
|
||||||
VariantContext vcWithMaxAC = null;
|
VariantContext vcWithMaxAC = null;
|
||||||
Set<String> addedSamples = new HashSet<String>(first.getNSamples());
|
|
||||||
GenotypesContext genotypes = GenotypesContext.create();
|
GenotypesContext genotypes = GenotypesContext.create();
|
||||||
|
|
||||||
// counting the number of filtered and variant VCs
|
// counting the number of filtered and variant VCs
|
||||||
|
|
@ -557,7 +556,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
alleles.addAll(alleleMapping.values());
|
alleles.addAll(alleleMapping.values());
|
||||||
|
|
||||||
mergeGenotypes(genotypes, addedSamples, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
|
mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
|
||||||
|
|
||||||
log10PError = Math.min(log10PError, vc.isVariant() ? vc.getLog10PError() : 1);
|
log10PError = Math.min(log10PError, vc.isVariant() ? vc.getLog10PError() : 1);
|
||||||
|
|
||||||
|
|
@ -963,10 +962,10 @@ public class VariantContextUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void mergeGenotypes(GenotypesContext mergedGenotypes, Set<String> addedSamples, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) {
|
private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) {
|
||||||
for ( Genotype g : oneVC.getGenotypes() ) {
|
for ( Genotype g : oneVC.getGenotypes() ) {
|
||||||
String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
|
String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
|
||||||
if ( ! addedSamples.contains(name) ) {
|
if ( mergedGenotypes.containsSample(name) ) {
|
||||||
// only add if the name is new
|
// only add if the name is new
|
||||||
Genotype newG = g;
|
Genotype newG = g;
|
||||||
|
|
||||||
|
|
@ -976,7 +975,6 @@ public class VariantContextUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
mergedGenotypes.add(newG);
|
mergedGenotypes.add(newG);
|
||||||
addedSamples.add(name);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -88,7 +88,8 @@ public class GenotypesContextUnitTest extends BaseTest {
|
||||||
@Override
|
@Override
|
||||||
public LazyGenotypesContext.LazyData parse(final Object data) {
|
public LazyGenotypesContext.LazyData parse(final Object data) {
|
||||||
GenotypesContext gc = GenotypesContext.copy((List<Genotype>)data);
|
GenotypesContext gc = GenotypesContext.copy((List<Genotype>)data);
|
||||||
gc.buildCache();
|
gc.ensureSampleNameMap();
|
||||||
|
gc.ensureSampleOrdering();
|
||||||
return new LazyGenotypesContext.LazyData(gc.notToBeDirectlyAccessedGenotypes, gc.sampleNamesInOrder, gc.sampleNameToOffset);
|
return new LazyGenotypesContext.LazyData(gc.notToBeDirectlyAccessedGenotypes, gc.sampleNamesInOrder, gc.sampleNameToOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -234,10 +235,6 @@ public class GenotypesContextUnitTest extends BaseTest {
|
||||||
gc.add(add2);
|
gc.add(add2);
|
||||||
testGenotypesContextContainsExpectedSamples(gc, with(cfg.initialSamples, add1, add2));
|
testGenotypesContextContainsExpectedSamples(gc, with(cfg.initialSamples, add1, add2));
|
||||||
|
|
||||||
gc = cfg.makeContext();
|
|
||||||
gc.add(add1, add2);
|
|
||||||
testGenotypesContextContainsExpectedSamples(gc, with(cfg.initialSamples, add1, add2));
|
|
||||||
|
|
||||||
gc = cfg.makeContext();
|
gc = cfg.makeContext();
|
||||||
gc.addAll(Arrays.asList(add1, add2));
|
gc.addAll(Arrays.asList(add1, add2));
|
||||||
testGenotypesContextContainsExpectedSamples(gc, with(cfg.initialSamples, add1, add2));
|
testGenotypesContextContainsExpectedSamples(gc, with(cfg.initialSamples, add1, add2));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue