Moving ValidationSiteSelector to validation package in public under my ownership. JunctionGenotyper added and modified several times, this commit is due to merging conflix fixes.
This commit is contained in:
parent
3642a73c07
commit
69661da37d
|
|
@ -776,9 +776,9 @@ public class UnifiedGenotyperEngine {
|
|||
*
|
||||
* @return genotypes
|
||||
*/
|
||||
public GenotypesContext assignGenotypes(final VariantContext vc,
|
||||
final boolean[] allelesToUse,
|
||||
final List<Allele> newAlleles) {
|
||||
public static GenotypesContext assignGenotypes(final VariantContext vc,
|
||||
final boolean[] allelesToUse,
|
||||
final List<Allele> newAlleles) {
|
||||
|
||||
// the no-called genotypes
|
||||
final GenotypesContext GLs = vc.getGenotypes();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
public abstract class FrequencyModeSelector implements Cloneable{
|
||||
|
||||
protected GenomeLocParser parser;
|
||||
|
||||
protected FrequencyModeSelector(GenomeLocParser parser) {
|
||||
this.parser = parser;
|
||||
}
|
||||
protected void logCurrentSiteData(VariantContext vc, VariantContext subVC) {
|
||||
logCurrentSiteData(vc, subVC, false, false);
|
||||
}
|
||||
protected abstract void logCurrentSiteData(VariantContext vc, VariantContext subVC, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC);
|
||||
protected abstract ArrayList<VariantContext> selectValidationSites(int numValidationSites);
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
public class GLBasedSampleSelector extends SampleSelector {
|
||||
public GLBasedSampleSelector(TreeSet<String> sm) {
|
||||
super(sm);
|
||||
}
|
||||
|
||||
public VariantContext subsetSiteToSamples(VariantContext vc) {
|
||||
/* todo - Look at sample array, and create a new vc with samples for which GL's indicate they should be included.
|
||||
For example, include all samples (and corresponding genotypes) whose GL's are such that argmax(GL) = HET or HOMVAR. */
|
||||
throw new ReviewedStingException("GLBasedSampleSelector not implemented yet!");
|
||||
//return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
public class GTBasedSampleSelector extends SampleSelector{
|
||||
public GTBasedSampleSelector(TreeSet<String> sm) {
|
||||
super(sm);
|
||||
}
|
||||
|
||||
public VariantContext subsetSiteToSamples(VariantContext vc) {
|
||||
// Super class already defined initialization which filled data structure "samples" with desired samples.
|
||||
// We only need to check if current vc if polymorphic in that set of samples
|
||||
|
||||
if ( samples == null || samples.isEmpty() )
|
||||
return vc;
|
||||
|
||||
return vc.subContextFromSamples(samples, vc.getAlleles());
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class GenomeEvent implements Comparable {
|
||||
final protected GenomeLoc loc;
|
||||
/** A set of the alleles segregating in this context */
|
||||
final protected List<Allele> alleles;
|
||||
final protected Byte refBase;
|
||||
// final protected HashMap<String, Object> attributes;
|
||||
|
||||
public GenomeEvent(GenomeLocParser parser, final String contig, final int start, final int stop, final List<Allele> alleles, HashMap<String, Object> attributes,
|
||||
byte base) {
|
||||
this.loc = parser.createGenomeLoc(contig, start, stop);
|
||||
this.alleles = alleles;
|
||||
this.refBase = base;
|
||||
// this.attributes = attributes;
|
||||
}
|
||||
|
||||
// Routine to compare two variant contexts (useful to sort collections of vc's).
|
||||
// By default, we want to sort first by contig, then by start location
|
||||
|
||||
public GenomeLoc getGenomeLoc() {
|
||||
return loc;
|
||||
}
|
||||
public int compareTo(final Object o) {
|
||||
if (!(o instanceof GenomeEvent))
|
||||
throw new ReviewedStingException("BUG: comparing variant context with non-VC object");
|
||||
|
||||
GenomeEvent otherEvent = (GenomeEvent)o;
|
||||
|
||||
return loc.compareTo(otherEvent.getGenomeLoc());
|
||||
}
|
||||
|
||||
public VariantContext createVariantContextFromEvent() {
|
||||
return new VariantContextBuilder("event", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
||||
.log10PError(0.0).referenceBaseForIndel(refBase).make();
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
|
||||
|
||||
public class KeepAFSpectrumFrequencySelector extends FrequencyModeSelector {
|
||||
|
||||
private static final boolean DEBUG = true;
|
||||
|
||||
private int NUM_BINS = 20;
|
||||
|
||||
private int[] preSampleSelectionHistogram;
|
||||
private int numTotalSites = 0;
|
||||
private int[] postSampleSelectionHistogram;
|
||||
private int numSampleSelectedSites = 0;
|
||||
private ArrayList<GenomeEvent>[] binnedEventArray;
|
||||
|
||||
public KeepAFSpectrumFrequencySelector(int numBins, GenomeLocParser parser) {
|
||||
super(parser);
|
||||
NUM_BINS = numBins;
|
||||
// initialize arrays dependent on NUM_BINS
|
||||
binnedEventArray = new ArrayList[NUM_BINS];
|
||||
|
||||
for (int k=0; k < NUM_BINS; k++)
|
||||
binnedEventArray[k] = new ArrayList<GenomeEvent>();
|
||||
|
||||
preSampleSelectionHistogram = new int[NUM_BINS];
|
||||
postSampleSelectionHistogram = new int[NUM_BINS];
|
||||
}
|
||||
|
||||
public void logCurrentSiteData(VariantContext vc, VariantContext subVC, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC) {
|
||||
|
||||
// this method is called for every variant of a selected type, regardless of whether it will be selectable or not
|
||||
// get AC,AF,AN attributes from vc
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
double[] afArray = null;
|
||||
|
||||
if (vc.hasGenotypes() && !IGNORE_GENOTYPES) {
|
||||
// recompute AF,AC,AN based on genotypes:
|
||||
// todo - - maybe too inefficient??
|
||||
VariantContextUtils.calculateChromosomeCounts(vc, attributes, false);
|
||||
afArray = new double[] {Double.valueOf((String)attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY))};
|
||||
} else {
|
||||
// sites-only vc or we explicitly tell to ignore genotypes; we trust the AF field if present
|
||||
if ( vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
||||
String afo = vc.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY, null);
|
||||
|
||||
if (afo.contains(",")) {
|
||||
String[] afs = afo.split(",");
|
||||
afs[0] = afs[0].substring(1,afs[0].length());
|
||||
afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1);
|
||||
|
||||
afArray = new double[afs.length];
|
||||
|
||||
for (int k=0; k < afArray.length; k++)
|
||||
afArray[k] = Double.valueOf(afs[k]);
|
||||
}
|
||||
else
|
||||
afArray = new double[] {Double.valueOf(afo)};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (afArray == null )
|
||||
return;
|
||||
|
||||
double af0 = MathUtils.arrayMax(afArray);
|
||||
|
||||
int binIndex = (NUM_BINS-1) - (int) Math.floor(((1.0-af0)*NUM_BINS));
|
||||
// deal with round-off issue: low-AC sites with large samples can have AF rounded down to 0.000
|
||||
if (binIndex < 0)
|
||||
binIndex = 0;
|
||||
// System.out.format("Pre:%4.4f %d\n",af0, binIndex);
|
||||
preSampleSelectionHistogram[binIndex]++;
|
||||
numTotalSites++;
|
||||
|
||||
// now process VC subsetted to samples of interest
|
||||
if (!subVC.isPolymorphicInSamples() && !IGNORE_POLYMORPHIC)
|
||||
return;
|
||||
|
||||
//System.out.format("Post:%4.4f %d\n",af0, binIndex);
|
||||
postSampleSelectionHistogram[binIndex]++;
|
||||
numSampleSelectedSites++;
|
||||
|
||||
// create bare-bones event and log in corresponding bin
|
||||
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
|
||||
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
|
||||
|
||||
binnedEventArray[binIndex].add(event);
|
||||
|
||||
}
|
||||
|
||||
public ArrayList<VariantContext> selectValidationSites(int numValidationSites) {
|
||||
// number of sites to choose at random for each frequency bin = #desired validation sites/# total sites * #sites in original bin
|
||||
int[] sitesToChoosePerBin = new int[NUM_BINS];
|
||||
int totalSites = 0;
|
||||
for (int k=0; k < NUM_BINS; k++) {
|
||||
int sites = (int)Math.round((double)numValidationSites * preSampleSelectionHistogram[k]/ (double)numTotalSites);
|
||||
sitesToChoosePerBin[k] = sites;
|
||||
totalSites += sites;
|
||||
}
|
||||
|
||||
// deal with rounding artifacts
|
||||
while (totalSites > numValidationSites) {
|
||||
// take off one from randomly selected bin
|
||||
int k= GenomeAnalysisEngine.getRandomGenerator().nextInt(NUM_BINS);
|
||||
sitesToChoosePerBin[k]--;
|
||||
totalSites--;
|
||||
}
|
||||
while (totalSites < numValidationSites) {
|
||||
// take off one from randomly selected bin
|
||||
int k= GenomeAnalysisEngine.getRandomGenerator().nextInt( NUM_BINS);
|
||||
sitesToChoosePerBin[k]++;
|
||||
totalSites++;
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("sitesToChoosePerBin:");
|
||||
for (int k=0; k < NUM_BINS; k++)
|
||||
System.out.format("%d ", sitesToChoosePerBin[k]);
|
||||
System.out.println();
|
||||
|
||||
System.out.println("preSampleSelectionHistogram:");
|
||||
for (int k=0; k < NUM_BINS; k++)
|
||||
System.out.format("%d ", preSampleSelectionHistogram[k]);
|
||||
System.out.println();
|
||||
|
||||
System.out.println("postSampleSelectionHistogram:");
|
||||
for (int k=0; k < NUM_BINS; k++)
|
||||
System.out.format("%d ", postSampleSelectionHistogram[k]);
|
||||
System.out.println();
|
||||
|
||||
}
|
||||
|
||||
// take randomly sitesToChoosePerBin[k] elements from each bin
|
||||
ArrayList<GenomeEvent> selectedEvents = new ArrayList<GenomeEvent>();
|
||||
|
||||
for (int k=0; k < NUM_BINS; k++) {
|
||||
selectedEvents.addAll(MathUtils.randomSubset(binnedEventArray[k], sitesToChoosePerBin[k]));
|
||||
}
|
||||
|
||||
Collections.sort(selectedEvents);
|
||||
|
||||
// now convert to VC
|
||||
ArrayList<VariantContext> selectedSites = new ArrayList<VariantContext>();
|
||||
for (GenomeEvent event : selectedEvents)
|
||||
selectedSites.add(event.createVariantContextFromEvent());
|
||||
|
||||
return selectedSites;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
public class NullSampleSelector extends SampleSelector{
|
||||
public NullSampleSelector(TreeSet<String> sm) {
|
||||
super(sm);
|
||||
}
|
||||
|
||||
public VariantContext subsetSiteToSamples(VariantContext vc) {
|
||||
return vc;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
public abstract class SampleSelector implements Cloneable {
|
||||
|
||||
TreeSet<String> samples;
|
||||
protected SampleSelector(TreeSet<String> sm) {
|
||||
samples = new TreeSet<String>(sm);
|
||||
}
|
||||
|
||||
protected abstract VariantContext subsetSiteToSamples(VariantContext vc);
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class UniformSamplingFrequencySelector extends FrequencyModeSelector {
|
||||
private ArrayList<GenomeEvent> binnedEventArray;
|
||||
|
||||
public UniformSamplingFrequencySelector(GenomeLocParser parser) {
|
||||
super(parser);
|
||||
binnedEventArray = new ArrayList<GenomeEvent>();
|
||||
|
||||
}
|
||||
|
||||
public void logCurrentSiteData(VariantContext vc, VariantContext subVC, boolean IGNORE_GENOTYPES, boolean IGNORE_POLYMORPHIC) {
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
|
||||
if (vc.hasGenotypes() && !IGNORE_GENOTYPES) {
|
||||
// recompute AF,AC,AN based on genotypes:
|
||||
VariantContextUtils.calculateChromosomeCounts(vc, attributes, false);
|
||||
if (!subVC.isPolymorphicInSamples() && !IGNORE_POLYMORPHIC)
|
||||
return;
|
||||
} else {
|
||||
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
int ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||
if (ac == 0) return; // site not polymorphic
|
||||
}
|
||||
else
|
||||
return;
|
||||
|
||||
}
|
||||
// create bare-bones event and log in corresponding bin
|
||||
// attributes contains AC,AF,AN pulled from original vc, and we keep them here and log in output file for bookkeeping purposes
|
||||
GenomeEvent event = new GenomeEvent(parser, vc.getChr(), vc.getStart(), vc.getEnd(),vc.getAlleles(), attributes, vc.getReferenceBaseForIndel());
|
||||
binnedEventArray.add(event);
|
||||
|
||||
}
|
||||
|
||||
public ArrayList<VariantContext> selectValidationSites(int numValidationSites) {
|
||||
|
||||
// take randomly sitesToChoosePerBin[k] elements from each bin
|
||||
ArrayList<GenomeEvent> selectedEvents = new ArrayList<GenomeEvent>();
|
||||
|
||||
selectedEvents.addAll(MathUtils.randomSubset(binnedEventArray, numValidationSites));
|
||||
|
||||
Collections.sort(selectedEvents);
|
||||
|
||||
// now convert to VC
|
||||
ArrayList<VariantContext> selectedSites = new ArrayList<VariantContext>();
|
||||
for (GenomeEvent event : selectedEvents)
|
||||
selectedSites.add(event.createVariantContextFromEvent());
|
||||
|
||||
return selectedSites;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,296 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* Randomly selects VCF records according to specified options.
|
||||
*
|
||||
* <p>
|
||||
* ValidationSiteSelectorWalker is intended for use in experiments where we sample data randomly from a set of variants, for example
|
||||
* in order to choose sites for a follow-up validation study.
|
||||
*
|
||||
* Sites are selected randomly but within certain restrictions. There are two main sources of restrictions
|
||||
* a) Sample restrictions. A user can specify a set of samples, and we will only consider sites which are polymorphic within such given sample subset.
|
||||
* These sample restrictions can be given as a set of individual samples, a text file (each line containing a sample name), or a regular expression.
|
||||
* A user can additionally specify whether samples will be considered based on their genotypes (a non-reference genotype means that such sample is polymorphic in that variant,
|
||||
* and hence that variant will be considered for inclusion in set), or based on their PLs.
|
||||
* b) A user can additionally specify a sampling method based on allele frequency. Two sampling methods are currently supported.
|
||||
* 1. Uniform sampling will just sample uniformly from variants polymorphic in selected samples.
|
||||
* 2. Sampling based on Allele Frequency spectrum will ensure that output sites have the same AF distribution as the input set.
|
||||
*
|
||||
* User can additionally restrict output to a particular type of variant (SNP, Indel, etc.)
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more variant sets to choose from.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A sites-only VCF with the desired number of randomly selected sites.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T ValidationSiteSelectorWalker \
|
||||
* --variant input1.vcf \
|
||||
* --variant input2.vcf \
|
||||
* -sn NA12878 \
|
||||
* -o output.vcf \
|
||||
* --numValidationSites 200 \
|
||||
* -sampleMode POLY_BASED_ON_GT \
|
||||
* -freqMode KEEP_AF_SPECTRUM
|
||||
*
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T ValidationSiteSelectorWalker \
|
||||
* --variant:foo input1.vcf \
|
||||
* --variant:bar input2.vcf \
|
||||
* --numValidationSites 200 \
|
||||
* -sf samples.txt \
|
||||
* -o output.vcf \
|
||||
* -sampleMode POLY_BASED_ON_GT \
|
||||
* -freqMode UNIFORM
|
||||
* -selectType INDEL
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class ValidationSiteSelectorWalker extends RodWalker<Integer, Integer> {
|
||||
|
||||
public enum AF_COMPUTATION_MODE {
|
||||
KEEP_AF_SPECTRUM,
|
||||
UNIFORM
|
||||
}
|
||||
|
||||
public enum SAMPLE_SELECTION_MODE {
|
||||
NONE,
|
||||
POLY_BASED_ON_GT,
|
||||
POLY_BASED_ON_GL
|
||||
}
|
||||
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file, can be specified multiple times", required=true)
|
||||
public List<RodBinding<VariantContext>> variants;
|
||||
|
||||
@Output(doc="File to which variants should be written",required=true)
|
||||
protected VCFWriter vcfWriter = null;
|
||||
|
||||
@Argument(fullName="sample_name", shortName="sn", doc="Include genotypes from this sample. Can be specified multiple times", required=false)
|
||||
public Set<String> sampleNames = new HashSet<String>(0);
|
||||
|
||||
@Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
|
||||
public Set<String> sampleExpressions ;
|
||||
|
||||
@Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
|
||||
public Set<File> sampleFiles;
|
||||
|
||||
@Argument(fullName="sampleMode", shortName="sampleMode", doc="Sample selection mode", required=false)
|
||||
private SAMPLE_SELECTION_MODE sampleMode = SAMPLE_SELECTION_MODE.NONE;
|
||||
|
||||
@Argument(fullName="numValidationSites", shortName="numSites", doc="Number of output validation sites", required=true)
|
||||
private int numValidationSites;
|
||||
|
||||
@Argument(fullName="includeFilteredSites", shortName="ifs", doc="If true, will include filtered sites in set to choose variants from", required=false)
|
||||
private boolean INCLUDE_FILTERED_SITES = false;
|
||||
|
||||
@Argument(fullName="ignoreGenotypes", shortName="ignoreGenotypes", doc="If true, will ignore genotypes in VCF, will take AC,AF from annotations and will make no sample selection", required=false)
|
||||
private boolean IGNORE_GENOTYPES = false;
|
||||
|
||||
@Argument(fullName="ignorePolymorphicStatus", shortName="ignorePolymorphicStatus", doc="If true, will ignore polymorphic status in VCF, and will take VCF record directly without pre-selection", required=false)
|
||||
private boolean IGNORE_POLYMORPHIC = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="numFrequencyBins", shortName="numBins", doc="Number of frequency bins if we're to match AF distribution", required=false)
|
||||
private int numFrequencyBins = 20;
|
||||
|
||||
/**
|
||||
* This argument selects allele frequency selection mode:
|
||||
* KEEP_AF_SPECTRUM will choose variants so that the resulting allele frequency spectrum matches as closely as possible the input set
|
||||
* UNIFORM will choose variants uniformly without regard to their allele frequency.
|
||||
*
|
||||
*/
|
||||
@Argument(fullName="frequencySelectionMode", shortName="freqMode", doc="Allele Frequency selection mode", required=false)
|
||||
private AF_COMPUTATION_MODE freqMode = AF_COMPUTATION_MODE.KEEP_AF_SPECTRUM;
|
||||
|
||||
/**
|
||||
* This argument selects particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||
* When specified one or more times, a particular type of variant is selected.
|
||||
*
|
||||
*/
|
||||
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
|
||||
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
|
||||
|
||||
|
||||
private TreeSet<String> samples = new TreeSet<String>();
|
||||
SampleSelector sampleSelector = null;
|
||||
FrequencyModeSelector frequencyModeSelector = null;
|
||||
private ArrayList<VariantContext.Type> selectedTypes = new ArrayList<VariantContext.Type>();
|
||||
|
||||
public void initialize() {
|
||||
// Get list of samples to include in the output
|
||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit());
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
|
||||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
||||
|
||||
// first, add any requested samples
|
||||
samples.addAll(samplesFromFile);
|
||||
samples.addAll(samplesFromExpressions);
|
||||
samples.addAll(sampleNames);
|
||||
|
||||
// if none were requested, we want all of them
|
||||
if ( samples.isEmpty() ) {
|
||||
samples.addAll(vcfSamples);
|
||||
|
||||
}
|
||||
|
||||
sampleSelector = getSampleSelectorObject(sampleMode, samples);
|
||||
|
||||
// initialize frequency mode selector
|
||||
frequencyModeSelector = getFrequencyModeSelectorObject(freqMode, getToolkit().getGenomeLocParser());
|
||||
|
||||
// if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include
|
||||
if (TYPES_TO_INCLUDE.isEmpty()) {
|
||||
|
||||
for (VariantContext.Type t : VariantContext.Type.values())
|
||||
selectedTypes.add(t);
|
||||
|
||||
}
|
||||
else {
|
||||
for (VariantContext.Type t : TYPES_TO_INCLUDE)
|
||||
selectedTypes.add(t);
|
||||
|
||||
}
|
||||
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
headerLines.add(new VCFHeaderLine("source", "ValidationSiteSelector"));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
Collection<VariantContext> vcs = tracker.getValues(variants, context.getLocation());
|
||||
|
||||
if ( vcs == null || vcs.size() == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
for (VariantContext vc : vcs) {
|
||||
if (!selectedTypes.contains(vc.getType()))
|
||||
continue;
|
||||
|
||||
// skip if site isn't polymorphic and if user didn't request to ignore polymorphic status
|
||||
if (!vc.isPolymorphicInSamples() && !IGNORE_POLYMORPHIC)
|
||||
continue;
|
||||
|
||||
if (!INCLUDE_FILTERED_SITES && vc.filtersWereApplied() && vc.isFiltered())
|
||||
continue;
|
||||
|
||||
|
||||
// do anything required by frequency selector before we select for samples
|
||||
VariantContext subVC;
|
||||
if (samples.isEmpty())
|
||||
subVC = vc;
|
||||
else
|
||||
subVC = sampleSelector.subsetSiteToSamples(vc);
|
||||
frequencyModeSelector.logCurrentSiteData(vc, subVC, IGNORE_GENOTYPES, IGNORE_POLYMORPHIC);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
logger.info("Outputting validation sites...");
|
||||
ArrayList<VariantContext> selectedSites = frequencyModeSelector.selectValidationSites(numValidationSites);
|
||||
|
||||
for (VariantContext vc : selectedSites) {
|
||||
vcfWriter.add(vc);
|
||||
}
|
||||
logger.info(result + " records processed.");
|
||||
|
||||
}
|
||||
|
||||
private SampleSelector getSampleSelectorObject(SAMPLE_SELECTION_MODE sampleMode, TreeSet<String> samples) {
|
||||
SampleSelector sm;
|
||||
switch ( sampleMode ) {
|
||||
case POLY_BASED_ON_GL:
|
||||
sm = new GLBasedSampleSelector(samples);
|
||||
break;
|
||||
case POLY_BASED_ON_GT:
|
||||
sm = new GTBasedSampleSelector(samples);
|
||||
break;
|
||||
case NONE:
|
||||
sm = new NullSampleSelector(samples);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unsupported Sample Selection Mode: " + sampleMode);
|
||||
}
|
||||
|
||||
return sm;
|
||||
}
|
||||
|
||||
private FrequencyModeSelector getFrequencyModeSelectorObject (AF_COMPUTATION_MODE freqMode, GenomeLocParser parser) {
|
||||
FrequencyModeSelector fm;
|
||||
|
||||
switch (freqMode) {
|
||||
case KEEP_AF_SPECTRUM:
|
||||
fm = new KeepAFSpectrumFrequencySelector(numFrequencyBins, parser);
|
||||
break;
|
||||
case UNIFORM:
|
||||
fm = new UniformSamplingFrequencySelector(parser);
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected Frequency Selection Mode: "+ freqMode);
|
||||
|
||||
}
|
||||
return fm;
|
||||
}
|
||||
}
|
||||
|
|
@ -6,8 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* the ref seq feature
|
||||
|
|
@ -111,6 +110,34 @@ public class RefSeqFeature implements Transcript, Feature {
|
|||
return overlapString.toString();
|
||||
}
|
||||
|
||||
ArrayList<GenomeLoc> exonInRefOrderCache = null;
|
||||
|
||||
public Integer getSortedOverlapInteger(GenomeLoc position) {
|
||||
int exonNo = -1;
|
||||
ArrayList<GenomeLoc> exonsInReferenceOrder = exonInRefOrderCache != null ? exonInRefOrderCache : new ArrayList<GenomeLoc>(exons);
|
||||
if ( exonInRefOrderCache == null ) {
|
||||
Collections.sort(exonsInReferenceOrder);
|
||||
}
|
||||
exonInRefOrderCache = exonsInReferenceOrder;
|
||||
for ( GenomeLoc exon : exonsInReferenceOrder ) {
|
||||
if ( exon.overlapsP(position) ) {
|
||||
return ++exonNo;
|
||||
}
|
||||
++exonNo;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
public GenomeLoc getSortedExonLoc(int offset) {
|
||||
ArrayList<GenomeLoc> exonsInReferenceOrder = exonInRefOrderCache != null ? exonInRefOrderCache : new ArrayList<GenomeLoc>(exons);
|
||||
if ( exonInRefOrderCache == null ) {
|
||||
Collections.sort(exonsInReferenceOrder);
|
||||
}
|
||||
exonInRefOrderCache = exonsInReferenceOrder;
|
||||
return exonsInReferenceOrder.get(offset);
|
||||
}
|
||||
|
||||
/** Returns true if the specified interval 'that' overlaps with the full genomic interval of this transcript */
|
||||
public boolean overlapsP (GenomeLoc that) {
|
||||
return getLocation().overlapsP(that);
|
||||
|
|
|
|||
|
|
@ -88,7 +88,6 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
|
|||
try {
|
||||
boolean isFirst = true;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
System.out.println(line);
|
||||
if ( isFirst && ! line.startsWith(headerDelimiter) && ! line.startsWith(commentDelimiter)) {
|
||||
throw new UserException.MalformedFile("TableCodec file does not have a header");
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue