Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
efce2ece9d
|
|
@ -163,6 +163,14 @@
|
|||
<!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
|
||||
<delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
|
||||
<delete file="${ivy.jar.dir}/ivy-2.2.0-rc1.jar"/>
|
||||
<!--
|
||||
An old versions of the ivy-1.4.1.xml does not contain /ivy-module/configuration/conf/@name="compile".
|
||||
Easier to upgrade to 1.4.4 than try to deal with xmlproperty and conditional deletion in ant.
|
||||
Just in case we remove explicit 1.4.4 and go back to 1.4.1, try to clean out the file for now.
|
||||
-->
|
||||
<delete file="${ivy.home}/cache/javax.mail/mail/ivy-1.4.1.xml"/>
|
||||
<delete file="${ivy.home}/cache/javax.mail/mail/ivydata-1.4.1.properties"/>
|
||||
<delete file="${ivy.home}/cache/javax.mail/mail/jars/mail-1.4.1.jar"/>
|
||||
</target>
|
||||
|
||||
<target name="init.buildall">
|
||||
|
|
|
|||
6
ivy.xml
6
ivy.xml
|
|
@ -15,10 +15,8 @@
|
|||
<!-- Tribble -->
|
||||
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
|
||||
|
||||
<dependency org="log4j" name="log4j" rev="1.2.15">
|
||||
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
|
||||
<exclude org="javax.mail" />
|
||||
</dependency>
|
||||
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
||||
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
||||
<dependency org="colt" name="colt" rev="1.2.0"/>
|
||||
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||
|
|
|
|||
|
|
@ -12,14 +12,14 @@ if ( onCMDLine ) {
|
|||
inputFileName = args[1]
|
||||
outputPDF = args[2]
|
||||
} else {
|
||||
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
||||
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||
inputFileName = "~/Desktop/Q-30033@gsa1.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||
outputPDF = NA
|
||||
}
|
||||
|
||||
RUNTIME_UNITS = "(sec)"
|
||||
ORIGINAL_UNITS_TO_SECONDS = 1/1000
|
||||
RUNTIME_UNITS = "(hours)"
|
||||
ORIGINAL_UNITS_TO_SECONDS = 1/1000/60/60
|
||||
|
||||
#
|
||||
# Helper function to aggregate all of the jobs in the report across all tables
|
||||
|
|
@ -33,7 +33,7 @@ allJobsFromReport <- function(report) {
|
|||
#
|
||||
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
|
||||
#
|
||||
plotJobsGantt <- function(gatkReport, sortOverall) {
|
||||
plotJobsGantt <- function(gatkReport, sortOverall, includeText) {
|
||||
allJobs = allJobsFromReport(gatkReport)
|
||||
if ( sortOverall ) {
|
||||
title = "All jobs, by analysis, by start time"
|
||||
|
|
@ -44,16 +44,18 @@ plotJobsGantt <- function(gatkReport, sortOverall) {
|
|||
}
|
||||
allJobs$index = 1:nrow(allJobs)
|
||||
minTime = min(allJobs$startTime)
|
||||
allJobs$relStartTime = allJobs$startTime - minTime
|
||||
allJobs$relDoneTime = allJobs$doneTime - minTime
|
||||
allJobs$relStartTime = (allJobs$startTime - minTime) * ORIGINAL_UNITS_TO_SECONDS
|
||||
allJobs$relDoneTime = (allJobs$doneTime - minTime) * ORIGINAL_UNITS_TO_SECONDS
|
||||
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
|
||||
maxRelTime = max(allJobs$relDoneTime)
|
||||
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
|
||||
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
|
||||
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
||||
p <- p + theme_bw()
|
||||
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm")))
|
||||
if ( includeText )
|
||||
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
||||
p <- p + xlim(0, maxRelTime * 1.1)
|
||||
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
|
||||
p <- p + ylab("Job")
|
||||
p <- p + ylab("Job number")
|
||||
p <- p + opts(title=title)
|
||||
print(p)
|
||||
}
|
||||
|
|
@ -157,8 +159,8 @@ if ( ! is.na(outputPDF) ) {
|
|||
pdf(outputPDF, height=8.5, width=11)
|
||||
}
|
||||
|
||||
plotJobsGantt(gatkReportData, T)
|
||||
plotJobsGantt(gatkReportData, F)
|
||||
plotJobsGantt(gatkReportData, T, F)
|
||||
plotJobsGantt(gatkReportData, F, F)
|
||||
plotProgressByTime(gatkReportData)
|
||||
for ( group in gatkReportData ) {
|
||||
plotGroup(group)
|
||||
|
|
|
|||
|
|
@ -293,15 +293,16 @@ public class GATKRunReport {
|
|||
* That is, postReport() is guarenteed not to fail for any reason.
|
||||
*/
|
||||
private File postReportToLocalDisk(File rootDir) {
|
||||
String filename = getID() + ".report.xml.gz";
|
||||
File file = new File(rootDir, filename);
|
||||
try {
|
||||
String filename = getID() + ".report.xml.gz";
|
||||
File file = new File(rootDir, filename);
|
||||
postReportToFile(file);
|
||||
logger.debug("Wrote report to " + file);
|
||||
return file;
|
||||
} catch ( Exception e ) {
|
||||
// we catch everything, and no matter what eat the error
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
file.delete();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -312,6 +313,7 @@ public class GATKRunReport {
|
|||
File localFile = postReportToLocalDisk(new File("./"));
|
||||
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
||||
if ( localFile != null ) { // we succeeded in creating the local file
|
||||
localFile.deleteOnExit();
|
||||
try {
|
||||
// stop us from printing the annoying, and meaningless, mime types warning
|
||||
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
||||
|
|
@ -336,14 +338,13 @@ public class GATKRunReport {
|
|||
//logger.info("Uploading " + localFile + " to AWS bucket");
|
||||
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
||||
logger.debug("Uploaded to AWS: " + s3Object);
|
||||
logger.info("Uploaded run statistics report to AWS S3");
|
||||
} catch ( S3ServiceException e ) {
|
||||
exceptDuringRunReport("S3 exception occurred", e);
|
||||
} catch ( NoSuchAlgorithmException e ) {
|
||||
exceptDuringRunReport("Couldn't calculate MD5", e);
|
||||
} catch ( IOException e ) {
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
} finally {
|
||||
localFile.delete();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -419,9 +419,10 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
if (dict == null) return;
|
||||
|
||||
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
||||
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
|
||||
|
||||
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
||||
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
|
||||
|
||||
for (SAMSequenceRecord seq : currentDict.getSequences()) {
|
||||
if (dict.getSequence(seq.getSequenceName()) == null)
|
||||
continue;
|
||||
|
|
@ -435,6 +436,13 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
}
|
||||
}
|
||||
|
||||
public void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
|
||||
for ( SAMSequenceRecord seq : dict.getSequences() ) {
|
||||
final String contig = SequenceDictionaryPropertyPredicate + seq.getSequenceName();
|
||||
final String length = String.valueOf(seq.getSequenceLength());
|
||||
index.addProperty(contig,length);
|
||||
}
|
||||
}
|
||||
|
||||
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
|
||||
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public class SBByDepth extends AnnotationByDepth {
|
|||
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
|
||||
return null;
|
||||
|
||||
double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
|
||||
double sBias = vc.getAttributeAsDouble(VCFConstants.STRAND_BIAS_KEY, -1);
|
||||
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
|
|
|
|||
|
|
@ -63,9 +63,12 @@ import java.util.*;
|
|||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more bam files (with proper headers) to be analyzed for coverage statistics
|
||||
* (Optional) A REFSEQ Rod to aggregate coverage to the gene level
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
*(Optional) A REFSEQ Rod to aggregate coverage to the gene level
|
||||
* <p>
|
||||
* (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation)
|
||||
*</p></p>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
|
||||
|
|
@ -93,7 +96,7 @@ import java.util.*;
|
|||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T VariantEval \
|
||||
* -T DepthOfCoverage \
|
||||
* -o file_name_base \
|
||||
* -I input_bams.list
|
||||
* [-geneList refSeq.sorted.txt] \
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class HaplotypeIndelErrorModel {
|
|||
baseMatchArray = new double[MAX_CACHED_QUAL+1];
|
||||
baseMismatchArray = new double[MAX_CACHED_QUAL+1];
|
||||
for (int k=1; k <= MAX_CACHED_QUAL; k++) {
|
||||
double baseProb = QualityUtils.qualToProb(k);
|
||||
double baseProb = QualityUtils.qualToProb((byte)k);
|
||||
|
||||
|
||||
baseMatchArray[k] = probToQual(baseProb);
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ public class PhasingRead extends BaseArray {
|
|||
public PhasingRead(int length, int mappingQual) {
|
||||
super(length);
|
||||
|
||||
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb(mappingQual));
|
||||
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb((byte)mappingQual));
|
||||
|
||||
this.baseProbs = new PreciseNonNegativeDouble[length];
|
||||
Arrays.fill(this.baseProbs, null);
|
||||
|
|
|
|||
|
|
@ -44,12 +44,12 @@ public class RefSeqDataParser {
|
|||
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
|
||||
|
||||
Map<String, String> entriesToNames = new HashMap<String, String>();
|
||||
Integer numRecords = vc.getAttributeAsIntegerNoException(NUM_RECORDS_KEY);
|
||||
if (numRecords != null) {
|
||||
int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1);
|
||||
if (numRecords != -1) {
|
||||
boolean done = false;
|
||||
|
||||
if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
|
||||
String name = vc.getAttributeAsStringNoException(nameKeyToUse);
|
||||
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
||||
if (name != null) {
|
||||
entriesToNames.put(nameKeyToUse, name);
|
||||
done = true;
|
||||
|
|
@ -59,14 +59,14 @@ public class RefSeqDataParser {
|
|||
if (!done) {
|
||||
for (int i = 1; i <= numRecords; i++) {
|
||||
String key = nameKeyToUseMultiplePrefix + i;
|
||||
String name = vc.getAttributeAsStringNoException(key);
|
||||
String name = vc.getAttributeAsString(key, null);
|
||||
if (name != null)
|
||||
entriesToNames.put(key, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
else { // no entry with the # of records:
|
||||
String name = vc.getAttributeAsStringNoException(nameKeyToUse);
|
||||
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
||||
if (name != null) {
|
||||
entriesToNames.put(nameKeyToUse, name);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -110,12 +110,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
case SNP:
|
||||
nVariantLoci++;
|
||||
nSNPs++;
|
||||
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
|
||||
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
|
||||
break;
|
||||
case MNP:
|
||||
nVariantLoci++;
|
||||
nMNPs++;
|
||||
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
|
||||
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
|
||||
break;
|
||||
case INDEL:
|
||||
nVariantLoci++;
|
||||
|
|
@ -137,7 +137,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
|
||||
String refStr = vc1.getReference().getBaseString().toUpperCase();
|
||||
|
||||
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null;
|
||||
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null;
|
||||
// if (aaStr.equals(".")) {
|
||||
// aaStr = refStr;
|
||||
// }
|
||||
|
|
|
|||
|
|
@ -219,7 +219,8 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public static Double getPQ(Genotype gt) {
|
||||
return gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
|
||||
Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||
return d == -1 ? null : d;
|
||||
}
|
||||
|
||||
public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
|
|||
if ( eval.hasGenotypes() )
|
||||
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
||||
else if ( eval.hasAttribute("AC") ) {
|
||||
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
|
||||
ac = eval.getAttributeAsInt("AC", -1);
|
||||
}
|
||||
|
||||
if ( ac != -1 ) {
|
||||
|
|
|
|||
|
|
@ -49,18 +49,14 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
else nTv++;
|
||||
}
|
||||
|
||||
String refStr = vc.getReference().getBaseString().toUpperCase();
|
||||
String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase();
|
||||
|
||||
if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) {
|
||||
BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]);
|
||||
|
||||
//System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType);
|
||||
|
||||
if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) {
|
||||
nTiDerived++;
|
||||
} else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) {
|
||||
nTvDerived++;
|
||||
if (vc.hasAttribute("ANCESTRALALLELE")) {
|
||||
final String aaStr = vc.getAttributeAsString("ANCESTRALALLELE", "null").toUpperCase();
|
||||
if ( ! aaStr.equals(".") ) {
|
||||
switch ( BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0] ) ) {
|
||||
case TRANSITION: nTiDerived++; break;
|
||||
case TRANSVERSION: nTvDerived++; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
//// System.out.printf(" ac = %d%n", ac);
|
||||
}
|
||||
else
|
||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
|
||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else {
|
||||
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class AlleleCount extends VariantStratifier {
|
|||
if (eval != null) {
|
||||
int AC = -1;
|
||||
if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
|
||||
AC = eval.getAttributeAsInt("AC");
|
||||
AC = eval.getAttributeAsInt("AC", 0);
|
||||
} else if ( eval.isVariant() ) {
|
||||
for (Allele allele : eval.getAlternateAlleles())
|
||||
AC = Math.max(AC, eval.getChromosomeCount(allele));
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class AlleleFrequency extends VariantStratifier {
|
|||
|
||||
if (eval != null) {
|
||||
try {
|
||||
relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF") / 5.0, 3))));
|
||||
relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF", 0.0) / 5.0, 3))));
|
||||
} catch (Exception e) {
|
||||
return relevantStates;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -90,8 +90,8 @@ public class Degeneracy extends VariantStratifier {
|
|||
Integer frame = null;
|
||||
|
||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||
aa = eval.getAttributeAsString("refseq.variantAA");
|
||||
frame = eval.getAttributeAsInt("refseq.frame");
|
||||
aa = eval.getAttributeAsString("refseq.variantAA", null);
|
||||
frame = eval.getAttributeAsInt("refseq.frame", 0);
|
||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||
int annotationId = 1;
|
||||
String key;
|
||||
|
|
@ -99,7 +99,7 @@ public class Degeneracy extends VariantStratifier {
|
|||
do {
|
||||
key = String.format("refseq.functionalClass_%d", annotationId);
|
||||
|
||||
String newtype = eval.getAttributeAsString(key);
|
||||
String newtype = eval.getAttributeAsString(key, null);
|
||||
|
||||
if ( newtype != null &&
|
||||
( type == null ||
|
||||
|
|
@ -109,13 +109,13 @@ public class Degeneracy extends VariantStratifier {
|
|||
type = newtype;
|
||||
|
||||
String aakey = String.format("refseq.variantAA_%d", annotationId);
|
||||
aa = eval.getAttributeAsString(aakey);
|
||||
aa = eval.getAttributeAsString(aakey, null);
|
||||
|
||||
if (aa != null) {
|
||||
String framekey = String.format("refseq.frame_%d", annotationId);
|
||||
|
||||
if (eval.hasAttribute(framekey)) {
|
||||
frame = eval.getAttributeAsInt(framekey);
|
||||
frame = eval.getAttributeAsInt(framekey, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class FunctionalClass extends VariantStratifier {
|
|||
}
|
||||
|
||||
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("all");
|
||||
|
|
@ -38,7 +38,7 @@ public class FunctionalClass extends VariantStratifier {
|
|||
|
||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||
try {
|
||||
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass"));
|
||||
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass", null));
|
||||
} catch ( Exception e ) {} // don't error out if the type isn't supported
|
||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||
int annotationId = 1;
|
||||
|
|
@ -47,7 +47,7 @@ public class FunctionalClass extends VariantStratifier {
|
|||
do {
|
||||
key = String.format("refseq.functionalClass_%d", annotationId);
|
||||
|
||||
String newtypeStr = eval.getAttributeAsString(key);
|
||||
String newtypeStr = eval.getAttributeAsString(key, null);
|
||||
if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
|
||||
try {
|
||||
FunctionalType newType = FunctionalType.valueOf(newtypeStr);
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ public class VQSRCalibrationCurve {
|
|||
if ( vc.isFiltered() )
|
||||
return 0.0;
|
||||
else if ( vc.hasAttribute(VQSRQualKey) ) {
|
||||
double qual = vc.getAttributeAsDouble(VQSRQualKey);
|
||||
double qual = vc.getAttributeAsDouble(VQSRQualKey, 0.0);
|
||||
return probTrueVariant(qual);
|
||||
} else {
|
||||
throw new UserException.VariantContextMissingRequiredField(VQSRQualKey, vc);
|
||||
|
|
@ -143,7 +143,7 @@ public class VQSRCalibrationCurve {
|
|||
for ( int i = 0; i < log10Likelihoods.length; i++) {
|
||||
double p = Math.pow(10, log10Likelihoods[i]);
|
||||
double q = alpha * p + (1-alpha) * noInfoPr;
|
||||
if ( DEBUG ) System.out.printf(" vqslod = %.2f, p = %.2e, alpha = %.2e, q = %.2e%n", vc.getAttributeAsDouble(VQSRQualKey), p, alpha, q);
|
||||
if ( DEBUG ) System.out.printf(" vqslod = %.2f, p = %.2e, alpha = %.2e, q = %.2e%n", vc.getAttributeAsDouble(VQSRQualKey, 0.0), p, alpha, q);
|
||||
updated[i] = Math.log10(q);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -574,7 +574,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
|
||||
// We then pick a variant with probablity AF*desiredFraction
|
||||
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
||||
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY, null);
|
||||
|
||||
double af;
|
||||
double afBoost = 1.0;
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
if ( getters.containsKey(field) ) {
|
||||
val = getters.get(field).get(vc);
|
||||
} else if ( vc.hasAttribute(field) ) {
|
||||
val = vc.getAttributeAsString(field);
|
||||
val = vc.getAttributeAsString(field, null);
|
||||
} else if ( isWildCard(field) ) {
|
||||
Set<String> wildVals = new HashSet<String>();
|
||||
for ( Map.Entry<String,Object> elt : vc.getAttributes().entrySet()) {
|
||||
|
|
@ -309,6 +309,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } });
|
||||
getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } });
|
||||
getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } });
|
||||
getters.put("TYPE", new Getter() { public String get(VariantContext vc) { return vc.getType().toString(); } });
|
||||
getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } });
|
||||
getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } });
|
||||
getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } });
|
||||
|
|
|
|||
|
|
@ -306,7 +306,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (int)( start << 16 + stop << 4 + contigIndex );
|
||||
return start << 16 | stop << 4 | contigIndex;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,14 +9,17 @@ import net.sf.samtools.SAMUtils;
|
|||
* @author Kiran Garimella
|
||||
*/
|
||||
public class QualityUtils {
|
||||
|
||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
public final static double MIN_REASONABLE_ERROR = 0.0001;
|
||||
public final static byte MAX_REASONABLE_Q_SCORE = 40;
|
||||
public final static byte MIN_USABLE_Q_SCORE = 6;
|
||||
|
||||
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
|
||||
|
||||
private static double qualToErrorProbCache[] = new double[256];
|
||||
static {
|
||||
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Private constructor. No instantiating this class!
|
||||
*/
|
||||
|
|
@ -33,10 +36,6 @@ public class QualityUtils {
|
|||
return 1.0 - qualToErrorProb(qual);
|
||||
}
|
||||
|
||||
static public double qualToProb(int qual) {
|
||||
return qualToProb( (double)qual );
|
||||
}
|
||||
|
||||
static public double qualToProb(double qual) {
|
||||
return 1.0 - Math.pow(10.0, qual/(-10.0));
|
||||
}
|
||||
|
|
@ -48,10 +47,14 @@ public class QualityUtils {
|
|||
* @param qual a quality score (0-40)
|
||||
* @return a probability (0.0-1.0)
|
||||
*/
|
||||
static public double qualToErrorProb(byte qual) {
|
||||
static public double qualToErrorProbRaw(byte qual) {
|
||||
return Math.pow(10.0, ((double) qual)/-10.0);
|
||||
}
|
||||
|
||||
static public double qualToErrorProb(byte qual) {
|
||||
return qualToErrorProbCache[qual];
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a probability to a quality score. Note, this is capped at Q40.
|
||||
*
|
||||
|
|
@ -110,88 +113,4 @@ public class QualityUtils {
|
|||
//return (byte) Math.min(qual, maxQual);
|
||||
return (byte) Math.max(Math.min(qual, maxQual), 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress a base and a probability into a single byte so that it can be output in a SAMRecord's SQ field.
|
||||
* Note: the highest probability this function can encode is 64%, so this function should only never be used on the best base hypothesis.
|
||||
* Another note: the probability encoded here gets rounded to the nearest 1%.
|
||||
*
|
||||
* @param baseIndex the base index
|
||||
* @param prob the base probability
|
||||
* @return a byte containing the index and the probability
|
||||
*/
|
||||
static public byte baseAndProbToCompressedQuality(int baseIndex, double prob) {
|
||||
byte compressedQual = 0;
|
||||
|
||||
compressedQual = (byte) baseIndex;
|
||||
|
||||
byte cprob = (byte) (100.0*prob);
|
||||
byte qualmask = (byte) 252;
|
||||
compressedQual += ((cprob << 2) & qualmask);
|
||||
|
||||
return compressedQual;
|
||||
}
|
||||
|
||||
/**
|
||||
* From a compressed base, extract the base index (0:A, 1:C, 2:G, 3:T)
|
||||
*
|
||||
* @param compressedQual the compressed quality score, as returned by baseAndProbToCompressedQuality
|
||||
* @return base index
|
||||
*/
|
||||
static public int compressedQualityToBaseIndex(byte compressedQual) {
|
||||
return (int) (compressedQual & 0x3);
|
||||
}
|
||||
|
||||
/**
|
||||
* From a compressed base, extract the base probability
|
||||
*
|
||||
* @param compressedQual the compressed quality score, as returned by baseAndProbToCompressedQuality
|
||||
* @return the probability
|
||||
*/
|
||||
static public double compressedQualityToProb(byte compressedQual) {
|
||||
// Because java natives are signed, extra care must be taken to avoid
|
||||
// shifting a 1 into the sign bit in the implicit promotion of 2 to an int.
|
||||
int x2 = ((int) compressedQual) & 0xff;
|
||||
x2 = (x2 >>> 2);
|
||||
|
||||
return ((double) x2)/100.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the complement of a compressed quality
|
||||
*
|
||||
* @param compressedQual the compressed quality score (as returned by baseAndProbToCompressedQuality)
|
||||
* @return the complementary compressed quality
|
||||
*/
|
||||
static public byte complementCompressedQuality(byte compressedQual) {
|
||||
int baseIndex = compressedQualityToBaseIndex(compressedQual);
|
||||
double prob = compressedQualityToProb(compressedQual);
|
||||
|
||||
return baseAndProbToCompressedQuality(BaseUtils.complementIndex(baseIndex), prob);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the reverse complement of a byte array of compressed qualities
|
||||
*
|
||||
* @param compressedQuals a byte array of compressed quality scores
|
||||
* @return the reverse complement of the byte array
|
||||
*/
|
||||
static public byte[] reverseComplementCompressedQualityArray(byte[] compressedQuals) {
|
||||
byte[] rcCompressedQuals = new byte[compressedQuals.length];
|
||||
|
||||
for (int pos = 0; pos < compressedQuals.length; pos++) {
|
||||
rcCompressedQuals[compressedQuals.length - pos - 1] = complementCompressedQuality(compressedQuals[pos]);
|
||||
}
|
||||
|
||||
return rcCompressedQuals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the reverse of a byte array of qualities (compressed or otherwise)
|
||||
* @param quals the array of bytes to be reversed
|
||||
* @return the reverse of the quality array
|
||||
*/
|
||||
static public byte[] reverseQualityArray( byte[] quals ) {
|
||||
return Utils.reverse(quals); // no sense in duplicating functionality
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -190,11 +190,21 @@ public class SampleUtils {
|
|||
|
||||
}
|
||||
|
||||
public static List<String> getSamplesFromCommandLineInput(Collection<String> sampleArgs) {
|
||||
/**
|
||||
* Returns a new set of samples, containing a final list of samples expanded from sampleArgs
|
||||
*
|
||||
* Each element E of sampleArgs can either be a literal sample name or a file. For each E,
|
||||
* we try to read a file named E from disk, and if possible all lines from that file are expanded
|
||||
* into unique sample names.
|
||||
*
|
||||
* @param sampleArgs
|
||||
* @return
|
||||
*/
|
||||
public static Set<String> getSamplesFromCommandLineInput(Collection<String> sampleArgs) {
|
||||
if (sampleArgs != null) {
|
||||
// Let's first go through the list and see if we were given any files. We'll add every entry in the file to our
|
||||
// sample list set, and treat the entries as if they had been specified on the command line.
|
||||
List<String> samplesFromFiles = new ArrayList<String>();
|
||||
Set<String> samplesFromFiles = new HashSet<String>();
|
||||
for (String SAMPLE_EXPRESSION : sampleArgs) {
|
||||
File sampleFile = new File(SAMPLE_EXPRESSION);
|
||||
|
||||
|
|
@ -203,7 +213,7 @@ public class SampleUtils {
|
|||
|
||||
List<String> lines = reader.readLines();
|
||||
for (String line : lines) {
|
||||
samplesFromFiles.add(line);
|
||||
samplesFromFiles.add(line.trim());
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
samplesFromFiles.add(SAMPLE_EXPRESSION); // not a file, so must be a sample
|
||||
|
|
@ -212,7 +222,8 @@ public class SampleUtils {
|
|||
|
||||
return samplesFromFiles;
|
||||
}
|
||||
return new ArrayList<String>();
|
||||
|
||||
return new HashSet<String>();
|
||||
}
|
||||
|
||||
public static Set<String> getSamplesFromCommandLineInput(Collection<String> vcfSamples, Collection<String> sampleExpressions) {
|
||||
|
|
|
|||
|
|
@ -12,19 +12,35 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* TODO FOR CHRIS HARTL
|
||||
* Allows for reading in RefSeq information
|
||||
*
|
||||
* <p>
|
||||
* Codec Description
|
||||
* Parses a sorted UCSC RefSeq file (see below) into relevant features: the gene name, the unique gene name (if multiple transcrips get separate entries), exons, gene start/stop, coding start/stop,
|
||||
* strandedness of transcription.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* See also: link to file specification
|
||||
* Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the Wiki here
|
||||
* <a href="http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq">http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq</a>
|
||||
* </p>
|
||||
* <h2> Usage </h2>
|
||||
* The RefSeq Rod can be bound as any other rod, and is specified by REFSEQ, for example
|
||||
* <pre>
|
||||
* -refSeqBinding:REFSEQ /path/to/refSeq.txt
|
||||
* </pre>
|
||||
*
|
||||
* You will need to consult individual walkers for the binding name ("refSeqBinding", above)
|
||||
*
|
||||
* <h2>File format example</h2>
|
||||
* If you want to define your own file for use, the format is (tab delimited):
|
||||
* bin, name, chrom, strand, transcription start, transcription end, coding start, coding end, num exons, exon starts, exon ends, id, alt. name, coding start status (complete/incomplete), coding end status (complete,incomplete)
|
||||
* and exon frames, for example:
|
||||
* <pre>
|
||||
* 76 NM_001011874 1 - 3204562 3661579 3206102 3661429 3 3204562,3411782,3660632, 3207049,3411982,3661579, 0 Xkr4 cmpl cmpl 1,2,0,
|
||||
* </pre>
|
||||
* for more information see <a href="http://skip.ucsc.edu/cgi-bin/hgTables?hgsid=5651&hgta_doSchemaDb=mm8&hgta_doSchemaTable=refGene">here</a>
|
||||
* <p>
|
||||
* A BAM file containing <b>exactly one sample</b>.
|
||||
*
|
||||
* </p>
|
||||
*
|
||||
* @author Mark DePristo
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
throw new UserException.MalformedVCF(message, lineNo);
|
||||
}
|
||||
|
||||
private static void generateException(String message, int lineNo) {
|
||||
protected static void generateException(String message, int lineNo) {
|
||||
throw new UserException.MalformedVCF(message, lineNo);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.index.DynamicIndexCreator;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broad.tribble.util.PositionalStream;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* this class writes VCF files
|
||||
*/
|
||||
public abstract class IndexingVCFWriter implements VCFWriter {
|
||||
final private String name;
|
||||
|
||||
private File indexFile = null;
|
||||
private OutputStream outputStream;
|
||||
private PositionalStream positionalStream = null;
|
||||
private DynamicIndexCreator indexer = null;
|
||||
private LittleEndianOutputStream idxStream = null;
|
||||
|
||||
protected IndexingVCFWriter(String name, File location, OutputStream output, boolean enableOnTheFlyIndexing) {
|
||||
outputStream = output;
|
||||
this.name = name;
|
||||
|
||||
if ( enableOnTheFlyIndexing ) {
|
||||
indexFile = Tribble.indexFile(location);
|
||||
try {
|
||||
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||
//System.out.println("Creating index on the fly for " + location);
|
||||
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||
indexer.initialize(location, indexer.defaultBinSize());
|
||||
positionalStream = new PositionalStream(output);
|
||||
outputStream = positionalStream;
|
||||
} catch ( IOException ex ) {
|
||||
// No matter what we keep going, since we don't care if we can't create the index file
|
||||
idxStream = null;
|
||||
indexer = null;
|
||||
positionalStream = null;
|
||||
indexFile = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public OutputStream getOutputStream() {
|
||||
return outputStream;
|
||||
}
|
||||
|
||||
public String getStreamName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public abstract void writeHeader(VCFHeader header);
|
||||
|
||||
/**
|
||||
* attempt to close the VCF file
|
||||
*/
|
||||
public void close() {
|
||||
// try to close the index stream (keep it separate to help debugging efforts)
|
||||
if ( indexer != null ) {
|
||||
try {
|
||||
Index index = indexer.finalizeIndex(positionalStream.getPosition());
|
||||
index.write(idxStream);
|
||||
idxStream.close();
|
||||
} catch (IOException e) {
|
||||
throw new ReviewedStingException("Unable to close index for " + getStreamName(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* add a record to the file
|
||||
*
|
||||
* @param vc the Variant Context object
|
||||
*/
|
||||
public void add(VariantContext vc) {
|
||||
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
||||
if ( indexer != null )
|
||||
indexer.addFeature(vc, positionalStream.getPosition());
|
||||
}
|
||||
|
||||
protected static final String writerName(File location, OutputStream stream) {
|
||||
return location == null ? stream.toString() : location.getAbsolutePath();
|
||||
}
|
||||
|
||||
protected static OutputStream openOutputStream(File location) {
|
||||
try {
|
||||
return new FileOutputStream(location);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException("Unable to create VCF file at location: " + location, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -44,26 +44,19 @@ import java.util.*;
|
|||
/**
|
||||
* this class writes VCF files
|
||||
*/
|
||||
public class StandardVCFWriter implements VCFWriter {
|
||||
public class StandardVCFWriter extends IndexingVCFWriter {
|
||||
// the print stream we're writing to
|
||||
final protected BufferedWriter mWriter;
|
||||
|
||||
// should we write genotypes or just sites?
|
||||
final protected boolean doNotWriteGenotypes;
|
||||
|
||||
// the VCF header we're storing
|
||||
protected VCFHeader mHeader = null;
|
||||
|
||||
// the print stream we're writing to
|
||||
protected BufferedWriter mWriter;
|
||||
protected PositionalStream positionalStream = null;
|
||||
|
||||
// were filters applied?
|
||||
protected boolean filtersWereAppliedToContext = false;
|
||||
|
||||
// should we write genotypes or just sites?
|
||||
protected boolean doNotWriteGenotypes = false;
|
||||
|
||||
protected DynamicIndexCreator indexer = null;
|
||||
protected File indexFile = null;
|
||||
LittleEndianOutputStream idxStream = null;
|
||||
File location = null;
|
||||
|
||||
/**
|
||||
* create a VCF writer, given a file to write to
|
||||
*
|
||||
|
|
@ -93,32 +86,22 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
* @param doNotWriteGenotypes do not write genotypes
|
||||
*/
|
||||
public StandardVCFWriter(OutputStream output, boolean doNotWriteGenotypes) {
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||
this(null, output, false, doNotWriteGenotypes);
|
||||
}
|
||||
|
||||
public StandardVCFWriter(File location, OutputStream output, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||
this.location = location;
|
||||
|
||||
if ( enableOnTheFlyIndexing ) {
|
||||
indexFile = Tribble.indexFile(location);
|
||||
try {
|
||||
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||
//System.out.println("Creating index on the fly for " + location);
|
||||
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||
indexer.initialize(location, indexer.defaultBinSize());
|
||||
positionalStream = new PositionalStream(output);
|
||||
output = positionalStream;
|
||||
} catch ( IOException ex ) {
|
||||
// No matter what we keep going, since we don't care if we can't create the index file
|
||||
}
|
||||
}
|
||||
|
||||
//mWriter = new BufferedWriter(new OutputStreamWriter(new PositionalStream(output)));
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
||||
super(writerName(location, output), location, output, enableOnTheFlyIndexing);
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(getOutputStream())); // todo -- fix buffer size
|
||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// VCFWriter interface functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void writeHeader(VCFHeader header) {
|
||||
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
||||
|
||||
|
|
@ -158,44 +141,24 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new TribbleException("IOException writing the VCF header to " + locationString(), e);
|
||||
throw new ReviewedStingException("IOException writing the VCF header to " + getStreamName(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private String locationString() {
|
||||
return location == null ? mWriter.toString() : location.getAbsolutePath();
|
||||
}
|
||||
|
||||
/**
|
||||
* attempt to close the VCF file
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
// try to close the vcf stream
|
||||
try {
|
||||
mWriter.flush();
|
||||
mWriter.close();
|
||||
} catch (IOException e) {
|
||||
throw new TribbleException("Unable to close " + locationString() + " because of " + e.getMessage());
|
||||
throw new ReviewedStingException("Unable to close " + getStreamName(), e);
|
||||
}
|
||||
|
||||
// try to close the index stream (keep it separate to help debugging efforts)
|
||||
if ( indexer != null ) {
|
||||
try {
|
||||
Index index = indexer.finalizeIndex(positionalStream.getPosition());
|
||||
index.write(idxStream);
|
||||
idxStream.close();
|
||||
} catch (IOException e) {
|
||||
throw new TribbleException("Unable to close index for " + locationString() + " because of " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected static OutputStream openOutputStream(File location) {
|
||||
try {
|
||||
return new FileOutputStream(location);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new TribbleException("Unable to create VCF file at location: " + location);
|
||||
}
|
||||
super.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -203,28 +166,17 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
*
|
||||
* @param vc the Variant Context object
|
||||
*/
|
||||
@Override
|
||||
public void add(VariantContext vc) {
|
||||
add(vc, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* add a record to the file
|
||||
*
|
||||
* @param vc the Variant Context object
|
||||
* @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
|
||||
*/
|
||||
public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added: " + getStreamName());
|
||||
|
||||
if ( doNotWriteGenotypes )
|
||||
vc = VariantContext.modifyGenotypes(vc, null);
|
||||
|
||||
try {
|
||||
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
|
||||
|
||||
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
||||
if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
|
||||
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
|
||||
super.add(vc);
|
||||
|
||||
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
|
||||
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
||||
|
|
@ -275,7 +227,7 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// FILTER
|
||||
String filters = vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
String filters = getFilterString(vc, filtersWereAppliedToContext);
|
||||
mWriter.write(filters);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
|
|
@ -317,9 +269,22 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
mWriter.write("\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to write the VCF object to " + locationString());
|
||||
throw new RuntimeException("Unable to write the VCF object to " + getStreamName());
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// implementation functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static final String getFilterString(final VariantContext vc) {
|
||||
return getFilterString(vc, false);
|
||||
}
|
||||
|
||||
public static final String getFilterString(final VariantContext vc, boolean forcePASS) {
|
||||
return vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (forcePASS || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
private String getQualValue(double qual) {
|
||||
|
|
@ -462,7 +427,7 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
mWriter.write(encoding);
|
||||
}
|
||||
|
||||
private static String formatVCFField(Object val) {
|
||||
public static String formatVCFField(Object val) {
|
||||
String result;
|
||||
if ( val == null )
|
||||
result = VCFConstants.MISSING_VALUE_v4;
|
||||
|
|
@ -524,12 +489,11 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
}
|
||||
|
||||
|
||||
public static int countOccurrences(char c, String s) {
|
||||
private static int countOccurrences(char c, String s) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
count += s.charAt(i) == c ? 1 : 0;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -105,34 +105,37 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
* @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
|
||||
*/
|
||||
protected Set<String> parseFilters(String filterString) {
|
||||
return parseFilters(filterHash, lineNo, filterString);
|
||||
}
|
||||
|
||||
public static Set<String> parseFilters(final Map<String, LinkedHashSet<String>> cache, final int lineNo, final String filterString) {
|
||||
// null for unfiltered
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null;
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
|
||||
return fFields;
|
||||
return Collections.emptySet();
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4");
|
||||
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
|
||||
if ( filterString.length() == 0 )
|
||||
generateException("The VCF specification requires a valid filter status");
|
||||
generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
|
||||
|
||||
// do we have the filter string cached?
|
||||
if ( filterHash.containsKey(filterString) )
|
||||
return filterHash.get(filterString);
|
||||
if ( cache != null && cache.containsKey(filterString) )
|
||||
return Collections.unmodifiableSet(cache.get(filterString));
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
// otherwise we have to parse and cache the value
|
||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||
fFields.add(filterString);
|
||||
else
|
||||
fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
|
||||
|
||||
filterHash.put(filterString, fFields);
|
||||
fFields = fFields;
|
||||
if ( cache != null ) cache.put(filterString, fFields);
|
||||
|
||||
return fFields;
|
||||
return Collections.unmodifiableSet(fFields);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.gcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* GATK binary VCF record
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class GCF {
|
||||
private final static int RECORD_TERMINATOR = 123456789;
|
||||
private int chromOffset;
|
||||
private int start, stop;
|
||||
private String id;
|
||||
private List<Allele> alleleMap;
|
||||
private int alleleOffsets[];
|
||||
private float qual;
|
||||
private byte refPad;
|
||||
private String info;
|
||||
private int filterOffset;
|
||||
|
||||
private List<GCFGenotype> genotypes = Collections.emptyList();
|
||||
|
||||
public GCF(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc, boolean skipGenotypes) {
|
||||
chromOffset = GCFHeaderBuilder.encodeString(vc.getChr());
|
||||
start = vc.getStart();
|
||||
stop = vc.getEnd();
|
||||
refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0;
|
||||
id = vc.getID();
|
||||
|
||||
// encode alleles
|
||||
alleleMap = new ArrayList<Allele>(vc.getNAlleles());
|
||||
alleleOffsets = new int[vc.getNAlleles()];
|
||||
alleleMap.add(vc.getReference());
|
||||
alleleOffsets[0] = GCFHeaderBuilder.encodeAllele(vc.getReference());
|
||||
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||
alleleMap.add(vc.getAlternateAllele(i));
|
||||
alleleOffsets[i+1] = GCFHeaderBuilder.encodeAllele(vc.getAlternateAllele(i));
|
||||
}
|
||||
|
||||
qual = (float)vc.getNegLog10PError(); //qualToByte(vc.getPhredScaledQual());
|
||||
info = infoFieldString(vc, GCFHeaderBuilder);
|
||||
filterOffset = GCFHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc));
|
||||
|
||||
if ( ! skipGenotypes ) {
|
||||
genotypes = encodeGenotypes(GCFHeaderBuilder, vc);
|
||||
}
|
||||
}
|
||||
|
||||
public GCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException, EOFException {
|
||||
chromOffset = inputStream.readInt();
|
||||
|
||||
// have we reached the footer?
|
||||
if ( chromOffset == GCFHeader.FOOTER_START_MARKER )
|
||||
throw new EOFException();
|
||||
|
||||
start = inputStream.readInt();
|
||||
stop = inputStream.readInt();
|
||||
id = inputStream.readUTF();
|
||||
refPad = inputStream.readByte();
|
||||
alleleOffsets = readIntArray(inputStream);
|
||||
qual = inputStream.readFloat();
|
||||
info = inputStream.readUTF();
|
||||
filterOffset = inputStream.readInt();
|
||||
|
||||
int nGenotypes = inputStream.readInt();
|
||||
int sizeOfGenotypes = inputStream.readInt();
|
||||
if ( skipGenotypes ) {
|
||||
genotypes = Collections.emptyList();
|
||||
inputStream.skipBytes(sizeOfGenotypes);
|
||||
} else {
|
||||
genotypes = new ArrayList<GCFGenotype>(nGenotypes);
|
||||
for ( int i = 0; i < nGenotypes; i++ )
|
||||
genotypes.add(new GCFGenotype(this, inputStream));
|
||||
}
|
||||
|
||||
int recordDone = inputStream.readInt();
|
||||
if ( recordDone != RECORD_TERMINATOR )
|
||||
throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key");
|
||||
}
|
||||
|
||||
public int write(DataOutputStream outputStream) throws IOException {
|
||||
int startSize = outputStream.size();
|
||||
outputStream.writeInt(chromOffset);
|
||||
outputStream.writeInt(start);
|
||||
outputStream.writeInt(stop);
|
||||
outputStream.writeUTF(id);
|
||||
outputStream.writeByte(refPad);
|
||||
writeIntArray(alleleOffsets, outputStream, true);
|
||||
outputStream.writeFloat(qual);
|
||||
outputStream.writeUTF(info);
|
||||
outputStream.writeInt(filterOffset);
|
||||
|
||||
int nGenotypes = genotypes.size();
|
||||
int expectedSizeOfGenotypes = nGenotypes == 0 ? 0 : genotypes.get(0).sizeInBytes() * nGenotypes;
|
||||
outputStream.writeInt(nGenotypes);
|
||||
outputStream.writeInt(expectedSizeOfGenotypes);
|
||||
int obsSizeOfGenotypes = 0;
|
||||
for ( GCFGenotype g : genotypes )
|
||||
obsSizeOfGenotypes += g.write(outputStream);
|
||||
if ( obsSizeOfGenotypes != expectedSizeOfGenotypes )
|
||||
throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes);
|
||||
|
||||
outputStream.writeInt(RECORD_TERMINATOR);
|
||||
return outputStream.size() - startSize;
|
||||
}
|
||||
|
||||
public VariantContext decode(final String source, final GCFHeader header) {
|
||||
final String contig = header.getString(chromOffset);
|
||||
alleleMap = header.getAlleles(alleleOffsets);
|
||||
double negLog10PError = qual; // QualityUtils.qualToErrorProb(qual);
|
||||
Set<String> filters = header.getFilters(filterOffset);
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put("INFO", info);
|
||||
Byte refPadByte = refPad == 0 ? null : refPad;
|
||||
Map<String, Genotype> genotypes = decodeGenotypes(header);
|
||||
|
||||
return new VariantContext(source, contig, start, stop, alleleMap, genotypes, negLog10PError, filters, attributes, refPadByte);
|
||||
}
|
||||
|
||||
private Map<String, Genotype> decodeGenotypes(final GCFHeader header) {
|
||||
if ( genotypes.isEmpty() )
|
||||
return VariantContext.NO_GENOTYPES;
|
||||
else {
|
||||
Map<String, Genotype> map = new TreeMap<String, Genotype>();
|
||||
|
||||
for ( int i = 0; i < genotypes.size(); i++ ) {
|
||||
final String sampleName = header.getSample(i);
|
||||
final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap);
|
||||
map.put(sampleName, g);
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
}
|
||||
|
||||
private List<GCFGenotype> encodeGenotypes(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc) {
|
||||
int nGenotypes = vc.getNSamples();
|
||||
if ( nGenotypes > 0 ) {
|
||||
List<GCFGenotype> genotypes = new ArrayList<GCFGenotype>(nGenotypes);
|
||||
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
|
||||
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
int i = GCFHeaderBuilder.encodeSample(g.getSampleName());
|
||||
genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g));
|
||||
}
|
||||
|
||||
return genotypes;
|
||||
} else {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
public int getNAlleles() { return alleleOffsets.length; }
|
||||
|
||||
|
||||
private final String infoFieldString(VariantContext vc, final GCFHeaderBuilder GCFHeaderBuilder) {
|
||||
StringBuilder s = new StringBuilder();
|
||||
|
||||
boolean first = true;
|
||||
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
||||
String key = field.getKey();
|
||||
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
|
||||
continue;
|
||||
int stringIndex = GCFHeaderBuilder.encodeString(key);
|
||||
String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
|
||||
if ( outputValue != null ) {
|
||||
if ( ! first ) s.append(";");
|
||||
s.append(stringIndex).append("=").append(outputValue);
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
protected final static int BUFFER_SIZE = 1048576; // 2**20
|
||||
|
||||
public static DataInputStream createDataInputStream(final InputStream stream) {
|
||||
return new DataInputStream(new BufferedInputStream(stream, BUFFER_SIZE));
|
||||
}
|
||||
|
||||
public static FileInputStream createFileInputStream(final File file) throws FileNotFoundException {
|
||||
return new FileInputStream(file);
|
||||
}
|
||||
|
||||
protected final static int[] readIntArray(final DataInputStream inputStream) throws IOException {
|
||||
return readIntArray(inputStream, inputStream.readInt());
|
||||
}
|
||||
|
||||
protected final static int[] readIntArray(final DataInputStream inputStream, int size) throws IOException {
|
||||
int[] array = new int[size];
|
||||
for ( int i = 0; i < array.length; i++ )
|
||||
array[i] = inputStream.readInt();
|
||||
return array;
|
||||
}
|
||||
|
||||
protected final static void writeIntArray(int[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException {
|
||||
if ( writeSize ) outputStream.writeInt(array.length);
|
||||
for ( int i : array )
|
||||
outputStream.writeInt(i);
|
||||
}
|
||||
|
||||
protected final static byte[] readByteArray(final DataInputStream inputStream) throws IOException {
|
||||
return readByteArray(inputStream, inputStream.readInt());
|
||||
}
|
||||
|
||||
protected final static byte[] readByteArray(final DataInputStream inputStream, int size) throws IOException {
|
||||
byte[] array = new byte[size];
|
||||
for ( int i = 0; i < array.length; i++ )
|
||||
array[i] = inputStream.readByte();
|
||||
return array;
|
||||
}
|
||||
|
||||
protected final static void writeByteArray(byte[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException {
|
||||
if ( writeSize ) outputStream.writeInt(array.length);
|
||||
for ( byte i : array )
|
||||
outputStream.writeByte(i);
|
||||
}
|
||||
|
||||
protected final static byte qualToByte(double phredScaledQual) {
|
||||
return (byte)Math.round(Math.min(phredScaledQual, 255));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.gcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* GATK binary VCF record
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class GCFGenotype {
|
||||
private byte gq;
|
||||
private int gt;
|
||||
private int dp;
|
||||
private int ad[];
|
||||
private byte[] pl;
|
||||
|
||||
// todo -- what to do about phasing? Perhaps we shouldn't support it
|
||||
// todo -- is the FL field generic or just a flag? Should we even support per sample filtering?
|
||||
|
||||
public GCFGenotype(final GCFHeaderBuilder GCFHeaderBuilder, final List<Allele> allAlleles, Genotype genotype) {
|
||||
gq = GCF.qualToByte(genotype.getPhredScaledQual());
|
||||
gt = encodeAlleles(genotype.getAlleles(), allAlleles);
|
||||
|
||||
dp = genotype.getAttributeAsInt("DP", 0);
|
||||
|
||||
int nAlleles = allAlleles.size();
|
||||
ad = new int[nAlleles];
|
||||
|
||||
int npls = nAllelesToNPls(nAlleles);
|
||||
pl = new byte[npls];
|
||||
}
|
||||
|
||||
private int nAllelesToNPls( int nAlleles ) {
|
||||
return nAlleles*(nAlleles+1) / 2;
|
||||
}
|
||||
|
||||
public GCFGenotype(GCF GCF, DataInputStream inputStream) throws IOException {
|
||||
int gqInt = inputStream.readUnsignedByte();
|
||||
gq = (byte)gqInt;
|
||||
gt = inputStream.readInt();
|
||||
dp = inputStream.readInt();
|
||||
ad = GCF.readIntArray(inputStream, GCF.getNAlleles());
|
||||
pl = GCF.readByteArray(inputStream, nAllelesToNPls(GCF.getNAlleles()));
|
||||
}
|
||||
|
||||
// 2 alleles => 1 + 8 + 8 + 3 => 20
|
||||
protected int sizeInBytes() {
|
||||
return 1 // gq
|
||||
+ 4 * 2 // gt + dp
|
||||
+ 4 * ad.length // ad
|
||||
+ 1 * pl.length; // pl
|
||||
}
|
||||
|
||||
public Genotype decode(final String sampleName, final GCFHeader header, GCF GCF, List<Allele> alleleIndex) {
|
||||
final List<Allele> alleles = decodeAlleles(gt, alleleIndex);
|
||||
final double negLog10PError = gq / 10.0;
|
||||
final Set<String> filters = Collections.emptySet();
|
||||
final Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put("DP", dp);
|
||||
attributes.put("AD", ad);
|
||||
attributes.put("PL", pl);
|
||||
|
||||
return new Genotype(sampleName, alleles, negLog10PError, filters, attributes, false);
|
||||
}
|
||||
|
||||
private static int encodeAlleles(List<Allele> gtList, List<Allele> allAlleles) {
|
||||
final int nAlleles = gtList.size();
|
||||
if ( nAlleles > 4 )
|
||||
throw new IllegalArgumentException("encodeAlleles doesn't support more than 4 alt alleles, but I saw " + gtList);
|
||||
|
||||
int gtInt = 0;
|
||||
for ( int i = 0; i < nAlleles ; i++ ) {
|
||||
final int bitOffset = i * 8;
|
||||
final int allelei = getAlleleIndex(gtList.get(i), allAlleles);
|
||||
final int gti = (allelei + 1) << bitOffset;
|
||||
gtInt = gtInt | gti;
|
||||
}
|
||||
|
||||
return gtInt;
|
||||
}
|
||||
|
||||
private static int getAlleleIndex(Allele q, List<Allele> allAlleles) {
|
||||
if ( q.isNoCall() )
|
||||
return 254;
|
||||
for ( int i = 0; i < allAlleles.size(); i++ )
|
||||
if ( q.equals(allAlleles.get(i)) )
|
||||
return i;
|
||||
throw new IllegalStateException("getAlleleIndex passed allele not in map! allele " + q + " allAlleles " + allAlleles);
|
||||
}
|
||||
|
||||
private static List<Allele> decodeAlleles(int gtInt, List<Allele> alleleIndex) {
|
||||
List<Allele> alleles = new ArrayList<Allele>(4);
|
||||
|
||||
for ( int i = 0; i < 32; i += 8 ) {
|
||||
final int gi = (gtInt & (0x000000FF << i)) >> i;
|
||||
if ( gi != 0 ) {
|
||||
final int allelei = gi - 1;
|
||||
alleles.add( allelei == 254 ? Allele.NO_CALL : alleleIndex.get(allelei) );
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public int write(DataOutputStream outputStream) throws IOException {
|
||||
int startSize = outputStream.size();
|
||||
outputStream.writeByte(gq);
|
||||
outputStream.writeInt(gt);
|
||||
outputStream.writeInt(dp);
|
||||
GCF.writeIntArray(ad, outputStream, false);
|
||||
GCF.writeByteArray(pl, outputStream, false);
|
||||
return outputStream.size() - startSize;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,205 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.gcf;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class GCFHeader {
|
||||
final protected static Logger logger = Logger.getLogger(GCFHeader.class);
|
||||
|
||||
public final static int GCF_VERSION = 1;
|
||||
public final static byte[] GCF_FILE_START_MARKER = "GCF\1".getBytes();
|
||||
public final static int FOOTER_START_MARKER = -1;
|
||||
public final static long HEADER_FORWARD_REFERENCE_OFFSET = GCF_FILE_START_MARKER.length + 4; // for the version
|
||||
|
||||
final int version;
|
||||
long footerPosition;
|
||||
final List<Allele> alleles;
|
||||
final List<String> strings;
|
||||
final List<String> samples;
|
||||
final List<Set<String>> filters;
|
||||
|
||||
public GCFHeader(final Map<Allele, Integer> allelesIn, final Map<String, Integer> stringIn, final Map<String, Integer> samplesIn) {
|
||||
version = GCF_VERSION;
|
||||
footerPosition = 0;
|
||||
this.alleles = linearize(allelesIn);
|
||||
this.strings = linearize(stringIn);
|
||||
this.samples = linearize(samplesIn);
|
||||
this.filters = null; // not used with this constructor
|
||||
}
|
||||
|
||||
public GCFHeader(FileInputStream fileInputStream) throws IOException {
|
||||
DataInputStream inputStream = new DataInputStream(fileInputStream);
|
||||
byte[] headerTest = new byte[GCF_FILE_START_MARKER.length];
|
||||
inputStream.read(headerTest);
|
||||
if ( ! Arrays.equals(headerTest, GCF_FILE_START_MARKER) ) {
|
||||
throw new UserException("Could not read GVCF file. GCF_FILE_START_MARKER missing. Saw " + new String(headerTest));
|
||||
} else {
|
||||
version = inputStream.readInt();
|
||||
logger.info("Read GCF version " + version);
|
||||
footerPosition = inputStream.readLong();
|
||||
logger.info("Read footer position of " + footerPosition);
|
||||
long lastPos = fileInputStream.getChannel().position();
|
||||
logger.info(" Last position is " + lastPos);
|
||||
|
||||
// seek to the footer
|
||||
fileInputStream.getChannel().position(footerPosition);
|
||||
if ( inputStream.readInt() != FOOTER_START_MARKER )
|
||||
throw new UserException.MalformedFile("Malformed GCF file: couldn't find the footer marker");
|
||||
alleles = stringsToAlleles(readStrings(inputStream));
|
||||
strings = readStrings(inputStream);
|
||||
samples = readStrings(inputStream);
|
||||
logger.info(String.format("Allele map of %d elements", alleles.size()));
|
||||
logger.info(String.format("String map of %d elements", strings.size()));
|
||||
logger.info(String.format("Sample map of %d elements", samples.size()));
|
||||
filters = initializeFilterCache();
|
||||
fileInputStream.getChannel().position(lastPos);
|
||||
}
|
||||
}
|
||||
|
||||
public static int writeHeader(final DataOutputStream outputStream) throws IOException {
|
||||
int startBytes = outputStream.size();
|
||||
outputStream.write(GCF_FILE_START_MARKER);
|
||||
outputStream.writeInt(GCF_VERSION);
|
||||
outputStream.writeLong(0);
|
||||
return outputStream.size() - startBytes;
|
||||
}
|
||||
|
||||
public int writeFooter(final DataOutputStream outputStream) throws IOException {
|
||||
int startBytes = outputStream.size();
|
||||
outputStream.writeInt(FOOTER_START_MARKER); // has to be the same as chrom encoding
|
||||
write(outputStream, allelesToStrings(alleles));
|
||||
write(outputStream, strings);
|
||||
write(outputStream, samples);
|
||||
return outputStream.size() - startBytes;
|
||||
}
|
||||
|
||||
private void write(DataOutputStream outputStream, List<String> l) throws IOException {
|
||||
outputStream.writeInt(l.size());
|
||||
for ( String elt : l ) outputStream.writeUTF(elt);
|
||||
}
|
||||
|
||||
private List<String> allelesToStrings(List<Allele> alleles) {
|
||||
List<String> strings = new ArrayList<String>(alleles.size());
|
||||
for ( Allele allele : alleles ) strings.add(allele.toString());
|
||||
return strings;
|
||||
}
|
||||
|
||||
private List<Set<String>> initializeFilterCache() {
|
||||
// required to allow offset -> set lookup
|
||||
List<Set<String>> l = new ArrayList<Set<String>>(strings.size());
|
||||
for ( int i = 0; i < strings.size(); i++ ) l.add(null);
|
||||
return l;
|
||||
}
|
||||
|
||||
private static List<Allele> stringsToAlleles(final List<String> strings) {
|
||||
final List<Allele> alleles = new ArrayList<Allele>(strings.size());
|
||||
for ( String string : strings ) {
|
||||
boolean isRef = string.endsWith("*");
|
||||
if ( isRef ) string = string.substring(0, string.length() - 1);
|
||||
alleles.add(Allele.create(string, isRef));
|
||||
}
|
||||
return alleles;
|
||||
}
|
||||
|
||||
private static List<String> readStrings(final DataInputStream inputStream) throws IOException {
|
||||
final int nStrings = inputStream.readInt();
|
||||
|
||||
final List<String> strings = new ArrayList<String>(nStrings);
|
||||
for ( int i = 0; i < nStrings; i++ ) {
|
||||
strings.add(inputStream.readUTF());
|
||||
}
|
||||
|
||||
return strings;
|
||||
}
|
||||
|
||||
private static <T> List<T> linearize(final Map<T, Integer> map) {
|
||||
final ArrayList<T> l = new ArrayList<T>(map.size());
|
||||
for ( int i = 0; i < map.size(); i++ ) l.add(null);
|
||||
for ( final Map.Entry<T, Integer> elt : map.entrySet() )
|
||||
l.set(elt.getValue(), elt.getKey());
|
||||
return l;
|
||||
}
|
||||
|
||||
public String getSample(final int offset) { return samples.get(offset); }
|
||||
public String getString(final int offset) { return strings.get(offset); }
|
||||
public Allele getAllele(final int offset) { return alleles.get(offset); }
|
||||
public List<Allele> getAlleles(final int[] offsets) {
|
||||
final List<Allele> alleles = new ArrayList<Allele>(offsets.length);
|
||||
for ( int i : offsets ) alleles.add(getAllele(i));
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public Set<String> getFilters(final int offset) {
|
||||
Set<String> cached = filters.get(offset);
|
||||
|
||||
if ( cached != null )
|
||||
return cached;
|
||||
else {
|
||||
final String filterString = getString(offset);
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null; // UNFILTERED records are represented by null
|
||||
else {
|
||||
Set<String> set = VCFCodec.parseFilters(null, -1, filterString);
|
||||
filters.set(offset, set); // remember the result
|
||||
return set;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.gcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class GCFHeaderBuilder {
|
||||
Map<Allele, Integer> alleles = new HashMap<Allele, Integer>();
|
||||
Map<String, Integer> strings = new HashMap<String, Integer>();
|
||||
Map<String, Integer> samples = new HashMap<String, Integer>();
|
||||
|
||||
public GCFHeader createHeader() {
|
||||
return new GCFHeader(alleles, strings, samples);
|
||||
}
|
||||
|
||||
public int encodeString(final String chr) { return encode(strings, chr); }
|
||||
public int encodeAllele(final Allele allele) { return encode(alleles, allele); }
|
||||
public int encodeSample(final String sampleName) { return encode(samples, sampleName); }
|
||||
|
||||
private <T> int encode(Map<T, Integer> map, T key) {
|
||||
Integer v = map.get(key);
|
||||
if ( v == null ) {
|
||||
v = map.size();
|
||||
map.put(key, v);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.gcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.IndexingVCFWriter;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* GCFWriter implementing the VCFWriter interface
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class GCFWriter extends IndexingVCFWriter {
|
||||
final boolean skipGenotypes;
|
||||
final FileOutputStream fileOutputStream;
|
||||
final DataOutputStream dataOutputStream;
|
||||
final GCFHeaderBuilder gcfHeaderBuilder;
|
||||
int nbytes = 0;
|
||||
VCFHeader header = null;
|
||||
File location;
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public GCFWriter(File location, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||
super(writerName(location, null), location, null, enableOnTheFlyIndexing);
|
||||
this.location = location;
|
||||
this.skipGenotypes = doNotWriteGenotypes;
|
||||
|
||||
// write the output
|
||||
try {
|
||||
fileOutputStream = new FileOutputStream(location);
|
||||
dataOutputStream = createDataOutputStream(fileOutputStream);
|
||||
gcfHeaderBuilder = new GCFHeaderBuilder();
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(location, e);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// VCFWriter interface functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void writeHeader(VCFHeader header) {
|
||||
this.header = header;
|
||||
try {
|
||||
nbytes += GCFHeader.writeHeader(dataOutputStream);
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(getStreamName(), "Couldn't write header", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(VariantContext vc) {
|
||||
super.add(vc);
|
||||
GCF gcf = new GCF(gcfHeaderBuilder, vc, skipGenotypes);
|
||||
try {
|
||||
nbytes += gcf.write(dataOutputStream);
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(getStreamName(), "Failed to add gcf record " + gcf + " to stream " + getStreamName(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// todo -- write out VCF header lines
|
||||
GCFHeader gcfHeader = gcfHeaderBuilder.createHeader();
|
||||
try {
|
||||
long headerPosition = nbytes;
|
||||
nbytes += gcfHeader.writeFooter(dataOutputStream);
|
||||
dataOutputStream.close();
|
||||
//System.out.println("Writing forward reference to " + headerPosition);
|
||||
|
||||
RandomAccessFile raFile = new RandomAccessFile(location, "rw");
|
||||
raFile.seek(GCFHeader.HEADER_FORWARD_REFERENCE_OFFSET);
|
||||
raFile.writeLong(headerPosition);
|
||||
raFile.close();
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException("Failed to close GCFWriter " + getStreamName(), e);
|
||||
}
|
||||
|
||||
super.close();
|
||||
}
|
||||
|
||||
private static final DataOutputStream createDataOutputStream(final OutputStream stream) {
|
||||
return new DataOutputStream(new BufferedOutputStream(stream, GCF.BUFFER_SIZE));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -334,24 +334,44 @@ public class IntervalUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Splits an interval list into multiple files.
|
||||
* @param fileHeader The sam file header.
|
||||
* Splits an interval list into multiple sublists.
|
||||
* @param locs The genome locs to split.
|
||||
* @param splits The stop points for the genome locs returned by splitFixedIntervals.
|
||||
* @param scatterParts The output interval lists to write to.
|
||||
* @return A list of lists of genome locs, split according to splits
|
||||
*/
|
||||
public static void scatterFixedIntervals(SAMFileHeader fileHeader, List<GenomeLoc> locs, List<Integer> splits, List<File> scatterParts) {
|
||||
if (splits.size() != scatterParts.size())
|
||||
throw new UserException.BadArgumentValue("splits", String.format("Split points %d does not equal the number of scatter parts %d.", splits.size(), scatterParts.size()));
|
||||
int fileIndex = 0;
|
||||
public static List<List<GenomeLoc>> splitIntervalsToSubLists(List<GenomeLoc> locs, List<Integer> splits) {
|
||||
int locIndex = 1;
|
||||
int start = 0;
|
||||
List<List<GenomeLoc>> sublists = new ArrayList<List<GenomeLoc>>(splits.size());
|
||||
for (Integer stop: splits) {
|
||||
IntervalList intervalList = new IntervalList(fileHeader);
|
||||
List<GenomeLoc> curList = new ArrayList<GenomeLoc>();
|
||||
for (int i = start; i < stop; i++)
|
||||
intervalList.add(toInterval(locs.get(i), locIndex++));
|
||||
intervalList.write(scatterParts.get(fileIndex++));
|
||||
curList.add(locs.get(i));
|
||||
start = stop;
|
||||
sublists.add(curList);
|
||||
}
|
||||
|
||||
return sublists;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Splits an interval list into multiple files.
|
||||
* @param fileHeader The sam file header.
|
||||
* @param splits Pre-divided genome locs returned by splitFixedIntervals.
|
||||
* @param scatterParts The output interval lists to write to.
|
||||
*/
|
||||
public static void scatterFixedIntervals(SAMFileHeader fileHeader, List<List<GenomeLoc>> splits, List<File> scatterParts) {
|
||||
if (splits.size() != scatterParts.size())
|
||||
throw new UserException.BadArgumentValue("splits", String.format("Split points %d does not equal the number of scatter parts %d.", splits.size(), scatterParts.size()));
|
||||
|
||||
int fileIndex = 0;
|
||||
int locIndex = 1;
|
||||
for (final List<GenomeLoc> split : splits) {
|
||||
IntervalList intervalList = new IntervalList(fileHeader);
|
||||
for (final GenomeLoc loc : split)
|
||||
intervalList.add(toInterval(loc, locIndex++));
|
||||
intervalList.write(scatterParts.get(fileIndex++));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -361,17 +381,15 @@ public class IntervalUtils {
|
|||
* @param numParts Number of parts to split the locs into.
|
||||
* @return The stop points to split the genome locs.
|
||||
*/
|
||||
public static List<Integer> splitFixedIntervals(List<GenomeLoc> locs, int numParts) {
|
||||
public static List<List<GenomeLoc>> splitFixedIntervals(List<GenomeLoc> locs, int numParts) {
|
||||
if (locs.size() < numParts)
|
||||
throw new UserException.BadArgumentValue("scatterParts", String.format("Cannot scatter %d locs into %d parts.", locs.size(), numParts));
|
||||
long locsSize = 0;
|
||||
for (GenomeLoc loc: locs)
|
||||
locsSize += loc.size();
|
||||
List<Integer> splitPoints = new ArrayList<Integer>();
|
||||
final long locsSize = intervalSize(locs);
|
||||
final List<Integer> splitPoints = new ArrayList<Integer>();
|
||||
addFixedSplit(splitPoints, locs, locsSize, 0, locs.size(), numParts);
|
||||
Collections.sort(splitPoints);
|
||||
splitPoints.add(locs.size());
|
||||
return splitPoints;
|
||||
return splitIntervalsToSubLists(locs, splitPoints);
|
||||
}
|
||||
|
||||
private static void addFixedSplit(List<Integer> splitPoints, List<GenomeLoc> locs, long locsSize, int startIndex, int stopIndex, int numParts) {
|
||||
|
|
@ -441,4 +459,11 @@ public class IntervalUtils {
|
|||
return merged;
|
||||
}
|
||||
}
|
||||
|
||||
public static final long intervalSize(final List<GenomeLoc> locs) {
|
||||
long size = 0;
|
||||
for ( final GenomeLoc loc : locs )
|
||||
size += loc.size();
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -118,31 +118,40 @@ public class ReadUtils {
|
|||
/**
|
||||
* This enum represents all the different ways in which a read can overlap an interval.
|
||||
*
|
||||
* NO_OVERLAP:
|
||||
* NO_OVERLAP_CONTIG:
|
||||
* read and interval are in different contigs.
|
||||
*
|
||||
* NO_OVERLAP_LEFT:
|
||||
* the read does not overlap the interval.
|
||||
*
|
||||
* |----------------| (interval)
|
||||
* <----------------> (read)
|
||||
*
|
||||
* NO_OVERLAP_RIGHT:
|
||||
* the read does not overlap the interval.
|
||||
*
|
||||
* |----------------| (interval)
|
||||
* <----------------> (read)
|
||||
*
|
||||
* LEFT_OVERLAP:
|
||||
* OVERLAP_LEFT:
|
||||
* the read starts before the beginning of the interval but ends inside of it
|
||||
*
|
||||
* |----------------| (interval)
|
||||
* <----------------> (read)
|
||||
*
|
||||
* RIGHT_OVERLAP:
|
||||
* OVERLAP_RIGHT:
|
||||
* the read starts inside the interval but ends outside of it
|
||||
*
|
||||
* |----------------| (interval)
|
||||
* <----------------> (read)
|
||||
*
|
||||
* FULL_OVERLAP:
|
||||
* OVERLAP_LEFT_AND_RIGHT:
|
||||
* the read starts before the interval and ends after the interval
|
||||
*
|
||||
* |-----------| (interval)
|
||||
* <-------------------> (read)
|
||||
*
|
||||
* CONTAINED:
|
||||
* OVERLAP_CONTAINED:
|
||||
* the read starts and ends inside the interval
|
||||
*
|
||||
* |----------------| (interval)
|
||||
|
|
@ -658,7 +667,7 @@ public class ReadUtils {
|
|||
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
|
||||
}
|
||||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
|
||||
int start = read.getUnclippedStart();
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
|
|
@ -670,9 +679,13 @@ public class ReadUtils {
|
|||
return start;
|
||||
}
|
||||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
|
||||
int stop = read.getUnclippedStart();
|
||||
|
||||
if (readIsEntirelyInsertion(read))
|
||||
return stop;
|
||||
|
||||
int shift = 0;
|
||||
CigarOperator lastOperator = null;
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
|
|
@ -686,6 +699,14 @@ public class ReadUtils {
|
|||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
|
||||
private static boolean readIsEntirelyInsertion(SAMRecord read) {
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() != CigarOperator.INSERTION)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
|
||||
* the alignment start of the read.
|
||||
|
|
|
|||
|
|
@ -293,17 +293,8 @@ public class Genotype {
|
|||
return commonInfo.getAttribute(key, defaultValue);
|
||||
}
|
||||
|
||||
public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); }
|
||||
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||
public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||
public boolean getAttributeAsBoolean(String key) { return commonInfo.getAttributeAsBoolean(key); }
|
||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||
|
||||
public Integer getAttributeAsIntegerNoException(String key) { return commonInfo.getAttributeAsIntegerNoException(key); }
|
||||
public Double getAttributeAsDoubleNoException(String key) { return commonInfo.getAttributeAsDoubleNoException(key); }
|
||||
public String getAttributeAsStringNoException(String key) { return commonInfo.getAttributeAsStringNoException(key); }
|
||||
public Boolean getAttributeAsBooleanNoException(String key) { return commonInfo.getAttributeAsBooleanNoException(key); }
|
||||
}
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
|
|
@ -204,27 +206,40 @@ public final class InferredGeneticContext {
|
|||
return defaultValue;
|
||||
}
|
||||
|
||||
// public AttributedObject getAttributes(Collection<Object> keys) {
|
||||
// AttributedObject selected = new AttributedObject();
|
||||
//
|
||||
// for ( Object key : keys )
|
||||
// selected.putAttribute(key, this.getAttribute(key));
|
||||
//
|
||||
// return selected;
|
||||
// }
|
||||
public String getAttributeAsString(String key, String defaultValue) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x == null ) return defaultValue;
|
||||
if ( x instanceof String ) return (String)x;
|
||||
return String.valueOf(x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); } // **NOTE**: will turn a null Object into the String "null"
|
||||
public int getAttributeAsInt(String key) { Object x = getAttribute(key); return x instanceof Integer ? (Integer)x : Integer.valueOf((String)x); }
|
||||
public double getAttributeAsDouble(String key) { Object x = getAttribute(key); return x instanceof Double ? (Double)x : Double.valueOf((String)x); }
|
||||
public boolean getAttributeAsBoolean(String key) { Object x = getAttribute(key); return x instanceof Boolean ? (Boolean)x : Boolean.valueOf((String)x); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
|
||||
if ( x instanceof Integer ) return (Integer)x;
|
||||
return Integer.valueOf((String)x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
public String getAttributeAsString(String key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
|
||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue){ return (Boolean)getAttribute(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x == null ) return defaultValue;
|
||||
if ( x instanceof Double ) return (Double)x;
|
||||
return Double.valueOf((String)x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
public Integer getAttributeAsIntegerNoException(String key) { try {return getAttributeAsInt(key);} catch (Exception e) {return null;} }
|
||||
public Double getAttributeAsDoubleNoException(String key) { try {return getAttributeAsDouble(key);} catch (Exception e) {return null;} }
|
||||
public String getAttributeAsStringNoException(String key) { if (getAttribute(key) == null) return null; return getAttributeAsString(key); }
|
||||
public Boolean getAttributeAsBooleanNoException(String key) { try {return getAttributeAsBoolean(key);} catch (Exception e) {return null;} }
|
||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) {
|
||||
Object x = getAttribute(key);
|
||||
if ( x == null ) return defaultValue;
|
||||
if ( x instanceof Boolean ) return (Boolean)x;
|
||||
return Boolean.valueOf((String)x); // throws an exception if this isn't a string
|
||||
}
|
||||
|
||||
// public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); } // **NOTE**: will turn a null Object into the String "null"
|
||||
// public int getAttributeAsInt(String key) { Object x = getAttribute(key); return x instanceof Integer ? (Integer)x : Integer.valueOf((String)x); }
|
||||
// public double getAttributeAsDouble(String key) { Object x = getAttribute(key); return x instanceof Double ? (Double)x : Double.valueOf((String)x); }
|
||||
// public boolean getAttributeAsBoolean(String key) { Object x = getAttribute(key); return x instanceof Boolean ? (Boolean)x : Boolean.valueOf((String)x); }
|
||||
// public Integer getAttributeAsIntegerNoException(String key) { try {return getAttributeAsInt(key);} catch (Exception e) {return null;} }
|
||||
// public Double getAttributeAsDoubleNoException(String key) { try {return getAttributeAsDouble(key);} catch (Exception e) {return null;} }
|
||||
// public String getAttributeAsStringNoException(String key) { if (getAttribute(key) == null) return null; return getAttributeAsString(key); }
|
||||
// public Boolean getAttributeAsBooleanNoException(String key) { try {return getAttributeAsBoolean(key);} catch (Exception e) {return null;} }
|
||||
}
|
||||
|
|
@ -666,21 +666,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return commonInfo.getAttribute(key, defaultValue);
|
||||
}
|
||||
|
||||
public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); }
|
||||
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||
public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||
public boolean getAttributeAsBoolean(String key) { return commonInfo.getAttributeAsBoolean(key); }
|
||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||
|
||||
public Integer getAttributeAsIntegerNoException(String key) { return commonInfo.getAttributeAsIntegerNoException(key); }
|
||||
public Double getAttributeAsDoubleNoException(String key) { return commonInfo.getAttributeAsDoubleNoException(key); }
|
||||
public String getAttributeAsStringNoException(String key) { return commonInfo.getAttributeAsStringNoException(key); }
|
||||
public Boolean getAttributeAsBooleanNoException(String key) { return commonInfo.getAttributeAsBooleanNoException(key); }
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with alleles
|
||||
|
|
|
|||
|
|
@ -565,11 +565,11 @@ public class VariantContextUtils {
|
|||
// special case DP (add it up) and ID (just preserve it)
|
||||
//
|
||||
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
|
||||
depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY));
|
||||
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
|
||||
if (rsID == null && vc.hasID())
|
||||
rsID = vc.getID();
|
||||
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
|
||||
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY);
|
||||
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
|
||||
// lets see if the string contains a , separator
|
||||
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
|
||||
List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
|
||||
|
|
@ -1159,9 +1159,7 @@ public class VariantContextUtils {
|
|||
for (String orAttrib : MERGE_OR_ATTRIBS) {
|
||||
boolean attribVal = false;
|
||||
for (VariantContext vc : vcList) {
|
||||
Boolean val = vc.getAttributeAsBooleanNoException(orAttrib);
|
||||
if (val != null)
|
||||
attribVal = (attribVal || val);
|
||||
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
|
||||
if (attribVal) // already true, so no reason to continue:
|
||||
break;
|
||||
}
|
||||
|
|
@ -1171,7 +1169,7 @@ public class VariantContextUtils {
|
|||
// Merge ID fields:
|
||||
String iDVal = null;
|
||||
for (VariantContext vc : vcList) {
|
||||
String val = vc.getAttributeAsStringNoException(VariantContext.ID_KEY);
|
||||
String val = vc.getAttributeAsString(VariantContext.ID_KEY, null);
|
||||
if (val != null && !val.equals(VCFConstants.EMPTY_ID_FIELD)) {
|
||||
if (iDVal == null)
|
||||
iDVal = val;
|
||||
|
|
@ -1251,8 +1249,10 @@ public class VariantContextUtils {
|
|||
|
||||
public PhaseAndQuality(Genotype gt) {
|
||||
this.isPhased = gt.isPhased();
|
||||
if (this.isPhased)
|
||||
this.PQ = gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
|
||||
if (this.isPhased) {
|
||||
this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||
if ( this.PQ == -1 ) this.PQ = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,34 +22,6 @@ public class IndexFactoryUnitTest {
|
|||
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
|
||||
File outputFileIndex = Tribble.indexFile(outputFile);
|
||||
|
||||
/**
|
||||
* test out scoring the indexes
|
||||
*/
|
||||
@Test
|
||||
public void testScoreIndexes() {
|
||||
/*// make a list of indexes to score
|
||||
Map<Class,IndexCreator> creators = new HashMap<Class,IndexCreator>();
|
||||
// add a linear index with the default bin size
|
||||
LinearIndexCreator linearNormal = new LinearIndexCreator();
|
||||
linearNormal.initialize(inputFile, linearNormal.defaultBinSize());
|
||||
creators.add(LInearIndexlinearNormal);
|
||||
|
||||
// create a tree index with a small index size
|
||||
IntervalIndexCreator treeSmallBin = new IntervalIndexCreator();
|
||||
treeSmallBin.initialize(inputFile, Math.max(200,treeSmallBin.defaultBinSize()/10));
|
||||
creators.add(treeSmallBin);
|
||||
|
||||
List<Index> indexes = new ArrayList<Index>();
|
||||
for (IndexCreator creator : creators)
|
||||
indexes.add(creator.finalizeIndex(0));
|
||||
|
||||
ArrayList<Double> scores = IndexFactory.scoreIndexes(0.5,indexes,100, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||
System.err.println("scores are : ");
|
||||
for (Double score : scores) {
|
||||
System.err.println(score);
|
||||
*/
|
||||
}
|
||||
|
||||
//
|
||||
// test out scoring the indexes
|
||||
//
|
||||
|
|
|
|||
|
|
@ -30,6 +30,20 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
private SAMFileHeader hg19Header;
|
||||
private GenomeLocParser hg19GenomeLocParser;
|
||||
private List<GenomeLoc> hg19ReferenceLocs;
|
||||
private List<GenomeLoc> hg19exomeIntervals;
|
||||
|
||||
private List<GenomeLoc> getLocs(String... intervals) {
|
||||
return getLocs(Arrays.asList(intervals));
|
||||
}
|
||||
|
||||
private List<GenomeLoc> getLocs(List<String> intervals) {
|
||||
if (intervals.size() == 0)
|
||||
return hg18ReferenceLocs;
|
||||
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for (String interval: intervals)
|
||||
locs.add(hg18GenomeLocParser.parseGenomeLoc(interval));
|
||||
return locs;
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
|
|
@ -54,12 +68,69 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(hg19Ref);
|
||||
hg19GenomeLocParser = new GenomeLocParser(seq);
|
||||
hg19ReferenceLocs = Collections.unmodifiableList(GenomeLocSortedSet.createSetFromSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()).toList()) ;
|
||||
|
||||
hg19exomeIntervals = Collections.unmodifiableList(IntervalUtils.parseIntervalArguments(hg19GenomeLocParser, Arrays.asList(hg19Intervals), false));
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new UserException.CouldNotReadInputFile(hg19Ref,ex);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// tests to ensure the quality of the interval cuts of the interval cutting functions
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
private class IntervalSlicingTest extends TestDataProvider {
|
||||
public int parts;
|
||||
public double maxAllowableVariance;
|
||||
|
||||
private IntervalSlicingTest(final int parts, final double maxAllowableVariance) {
|
||||
super(IntervalSlicingTest.class);
|
||||
this.parts = parts;
|
||||
this.maxAllowableVariance = maxAllowableVariance;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("IntervalSlicingTest parts=%d maxVar=%.2f", parts, maxAllowableVariance);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "intervalslicingdata")
|
||||
public Object[][] createTrees() {
|
||||
new IntervalSlicingTest(1, 0);
|
||||
new IntervalSlicingTest(2, 1);
|
||||
new IntervalSlicingTest(5, 1);
|
||||
new IntervalSlicingTest(10, 1);
|
||||
new IntervalSlicingTest(67, 1);
|
||||
new IntervalSlicingTest(100, 1);
|
||||
new IntervalSlicingTest(500, 1);
|
||||
new IntervalSlicingTest(1000, 1);
|
||||
return IntervalSlicingTest.getTests(IntervalSlicingTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "intervalslicingdata")
|
||||
public void testFixedScatterIntervalsAlgorithm(IntervalSlicingTest test) {
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(hg19exomeIntervals, test.parts);
|
||||
|
||||
long totalSize = IntervalUtils.intervalSize(hg19exomeIntervals);
|
||||
long idealSplitSize = totalSize / test.parts;
|
||||
|
||||
long sumOfSplitSizes = 0;
|
||||
int counter = 0;
|
||||
for ( final List<GenomeLoc> split : splits ) {
|
||||
long splitSize = IntervalUtils.intervalSize(split);
|
||||
double sigma = (splitSize - idealSplitSize) / (1.0 * idealSplitSize);
|
||||
//logger.warn(String.format("Split %d size %d ideal %d sigma %.2f", counter, splitSize, idealSplitSize, sigma));
|
||||
counter++;
|
||||
sumOfSplitSizes += splitSize;
|
||||
Assert.assertTrue(Math.abs(sigma) <= test.maxAllowableVariance, String.format("Interval %d (size %d ideal %d) has a variance %.2f outside of the tolerated range %.2f", counter, splitSize, idealSplitSize, sigma, test.maxAllowableVariance));
|
||||
}
|
||||
|
||||
Assert.assertEquals(totalSize, sumOfSplitSizes, "Split intervals don't contain the exact number of bases in the origianl intervals");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=UserException.class)
|
||||
public void testMergeListsBySetOperatorNoOverlap() {
|
||||
// a couple of lists we'll use for the testing
|
||||
|
|
@ -129,19 +200,6 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals((long)lengths.get("chrX"), 154913754);
|
||||
}
|
||||
|
||||
private List<GenomeLoc> getLocs(String... intervals) {
|
||||
return getLocs(Arrays.asList(intervals));
|
||||
}
|
||||
|
||||
private List<GenomeLoc> getLocs(List<String> intervals) {
|
||||
if (intervals.size() == 0)
|
||||
return hg18ReferenceLocs;
|
||||
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for (String interval: intervals)
|
||||
locs.add(hg18GenomeLocParser.parseGenomeLoc(interval));
|
||||
return locs;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseIntervalArguments() {
|
||||
Assert.assertEquals(getLocs().size(), 45);
|
||||
|
|
@ -174,8 +232,8 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<File> files = testFiles("basic.", 3, ".intervals");
|
||||
|
||||
List<GenomeLoc> locs = getLocs("chr1", "chr2", "chr3");
|
||||
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, splits, files);
|
||||
|
||||
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
||||
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
||||
|
|
@ -200,8 +258,8 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<File> files = testFiles("less.", 3, ".intervals");
|
||||
|
||||
List<GenomeLoc> locs = getLocs("chr1", "chr2", "chr3", "chr4");
|
||||
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, splits, files);
|
||||
|
||||
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
||||
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
||||
|
|
@ -228,8 +286,8 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
public void testScatterFixedIntervalsMoreFiles() {
|
||||
List<File> files = testFiles("more.", 3, ".intervals");
|
||||
List<GenomeLoc> locs = getLocs("chr1", "chr2");
|
||||
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, locs.size()); // locs.size() instead of files.size()
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(locs, locs.size()); // locs.size() instead of files.size()
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, splits, files);
|
||||
}
|
||||
@Test
|
||||
public void testScatterFixedIntervalsStart() {
|
||||
|
|
@ -242,8 +300,8 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<File> files = testFiles("split.", 3, ".intervals");
|
||||
|
||||
List<GenomeLoc> locs = getLocs(intervals);
|
||||
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, splits, files);
|
||||
|
||||
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
||||
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
||||
|
|
@ -270,8 +328,8 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<File> files = testFiles("split.", 3, ".intervals");
|
||||
|
||||
List<GenomeLoc> locs = getLocs(intervals);
|
||||
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, splits, files);
|
||||
|
||||
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
||||
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
||||
|
|
@ -298,8 +356,8 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<File> files = testFiles("split.", 3, ".intervals");
|
||||
|
||||
List<GenomeLoc> locs = getLocs(intervals);
|
||||
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, splits, files);
|
||||
|
||||
List<GenomeLoc> locs1 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(0).toString()), false);
|
||||
List<GenomeLoc> locs2 = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(files.get(1).toString()), false);
|
||||
|
|
@ -319,7 +377,7 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
public void testScatterFixedIntervalsFile() {
|
||||
List<File> files = testFiles("sg.", 20, ".intervals");
|
||||
List<GenomeLoc> locs = IntervalUtils.parseIntervalArguments(hg18GenomeLocParser, Arrays.asList(BaseTest.GATKDataLocation + "whole_exome_agilent_designed_120.targets.hg18.chr20.interval_list"), false);
|
||||
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
|
||||
int[] counts = {
|
||||
125, 138, 287, 291, 312, 105, 155, 324,
|
||||
|
|
@ -332,16 +390,13 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
};
|
||||
|
||||
//String splitCounts = "";
|
||||
for (int lastIndex = 0, i = 0; i < splits.size(); i++) {
|
||||
int splitIndex = splits.get(i);
|
||||
int splitCount = (splitIndex - lastIndex);
|
||||
//splitCounts += ", " + splitCount;
|
||||
lastIndex = splitIndex;
|
||||
for (int i = 0; i < splits.size(); i++) {
|
||||
int splitCount = splits.get(i).size();
|
||||
Assert.assertEquals(splitCount, counts[i], "Num intervals in split " + i);
|
||||
}
|
||||
//System.out.println(splitCounts.substring(2));
|
||||
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, locs, splits, files);
|
||||
IntervalUtils.scatterFixedIntervals(hg18Header, splits, files);
|
||||
|
||||
int locIndex = 0;
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
|
|
@ -357,8 +412,8 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testScatterFixedIntervalsMax() {
|
||||
List<File> files = testFiles("sg.", 85, ".intervals");
|
||||
List<Integer> splits = IntervalUtils.splitFixedIntervals(hg19ReferenceLocs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg19Header, hg19ReferenceLocs, splits, files);
|
||||
List<List<GenomeLoc>> splits = IntervalUtils.splitFixedIntervals(hg19ReferenceLocs, files.size());
|
||||
IntervalUtils.scatterFixedIntervals(hg19Header, splits, files);
|
||||
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
String file = files.get(i).toString();
|
||||
|
|
|
|||
|
|
@ -131,11 +131,11 @@ class GATKResourcesBundle extends QScript {
|
|||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf",
|
||||
"hapmap_3.3", b37, true, true))
|
||||
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf",
|
||||
"1000G_indels_for_realignment", b37, true, false))
|
||||
addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf",
|
||||
"1000G_biallelic.indels", b37, true, false))
|
||||
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
|
||||
"indels_mills_devine", b37, true, true))
|
||||
"Mills_Devine_2hit.indels", b37, true, true))
|
||||
|
||||
//
|
||||
// example call set for wiki tutorial
|
||||
|
|
@ -300,9 +300,9 @@ class GATKResourcesBundle extends QScript {
|
|||
bamFile = bamIn
|
||||
}
|
||||
|
||||
class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRod with UNIVERSAL_GATK_ARGS {
|
||||
class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRODs with UNIVERSAL_GATK_ARGS {
|
||||
//@Output val vcfIndex: File = swapExt(vcf.getParent, vcf, ".vcf", ".vcf.idx")
|
||||
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
|
||||
this.rod :+= vcf
|
||||
this.reference_sequence = ref
|
||||
}
|
||||
|
||||
|
|
@ -313,7 +313,7 @@ class GATKResourcesBundle extends QScript {
|
|||
}
|
||||
|
||||
class MakeDBSNP129(@Input dbsnp: File, @Input ref: File, @Output dbsnp129: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
|
||||
this.rodBind :+= RodBind("variant", "VCF", dbsnp)
|
||||
this.variant = dbsnp
|
||||
this.select ++= List("\"dbSNPBuildID <= 129\"")
|
||||
this.reference_sequence = ref
|
||||
this.out = dbsnp129
|
||||
|
|
|
|||
|
|
@ -1,65 +1,65 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
|
||||
import java.io.File
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
|
||||
import net.sf.samtools.SAMFileHeader
|
||||
import java.util.Collections
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser}
|
||||
|
||||
case class GATKIntervals(reference: File, intervals: List[String]) {
|
||||
private lazy val referenceDataSource = new ReferenceDataSource(reference)
|
||||
private var splitsBySize = Map.empty[Int, java.util.List[java.lang.Integer]]
|
||||
|
||||
lazy val samFileHeader = {
|
||||
val header = new SAMFileHeader
|
||||
header.setSequenceDictionary(referenceDataSource.getReference.getSequenceDictionary)
|
||||
header
|
||||
}
|
||||
|
||||
lazy val locs: java.util.List[GenomeLoc] = {
|
||||
val parser = new GenomeLocParser(referenceDataSource.getReference)
|
||||
val parsedLocs =
|
||||
if (intervals.isEmpty)
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(samFileHeader.getSequenceDictionary).toList
|
||||
else
|
||||
IntervalUtils.parseIntervalArguments(parser, intervals, false)
|
||||
Collections.sort(parsedLocs)
|
||||
Collections.unmodifiableList(parsedLocs)
|
||||
}
|
||||
|
||||
lazy val contigs = locs.map(_.getContig).distinct.toList
|
||||
|
||||
def getSplits(size: Int) = {
|
||||
splitsBySize.getOrElse(size, {
|
||||
val splits: java.util.List[java.lang.Integer] = IntervalUtils.splitFixedIntervals(locs, size)
|
||||
splitsBySize += size -> splits
|
||||
splits
|
||||
})
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
|
||||
import java.io.File
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource
|
||||
import net.sf.samtools.SAMFileHeader
|
||||
import java.util.Collections
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser}
|
||||
|
||||
case class GATKIntervals(reference: File, intervals: List[String]) {
|
||||
private lazy val referenceDataSource = new ReferenceDataSource(reference)
|
||||
// private var splitsBySize = Map.empty[Int, java.util.List[java.lang.Integer]]
|
||||
|
||||
lazy val samFileHeader = {
|
||||
val header = new SAMFileHeader
|
||||
header.setSequenceDictionary(referenceDataSource.getReference.getSequenceDictionary)
|
||||
header
|
||||
}
|
||||
|
||||
lazy val locs: java.util.List[GenomeLoc] = {
|
||||
val parser = new GenomeLocParser(referenceDataSource.getReference)
|
||||
val parsedLocs =
|
||||
if (intervals.isEmpty)
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(samFileHeader.getSequenceDictionary).toList
|
||||
else
|
||||
IntervalUtils.parseIntervalArguments(parser, intervals, false)
|
||||
Collections.sort(parsedLocs)
|
||||
Collections.unmodifiableList(parsedLocs)
|
||||
}
|
||||
|
||||
lazy val contigs = locs.map(_.getContig).distinct.toList
|
||||
|
||||
// def getSplits(size: Int) = {
|
||||
// splitsBySize.getOrElse(size, {
|
||||
// val splits: java.util.List[java.lang.Integer] = IntervalUtils.splitFixedIntervals(locs, size)
|
||||
// splitsBySize += size -> splits
|
||||
// splits
|
||||
// })
|
||||
// }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class IntervalScatterFunction extends GATKScatterFunction with InProcessFunction
|
|||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, gi.locs,
|
||||
gi.getSplits(this.scatterOutputFiles.size), this.scatterOutputFiles)
|
||||
val splits = IntervalUtils.splitFixedIntervals(gi.locs, this.scatterOutputFiles.size)
|
||||
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,8 +53,8 @@ class GATKIntervalsUnitTest {
|
|||
val gi = new GATKIntervals(hg18Reference, List("chr1:1-1", "chr2:2-3", "chr3:3-5"))
|
||||
Assert.assertEquals(gi.locs.toList, List(chr1, chr2, chr3))
|
||||
Assert.assertEquals(gi.contigs, List("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(gi.getSplits(2).toList, List(2, 3))
|
||||
Assert.assertEquals(gi.getSplits(3).toList, List(1, 2, 3))
|
||||
// Assert.assertEquals(gi.getSplits(2).toList, List(2, 3))
|
||||
// Assert.assertEquals(gi.getSplits(3).toList, List(1, 2, 3))
|
||||
}
|
||||
|
||||
@Test(timeOut = 30000)
|
||||
|
|
@ -65,7 +65,7 @@ class GATKIntervalsUnitTest {
|
|||
// for(Item item: javaConvertedScalaList)
|
||||
// This for loop is actually an O(N^2) operation as the iterator calls the
|
||||
// O(N) javaConvertedScalaList.size() for each iteration of the loop.
|
||||
Assert.assertEquals(gi.getSplits(gi.locs.size).size, 189894)
|
||||
//Assert.assertEquals(gi.getSplits(gi.locs.size).size, 189894)
|
||||
Assert.assertEquals(gi.contigs.size, 24)
|
||||
}
|
||||
|
||||
|
|
@ -74,8 +74,8 @@ class GATKIntervalsUnitTest {
|
|||
val gi = new GATKIntervals(hg18Reference, Nil)
|
||||
Assert.assertEquals(gi.locs, hg18ReferenceLocs)
|
||||
Assert.assertEquals(gi.contigs.size, hg18ReferenceLocs.size)
|
||||
Assert.assertEquals(gi.getSplits(2).toList, List(10, 45))
|
||||
Assert.assertEquals(gi.getSplits(4).toList, List(5, 10, 16, 45))
|
||||
// Assert.assertEquals(gi.getSplits(2).toList, List(10, 45))
|
||||
// Assert.assertEquals(gi.getSplits(4).toList, List(5, 10, 16, 45))
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
Loading…
Reference in New Issue