diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 071b4d806..ac1c751ca 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.traversals; +import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -38,10 +39,12 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.activeregion.*; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import java.io.PrintStream; import java.util.*; /** @@ -79,26 +82,35 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine 0" + }) + private void printIGVFormatHeader(final PrintStream out, final String graphType, final String ... columns ) { + out.printf("#track graphType=%s%n", graphType); + out.printf("Chromosome\tStart\tEnd\tFeature\t%s%n", Utils.join("\t", columns)); + + } + + /** + * Helper function to write out a IGV formatted line to out, at loc, with values + * + * http://www.broadinstitute.org/software/igv/IGV + * + * @param out a non-null PrintStream where we'll write our line + * @param loc the location of values + * @param featureName string name of this feature (see IGV format) + * @param values the floating point values to associate with loc and feature name in out + */ + @Requires({ + "out != null", + "loc != null", + "values.length > 0" + }) + private void printIGVFormatRow(final PrintStream out, final GenomeLoc loc, final String featureName, final double ... values) { + // note that start and stop are 0 based, but the stop is exclusive so we don't subtract 1 + out.printf("%s\t%d\t%d\t%s", loc.getContig(), loc.getStart() - 1, loc.getStop(), featureName); + for ( final double value : values ) + out.print(String.format("\t%.3f", value)); + out.println(); + } + + /** + * Write out activity profile information, if requested by the walker + * + * @param states the states in the current activity profile + */ + @Requires("states != null") + private void writeActivityProfile(final List states) { + if ( walker.activityProfileOutStream != null ) { + initializeOutputStreamsIfNecessary(); + for ( final ActivityProfileState state : states ) { + printIGVFormatRow(walker.activityProfileOutStream, state.getLoc(), "state", state.isActiveProb); + } + } + } + + /** + * Write out each active region to the walker activeRegionOutStream + * + * @param region the region we're currently operating on + */ + @Requires("region != null") + private void writeActiveRegion(final ActiveRegion region) { + if( walker.activeRegionOutStream != null ) { + initializeOutputStreamsIfNecessary(); + printIGVFormatRow(walker.activeRegionOutStream, region.getLocation().getStartLocation(), + "end-marker", 0.0); + printIGVFormatRow(walker.activeRegionOutStream, region.getLocation(), + "size=" + region.getLocation().size(), region.isActive ? 1.0 : -1.0); + } + } + + // ------------------------------------------------------------------------------------- // // Functions to process active regions that are ready for map / reduce calls @@ -349,26 +459,6 @@ public class TraverseActiveRegions extends TraversalEngine walker ) { - // Just want to output the active regions to a file, not actually process them - for( final ActiveRegion activeRegion : workQueue ) { - if( activeRegion.isActive ) { - walker.activeRegionOutStream.println( activeRegion.getLocation() ); - } - } } /** @@ -384,23 +474,20 @@ public class TraverseActiveRegions extends TraversalEngine walker) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index e268bba0d..24e512a7b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -61,10 +61,26 @@ import java.util.*; @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { - @Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results bed file", required = false) + /** + * If provided, this walker will write out its activity profile (per bp probabilities of being active) + * to this file in the IGV formatted TAB deliminated output: + * + * http://www.broadinstitute.org/software/igv/IGV + * + * Intended to make debugging the activity profile calculations easier + */ + @Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results in IGV format", required = false) public PrintStream activityProfileOutStream = null; - @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) + /** + * If provided, this walker will write out its active and inactive regions + * to this file in the IGV formatted TAB deliminated output: + * + * http://www.broadinstitute.org/software/igv/IGV + * + * Intended to make debugging the active region calculations easier + */ + @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this IGV formatted file", required = false) public PrintStream activeRegionOutStream = null; @Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false) diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 8f04c09cb..66485c8cf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -44,6 +45,7 @@ import java.util.ArrayList; public class ActiveRegion implements HasGenomeLocation { private final ArrayList reads = new ArrayList(); + private final List supportingStates; private final GenomeLoc activeRegionLoc; private final GenomeLoc extendedLoc; private final int extension; @@ -51,8 +53,9 @@ public class ActiveRegion implements HasGenomeLocation { private final GenomeLocParser genomeLocParser; public final boolean isActive; - public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) { + public ActiveRegion( final GenomeLoc activeRegionLoc, final List supportingStates, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) { this.activeRegionLoc = activeRegionLoc; + this.supportingStates = new ArrayList(supportingStates); this.isActive = isActive; this.genomeLocParser = genomeLocParser; this.extension = extension; @@ -112,6 +115,8 @@ public class ActiveRegion implements HasGenomeLocation { public GenomeLoc getExtendedLoc() { return extendedLoc; } public GenomeLoc getReferenceLoc() { return fullExtentReferenceLoc; } + public List getSupportingStates() { return supportingStates; } + public int getExtension() { return extension; } public int size() { return reads.size(); } public void clearReads() { reads.clear(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index a863d695e..ab9095106 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -336,7 +336,9 @@ public class ActivityProfile { return null; // we need to create the active region, and clip out the states we're extracting from this profile - stateList.subList(0, offsetOfNextRegionEnd + 1).clear(); + final List sub = stateList.subList(0, offsetOfNextRegionEnd + 1); + final List supportingStates = new ArrayList(sub); + sub.clear(); // update the start and stop locations as necessary if ( stateList.isEmpty() ) { @@ -345,7 +347,7 @@ public class ActivityProfile { regionStartLoc = stateList.get(0).getLoc(); } final GenomeLoc regionLoc = parser.createGenomeLoc(first.getLoc().getContig(), first.getLoc().getStart(), first.getLoc().getStart() + offsetOfNextRegionEnd); - return new ActiveRegion(regionLoc, isActiveRegion, parser, activeRegionExtension); + return new ActiveRegion(regionLoc, supportingStates, isActiveRegion, parser, activeRegionExtension); } /**