diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java index 95d1da5b7..3e8d884be 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java @@ -18,6 +18,7 @@ import java.util.*; public class TableCodec implements ReferenceDependentFeatureCodec { private String delimiterRegex = "\\s+"; private String headerDelimiter = "HEADER"; + private String igvHeaderDelimiter = "track"; private String commentDelimiter = "#"; private ArrayList header = new ArrayList(); @@ -43,7 +44,7 @@ public class TableCodec implements ReferenceDependentFeatureCodec { @Override public Feature decode(String line) { - if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter)) + if (line.startsWith(headerDelimiter) || line.startsWith(commentDelimiter) || line.startsWith(igvHeaderDelimiter)) return null; String[] split = line.split(delimiterRegex); if (split.length < 1) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java index 59e38035d..193e3c883 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/AssociationTestRunner.java @@ -26,7 +26,7 @@ public class AssociationTestRunner { // todo -- this was written when ACs could implement interfaces, now that they extend, there's no multiple inheritance static Normal standardNormal = new Normal(0.0,1.0,null); - private static int pToQ(double p) { + public static int pToQ(double p) { return Math.min((int) Math.floor(QualityUtils.phredScaleErrorRate(p)),MAX_Q_VALUE); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationRecalibrator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationRecalibrator.java new file mode 100755 index 000000000..39f357f59 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationRecalibrator.java @@ -0,0 +1,177 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.association; + +import cern.colt.function.DoubleDoubleFunction; +import cern.colt.matrix.DoubleMatrix1D; +import cern.colt.matrix.DoubleMatrix2D; +import cern.colt.matrix.impl.DenseDoubleMatrix1D; +import cern.colt.matrix.impl.DenseDoubleMatrix2D; +import cern.colt.matrix.linalg.Algebra; +import cern.colt.matrix.linalg.EigenvalueDecomposition; +import org.broad.tribble.bed.BEDFeature; +import org.broad.tribble.bed.SimpleBEDFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.bed.BedParser; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.io.File; +import java.io.PrintStream; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: 3/25/11 + * Time: 11:05 AM + * To change this template use File | Settings | File Templates. + */ +public class RegionalAssociationRecalibrator extends RodWalker> { + + @Argument(shortName="w",fullName="whiteningFile",doc="File containing the mean, max vectors and covariance matrix",required=false) + File whitenFile = null; + @Argument(shortName="p",fullName="normParameter",doc="Exponent for vector norm; p > 4 will induce a p=infinity approximation",required=false) + double pNorm = 1.0; + @Hidden + @Argument(shortName="cg",fullName="capacityGuess",doc="this is hidden",required = false) + int initialCapacity = 3500000; + + @Output + public PrintStream out; + + private DataWhitener whitener; + private double[] dataHolder; + + public void initialize() { + if ( whitenFile != null ) { + whitener = getWhitenerFromFile(whitenFile); + } + } + + public Set reduceInit() { return new HashSet(initialCapacity); } + + public LocHashingPair map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext ali ) { + if ( tracker == null ) { return null; } + + ArrayList features = new ArrayList(7); + + for ( GATKFeature feature : tracker.getAllRods() ) { + Object uo = feature.getUnderlyingObject(); + if ( uo instanceof TableFeature ) { + features.add((TableFeature)uo); + } + } + + if ( dataHolder == null ) { + dataHolder = new double[features.size()]; + } + + int idx = 0; + for ( TableFeature f : features ) { + dataHolder[idx] = Double.parseDouble(f.getValue(3)); + idx++; + } + + double normVal; + + if ( whitener != null ) { + normVal = calculateNorm(whitener.whiten(dataHolder),pNorm); + } else { + normVal = calculateNorm(dataHolder,pNorm); + } + + return new LocHashingPair(features.get(0).getLocation(),normVal); + } + + public Set reduce(LocHashingPair map, Set red) { + if ( map != null ) { red.add(map); } + return red; + } + + public void onTraversalDone(Set normByLoc) { + double[] normRanks = new double[(normByLoc.size())]; + int offset = 0; + for ( LocHashingPair lhp : normByLoc ) { + normRanks[offset]=(lhp.second); + offset++; + } + Arrays.sort(normRanks); + for ( LocHashingPair lhp : normByLoc ) { + int rank = Arrays.binarySearch(normRanks,lhp.second); + double prob = ((double) rank)/((double)normRanks.length); + int qual = AssociationTestRunner.pToQ(prob); + out.printf("%s\t%d\t%d\t%d\t%.2e\t%d",lhp.getFirst().getContig(),lhp.getFirst().getStart(),lhp.getFirst().getStop(), + qual,prob,rank); + } + } + + private static DataWhitener getWhitenerFromFile(File file) { + // todo -- implement me + return null; + } + + private static double calculateNorm(double[] vec, double p) { + double sumExp = 0.0; + for ( double v : vec ) { + sumExp += Math.pow(Math.abs(v),p); + } + + return Math.pow(sumExp,1.0/p); + } + + class DataWhitener { + + private final Algebra ALGEBRA = new Algebra(); + private final DoubleDoubleFunction SUBTRACT = new DoubleDoubleFunction() { + @Override + public double apply(double v, double v1) { + return v - v1; + } + }; + + private DenseDoubleMatrix1D means; + private DenseDoubleMatrix1D maxAbs; + private DoubleMatrix2D transform; + + public DataWhitener() { + } + + public void setMeans(double[] inMeans) { + means = new DenseDoubleMatrix1D(inMeans); + } + + public void setMaxs(double[] maxs) { + maxAbs = new DenseDoubleMatrix1D(maxs); + } + + public void setCov(double[][] cov) { + EigenvalueDecomposition decomp = new EigenvalueDecomposition(new DenseDoubleMatrix2D(cov)); + DoubleMatrix2D diag = decomp.getD(); + for ( int i = 0; i < diag.size(); i ++ ) { + diag.set(i,i, Math.pow(diag.get(i,i),-0.5)); + } + transform = ALGEBRA.mult(diag,ALGEBRA.transpose(decomp.getV())); + } + + public double[] whiten(double[] value) { + DenseDoubleMatrix1D vec = new DenseDoubleMatrix1D(value); + return ALGEBRA.mult(transform, vec.assign(means, SUBTRACT) ).toArray(); + } + } + + class LocHashingPair extends Pair { + + public LocHashingPair(GenomeLoc g, Double d) { + super(g,d); + } + + public int hashCode() { return first.hashCode(); } + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java index 016884ca4..d3f6b0585 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/association/RegionalAssociationWalker.java @@ -40,6 +40,13 @@ public class RegionalAssociationWalker extends LocusWalker validAssociations = getAssociations(); if ( bedGraph ) { diff --git a/scala/qscript/oneoffs/chartl/ScatterGatherAssociation.q b/scala/qscript/oneoffs/chartl/ScatterGatherAssociation.q index 8d883abee..b8b150e54 100755 --- a/scala/qscript/oneoffs/chartl/ScatterGatherAssociation.q +++ b/scala/qscript/oneoffs/chartl/ScatterGatherAssociation.q @@ -23,7 +23,7 @@ class ScatterGatherAssociation extends QScript { @Argument(fullName="memoryLimit",shortName="M",doc="Memory limit for SG jobs",required=false) var memLimit : Int = 4 @Argument(fullName="scatterJobs",shortName="SJ",doc="Number of scatter jobs",required=false) - var scatterJobs : Int = 75 + var scatterJobs : Int = 125 val ASSOCIATION_TESTS = List("BaseQualityScore","InsertSizeDistribution","MappingQuality0", "MateMappingQuality","MateOtherContig","MateSameStrand","MateUnmapped","MismatchRate", @@ -33,6 +33,9 @@ class ScatterGatherAssociation extends QScript { class RegionalAssociationSG(base : String, ext : String) extends CommandLineGATK with ScatterGatherableFunction{ this.analysis_type = "RegionalAssociation" + @Argument(doc="useBed") + var useBed : Boolean = true + // the rest are output files implicitly constructed by the multiplexer @Output(doc="bqs") @@ -78,7 +81,13 @@ class ScatterGatherAssociation extends QScript { @Gather(classOf[SimpleTextGatherFunction]) var sd : File = new File(String.format("%s.%s.%s",base,"SampleDepth",ext)) - override def commandLine = super.commandLine + " -AT ALL -o %s".format(base) + override def commandLine = { + var bedStr : String = "" + if ( useBed ) { + bedStr = " -bg " + } + super.commandLine + " -AT ALL -o %s%s".format(base,bedStr) + } } def script = { @@ -91,6 +100,8 @@ class ScatterGatherAssociation extends QScript { } var association = new RegionalAssociationSG(outBase,ext) + association.useBed = ! dontUseBedGraph + association.sample_metadata :+= metaData association.intervals :+= intervalsFile association.reference_sequence = referenceFile association.jarFile = gatkJar