changes in three files to make the HapMap RODs work:
- HapMapAlleleFrequenciesROD.java - the referenceOrderedDatum implementation - PrepareROD.java - has a static block that loads the known ROD classes, had to add the above - GenomeAnalysisTK.java - when supplied a hapmap argument... loads the ROD git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@265 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b4cdd1d9a1
commit
c192a95998
|
|
@ -10,6 +10,7 @@ import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
|
@ -26,7 +27,6 @@ import java.io.File;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class GenomeAnalysisTK extends CommandLineProgram {
|
public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
|
|
@ -83,7 +83,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
* Used by the walker.
|
* Used by the walker.
|
||||||
*/
|
*/
|
||||||
public PrintStream err = System.err;
|
public PrintStream err = System.err;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* our log, which we want to capture anything from this class
|
* our log, which we want to capture anything from this class
|
||||||
*/
|
*/
|
||||||
|
|
@ -103,7 +103,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
m_parser.addOptionalArg("genome_region", "L", "Genome region to operation on: from chr:start-end", "REGION_STR");
|
m_parser.addOptionalArg("genome_region", "L", "Genome region to operation on: from chr:start-end", "REGION_STR");
|
||||||
m_parser.addRequiredArg("analysis_type", "T", "Type of analysis to run", "Analysis_Name");
|
m_parser.addRequiredArg("analysis_type", "T", "Type of analysis to run", "Analysis_Name");
|
||||||
m_parser.addOptionalArg("DBSNP", "D", "DBSNP file", "DBSNP_FILE");
|
m_parser.addOptionalArg("DBSNP", "D", "DBSNP file", "DBSNP_FILE");
|
||||||
m_parser.addOptionalArg("Hapmap", "H", "Hapmap file", "HAPMAP_FILE");
|
m_parser.addOptionalArg("hapmap", "H", "Hapmap file", "HAPMAP_FILE");
|
||||||
m_parser.addOptionalFlag("threaded_IO", "P", "If set, enables threaded I/O operations", "ENABLED_THREADED_IO");
|
m_parser.addOptionalFlag("threaded_IO", "P", "If set, enables threaded I/O operations", "ENABLED_THREADED_IO");
|
||||||
m_parser.addOptionalFlag("unsafe", "U", "If set, enables unsafe operations, nothing will be checked at runtime.", "UNSAFE");
|
m_parser.addOptionalFlag("unsafe", "U", "If set, enables unsafe operations, nothing will be checked at runtime.", "UNSAFE");
|
||||||
m_parser.addOptionalArg("sort_on_the_fly", "sort", "Maximum number of reads to sort on the fly", "MAX_ON_FLY_SORTS");
|
m_parser.addOptionalArg("sort_on_the_fly", "sort", "Maximum number of reads to sort on the fly", "MAX_ON_FLY_SORTS");
|
||||||
|
|
@ -113,7 +113,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
m_parser.addOptionalArg("all_loci", "A", "Should we process all loci, not just those covered by reads", "WALK_ALL_LOCI");
|
m_parser.addOptionalArg("all_loci", "A", "Should we process all loci, not just those covered by reads", "WALK_ALL_LOCI");
|
||||||
m_parser.addOptionalArg("out", "o", "An output file presented to the walker. Will overwrite contents if file exists.", "outFileName" );
|
m_parser.addOptionalArg("out", "o", "An output file presented to the walker. Will overwrite contents if file exists.", "outFileName" );
|
||||||
m_parser.addOptionalArg("err", "e", "An error output file presented to the walker. Will overwrite contents if file exists.", "errFileName" );
|
m_parser.addOptionalArg("err", "e", "An error output file presented to the walker. Will overwrite contents if file exists.", "errFileName" );
|
||||||
m_parser.addOptionalArg("outerr", "oe", "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", "outErrFileName");
|
m_parser.addOptionalArg("outerr", "oe", "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", "outErrFileName");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -142,7 +142,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String getArgumentSourceName( Class argumentSource ) {
|
protected String getArgumentSourceName( Class argumentSource ) {
|
||||||
return WalkerManager.getWalkerName( (Class<Walker>)argumentSource );
|
return WalkerManager.getWalkerName( (Class<Walker>)argumentSource );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -157,23 +157,27 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
final boolean TEST_ROD = false;
|
final boolean TEST_ROD = false;
|
||||||
List<ReferenceOrderedData> rods = new ArrayList<ReferenceOrderedData>();
|
List<ReferenceOrderedData> rods = new ArrayList<ReferenceOrderedData>();
|
||||||
|
|
||||||
if (TEST_ROD) {
|
|
||||||
ReferenceOrderedData gff = new ReferenceOrderedData(new File("single.gff"), rodGFF.class);
|
|
||||||
|
if ( TEST_ROD ) {
|
||||||
|
ReferenceOrderedData gff = new ReferenceOrderedData(new File("trunk/data/gFFTest.gff"), rodGFF.class );
|
||||||
gff.testMe();
|
gff.testMe();
|
||||||
|
|
||||||
//ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("trunk/data/dbSNP_head.txt"), rodDbSNP.class );
|
//ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("trunk/data/dbSNP_head.txt"), rodDbSNP.class );
|
||||||
ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("/Volumes/Users/mdepristo/broad/ATK/exampleSAMs/dbSNP_chr20.txt"), rodDbSNP.class);
|
ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("/Volumes/Users/mdepristo/broad/ATK/exampleSAMs/dbSNP_chr20.txt"), rodDbSNP.class );
|
||||||
//dbsnp.testMe();
|
//dbsnp.testMe();
|
||||||
rods.add(dbsnp); // { gff, dbsnp };
|
rods.add(dbsnp); // { gff, dbsnp };
|
||||||
} else if (DBSNP_FILE != null) {
|
} else {
|
||||||
ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File(DBSNP_FILE), rodDbSNP.class);
|
if ( DBSNP_FILE != null ) {
|
||||||
//dbsnp.testMe();
|
ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File(DBSNP_FILE), rodDbSNP.class );
|
||||||
rods.add(dbsnp); // { gff, dbsnp };
|
//dbsnp.testMe();
|
||||||
}
|
rods.add(dbsnp); // { gff, dbsnp };
|
||||||
|
}
|
||||||
if (HAPMAP_FILE != null) {
|
if ( HAPMAP_FILE != null ) {
|
||||||
ReferenceOrderedData gff = new ReferenceOrderedData(new File(HAPMAP_FILE), rodGFF.class);
|
ReferenceOrderedData hapmap = new ReferenceOrderedData(new File(HAPMAP_FILE), HapMapAlleleFrequenciesROD.class );
|
||||||
rods.add(gff);
|
//dbsnp.testMe();
|
||||||
|
rods.add(hapmap); // { gff, dbsnp };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
initializeOutputStreams();
|
initializeOutputStreams();
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,105 @@
|
||||||
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import edu.mit.broad.picard.util.SequenceUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ReferenceOrderedDatum class to hold HapMap AlleleFrequency Data
|
||||||
|
*/
|
||||||
|
public class HapMapAlleleFrequenciesROD extends ReferenceOrderedDatum {
|
||||||
|
public GenomeLoc loc; // genome location of SNP
|
||||||
|
// Reference sequence chromosome or scaffold
|
||||||
|
// Start and stop positions in chrom
|
||||||
|
|
||||||
|
public String rsNumber; // dbsnp rsNumber for this site
|
||||||
|
|
||||||
|
public String hgBuild;
|
||||||
|
|
||||||
|
public char Strand; // strand of the supplied alleles
|
||||||
|
|
||||||
|
public char refAllele;
|
||||||
|
public char varAllele;
|
||||||
|
|
||||||
|
public double refFreq;
|
||||||
|
public double varFreq;
|
||||||
|
|
||||||
|
|
||||||
|
public String strand; // maybe we don't need these?
|
||||||
|
public String alleles; // maybe we don't need these?
|
||||||
|
public Integer refCounts; // maybe we don't need these?
|
||||||
|
public Integer varCounts; // maybe we don't need these?
|
||||||
|
public Integer totalCounts; // maybe we don't need these?
|
||||||
|
|
||||||
|
|
||||||
|
public GenomeLoc getLocation() { return loc; }
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
//rs11511647 HG18 chr10 62765 + T/C T C 21 97 0.178 0.822 118
|
||||||
|
|
||||||
|
return String.format(
|
||||||
|
"%s\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%1.3f\t%1.3f\t%d",
|
||||||
|
rsNumber, hgBuild, getContig(), getStart(), strand, alleles, refAllele, varAllele,
|
||||||
|
refCounts, varCounts, refFreq, varFreq, totalCounts);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toSimpleString() {
|
||||||
|
return String.format("%s:%s:%s:%1.3f", rsNumber, alleles, strand, varFreq);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String repl() {
|
||||||
|
return toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parseLine(final String[] parts) {
|
||||||
|
try {
|
||||||
|
// rs11511647 <=> HG18 <=> chr10 <=> 62765 <=> + <=> T/C <=> T <=> C <=> 21 <=> 97 <=> 0.178 <=> 0.822 <=> 118
|
||||||
|
|
||||||
|
rsNumber = parts[0]; //rs#
|
||||||
|
hgBuild = parts[1]; // build
|
||||||
|
|
||||||
|
String contig = parts[2]; // chrom
|
||||||
|
long start = Long.parseLong(parts[3]); // The final is 1 based
|
||||||
|
long stop = start;
|
||||||
|
|
||||||
|
strand = parts[4]; // strand
|
||||||
|
alleles = parts[5]; //alleles
|
||||||
|
refAllele = parts[6].charAt(0); // ref_allele
|
||||||
|
varAllele = parts[7].charAt(0); // var_allele
|
||||||
|
refCounts = Integer.parseInt(parts[8]); // CEU_ref
|
||||||
|
varCounts = Integer.parseInt(parts[9]); // CEU_var
|
||||||
|
refFreq = Double.parseDouble(parts[10]); // CEU_ref_freq
|
||||||
|
varFreq = Double.parseDouble(parts[11]); // CEU_var_freq
|
||||||
|
totalCounts = Integer.parseInt(parts[12]); // CEU_var
|
||||||
|
|
||||||
|
loc = new GenomeLoc(contig, start, stop);
|
||||||
|
|
||||||
|
} catch ( RuntimeException e ) {
|
||||||
|
System.out.printf(" Exception caught during parsing HapMap Allele Freq %s%n", Utils.join(" <=> ", parts));
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getVarAlleleFreq() { return this.varFreq; }
|
||||||
|
|
||||||
|
public List<String> getAllelesFWD() {
|
||||||
|
List<String> alleleList;
|
||||||
|
if ( onFwdStrand() )
|
||||||
|
alleleList = Arrays.asList(alleles.split("/"));
|
||||||
|
else
|
||||||
|
alleleList = Arrays.asList(SequenceUtil.reverseComplement(alleles).split("/"));
|
||||||
|
|
||||||
|
return alleleList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean onFwdStrand() {
|
||||||
|
return strand.equals("+");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -8,10 +8,7 @@ import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
@ -36,6 +33,7 @@ public class PrepareROD extends CommandLineProgram {
|
||||||
static {
|
static {
|
||||||
addModule("GFF", rodGFF.class);
|
addModule("GFF", rodGFF.class);
|
||||||
addModule("dbSNP", rodDbSNP.class);
|
addModule("dbSNP", rodDbSNP.class);
|
||||||
|
addModule("HapMapAlleleFrequencies", HapMapAlleleFrequenciesROD.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Required main method implementation. */
|
/** Required main method implementation. */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue