changes in three files to make the HapMap RODs work:
- HapMapAlleleFrequenciesROD.java - the referenceOrderedDatum implementation - PrepareROD.java - has a static block that loads the known ROD classes, had to add the above - GenomeAnalysisTK.java - when supplied a hapmap argument... loads the ROD git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@265 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b4cdd1d9a1
commit
c192a95998
|
|
@ -10,6 +10,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
|
@ -26,7 +27,6 @@ import java.io.File;
|
|||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
public class GenomeAnalysisTK extends CommandLineProgram {
|
||||
|
|
@ -83,7 +83,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
* Used by the walker.
|
||||
*/
|
||||
public PrintStream err = System.err;
|
||||
|
||||
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
|
|
@ -103,7 +103,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
m_parser.addOptionalArg("genome_region", "L", "Genome region to operation on: from chr:start-end", "REGION_STR");
|
||||
m_parser.addRequiredArg("analysis_type", "T", "Type of analysis to run", "Analysis_Name");
|
||||
m_parser.addOptionalArg("DBSNP", "D", "DBSNP file", "DBSNP_FILE");
|
||||
m_parser.addOptionalArg("Hapmap", "H", "Hapmap file", "HAPMAP_FILE");
|
||||
m_parser.addOptionalArg("hapmap", "H", "Hapmap file", "HAPMAP_FILE");
|
||||
m_parser.addOptionalFlag("threaded_IO", "P", "If set, enables threaded I/O operations", "ENABLED_THREADED_IO");
|
||||
m_parser.addOptionalFlag("unsafe", "U", "If set, enables unsafe operations, nothing will be checked at runtime.", "UNSAFE");
|
||||
m_parser.addOptionalArg("sort_on_the_fly", "sort", "Maximum number of reads to sort on the fly", "MAX_ON_FLY_SORTS");
|
||||
|
|
@ -113,7 +113,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
m_parser.addOptionalArg("all_loci", "A", "Should we process all loci, not just those covered by reads", "WALK_ALL_LOCI");
|
||||
m_parser.addOptionalArg("out", "o", "An output file presented to the walker. Will overwrite contents if file exists.", "outFileName" );
|
||||
m_parser.addOptionalArg("err", "e", "An error output file presented to the walker. Will overwrite contents if file exists.", "errFileName" );
|
||||
m_parser.addOptionalArg("outerr", "oe", "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", "outErrFileName");
|
||||
m_parser.addOptionalArg("outerr", "oe", "A joint file for 'normal' and error output presented to the walker. Will overwrite contents if file exists.", "outErrFileName");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -142,7 +142,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
|
||||
@Override
|
||||
protected String getArgumentSourceName( Class argumentSource ) {
|
||||
return WalkerManager.getWalkerName( (Class<Walker>)argumentSource );
|
||||
return WalkerManager.getWalkerName( (Class<Walker>)argumentSource );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -157,23 +157,27 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
final boolean TEST_ROD = false;
|
||||
List<ReferenceOrderedData> rods = new ArrayList<ReferenceOrderedData>();
|
||||
|
||||
if (TEST_ROD) {
|
||||
ReferenceOrderedData gff = new ReferenceOrderedData(new File("single.gff"), rodGFF.class);
|
||||
|
||||
|
||||
if ( TEST_ROD ) {
|
||||
ReferenceOrderedData gff = new ReferenceOrderedData(new File("trunk/data/gFFTest.gff"), rodGFF.class );
|
||||
gff.testMe();
|
||||
|
||||
//ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("trunk/data/dbSNP_head.txt"), rodDbSNP.class );
|
||||
ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("/Volumes/Users/mdepristo/broad/ATK/exampleSAMs/dbSNP_chr20.txt"), rodDbSNP.class);
|
||||
ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("/Volumes/Users/mdepristo/broad/ATK/exampleSAMs/dbSNP_chr20.txt"), rodDbSNP.class );
|
||||
//dbsnp.testMe();
|
||||
rods.add(dbsnp); // { gff, dbsnp };
|
||||
} else if (DBSNP_FILE != null) {
|
||||
ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File(DBSNP_FILE), rodDbSNP.class);
|
||||
//dbsnp.testMe();
|
||||
rods.add(dbsnp); // { gff, dbsnp };
|
||||
}
|
||||
|
||||
if (HAPMAP_FILE != null) {
|
||||
ReferenceOrderedData gff = new ReferenceOrderedData(new File(HAPMAP_FILE), rodGFF.class);
|
||||
rods.add(gff);
|
||||
} else {
|
||||
if ( DBSNP_FILE != null ) {
|
||||
ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File(DBSNP_FILE), rodDbSNP.class );
|
||||
//dbsnp.testMe();
|
||||
rods.add(dbsnp); // { gff, dbsnp };
|
||||
}
|
||||
if ( HAPMAP_FILE != null ) {
|
||||
ReferenceOrderedData hapmap = new ReferenceOrderedData(new File(HAPMAP_FILE), HapMapAlleleFrequenciesROD.class );
|
||||
//dbsnp.testMe();
|
||||
rods.add(hapmap); // { gff, dbsnp };
|
||||
}
|
||||
}
|
||||
|
||||
initializeOutputStreams();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,105 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
import edu.mit.broad.picard.util.SequenceUtil;
|
||||
|
||||
/**
|
||||
* ReferenceOrderedDatum class to hold HapMap AlleleFrequency Data
|
||||
*/
|
||||
public class HapMapAlleleFrequenciesROD extends ReferenceOrderedDatum {
|
||||
public GenomeLoc loc; // genome location of SNP
|
||||
// Reference sequence chromosome or scaffold
|
||||
// Start and stop positions in chrom
|
||||
|
||||
public String rsNumber; // dbsnp rsNumber for this site
|
||||
|
||||
public String hgBuild;
|
||||
|
||||
public char Strand; // strand of the supplied alleles
|
||||
|
||||
public char refAllele;
|
||||
public char varAllele;
|
||||
|
||||
public double refFreq;
|
||||
public double varFreq;
|
||||
|
||||
|
||||
public String strand; // maybe we don't need these?
|
||||
public String alleles; // maybe we don't need these?
|
||||
public Integer refCounts; // maybe we don't need these?
|
||||
public Integer varCounts; // maybe we don't need these?
|
||||
public Integer totalCounts; // maybe we don't need these?
|
||||
|
||||
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
public String toString() {
|
||||
//rs11511647 HG18 chr10 62765 + T/C T C 21 97 0.178 0.822 118
|
||||
|
||||
return String.format(
|
||||
"%s\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%d\t%d\t%1.3f\t%1.3f\t%d",
|
||||
rsNumber, hgBuild, getContig(), getStart(), strand, alleles, refAllele, varAllele,
|
||||
refCounts, varCounts, refFreq, varFreq, totalCounts);
|
||||
}
|
||||
|
||||
public String toSimpleString() {
|
||||
return String.format("%s:%s:%s:%1.3f", rsNumber, alleles, strand, varFreq);
|
||||
}
|
||||
|
||||
public String repl() {
|
||||
return toString();
|
||||
}
|
||||
|
||||
public void parseLine(final String[] parts) {
|
||||
try {
|
||||
// rs11511647 <=> HG18 <=> chr10 <=> 62765 <=> + <=> T/C <=> T <=> C <=> 21 <=> 97 <=> 0.178 <=> 0.822 <=> 118
|
||||
|
||||
rsNumber = parts[0]; //rs#
|
||||
hgBuild = parts[1]; // build
|
||||
|
||||
String contig = parts[2]; // chrom
|
||||
long start = Long.parseLong(parts[3]); // The final is 1 based
|
||||
long stop = start;
|
||||
|
||||
strand = parts[4]; // strand
|
||||
alleles = parts[5]; //alleles
|
||||
refAllele = parts[6].charAt(0); // ref_allele
|
||||
varAllele = parts[7].charAt(0); // var_allele
|
||||
refCounts = Integer.parseInt(parts[8]); // CEU_ref
|
||||
varCounts = Integer.parseInt(parts[9]); // CEU_var
|
||||
refFreq = Double.parseDouble(parts[10]); // CEU_ref_freq
|
||||
varFreq = Double.parseDouble(parts[11]); // CEU_var_freq
|
||||
totalCounts = Integer.parseInt(parts[12]); // CEU_var
|
||||
|
||||
loc = new GenomeLoc(contig, start, stop);
|
||||
|
||||
} catch ( RuntimeException e ) {
|
||||
System.out.printf(" Exception caught during parsing HapMap Allele Freq %s%n", Utils.join(" <=> ", parts));
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public double getVarAlleleFreq() { return this.varFreq; }
|
||||
|
||||
public List<String> getAllelesFWD() {
|
||||
List<String> alleleList;
|
||||
if ( onFwdStrand() )
|
||||
alleleList = Arrays.asList(alleles.split("/"));
|
||||
else
|
||||
alleleList = Arrays.asList(SequenceUtil.reverseComplement(alleles).split("/"));
|
||||
|
||||
return alleleList;
|
||||
}
|
||||
|
||||
public boolean onFwdStrand() {
|
||||
return strand.equals("+");
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -8,10 +8,7 @@ import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
|||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -36,6 +33,7 @@ public class PrepareROD extends CommandLineProgram {
|
|||
static {
|
||||
addModule("GFF", rodGFF.class);
|
||||
addModule("dbSNP", rodDbSNP.class);
|
||||
addModule("HapMapAlleleFrequencies", HapMapAlleleFrequenciesROD.class);
|
||||
}
|
||||
|
||||
/** Required main method implementation. */
|
||||
|
|
|
|||
Loading…
Reference in New Issue