Updates to handle CG data, headers, etc.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5215 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2011-02-08 03:16:05 +00:00
parent 8d0f1b75d5
commit 4fe0fcd707
1 changed files with 29 additions and 15 deletions

View File

@ -28,6 +28,8 @@ package org.broadinstitute.sting.playground.tools;
import org.apache.log4j.BasicConfigurator; import org.apache.log4j.BasicConfigurator;
import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.completegenomics.CGVarCodec;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.soapsnp.SoapSNPCodec; import org.broad.tribble.soapsnp.SoapSNPCodec;
import org.broad.tribble.gelitext.GeliTextCodec; import org.broad.tribble.gelitext.GeliTextCodec;
import org.broad.tribble.dbsnp.DbSNPCodec; import org.broad.tribble.dbsnp.DbSNPCodec;
@ -87,7 +89,7 @@ public class SortROD {
} }
String rodType = null; String rodType = null;
String inputArg = null; String inputArg;
// our feature file // our feature file
int pos = args[1].indexOf(":"); int pos = args[1].indexOf(":");
if ( pos == -1 ) { if ( pos == -1 ) {
@ -114,13 +116,17 @@ public class SortROD {
FeatureCodec featureCodec = getFeatureCodec(featureFile,rodType); FeatureCodec featureCodec = getFeatureCodec(featureFile,rodType);
ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile); ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile);
XReadLines reader = null; AsciiLineReader reader = null;
try { try {
reader = new XReadLines(featureFile); reader = new AsciiLineReader(new FileInputStream(featureFile));
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
System.err.println("File "+featureFile.getAbsolutePath()+" doesn't exist"); System.err.println("File "+featureFile.getAbsolutePath()+" doesn't exist");
System.exit(1);
} }
// read the headers
featureCodec.readHeader(reader);
GenomeLocParser parser = new GenomeLocParser(ref.getSequenceDictionary()); GenomeLocParser parser = new GenomeLocParser(ref.getSequenceDictionary());
SortingCollection<String> sorter = SortingCollection.newInstance(String.class, SortingCollection<String> sorter = SortingCollection.newInstance(String.class,
@ -128,13 +134,18 @@ public class SortROD {
new FeatureComparator(featureCodec,parser),200000); new FeatureComparator(featureCodec,parser),200000);
int nLines = 0; int nLines = 0;
while ( reader.hasNext() ) {
String line = reader.next();
nLines++;
sorter.add(line);
}
try { try {
String currentLine = reader.readLine();
while ( currentLine != null ) {
nLines++;
// uncomment if null returns should be ignored
//if ( featureCodec.decodeLoc(currentLine) != null )
sorter.add(currentLine);
currentLine = reader.readLine();
}
for ( String s : sorter ) { for ( String s : sorter ) {
out.write(s); out.write(s);
out.write('\n'); out.write('\n');
@ -155,14 +166,14 @@ public class SortROD {
* print usage information * print usage information
*/ */
public static void printUsage() { public static void printUsage() {
System.err.println("Usage: java -jar CountRecords.jar <reference> [<rodType>:]<inputFile> <outputFile>"); System.err.println("Usage: java -jar SortROD.jar <reference> [<rodType>:]<inputFile> <outputFile>");
System.err.println(" Where input can be of type: VCF (ends in .vcf or .VCF"); System.err.println(" Where input can be of type: VCF (ends in .vcf or .VCF)");
System.err.println(" Bed (ends in .bed or .bed"); System.err.println(" Bed (ends in .bed or .bed)");
System.err.println(" DbSNP (ends in .snp or .rod"); System.err.println(" DbSNP (ends in .snp or .rod)");
System.err.println(" MAF (ends in .maf"); System.err.println(" MAF (ends in .maf)");
System.err.println(" If input file has non-standard extension, rodType can be specified"); System.err.println(" If input file has non-standard extension, rodType can be specified");
System.err.println(" (rodType always takes precedence over file extension, even if the"); System.err.println(" (rodType always takes precedence over file extension, even if the");
System.err.println(" latter is otherwise recognizable. rodType can be vcf, bed, dbsnp, or maf"); System.err.println(" latter is otherwise recognizable). rodType can be vcf, bed, dbsnp, or maf");
System.err.println(" Reference is what the input file needs to be sorted against"); System.err.println(" Reference is what the input file needs to be sorted against");
/** /**
@ -178,6 +189,7 @@ public class SortROD {
if ( rodType != null ) { if ( rodType != null ) {
if (rodType.equals("vcf") ) return new VCFCodec(); if (rodType.equals("vcf") ) return new VCFCodec();
if (rodType.equals("bed") ) return new BEDCodec(); if (rodType.equals("bed") ) return new BEDCodec();
if (rodType.equals("cgvar") || rodType.equals("CGVar") ) return new CGVarCodec();
if (rodType.equals("snp") || rodType.equals("dbsnp") ) return new DbSNPCodec(); if (rodType.equals("snp") || rodType.equals("dbsnp") ) return new DbSNPCodec();
if (rodType.equals("geli.calls") || rodType.equals("geli") ) return new GeliTextCodec(); if (rodType.equals("geli.calls") || rodType.equals("geli") ) return new GeliTextCodec();
if (rodType.equals("txt") ) return new SoapSNPCodec(); if (rodType.equals("txt") ) return new SoapSNPCodec();
@ -188,6 +200,8 @@ public class SortROD {
return new VCFCodec(); return new VCFCodec();
if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED") ) if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED") )
return new BEDCodec(); return new BEDCodec();
if ( featureFile.getName().endsWith(".tsv") || featureFile.getName().endsWith(".TSV") )
return new CGVarCodec();
if (featureFile.getName().endsWith(".snp") || featureFile.getName().endsWith(".rod") ) if (featureFile.getName().endsWith(".snp") || featureFile.getName().endsWith(".rod") )
return new DbSNPCodec(); return new DbSNPCodec();
if (featureFile.getName().endsWith(".geli.calls") || featureFile.getName().endsWith(".geli") ) if (featureFile.getName().endsWith(".geli.calls") || featureFile.getName().endsWith(".geli") )