Updates to handle CG data, headers, etc.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5215 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2011-02-08 03:16:05 +00:00
parent 8d0f1b75d5
commit 4fe0fcd707
1 changed files with 29 additions and 15 deletions

View File

@ -28,6 +28,8 @@ package org.broadinstitute.sting.playground.tools;
import org.apache.log4j.BasicConfigurator;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.completegenomics.CGVarCodec;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.soapsnp.SoapSNPCodec;
import org.broad.tribble.gelitext.GeliTextCodec;
import org.broad.tribble.dbsnp.DbSNPCodec;
@ -87,7 +89,7 @@ public class SortROD {
}
String rodType = null;
String inputArg = null;
String inputArg;
// our feature file
int pos = args[1].indexOf(":");
if ( pos == -1 ) {
@ -114,13 +116,17 @@ public class SortROD {
FeatureCodec featureCodec = getFeatureCodec(featureFile,rodType);
ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile);
XReadLines reader = null;
AsciiLineReader reader = null;
try {
reader = new XReadLines(featureFile);
reader = new AsciiLineReader(new FileInputStream(featureFile));
} catch (FileNotFoundException e) {
System.err.println("File "+featureFile.getAbsolutePath()+" doesn't exist");
System.exit(1);
}
// read the headers
featureCodec.readHeader(reader);
GenomeLocParser parser = new GenomeLocParser(ref.getSequenceDictionary());
SortingCollection<String> sorter = SortingCollection.newInstance(String.class,
@ -128,13 +134,18 @@ public class SortROD {
new FeatureComparator(featureCodec,parser),200000);
int nLines = 0;
while ( reader.hasNext() ) {
String line = reader.next();
nLines++;
sorter.add(line);
}
try {
String currentLine = reader.readLine();
while ( currentLine != null ) {
nLines++;
// uncomment if null returns should be ignored
//if ( featureCodec.decodeLoc(currentLine) != null )
sorter.add(currentLine);
currentLine = reader.readLine();
}
for ( String s : sorter ) {
out.write(s);
out.write('\n');
@ -155,14 +166,14 @@ public class SortROD {
* print usage information
*/
public static void printUsage() {
System.err.println("Usage: java -jar CountRecords.jar <reference> [<rodType>:]<inputFile> <outputFile>");
System.err.println(" Where input can be of type: VCF (ends in .vcf or .VCF");
System.err.println(" Bed (ends in .bed or .bed");
System.err.println(" DbSNP (ends in .snp or .rod");
System.err.println(" MAF (ends in .maf");
System.err.println("Usage: java -jar SortROD.jar <reference> [<rodType>:]<inputFile> <outputFile>");
System.err.println(" Where input can be of type: VCF (ends in .vcf or .VCF)");
System.err.println(" Bed (ends in .bed or .bed)");
System.err.println(" DbSNP (ends in .snp or .rod)");
System.err.println(" MAF (ends in .maf)");
System.err.println(" If input file has non-standard extension, rodType can be specified");
System.err.println(" (rodType always takes precedence over file extension, even if the");
System.err.println(" latter is otherwise recognizable. rodType can be vcf, bed, dbsnp, or maf");
System.err.println(" latter is otherwise recognizable). rodType can be vcf, bed, dbsnp, or maf");
System.err.println(" Reference is what the input file needs to be sorted against");
/**
@ -178,6 +189,7 @@ public class SortROD {
if ( rodType != null ) {
if (rodType.equals("vcf") ) return new VCFCodec();
if (rodType.equals("bed") ) return new BEDCodec();
if (rodType.equals("cgvar") || rodType.equals("CGVar") ) return new CGVarCodec();
if (rodType.equals("snp") || rodType.equals("dbsnp") ) return new DbSNPCodec();
if (rodType.equals("geli.calls") || rodType.equals("geli") ) return new GeliTextCodec();
if (rodType.equals("txt") ) return new SoapSNPCodec();
@ -188,6 +200,8 @@ public class SortROD {
return new VCFCodec();
if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED") )
return new BEDCodec();
if ( featureFile.getName().endsWith(".tsv") || featureFile.getName().endsWith(".TSV") )
return new CGVarCodec();
if (featureFile.getName().endsWith(".snp") || featureFile.getName().endsWith(".rod") )
return new DbSNPCodec();
if (featureFile.getName().endsWith(".geli.calls") || featureFile.getName().endsWith(".geli") )