Updates to handle CG data, headers, etc.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5215 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8d0f1b75d5
commit
4fe0fcd707
|
|
@ -28,6 +28,8 @@ package org.broadinstitute.sting.playground.tools;
|
||||||
import org.apache.log4j.BasicConfigurator;
|
import org.apache.log4j.BasicConfigurator;
|
||||||
import org.broad.tribble.FeatureCodec;
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broad.tribble.completegenomics.CGVarCodec;
|
||||||
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
import org.broad.tribble.soapsnp.SoapSNPCodec;
|
import org.broad.tribble.soapsnp.SoapSNPCodec;
|
||||||
import org.broad.tribble.gelitext.GeliTextCodec;
|
import org.broad.tribble.gelitext.GeliTextCodec;
|
||||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||||
|
|
@ -87,7 +89,7 @@ public class SortROD {
|
||||||
}
|
}
|
||||||
|
|
||||||
String rodType = null;
|
String rodType = null;
|
||||||
String inputArg = null;
|
String inputArg;
|
||||||
// our feature file
|
// our feature file
|
||||||
int pos = args[1].indexOf(":");
|
int pos = args[1].indexOf(":");
|
||||||
if ( pos == -1 ) {
|
if ( pos == -1 ) {
|
||||||
|
|
@ -114,13 +116,17 @@ public class SortROD {
|
||||||
FeatureCodec featureCodec = getFeatureCodec(featureFile,rodType);
|
FeatureCodec featureCodec = getFeatureCodec(featureFile,rodType);
|
||||||
ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile);
|
ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile);
|
||||||
|
|
||||||
XReadLines reader = null;
|
AsciiLineReader reader = null;
|
||||||
try {
|
try {
|
||||||
reader = new XReadLines(featureFile);
|
reader = new AsciiLineReader(new FileInputStream(featureFile));
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
System.err.println("File "+featureFile.getAbsolutePath()+" doesn't exist");
|
System.err.println("File "+featureFile.getAbsolutePath()+" doesn't exist");
|
||||||
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// read the headers
|
||||||
|
featureCodec.readHeader(reader);
|
||||||
|
|
||||||
GenomeLocParser parser = new GenomeLocParser(ref.getSequenceDictionary());
|
GenomeLocParser parser = new GenomeLocParser(ref.getSequenceDictionary());
|
||||||
|
|
||||||
SortingCollection<String> sorter = SortingCollection.newInstance(String.class,
|
SortingCollection<String> sorter = SortingCollection.newInstance(String.class,
|
||||||
|
|
@ -128,13 +134,18 @@ public class SortROD {
|
||||||
new FeatureComparator(featureCodec,parser),200000);
|
new FeatureComparator(featureCodec,parser),200000);
|
||||||
|
|
||||||
int nLines = 0;
|
int nLines = 0;
|
||||||
while ( reader.hasNext() ) {
|
try {
|
||||||
String line = reader.next();
|
String currentLine = reader.readLine();
|
||||||
|
while ( currentLine != null ) {
|
||||||
nLines++;
|
nLines++;
|
||||||
sorter.add(line);
|
|
||||||
|
// uncomment if null returns should be ignored
|
||||||
|
//if ( featureCodec.decodeLoc(currentLine) != null )
|
||||||
|
sorter.add(currentLine);
|
||||||
|
|
||||||
|
currentLine = reader.readLine();
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
for ( String s : sorter ) {
|
for ( String s : sorter ) {
|
||||||
out.write(s);
|
out.write(s);
|
||||||
out.write('\n');
|
out.write('\n');
|
||||||
|
|
@ -155,14 +166,14 @@ public class SortROD {
|
||||||
* print usage information
|
* print usage information
|
||||||
*/
|
*/
|
||||||
public static void printUsage() {
|
public static void printUsage() {
|
||||||
System.err.println("Usage: java -jar CountRecords.jar <reference> [<rodType>:]<inputFile> <outputFile>");
|
System.err.println("Usage: java -jar SortROD.jar <reference> [<rodType>:]<inputFile> <outputFile>");
|
||||||
System.err.println(" Where input can be of type: VCF (ends in .vcf or .VCF");
|
System.err.println(" Where input can be of type: VCF (ends in .vcf or .VCF)");
|
||||||
System.err.println(" Bed (ends in .bed or .bed");
|
System.err.println(" Bed (ends in .bed or .bed)");
|
||||||
System.err.println(" DbSNP (ends in .snp or .rod");
|
System.err.println(" DbSNP (ends in .snp or .rod)");
|
||||||
System.err.println(" MAF (ends in .maf");
|
System.err.println(" MAF (ends in .maf)");
|
||||||
System.err.println(" If input file has non-standard extension, rodType can be specified");
|
System.err.println(" If input file has non-standard extension, rodType can be specified");
|
||||||
System.err.println(" (rodType always takes precedence over file extension, even if the");
|
System.err.println(" (rodType always takes precedence over file extension, even if the");
|
||||||
System.err.println(" latter is otherwise recognizable. rodType can be vcf, bed, dbsnp, or maf");
|
System.err.println(" latter is otherwise recognizable). rodType can be vcf, bed, dbsnp, or maf");
|
||||||
System.err.println(" Reference is what the input file needs to be sorted against");
|
System.err.println(" Reference is what the input file needs to be sorted against");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -178,6 +189,7 @@ public class SortROD {
|
||||||
if ( rodType != null ) {
|
if ( rodType != null ) {
|
||||||
if (rodType.equals("vcf") ) return new VCFCodec();
|
if (rodType.equals("vcf") ) return new VCFCodec();
|
||||||
if (rodType.equals("bed") ) return new BEDCodec();
|
if (rodType.equals("bed") ) return new BEDCodec();
|
||||||
|
if (rodType.equals("cgvar") || rodType.equals("CGVar") ) return new CGVarCodec();
|
||||||
if (rodType.equals("snp") || rodType.equals("dbsnp") ) return new DbSNPCodec();
|
if (rodType.equals("snp") || rodType.equals("dbsnp") ) return new DbSNPCodec();
|
||||||
if (rodType.equals("geli.calls") || rodType.equals("geli") ) return new GeliTextCodec();
|
if (rodType.equals("geli.calls") || rodType.equals("geli") ) return new GeliTextCodec();
|
||||||
if (rodType.equals("txt") ) return new SoapSNPCodec();
|
if (rodType.equals("txt") ) return new SoapSNPCodec();
|
||||||
|
|
@ -188,6 +200,8 @@ public class SortROD {
|
||||||
return new VCFCodec();
|
return new VCFCodec();
|
||||||
if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED") )
|
if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED") )
|
||||||
return new BEDCodec();
|
return new BEDCodec();
|
||||||
|
if ( featureFile.getName().endsWith(".tsv") || featureFile.getName().endsWith(".TSV") )
|
||||||
|
return new CGVarCodec();
|
||||||
if (featureFile.getName().endsWith(".snp") || featureFile.getName().endsWith(".rod") )
|
if (featureFile.getName().endsWith(".snp") || featureFile.getName().endsWith(".rod") )
|
||||||
return new DbSNPCodec();
|
return new DbSNPCodec();
|
||||||
if (featureFile.getName().endsWith(".geli.calls") || featureFile.getName().endsWith(".geli") )
|
if (featureFile.getName().endsWith(".geli.calls") || featureFile.getName().endsWith(".geli") )
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue