minor improvements to snp selector
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2275 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8f461d3c40
commit
2632cb6b58
|
|
@ -67,13 +67,19 @@ class RecalibratedCall:
|
|||
def __str__(self):
|
||||
return '[%s: %s => Q%d]' % (str(self.call), self.featureStringList(), phredScale(self.jointFPErrorRate()))
|
||||
|
||||
def readVariants( file, maxRecords = None, decodeAll = True, downsampleFraction = 1 ):
|
||||
def readVariants( file, maxRecords = None, decodeAll = True, downsampleFraction = 1, filter = None ):
|
||||
if filter == None:
|
||||
filter = not OPTIONS.unfiltered
|
||||
|
||||
f = open(file)
|
||||
header, columnNames, lines = readVCFHeader(f)
|
||||
|
||||
def parseVariant(args):
|
||||
header1, VCF, counter = args
|
||||
if random.random() <= downsampleFraction:
|
||||
if filter and not VCF.passesFilters():
|
||||
#print 'filtering', VCF
|
||||
return None
|
||||
elif random.random() <= downsampleFraction:
|
||||
return VCF
|
||||
else:
|
||||
return None
|
||||
|
|
@ -479,9 +485,9 @@ def setup():
|
|||
parser.add_option("-l", "--recalLog", dest="recalLog",
|
||||
type='string', default="recal.log",
|
||||
help="VCF formated truth file. If provided, the script will compare the input calls with the truth calls. It also emits calls tagged as TP and a separate file of FP calls")
|
||||
parser.add_option("", "--unFilteredTruth", dest="unFilteredTruth",
|
||||
parser.add_option("-u", "--unfiltered", dest="unfiltered",
|
||||
action='store_true', default=False,
|
||||
help="If provided, the unfiltered truth calls will be used in comparisons [default: %default]")
|
||||
help="If provided, unfiltered calls will be used in comparisons [default: %default]")
|
||||
parser.add_option("", "--plottable", dest="plottableNones",
|
||||
action='store_true', default=False,
|
||||
help="If provided, will generate fake plottable points for annotations with None values -- doesn't effect the behavior of the system just makes it easy to plot outputs [default: %default]")
|
||||
|
|
@ -573,10 +579,7 @@ def writeRecalibratedCalls(file, header, calls):
|
|||
def readTruth(truthVCF):
|
||||
print 'Reading truth file', truthVCF
|
||||
rawTruth = list(readVariants(truthVCF, maxRecords = None, decodeAll = True)[1])
|
||||
def keepVariant(t):
|
||||
#print t.getPos(), t.getLoc()
|
||||
return OPTIONS.unFilteredTruth or t.passesFilters()
|
||||
truth = dict( [[v.getLoc(), v] for v in filter(keepVariant, rawTruth)])
|
||||
truth = dict( [[v.getLoc(), v] for v in rawTruth])
|
||||
print 'Number of raw and passing filter truth calls', len(rawTruth), len(truth)
|
||||
return truth
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,9 @@ VCF_KEYS_FORMAT = "CHROM POS ID REF ALT QUAL FILTER".split()
|
|||
|
||||
TRANSITIONS = dict()
|
||||
for p in ["AG", "CT"]:
|
||||
TRANSITIONS[p] = True
|
||||
TRANSITIONS[''.join(reversed(p))] = True
|
||||
for x in [p, ''.join(reversed(p))]:
|
||||
TRANSITIONS[x.lower()] = True
|
||||
TRANSITIONS[x] = True
|
||||
|
||||
def is_nan(x):
|
||||
return type(x) is float and x != x
|
||||
|
|
@ -68,8 +69,10 @@ class VCFRecord:
|
|||
def getFilter(self): return self.get("FILTER")
|
||||
def failsFilters(self): return not self.passesFilters()
|
||||
def passesFilters(self):
|
||||
#print self.getFilter(), ">>>", self
|
||||
return self.getFilter() == "." or self.getFilter() == "0"
|
||||
v = self.getFilter() == "." or self.getFilter() == "0"
|
||||
#print self.getFilter(), ">>>", v, self
|
||||
return v
|
||||
|
||||
|
||||
def hasField(self, field):
|
||||
return field in self.bindings or field in self.info
|
||||
|
|
|
|||
Loading…
Reference in New Issue