Source code for checkQC.parsers.interop_parser
from checkQC.parsers.parser import Parser
from interop import py_interop_run_metrics, py_interop_run, py_interop_summary
[docs]class InteropParser(Parser):
"""
This Parser will get data from the Illumina Interop binary files, and send it to its subscribers as a
tuple with the first element being the name of the element and the second one being a the actual data.
At this point the following data which is fetched from the Interop files and is sent in the following format:
- ("error_rate", {"lane": <lane nbr>, "read": <read nbr>, "error_rate": <error rate>}))
- ("percent_q30", {"lane": <lane nbr>, "read": <read nbr>, "percent_q30": <percent q30>}))
"""
def __init__(self, runfolder, parser_configurations, *args, **kwargs):
"""
Create a InteropParser instance for the specified runfolder
:param runfolder: to create InteropParser instance for
:param parser_configurations: dict containing any extra configuration required by
the parser under class name key
"""
super().__init__(*args, **kwargs)
self.runfolder = runfolder
[docs] @staticmethod
def get_non_index_reads(summary):
"""
Pick-out the reads which are not index reads
:param summary: a Interop read summary object to parse the read numbers from
:returns: all reads which are not index reads
"""
non_index_reads = []
for read_nbr in range(summary.size()):
if not summary.at(read_nbr).read().is_index():
non_index_reads.append(read_nbr)
return non_index_reads
[docs] def run(self):
run_metrics = py_interop_run_metrics.run_metrics()
run_metrics.run_info()
valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
run_metrics.read(self.runfolder, valid_to_load)
summary = py_interop_summary.run_summary()
py_interop_summary.summarize_run_metrics(run_metrics, summary)
lanes = summary.lane_count()
reads = self.get_non_index_reads(summary)
for lane in range(lanes):
# The interop library uses zero based indexing, however most people uses read 1/2
# to denote the different reads, this enumeration is used to transform from
# zero based indexing to this form. /JD 2017-10-27
for new_read_nbr, original_read_nbr in enumerate(reads):
read = summary.at(original_read_nbr).at(lane)
error_rate = read.error_rate().mean()
percent_q30 = read.percent_gt_q30()
percent_phix_aligned = read.percent_aligned().mean()
self._send_to_subscribers(("error_rate",
{"lane": lane+1, "read": new_read_nbr+1, "error_rate": error_rate}))
self._send_to_subscribers(("percent_q30",
{"lane": lane+1, "read": new_read_nbr+1, "percent_q30": percent_q30}))
self._send_to_subscribers(("percent_phix",
{"lane": lane+1, "read": new_read_nbr+1, "percent_phix": percent_phix_aligned}))
def __eq__(self, other):
if isinstance(other, self.__class__) and self.runfolder == other.runfolder:
return True
else:
return False
def __hash__(self):
return hash(self.__class__.__name__ + self.runfolder)