Source code for checkQC.parsers.demux_summary_parser
from pathlib import Path
from checkQC.runfolder_reader import RunfolderReader
from checkQC.parsers.parser import Parser
from checkQC.exceptions import ConfigurationError, DemuxSummaryNotFound
[docs]class DemuxSummaryParser(Parser):
"""
The DemuxSummaryParser will read information from the DemuxSummaryF1L<Lane number>.txt files.
At the moment it fetches the information about 'Most Popular Unknown Index Sequences'.
It will send the information lane wise as a tuple on the format:
("index_counts", {"lane": <lane nbr>, "indices": [<{"index": <index string>, "count": <nbr>}>]}
"""
def __init__(self, runfolder, parser_configurations, *args, **kwargs):
"""
Create a DemuxSummaryParser instance for the specified runfolder
:param runfolder: path to the runfolder to parse
:param parser_configurations: dict containing any extra configuration required by
the parser under class name key
"""
super().__init__(*args, **kwargs)
self.runfolder = runfolder
# NOTE: This parser will use the same config entry a the StatsJsonParser in order
# to not break backward compatibility. And it feel unnecessary to add this
# value to the config twice. /JD 2018-09-14
self.parser_conf = parser_configurations.get("StatsJsonParser")
if not self.parser_conf:
raise ConfigurationError("The configuration must contain parser_configurations "
"key with subkey StatsJsonParser. E.g: \n"
"parser_configurations:\n"
"\tStatsJsonParser:\n"
"\t\tbcl2fastq_output_path: Data/Intensities/BaseCalls")
self._bcl2fastq_output_path = self.parser_conf.get("bcl2fastq_output_path")
if not self._bcl2fastq_output_path:
raise ConfigurationError("The configuration must contain the key bcl2fastq_output_path, specifying "
"where the bcl2fastq output is, relative to the runfolder root.")
self._nbr_of_lanes = RunfolderReader.get_nbr_of_lanes(self.runfolder)
self._validate_demux_summary_files_exist(self.runfolder, self._bcl2fastq_output_path)
def _validate_demux_summary_files_exist(self, runfolder, bcl2fastq_output_path):
for i in range(1, self._nbr_of_lanes):
path = Path(runfolder, bcl2fastq_output_path, 'Stats', 'DemuxSummaryF1L{}.txt'.format(i))
if not path.exists():
raise DemuxSummaryNotFound("Could not identify expected demux summary file: {}. "
"We expect to find {} files matching the pattern, "
"'DemuxSummaryF1L<Lane number>.txt'".format(path))
@staticmethod
def _read_most_popular_unknown_indexes(demux_summary_file):
with open(demux_summary_file, 'r') as f:
reached_data = False
for line in f:
if reached_data:
split_data = line.split('\t')
yield {'index': split_data[0].strip(), 'count': int(split_data[1].strip())}
if line.startswith("### Columns: Index_Sequence Hit_Count"):
reached_data = True
[docs] def run(self):
for i in range(1, self._nbr_of_lanes+1):
path = Path(self.runfolder, self._bcl2fastq_output_path, 'Stats', 'DemuxSummaryF1L{}.txt'.format(i))
self._send_to_subscribers(("index_counts",
{"lane": i, "indices": list(self._read_most_popular_unknown_indexes(path))}))
def __eq__(self, other):
if isinstance(other, self.__class__) and self.runfolder == other.runfolder:
return True
else:
return False
def __hash__(self):
return hash(self.__class__.__name__ + self.runfolder)