Source code for checkQC.parsers.stats_json_parser
import json
import os
import logging
from checkQC.parsers.parser import Parser
from checkQC.exceptions import StatsJsonNotFound, ConfigurationError
log = logging.getLogger(__name__)
[docs]class StatsJsonParser(Parser):
"""
The StatsJsonParser reads the values from the Illumina Stats.json file (which is created by bcl2fastq) and sends
each key value pair as a tuple to the subscribers, e.g.:
('Flowcell', 'CB1TVANXX')
('RunNumber', 303)
('RunId', '170726_D00118_0303_BCB1TVANXX')
The subscribers decide which of these values they are iterested in.
"""
def __init__(self, runfolder, parser_configurations, *args, **kwargs):
"""
Create a StatsJsonParser instance for the specified runfolder
:param runfolder: path to the runfolder to parse
:param parser_configurations: dict containing any extra configuration required by
the parser under class name key
"""
super().__init__(*args, **kwargs)
self.parser_conf = parser_configurations.get(self.__class__.__name__)
if not self.parser_conf:
raise ConfigurationError("The configuration must contain parser_configurations "
"key with subkey StatsJsonParser. E.g: \n"
"parser_configurations:\n"
"\tStatsJsonParser:\n"
"\t\tbcl2fastq_output_path: Data/Intensities/BaseCalls")
bcl2fastq_output_path = self.parser_conf.get("bcl2fastq_output_path")
if not bcl2fastq_output_path:
raise ConfigurationError("The configuration must contain the key bcl2fastq_output_path, specifying "
"where the bcl2fastq output is, relative to the runfolder root.")
self.file_path = os.path.join(runfolder, bcl2fastq_output_path, "Stats", "Stats.json")
if not os.path.exists(self.file_path):
log.error("Could not identify a Stats.json file at: {}. This file is "
"created by bcl2fastq, please ensure that you have run "
"bcl2fastq on this runfolder before running checkqc."
"If this file is not located under <RUNFOLDER>/Data/Intensities/BaseCalls/Stats/Stats.json "
"which is the default option for bcl2fastq, you can specify where the 'Stats' directory is "
"located by changing the 'bcl2fastq_output_path' in the 'StatsJsonParser' part of the "
"checkqc configuration file.".format(self.file_path))
raise StatsJsonNotFound("Could not find a Stats.json file at: {}".format(self.file_path))
[docs] def run(self):
with open(self.file_path, "r") as f:
data = json.load(f)
for key_value in data.items():
self._send_to_subscribers(key_value)
def __eq__(self, other):
if isinstance(other, self.__class__) and self.file_path == other.file_path:
return True
else:
return False
def __hash__(self):
return hash(self.__class__.__name__ + self.file_path)