Source code for checkQC.handlers.reads_per_sample_handler

from checkQC.handlers.qc_handler import QCHandler, QCErrorFatal, QCErrorWarning
from checkQC.parsers.stats_json_parser import StatsJsonParser
from math import pow
from collections import defaultdict


[docs]class ReadsPerSampleHandler(QCHandler): """ This handler will check that the number of reads assigned to a sample is high enough. The value specified in the configuration is interpreted as the number of reads demanded for a single sample, i.e. the number of reads per sample on a lane which has multiple samples is the threshold divided by the total number of samples on the lane. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.conversion_results = None
[docs] def parser(self): """ The ReadsPerSampleHandler fetches its information from the Stats.json file :returns: A StatsJsonParser callable """ return StatsJsonParser
[docs] def collect(self, signal): key, value = signal if key == "ConversionResults": self.conversion_results = value
[docs] def check_qc(self): for lane_dict in self.conversion_results: lane_nbr = int(lane_dict["LaneNumber"]) lane_demux = lane_dict["DemuxResults"] total_reads = defaultdict(float) for sample_id_info in lane_demux: sample_name = sample_id_info["SampleName"] total_reads[sample_name] += sample_id_info["NumberReads"] / pow(10, 6) nbr_of_samples = len(total_reads.keys()) for sample, sample_total_reads in total_reads.items(): if self.error() != self.UNKNOWN: error_threshold = float(self.error()) / float(nbr_of_samples) if self.warning() != self.UNKNOWN: warning_threshold = float(self.warning()) / float(nbr_of_samples) if self.error() != self.UNKNOWN and sample_total_reads < error_threshold: yield QCErrorFatal("Number of reads for sample {} was too low on lane {}, " "it was: {:.3f} M".format(sample, lane_nbr, sample_total_reads), ordering=lane_nbr, data={"lane": lane_nbr, "number_of_samples": nbr_of_samples, "sample_name": sample, "sample_reads": sample_total_reads, "threshold": error_threshold}) elif self.warning() != self.UNKNOWN and \ sample_total_reads < warning_threshold: yield QCErrorWarning("Number of reads for sample {} was too low on lane {}, " "it was: {:.3f} M".format(sample, lane_nbr, sample_total_reads), ordering=lane_nbr, data={"lane": lane_nbr, "number_of_samples": nbr_of_samples, "sample_name": sample, "sample_reads": sample_total_reads, "threshold": warning_threshold}) else: continue