Source code for checkQC.parsers.interop_parser


from checkQC.parsers.parser import Parser

from interop import py_interop_run_metrics, py_interop_run, py_interop_summary


[docs]class InteropParser(Parser):
    """
    This Parser will get data from the Illumina Interop binary files, and send it to its subscribers as a
    tuple with the first element being the name of the element and the second one being a the actual data.

    At this point the following data which is fetched from the Interop files and is sent in the following format:

        - ("error_rate", {"lane": <lane nbr>, "read": <read nbr>, "error_rate": <error rate>}))
        - ("percent_q30", {"lane": <lane nbr>, "read": <read nbr>, "percent_q30": <percent q30>}))

    """

    def __init__(self, runfolder, parser_configurations, *args, **kwargs):
        """
        Create a InteropParser instance for the specified runfolder

        :param runfolder: to create InteropParser instance for
        :param parser_configurations: dict containing any extra configuration required by
        the parser under class name key
        """
        super().__init__(*args, **kwargs)
        self.runfolder = runfolder

[docs]    @staticmethod
    def get_non_index_reads(summary):
        """
        Pick-out the reads which are not index reads

        :param summary: a Interop read summary object to parse the read numbers from
        :returns: all reads which are not index reads
        """
        non_index_reads = []
        for read_nbr in range(summary.size()):
            if not summary.at(read_nbr).read().is_index():
                non_index_reads.append(read_nbr)
        return non_index_reads

[docs]    def run(self):
        run_metrics = py_interop_run_metrics.run_metrics()
        run_metrics.run_info()

        valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
        py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
        run_metrics.read(self.runfolder, valid_to_load)

        summary = py_interop_summary.run_summary()
        py_interop_summary.summarize_run_metrics(run_metrics, summary)

        lanes = summary.lane_count()
        reads = self.get_non_index_reads(summary)
        for lane in range(lanes):
            # The interop library uses zero based indexing, however most people uses read 1/2
            # to denote the different reads, this enumeration is used to transform from
            # zero based indexing to this form. /JD 2017-10-27
            for new_read_nbr, original_read_nbr in enumerate(reads):
                read = summary.at(original_read_nbr).at(lane)
                error_rate = read.error_rate().mean()
                percent_q30 = read.percent_gt_q30()
                percent_phix_aligned = read.percent_aligned().mean()
                self._send_to_subscribers(("error_rate",
                                           {"lane": lane+1, "read": new_read_nbr+1, "error_rate": error_rate}))
                self._send_to_subscribers(("percent_q30",
                                           {"lane": lane+1, "read": new_read_nbr+1, "percent_q30": percent_q30}))
                self._send_to_subscribers(("percent_phix",
                                           {"lane": lane+1, "read": new_read_nbr+1, "percent_phix": percent_phix_aligned}))

    def __eq__(self, other):
        if isinstance(other, self.__class__) and self.runfolder == other.runfolder:
            return True
        else:
            return False

    def __hash__(self):
        return hash(self.__class__.__name__ + self.runfolder)
Source code for checkQC.parsers.interop_parser

Related Topics

Related Topics