Source code for checkQC.run_type_recognizer
import logging
from checkQC.exceptions import *
from checkQC.runfolder_reader import RunfolderReader
log = logging.getLogger(__name__)
[docs]class IlluminaInstrument(object):
"""
Base class representing an Illumina instrument. The `name` and `reagent_version` needs to be implemented
by the specific subclasses.
"""
[docs] @staticmethod
def name():
"""
Name of the instrument, e.g. 'nova_seq'
:returns: name of instrument as string
"""
raise NotImplementedError
[docs] @staticmethod
def reagent_version(runtype_recognizer):
"""
Reagent version, e.g. `v1`
Can used the provided runtype_recognizer to determined the exact reagent version
:param runtype_recognizer: A instance of RuntypeRecognizer
:returns: reagent version as a string
"""
raise NotImplementedError
[docs]class ISeq(IlluminaInstrument):
[docs] @staticmethod
def name():
return "iseq"
[docs] @staticmethod
def reagent_version(runtype_recognizer):
return "v1"
[docs]class NovaSeq(IlluminaInstrument):
[docs] @staticmethod
def name():
return "novaseq"
[docs] @staticmethod
def reagent_version(runtype_recognizer):
try:
reagent_version = runtype_recognizer.run_parameters["RunParameters"]["RfidsInfo"]["FlowCellMode"]
return reagent_version
except KeyError:
raise ReagentVersionUnknown("Could not identify flowcell mode for Novaseq")
[docs]class NovaSeqXPlus(IlluminaInstrument):
[docs] @staticmethod
def name():
return "novaseqxplus"
[docs] @staticmethod
def reagent_version(runtype_recognizer):
try:
run_parameters = runtype_recognizer.run_parameters['RunParameters']
consumables = run_parameters["ConsumableInfo"]["ConsumableInfo"]
reagent_version = next(
consumable for consumable in consumables
if consumable['Type'] == 'FlowCell'
)['Mode']
return reagent_version
except (KeyError, StopIteration):
raise ReagentVersionUnknown("Could not identify flowcell mode for NovaSeqXPlus")
[docs]class HiSeqX(IlluminaInstrument):
[docs] @staticmethod
def name():
return "hiseqx"
[docs] @staticmethod
def reagent_version(runtype_recognizer):
return "v2"
[docs]class MiSeq(IlluminaInstrument):
[docs] @staticmethod
def name():
return "miseq"
[docs] @staticmethod
def reagent_version(runtype_recognizer):
"""
Find the reagent kit version (and flowcell mode if applicable) for this run
:returns: reagent version in format v[reagent kit version] or [flowcell mode]_v[reagent kit version],
e.g. v3 or nano_v2
"""
def _reagent_kit_version(runtype_recognizer):
try:
reagent_version = runtype_recognizer.run_parameters["RunParameters"]["ReagentKitVersion"]
return reagent_version.replace("Version", "v")
except KeyError:
raise ReagentVersionUnknown("No reagent version specified for this instrument type")
def _flowcell_type(runtype_recognizer):
try:
tiles_per_swath = int(runtype_recognizer.run_parameters["RunParameters"]["Setup"]["NumTilesPerSwath"])
if tiles_per_swath == 2:
return "nano"
elif tiles_per_swath == 4:
return "micro"
elif tiles_per_swath >= 14:
return "standard"
else:
raise ReagentVersionUnknown()
except (KeyError, ReagentVersionUnknown):
raise ReagentVersionUnknown("Unable to identify flowcell type through number of tiles per swath")
flowcell_version = _flowcell_type(runtype_recognizer)
reagent_version = _reagent_kit_version(runtype_recognizer)
if flowcell_version == "standard":
return reagent_version
else:
return "_".join([flowcell_version, reagent_version])
[docs]class HiSeq2500(IlluminaInstrument):
[docs] @staticmethod
def name():
return "hiseq2500"
[docs] @staticmethod
def reagent_version(runtype_recognizer):
"""
Find run mode (rapid or not) and reagent version used for this run
:return run mode (as specified in RunInfo.xml) and reagent version
joint as one string e.g. rapidhighoutput_v4 or rapidrun_v2
"""
try:
run_mode = runtype_recognizer.run_parameters["RunParameters"]["Setup"]["RunMode"].lower()
except KeyError:
raise RunModeUnknown("No run mode specified for this instrument type")
try:
reagent_version = runtype_recognizer.run_parameters["RunParameters"]["Setup"]["Sbs"]
#Select last element from string "HiSeq SBS Kit v4"
format_reagent_version= reagent_version.split(" ")[-1].strip().lower()
except KeyError:
raise ReagentVersionUnknown("No reagent version specified for this instrument type and run mode")
return "{}_{}".format(run_mode, format_reagent_version)
[docs]class RunTypeRecognizer(object):
"""
RunTypeRecognizer will read files in the runfolder to determine information about the run,
such as the instrument type, the read length, etc.
The runfolder needs to have a 'RunInfo.xml' and a '[R|r]unParameters.xml' file.
"""
def __init__(self, runfolder, runfolder_reader=RunfolderReader()):
"""
Create a RunTypeRecognizer instance
:param runfolder: to gather data about
:param runfolder_reader: reader class for for runfolders, defaults to RunfolderReader. Here to make testing
easier.
"""
self._runfolder = runfolder
self.run_info = runfolder_reader.read_run_info_xml(runfolder)
self.run_parameters = runfolder_reader.read_run_parameters_xml(runfolder)
[docs] def instrument_type(self):
"""
This will look in the RunInfo.xml and determine the run type, based on the
mappings from instrument names to instrument types
:raises: InstrumentTypeUnknown
:returns: the instrument type of the runfolder
"""
instrument_name = self.run_info["RunInfo"]["Run"]["Instrument"]
machine_type_mappings = {
"M": MiSeq,
"D": HiSeq2500,
"ST": HiSeqX,
"A": NovaSeq,
"FS": ISeq,
"LH": NovaSeqXPlus,
}
for instrument_code, instrument_class in machine_type_mappings.items():
if instrument_name.startswith(instrument_code):
return instrument_class()
raise InstrumentTypeUnknown("Did not recognize instrument type of: {}".format(instrument_name))
[docs] def instrument_and_reagent_version(self):
"""
Get the instrument and reagent version associated with this runfolder.
:returns: the joined instrument and reagent version, e.g. 'hiseq2500_rapidrun_v2'
"""
instrument_type = self.instrument_type()
return "_".join([instrument_type.name(), instrument_type.reagent_version(self)])
[docs] def read_length(self):
"""
Gather information on the read length of the run.
:returns: The read length. If multiple reads delimited by "-", e.g. 150-150.
"""
reads = self.run_info["RunInfo"]["Run"]["Reads"]["Read"]
read_lengths = []
for read in reads:
if not read['@IsIndexedRead'] == 'Y':
read_lengths.append(int(read['@NumCycles']))
if len(read_lengths) < 1:
raise RunModeUnknown("Found no NumCycles in RunInfo.xml, could not determine read length")
return "-".join(map(str, read_lengths))