Source code for bluepymm.prepare_combos.parse_files

"""Create sqlite database"""


from __future__ import print_function

import itertools
"""
Copyright (c) 2018, EPFL/Blue Brain Project
 This file is part of BluePyMM <https://github.com/BlueBrain/BluePyMM>
 This library is free software; you can redistribute it and/or modify it under
 the terms of the GNU Lesser General Public License version 3.0 as published
 by the Free Software Foundation.
 This library is distributed in the hope that it will be useful, but WITHOUT
 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 details.
 You should have received a copy of the GNU Lesser General Public License
 along with this library; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""

"""Some code based on BrainBuilder and morph repair code"""

# pylint: disable=R0912

import pandas
import re
import os

import lxml
import lxml.etree

from bluepymm import tools


def _parse_xml_tree(filename):
    """Read xml tree from file.
    Args:
        filename(str): filename of recipe (XML)
    Returns:
        xml.etree.ElementTree
    """
    parser = lxml.etree.XMLParser(resolve_entities=False)
    return lxml.etree.parse(filename, parser=parser)


[docs] def verify_no_zero_percentage(tree_element_list): """Verify that none of the elements of a given list have a zero value for the field 'percentage'. Args: tree_element_list(list of xml.etree.ElementTree): list of tree elements with 'percentage' field Returns: True if no percentage of zero is found. Raises: ValueError: if a percentage of zero is found. """ for element in tree_element_list: if float(element.attrib['percentage']) == 0.0: raise ValueError('Found a percentage of 0.0 in recipe, script' ' cannot handle this case: tag' ' {}'.format(element.tag)) return True
[docs] def read_recipe_records(recipe_tree): """Parse recipe tree and yield (layer, m-type, e-type)-tuples. Args: recipe_tree: xml.etree.ElementTree.ElementTree or xml.etree.ElementTree.Element Yields: (layer, m-type, e-type)-tuples """ for layer in list(recipe_tree.findall('NeuronTypes')[0]): for mtype in list(layer): if mtype.tag == "StructuralType": for etype in list(mtype): if etype.tag == "ElectroType": verify_no_zero_percentage([layer, mtype, etype]) yield (layer.attrib['id'], mtype.attrib['id'], etype.attrib['id'])
[docs] def read_mm_recipe(recipe_filename): """Read a BBP builder recipe and return a pandas.DataFrame with all possible (layer, m-type, e-type)-combinations. Args: recipe_filename(str): filename of recipe (XML/YAML) Returns: A pandas.DataFrame with fields "layer", "fullmtype", and "etype". """ if os.path.splitext(recipe_filename)[1] == '.xml': return read_mm_recipe_xml(recipe_filename) elif os.path.splitext(recipe_filename)[1] == '.yaml': return read_mm_recipe_yaml(recipe_filename) else: raise Exception('Please provide an .xml or .yaml as recipe file')
[docs] def read_mm_recipe_yaml(recipe_filename): """Read a BBP builder recipe and return a pandas.DataFrame with all possible (layer, m-type, e-type)-combinations. Args: recipe_filename(str): filename of recipe (YAML) Returns: A pandas.DataFrame with fields "layer", "fullmtype", and "etype". """ import yaml with open(recipe_filename, 'r') as f: recipe = yaml.safe_load(f) if recipe['version'] not in ('v2.0',): raise Exception('Only v2.0 of recipe yaml files are supported') mecombos = pandas.DataFrame(columns=["layer", "fullmtype", "etype"]) for region in recipe['neurons']: for etype in region['traits']['etype'].keys(): end = len(mecombos) mecombos.loc[end, 'layer'] = str(region['traits']['layer']) mecombos.loc[end, 'fullmtype'] = str(region['traits']['mtype']) mecombos.loc[end, 'etype'] = str(etype) return mecombos
[docs] def read_mm_recipe_xml(recipe_filename): """Read a BBP builder recipe and return a pandas.DataFrame with all possible (layer, m-type, e-type)-combinations. Args: recipe_filename(str): filename of recipe (XML) Returns: A pandas.DataFrame with fields "layer", "fullmtype", and "etype". """ recipe_tree = _parse_xml_tree(recipe_filename) return pandas.DataFrame(read_recipe_records(recipe_tree), columns=["layer", "fullmtype", "etype"])
[docs] def read_morph_records(morph_tree): """Parse morphology tree and yield (name, fullmtype, mtype, msubtype, layer)-tuples. Args: morph_tree: xml.etree.ElementTree.ElementTree or xml.etree.ElementTree.Element Yields: (name, fullmtype, mtype, msubtype, layer)-tuples """ for morph in morph_tree.findall('.//morphology'): name = morph.findtext('name') mtype = morph.findtext('mtype') msubtype = morph.findtext('msubtype') fullmtype = '%s:%s' % (mtype, msubtype) if msubtype != '' else mtype layer = morph.findtext('layer') yield (name, fullmtype, mtype, msubtype, layer)
[docs] def read_mtype_morph_map(neurondb_filename): """Read morphology database and return a pandas.DataFrame with all morphology records. Args: neurondb_filename(str): filename of morphology database (XML) Returns: A pandas.DataFrame with field "morph_name", "fullmtype", "mtype", "submtype", "layer". """ xml_tree = _parse_xml_tree(neurondb_filename) column_labels = ["morph_name", "fullmtype", "mtype", "submtype", "layer"] return pandas.DataFrame(read_morph_records(xml_tree), columns=column_labels)
[docs] def read_circuitmvd3(circuitmvd3_path): """Read data from circuit.mvd3""" print("Reading circuit.mvd3 at %s" % circuitmvd3_path) import h5py circuitmvd3_file = h5py.File(circuitmvd3_path, 'r') cell_etype_ids = circuitmvd3_file['cells']['properties']['etype'][()] cell_mtype_ids = circuitmvd3_file['cells']['properties']['mtype'][()] cell_morph_ids = \ circuitmvd3_file['cells']['properties']['morphology'][()] cell_layer_ids = \ circuitmvd3_file['cells']['properties']['layer'][()] # Layer number or stored without library in the h5 if 'layer' in circuitmvd3_file['library']: layer_ids = circuitmvd3_file['library']['layer'][()] cell_layers = [layer_ids[cell_layer_id] for cell_layer_id in cell_layer_ids] else: cell_layers = [ str(layer) for layer in circuitmvd3_file['cells']['properties']['layer'][()]] mtype_ids = circuitmvd3_file['library']['mtype'][()] etype_ids = circuitmvd3_file['library']['etype'][()] morph_ids = circuitmvd3_file['library']['morphology'][()] cell_mtypes = [mtype_ids[cell_mtype_id] for cell_mtype_id in cell_mtype_ids] cell_etypes = [etype_ids[cell_etype_id] for cell_etype_id in cell_etype_ids] cell_morphs = [morph_ids[cell_morph_id] for cell_morph_id in cell_morph_ids] cell_layers = [tools.decode_bstring(layer) for layer in cell_layers] cell_mtypes = [tools.decode_bstring(mtype) for mtype in cell_mtypes] cell_etypes = [tools.decode_bstring(etype) for etype in cell_etypes] cell_morphs = [tools.decode_bstring(morph) for morph in cell_morphs] # Write out in order layer, fullmtype, etype, morph cells = zip(cell_layers, cell_mtypes, cell_etypes, cell_morphs) return pandas.DataFrame( cells, columns=['layer', 'fullmtype', 'etype', 'morph_name'])
[docs] def fullmatch(regex, string): """Make sure string matches regex fully""" match = regex.match(string) if match and match.span()[1] == len(string): return match
[docs] def convert_emodel_etype_map(emodel_etype_map, fullmtypes, etypes): """Resolve regular expressions in an e-model e-type map and convert the result to a pandas.DataFrame. In the absence of the key "etype", "mtype", or "morph_name" in the e-model e-type map, the regular expression ".*" is assumed. Args: emodel_etype_map: A dict mapping e-models to a dict with keys "mm_recipe" and "layer". Optional additional keys are "etype", "mtype", and "morph_name", which may contain regular expressions. In absence of these keys, the regular expression ".*" is assumed. fullmtypes: A set of unique full m-types etypes: A set of unique e-types Returns: A pandas.DataFrame with fields 'emodel', 'layer', 'fullmtype', 'etype', 'morph_regex', and 'original_emodel'. Each row corresponds to a unique e-model description. """ morph_name_regexs_cache = {} def read_records(): """Read records """ for original_emodel, etype_map in emodel_etype_map.items(): etype_regex = re.compile(etype_map.get('etype', '.*')) mtype_regex = re.compile(etype_map.get('mtype', '.*')) morph_name_regex = etype_map.get('morph_name', '.*') morph_name_regex = morph_name_regexs_cache.setdefault( morph_name_regex, re.compile(morph_name_regex)) emodel = etype_map['mm_recipe'] layer_mtypes = itertools.product(etype_map['layer'], fullmtypes) for layer, fullmtype in layer_mtypes: if fullmatch(mtype_regex, fullmtype): for etype in etypes: if fullmatch(etype_regex, etype): yield (emodel, str(layer), fullmtype, etype, morph_name_regex, original_emodel,) columns = ['emodel', 'layer', 'fullmtype', 'etype', 'morph_regex', 'original_emodel'] return pandas.DataFrame(read_records(), columns=columns)