Source code for bluepymm.prepare_combos.create_mm_sqlite

"""Create sqlite database"""

from __future__ import print_function

"""
Copyright (c) 2018, EPFL/Blue Brain Project

 This file is part of BluePyMM <https://github.com/BlueBrain/BluePyMM>

 This library is free software; you can redistribute it and/or modify it under
 the terms of the GNU Lesser General Public License version 3.0 as published
 by the Free Software Foundation.

 This library is distributed in the hope that it will be useful, but WITHOUT
 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 details.

 You should have received a copy of the GNU Lesser General Public License
 along with this library; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""

"""Some Code based on BrainBuilder and morph repair code"""

# pylint: disable=R0914

import os
import json

import pandas
import sqlite3

from bluepymm import tools
from . import parse_files


[docs] def check_morphology_existence(morph_name, morph_type, morph_path): """Check if a morphology exists based on its path. Args: morph_name: a string representing the name of the morphology. Used for makeing a pretty error string. morph_type: a string representing the type of the morphology. Used for making a pretty error string. morph_path: the path to the morphology file Returns: True if the morphology exists. Raises: ValueError: The morphology does not exist. """ if not os.path.isfile(morph_path): raise ValueError( "{} morphology {} doesn't exist at {}".format( morph_type.capitalize(), morph_name, morph_path)) else: return True
[docs] def create_exemplar_rows( final_dict, rep_fullmtype_morph_map, emodel_etype_map, emodels, emodel_dirs, rep_morph_dir, unrep_morph_dir, skip_repaired_exemplar=False): """Create exemplar rows. Args: final_dict: final e-model map rep_fullmtype_morph_map: pandas.DataFrame with morphology database emodel_etype_map: e-model e-type map emodel_dirs: a dict mapping e-models to prepared e-model directories rep_morph_dir: directory with repaired morphologies skip_repaired_exemplar: indicates whether repaired exemplar should be skipped. Default value is False. Returns: pandas.DataFrame with one row for each exemplar. Keys are 'layer', 'fullmtype', 'mtype', 'msubtype', 'etype', 'morph_name', 'emodel', 'original_emodel', 'morph_dir', 'scores', 'opt_scores', 'exception', 'to_run', 'is_exemplar', 'is_repaired', and 'is_original'. """ exemplar_rows = [] for original_emodel in emodel_etype_map: emodel = emodel_etype_map[original_emodel]['mm_recipe'] if emodel in emodels: print('Adding exemplar row for e-model %s' % emodel) original_emodel_dict = final_dict[original_emodel] opt_scores = original_emodel_dict['fitness'] morph_filename = os.path.basename( original_emodel_dict['morph_path']) morph_name, morph_ext = os.path.splitext(morph_filename) # Warning: use this_ prefix, next iteration in for loop will # pick up previous step if unrep_morph_dir is None: this_unrep_morph_dir = os.path.dirname(os.path.abspath( os.path.join( emodel_dirs[emodel], original_emodel_dict['morph_path']))) else: this_unrep_morph_dir = unrep_morph_dir morph_path = os.path.join(this_unrep_morph_dir, morph_filename) check_morphology_existence( morph_filename, 'unrepaired', morph_path) if skip_repaired_exemplar: fullmtype = None mtype = None msubtype = None # Don't run repaired version combos = [(emodel, False, False), (original_emodel, True, False)] else: morph_info_list = rep_fullmtype_morph_map[ rep_fullmtype_morph_map['morph_name'] == morph_name].values if len(morph_info_list) == 0: raise Exception( 'Morphology %s for %s e-model not found in morphology ' 'release' % (morph_name, original_emodel)) else: _, fullmtype, mtype, msubtype, _ = morph_info_list[0] morph_path = os.path.join(rep_morph_dir, morph_filename) check_morphology_existence( morph_filename, 'repaired', morph_path) # Run repaired version combos = [(emodel, False, True), (original_emodel, True, True), (emodel, False, False), (original_emodel, True, False)] for (stored_emodel, original, repaired) in combos: new_row_dict = { 'layer': None, 'fullmtype': fullmtype, 'mtype': mtype, 'msubtype': msubtype, 'etype': emodel_etype_map[original_emodel]['etype'], 'morph_name': morph_name, 'morph_ext': morph_ext, 'emodel': stored_emodel, 'original_emodel': original_emodel, 'morph_dir': rep_morph_dir if repaired else this_unrep_morph_dir, 'scores': None, 'opt_scores': json.dumps(opt_scores) if not repaired else None, 'exception': None, 'to_run': True, 'is_exemplar': True, 'is_repaired': repaired, 'is_original': original} exemplar_rows.append(new_row_dict) else: print( 'Skipping exemplar row for e-model %s, ' 'not part of me-model db ' % emodel) return pandas.DataFrame(exemplar_rows)
[docs] def remove_morph_regex_failures(full_map): """Remove all rows where morph_name doesn't match morph_regex. Args: full_map: pandas.DataFrame with keys 'morph_name' and 'morph_regex' Returns: The processed pandas.DataFrame, with all rows of the input table where 'morph_name' matched 'morph_regex'. The column 'morph_regex' is removed. """ # Add a new column to store the regex match result full_map.insert(len(full_map.columns), 'morph_regex_matches', None) def match_morph_regex(row): """Check if 'morph_name' matches 'morph_regex'.""" row['morph_regex_matches'] = \ bool(row['morph_regex'].match(row['morph_name'])) return row # Check if 'morph_name' matches 'morph_regex' full_map = full_map.apply(match_morph_regex, axis=1) # Prune all the rows that didn't match full_map = full_map[full_map['morph_regex_matches'] == True] # NOQA # Delete obsolete columns and reset index del full_map['morph_regex'] del full_map['morph_regex_matches'] return full_map.reset_index(drop=True)
[docs] def create_mm_sqlite_circuitmvd3( output_filename, circuitmvd3_path, morph_dir, rep_morph_dir, unrep_morph_dir, original_emodel_etype_map, final_dict, emodel_dirs, skip_repaired_exemplar=False): """Create SQLite database using circuit.mvd3. Args: output_filename circuitmvd3_filename morph_dir: directory with morphology release, contains neuronDB.xml file original_emodel_etype_map final_dict: e-model parameters emodel_dirs: prepared e-model directories skip_repaired_exemplar: indicates whether repaired exemplar should be skipped. Default value is False. """ rep_neurondb_filename = os.path.join(rep_morph_dir, 'neuronDB.xml') # Contains layer, fullmtype, mtype, submtype, morph_name print( 'Reading repaired-morphologies neuronDB at %s' % rep_neurondb_filename) rep_fullmtype_morph_map = parse_files.read_mtype_morph_map( rep_neurondb_filename) tools.check_no_null_nan_values(rep_fullmtype_morph_map, "the full m-type morphology map") # Contains layer, fullmtype, etype, morph_name morph_fullmtype_etype_map = parse_files.read_circuitmvd3( circuitmvd3_path) tools.check_no_null_nan_values(morph_fullmtype_etype_map, "morph_fullmtype_etype_map") fullmtypes = morph_fullmtype_etype_map.fullmtype.unique() etypes = morph_fullmtype_etype_map.etype.unique() print('Creating emodel etype table') # Contains layer, fullmtype, etype, emodel, morph_regex, original_emodel emodel_fullmtype_etype_map = parse_files.convert_emodel_etype_map( original_emodel_etype_map, fullmtypes, etypes) tools.check_no_null_nan_values(emodel_fullmtype_etype_map, "e-model e-type map") print('Creating full table by merging subtables') # Contains layer, fullmtype, etype, morph_name, e_model, morph_regex full_map = morph_fullmtype_etype_map.merge( emodel_fullmtype_etype_map, on=['layer', 'etype', 'fullmtype'], how='left') null_emodel_rows = full_map[pandas.isnull(full_map['emodel'])] if len(null_emodel_rows) > 0: raise Exception( 'No emodels found for the following layer, etype, fullmtype' ' combinations: \n%s' % null_emodel_rows[['layer', 'etype', 'fullmtype']]) emodels = full_map['emodel'].unique().tolist() print('Filtering out morp_names that dont match regex') # Contains layer, fullmtype, etype, morph_name, e_model full_map = remove_morph_regex_failures(full_map) tools.check_no_null_nan_values(full_map, "the full map") print('Adding exemplar rows') full_map.insert(len(full_map.columns), 'morph_dir', morph_dir) full_map.insert(len(full_map.columns), 'morph_ext', None) full_map.insert(len(full_map.columns), 'is_exemplar', False) full_map.insert(len(full_map.columns), 'is_repaired', True) full_map.insert(len(full_map.columns), 'is_original', False) full_map.insert(len(full_map.columns), 'scores', None) full_map.insert(len(full_map.columns), 'opt_scores', None) full_map.insert(len(full_map.columns), 'extra_values', None) full_map.insert(len(full_map.columns), 'exception', None) full_map.insert(len(full_map.columns), 'to_run', True) exemplar_rows = create_exemplar_rows( final_dict, rep_fullmtype_morph_map, original_emodel_etype_map, emodels, emodel_dirs, rep_morph_dir, unrep_morph_dir, skip_repaired_exemplar=skip_repaired_exemplar) # Prepend exemplar rows to full_map full_map = pandas.concat( [exemplar_rows, full_map], ignore_index=True, sort=True) # Write full table to sqlite database with sqlite3.connect(output_filename) as conn: full_map.to_sql('scores', conn, if_exists='replace') print('Created sqlite db at %s' % output_filename)
[docs] def create_mm_sqlite( output_filename, recipe_filename, morph_dir, rep_morph_dir, unrep_morph_dir, original_emodel_etype_map, final_dict, emodel_dirs, skip_repaired_exemplar=False): """Create SQLite database with all possible me-combinations. Args: output_filename recipe_filename morph_dir: directory with morphology release, contains neuronDB.xml file original_emodel_etype_map final_dict: e-model parameters emodel_dirs: prepared e-model directories skip_repaired_exemplar: indicates whether repaired exemplar should be skipped. Default value is False. """ neurondb_filename = os.path.join(morph_dir, 'neuronDB.xml') rep_neurondb_filename = os.path.join(rep_morph_dir, 'neuronDB.xml') # Contains layer, fullmtype, etype print('Reading recipe at %s' % recipe_filename) fullmtype_etype_map = parse_files.read_mm_recipe(recipe_filename) tools.check_no_null_nan_values(fullmtype_etype_map, "the full m-type e-type map") # Contains layer, fullmtype, mtype, submtype, morph_name print('Reading neuronDB at %s' % neurondb_filename) fullmtype_morph_map = parse_files.read_mtype_morph_map(neurondb_filename) tools.check_no_null_nan_values(fullmtype_morph_map, "the full m-type morphology map") # Contains layer, fullmtype, mtype, submtype, morph_name print( 'Reading repaired-morphologies neuronDB at %s' % rep_neurondb_filename) rep_fullmtype_morph_map = parse_files.read_mtype_morph_map( rep_neurondb_filename) tools.check_no_null_nan_values(rep_fullmtype_morph_map, "the full m-type morphology map") # Contains layer, fullmtype, etype, morph_name print('Merging recipe and neuronDB tables') morph_fullmtype_etype_map = fullmtype_morph_map.merge( fullmtype_etype_map, on=['fullmtype', 'layer'], how='left') tools.check_no_null_nan_values(morph_fullmtype_etype_map, "morph_fullmtype_etype_map") fullmtypes = morph_fullmtype_etype_map.fullmtype.unique() etypes = morph_fullmtype_etype_map.etype.unique() print('Creating emodel etype table') # Contains layer, fullmtype, etype, emodel, morph_regex, original_emodel emodel_fullmtype_etype_map = parse_files.convert_emodel_etype_map( original_emodel_etype_map, fullmtypes, etypes) tools.check_no_null_nan_values(emodel_fullmtype_etype_map, "e-model e-type map") print('Creating full table by merging subtables') # Contains layer, fullmtype, etype, morph_name, e_model, morph_regex full_map = morph_fullmtype_etype_map.merge( emodel_fullmtype_etype_map, on=['layer', 'etype', 'fullmtype'], how='left') null_emodel_rows = full_map[pandas.isnull(full_map['emodel'])] if len(null_emodel_rows) > 0: raise Exception( 'No emodels found for the following layer, etype, fullmtype' ' combinations: \n%s' % null_emodel_rows[['layer', 'etype', 'fullmtype']]) emodels = full_map['emodel'].unique().tolist() print('Filtering out morp_names that dont match regex') # Contains layer, fullmtype, etype, morph_name, e_model full_map = remove_morph_regex_failures(full_map) tools.check_no_null_nan_values(full_map, "the full map") print('Adding exemplar rows') full_map.insert(len(full_map.columns), 'morph_dir', morph_dir) full_map.insert(len(full_map.columns), 'morph_ext', None) full_map.insert(len(full_map.columns), 'is_exemplar', False) full_map.insert(len(full_map.columns), 'is_repaired', True) full_map.insert(len(full_map.columns), 'is_original', False) full_map.insert(len(full_map.columns), 'scores', None) full_map.insert(len(full_map.columns), 'opt_scores', None) full_map.insert(len(full_map.columns), 'extra_values', None) full_map.insert(len(full_map.columns), 'exception', None) full_map.insert(len(full_map.columns), 'to_run', True) exemplar_rows = create_exemplar_rows( final_dict, rep_fullmtype_morph_map, original_emodel_etype_map, emodels, emodel_dirs, rep_morph_dir, unrep_morph_dir, skip_repaired_exemplar=skip_repaired_exemplar) # Prepend exemplar rows to full_map full_map = pandas.concat( [exemplar_rows, full_map], ignore_index=True, sort=False) # Write full table to sqlite database with sqlite3.connect(output_filename) as conn: full_map.to_sql('scores', conn, if_exists='replace') print('Created sqlite db at %s' % output_filename)