Source code for bluepymm.select_combos.table_processing

"""Functions to process tables produced by BluePyMM."""

Copyright (c) 2018, EPFL/Blue Brain Project

 This file is part of BluePyMM <>

 This library is free software; you can redistribute it and/or modify it under
 the terms of the GNU Lesser General Public License version 3.0 as published
 by the Free Software Foundation.

 This library is distributed in the hope that it will be useful, but WITHOUT
 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more

 You should have received a copy of the GNU Lesser General Public License
 along with this library; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

# pylint: disable=R0914, C0325, W0640
# pylama: ignore=E402

import math
import json
import pandas
import multiprocessing

from bluepymm import tools

def _row_transform(row, exemplar_row, to_skip_patterns,
    """Transform row values (scores) to booleans, where True means that a
    feature did not exceed the corresponding feature threshold, or can be

    for column in row.index[1:]:
        for megate_feature_threshold in row['megate_feature_threshold']:
            if megate_feature_threshold['features'].match(column):
                megate_threshold = megate_feature_threshold['megate_threshold']

        # transform score
        if skip_repaired_exemplar:
            row[column] = row[column] <= megate_threshold
            row[column] = row[column] <= max(
                megate_threshold, megate_threshold * exemplar_row[column])

    return row

[docs] def convert_extra_values(row): """Extract 'threshold_current' and 'holding_current' information from key 'extra_values' and convert to new (key, value)-pairs in given row data. Args: row: contains key 'extra_values', with string value Returns: row, with extra keys 'threshold_current' and/or 'holding_current', and associated value, as extracted from row['extra_values'] """ extra_values_str = row['extra_values'] if extra_values_str is not None: extra_values = json.loads(extra_values_str) if extra_values: for field in ['threshold_current', 'holding_current']: if field in extra_values: row[field] = extra_values[field] return row
[docs] def row_threshold_transform(row, megate_patterns): """Transform threshold row based on me-gate rule: add matching me-gate patterns to row data. Args: row: has keys 'emodel', 'fullmtype', 'etype', and 'megate_feature_threshold' megate_patterns: a list of megate patterns Returns: Processed row data: for all me-gate patterns that match the row data, the corresponding megate feature threshold is appended to row['megate_feature_threshold']. """ emodel = row['emodel'] fullmtype = row['fullmtype'] etype = row['etype'] for pattern_dict in megate_patterns: if (pattern_dict['emodel'].match(emodel) and pattern_dict['fullmtype'].match(fullmtype) and pattern_dict['etype'].match(etype)): if row['megate_feature_threshold'] is None: row['megate_feature_threshold'] = [] row['megate_feature_threshold'].append(pattern_dict[ 'megate_feature_threshold']) return row
[docs] def check_opt_scores(emodel, scores): """Check if opt_scores match with unrepaired exemplar runs. Args: emodel: e-model name scores: pandas.DataFrame with scores Raises: Exception: - if the keys of the opt_scores do not match the unrepaired exemplar runs, - if the scores values of the opt_scores do not match the scores of the unrepaired exemplar runs. """ test_rows = scores[(scores.emodel == emodel) & (scores.is_exemplar == 1) & (scores.is_repaired == 0)] for _, row in test_rows.iterrows(): opt_score = json.loads(row['opt_scores']) bluepymm_score = json.loads(row['scores']) if bluepymm_score is not None: if sorted(opt_score.keys()) != sorted(bluepymm_score.keys()): raise Exception( 'Difference detected in score keys between optimisation' 'score and score calculated by bluepymm for emodel %s !:' '\n%s\n%s' % (emodel, opt_score, bluepymm_score)) for feature in opt_score: if opt_score[feature] != bluepymm_score[feature]: raise Exception( 'Difference detected in optimisation score and score ' 'calculated by bluepymm for emodel %s !:\n%s\n%s' % (emodel, opt_score, bluepymm_score))
def _apply_megating(emodel_mtype_etype_thresholds, emodel_score_values, exemplar_row, to_skip_patterns, skip_repaired_exemplar): """Compare score values to applicable feature thresholds.""" # Add a column with the thresholds emodel_score_values_thresholds = pandas.concat( [emodel_mtype_etype_thresholds['megate_feature_threshold'], emodel_score_values], axis=1) # Apply the thresholds # Creates a table show which columns (objectives) pass for each combo non_skipped_columns = [ column for column in emodel_score_values_thresholds.columns if not any(pattern.match(column) for pattern in to_skip_patterns)] emodel_megate_pass = \ emodel_score_values_thresholds[non_skipped_columns].apply( _row_transform, args=( exemplar_row, to_skip_patterns, skip_repaired_exemplar), axis=1) # Remove the threshold column del emodel_megate_pass['megate_feature_threshold'] # Detect which rows (combos) pass in all columns emodel_megate_pass['Passed all'] = emodel_megate_pass.all(axis=1) return emodel_megate_pass def _create_extneurondb_rows(selected_combinations): """Prepare rows for database based on selected combinations.""" # 1. select relevant columns from db with successful combinations emodel_ext_neurondb = selected_combinations.loc[:, ('morph_name', 'layer', 'fullmtype', 'etype', 'emodel', 'extra_values')].copy() # 2. create additional columns: combo_name, threshold current, and # holding current if len(emodel_ext_neurondb) > 0: emodel_ext_neurondb['combo_name'] = emodel_ext_neurondb.apply( lambda x: '%s_%s_%s_%s' % (x['emodel'], x['fullmtype'], x['layer'], x['morph_name']), axis=1) emodel_ext_neurondb['threshold_current'] = None emodel_ext_neurondb['holding_current'] = None emodel_ext_neurondb = emodel_ext_neurondb.apply( convert_extra_values, axis=1) del emodel_ext_neurondb['extra_values'] return emodel_ext_neurondb
[docs] def select_passed_combos( emodel, emodel_combos, emodel_megate_pass, emodel_megate_scores, select_perc_best=None): """Select which combos pass""" passed_indices = pandas.DataFrame() if select_perc_best is not None: metype_inds = emodel_combos['etype'] + emodel_combos['fullmtype'] for metype in metype_inds.unique(): metype_scores = emodel_megate_scores.loc[ metype_inds[ metype_inds.values == metype].index] metype_scores_nonan = metype_scores.dropna(axis=0) metype_scores_nonan_no250 = metype_scores_nonan[ metype_scores_nonan['median_score'] < 250.0] # metype_scores_nonan = metype_scores metype_scores_nonan_no250_sorted = \ metype_scores_nonan_no250.sort_values( 'median_score') n_of_combos = len(metype_scores_nonan_no250_sorted.index) n_of_best = int(math.ceil(select_perc_best * n_of_combos)) passed_indices = passed_indices.append( metype_scores_nonan_no250_sorted.head(n_of_best)) if len(passed_indices) == 0: print( 'WARNING: no combos for me-type %s in emodel %s' % (metype, emodel)) else: passed_indices = \ emodel_megate_pass[ emodel_megate_pass['Passed all'] == True] # NOQA return emodel_combos.loc[passed_indices.index]
[docs] def calc_median_scores(emodel_score_values, to_skip_patterns): """Calculate scores for every me-combo""" columns = emodel_score_values.columns non_skipped_columns = [ column for column in columns if not any(pattern.match(column) for pattern in to_skip_patterns)] emodel_median_scores = emodel_score_values[non_skipped_columns].median( axis=1, skipna=True).to_frame('median_score') return emodel_median_scores
[docs] def process_emodels(emodels, scores, score_values, to_skip_patterns, megate_patterns, skip_repaired_exemplar, enable_check_opt_scores, select_perc_best, n_processes=None): arg_list = [(emodel, scores, score_values, to_skip_patterns, megate_patterns, skip_repaired_exemplar, enable_check_opt_scores, select_perc_best) for emodel in emodels] emodel_infos = {} if n_processes == 1: for args in arg_list: emodel, emodel_info = process_emodel(args) emodel_infos[emodel] = emodel_info else: print('Parallelising selection processing of e-models') pool = multiprocessing.Pool(maxtasksperchild=1, processes=n_processes) for emodel, emodel_info in pool.imap(process_emodel, arg_list, chunksize=1): print('Received processed info from e-model %s' % emodel) emodel_infos[emodel] = emodel_info pool.terminate() pool.join() return emodel_infos
[docs] def process_emodel(args): """Process scores and score values for indicated e-model and return data on the e-model performance as well as the selected combinations. Args: emodel: e-model name combos: pandas.DataFrame with combo data score_values: pandas.DataFrame with score values to_skip_patterns: list of compiled regular expressions megate_patterns: list of dictionaries with megate patterns skip_repaired_exemplar: boolean enable_check_opt_scores: boolean Returns: 4-tuple with megate results for the e-model: - emodel_ext_neurondb: pandas.DataFrame with database rows - emodel_megate_pass: pandas.DataFrame with megate fail/success - emodel_score_values: pandas.DataFrame with score values - mtypes: pandas.DataFrame with tested m-types None: - if boolean skip_repaired_exemplar is set to False, and no repaired exemplars are available - if the e-model was not run on any released morphology Raises: Exception, skip_repaired_exemplar is set to False and more than one exemplars are found. """ emodel, combos, score_values, to_skip_patterns, megate_patterns, \ skip_repaired_exemplar, enable_check_opt_scores, select_perc_best \ = args print('Processing e-model %s' % emodel) # check if opt_scores match with unrepaired exemplar runs if enable_check_opt_scores: check_opt_scores(emodel, combos) # if applicable, skip exemplar rows from combos and score values exemplar_row = None if not skip_repaired_exemplar: exemplar_morph = combos[combos.emodel == emodel].morph_name.values[0] exemplar_score_values = score_values[ (combos.emodel == emodel) & (combos.is_exemplar == 1) & (combos.is_repaired == 1) & (combos.is_original == 0) & (combos.morph_name == exemplar_morph)] if len(exemplar_score_values) > 1: raise Exception('Too many exemplars found for e-model %s: %s' % (emodel, exemplar_score_values)) exemplar_score_values = exemplar_score_values.head(1).copy() exemplar_score_values.dropna(axis=1, how='all', inplace=True) if len(exemplar_score_values) == 0: print('Skipping e-model %s: no repaired exemplars' % emodel) return exemplar_row = exemplar_score_values.iloc[0].to_dict() # identify relevant me-gate feature thresholds for each row emodel_mtype_etypes = combos[(combos.emodel == emodel) & (combos.is_exemplar == 0)].copy() if len(emodel_mtype_etypes) == 0: print('Skipping e-model %s: was not run on any released morphology' % emodel) return (emodel, None) emodel_mtype_etype_thresholds = emodel_mtype_etypes.loc[ :, ['emodel', 'fullmtype', 'etype']] emodel_mtype_etype_thresholds['megate_feature_threshold'] = None print('Getting megating thresholds for emodel %s' % emodel) emodel_mtype_etype_thresholds = emodel_mtype_etype_thresholds.apply( lambda row: row_threshold_transform(row, megate_patterns), axis=1) # select score values relevant to this e-model emodel_score_values = score_values[(combos.emodel == emodel) & (combos.is_exemplar == 0)].copy() emodel_score_values.dropna(axis=1, how='all', inplace=True) print('Applying megating to emodel %s' % emodel) # me-gating: compare score values to applicable feature thresholds emodel_megate_pass = _apply_megating( emodel_mtype_etype_thresholds, emodel_score_values, exemplar_row, to_skip_patterns, skip_repaired_exemplar) print('Calculating median scores for emodel %s' % emodel) emodel_median_scores = calc_median_scores( emodel_score_values, to_skip_patterns) emodel_combos = combos[(combos.emodel == emodel) & (combos.is_exemplar == 0)].copy() # identify combinations that passed the me-gating step passed_combos = select_passed_combos( emodel, emodel_combos, emodel_megate_pass, emodel_median_scores, select_perc_best) emodel_megate_passed_all = emodel_megate_pass[['Passed all']] if len(passed_combos[passed_combos['emodel'] != emodel]) > 0: raise Exception('Something went wrong during row indexing in megating') # prepare database rows for this e-model emodel_ext_neurondb = _create_extneurondb_rows(passed_combos) # identify m-types that were tested for this e-model mtypes = combos[(combos.emodel == emodel) & (combos.is_exemplar == 0)].loc[:, 'fullmtype'] return emodel, (emodel_ext_neurondb, emodel_megate_pass, emodel_score_values, mtypes, emodel_megate_passed_all, emodel_median_scores, passed_combos)
[docs] def process_combo_name(data, log_filename): """Make value corresponding to key 'combo_name' compliant with NEURON rules for template names. A log file is written out in csv format. Args: data: pandas.DataFrame with key 'combo_name' log_filename: path to log file """ log_data = pandas.DataFrame() log_data['original_combo_name'] = data['combo_name'].copy() data['combo_name'] = data.apply( lambda x: tools.get_neuron_compliant_template_name(x['combo_name']), axis=1) log_data['neuron_compliant_combo_name'] = data['combo_name'].copy() log_data.to_csv(log_filename, index=False)