Source code for bluepymm.select_combos.reporting

"""Functions for BluePyMM reporting."""

"""
Copyright (c) 2018, EPFL/Blue Brain Project

 This file is part of BluePyMM <https://github.com/BlueBrain/BluePyMM>

 This library is free software; you can redistribute it and/or modify it under
 the terms of the GNU Lesser General Public License version 3.0 as published
 by the Free Software Foundation.

 This library is distributed in the hope that it will be useful, but WITHOUT
 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
 details.

 You should have received a copy of the GNU Lesser General Public License
 along with this library; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""


# pylint: disable=R0914, C0325, W0640, W0633
# pylama: ignore=E402

import os

import pandas
import numpy

import matplotlib
matplotlib.use('Agg')
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
plt.style.use('ggplot')

from . import table_processing
from bluepymm import tools


BLUE = 'C1'
RED = 'C0'
YELLOW = 'C4'
FIGSIZE = (15, 10)


[docs] def pdf_file(pdf_filename): """Create and return a PDF file. Args: pdf_filename: path to PDF file Returns: A multi-page PDF file. """ tools.makedirs(os.path.dirname(pdf_filename)) return PdfPages(pdf_filename)
[docs] def add_plot_to_report(pp, plot_function, *args): """Add a plot to a given report. Args: pp: pdf file plot_function: function that returns figure args: arguments to plot_function """ fig = plot_function(*args) pp.savefig(fig, bbox_inches='tight') plt.close()
[docs] def plot_dict(dict_data, title): """Plot a dictionary. Args: dict_data: a dictionary title: string with plot title Returns: Figure with plotted dictionary """ fig = plt.figure(figsize=FIGSIZE) plt.axis('off') if dict_data: plt.table( cellText=[[x] for x in dict_data], loc='center') plt.title(title) plt.tight_layout() return fig
[docs] def plot_stacked_bars( data, xlabel, ylabel, title, color_map, log=False, yticksize=None): """Plot stacked bars. Args: data: a pandas.DataFrame xlabel: string with label for x-axis ylabel: string with label for y-axis title: string with plot title color_map: list of colors Returns: Figure with plot of stacked bars """ ax = data.plot( kind='barh', figsize=FIGSIZE, stacked=True, color=color_map, log=log) if not log: ax.get_xaxis().set_major_locator( matplotlib.ticker.MaxNLocator(integer=True)) else: plt.xlim(xmin=0.1) plt.xlabel(xlabel) plt.ylabel(ylabel) if yticksize is not None: plt.yticks(fontsize=yticksize) plt.title(title) plt.tight_layout() plt.legend(loc='upper right') return ax.get_figure()
[docs] def plot_morphs_per_feature_for_emodel(emodel, megate_scores, emodel_score_values): """Display number of tested morphologies per feature for a given e-model. Args: emodel: string representing e-model, used for plot title megate_scores: pandas.DataFrame with megate scores, one entry per run combo emodel_score_values: pandas.DataFrame with score values, one entry per run combo Returns: Figure with plot of stacked bars. Passed and failed simulations are colored blue and red, respectively. """ sums = pandas.DataFrame() sums['passed'] = megate_scores.sum(axis=0) sums['failed'] = len(emodel_score_values) - sums['passed'] return plot_stacked_bars( sums, '# morphologies', '', '{}: number of tested morphologies per feature'.format(emodel), [BLUE, RED])
[docs] def plot_morphs_per_mtype_for_emodel(emodel, fullmtypes, megate_scores): """Display number of tested morphologies per m-type for a given e-model. Args: emodel: string representing e-model, used for plot title fullmtypes: pandas.DataFrame with m-types, one entry per run combo megate_scores: pandas.DataFrame with megate scores, one entry per run combo Returns: Figure with plot of stacked bars. Passed and failed simulations are colored blue and red, respectively. """ sums = pandas.DataFrame() for mtype in fullmtypes.unique(): megate_scores_mtype = megate_scores[fullmtypes == mtype] mtype_passed = megate_scores_mtype[megate_scores_mtype['Passed all']] sums.loc[mtype, 'passed'] = len(mtype_passed) sums.loc[mtype, 'failed'] = (len(megate_scores_mtype) - sums.loc[mtype, 'passed']) return plot_stacked_bars( sums, '# morphologies', '', '{}: number of tested morphologies per m-type'.format(emodel), [BLUE, RED])
[docs] def create_morphology_label(data_frame): """Create label for morphology. Args: data_frame: pandas.DataFrame with columns 'morph_name', 'fullmtype', and 'etype' Returns: A label (string), based on the contents of the first row of `data_frame`: <morph_name> (<fullmtype>, <etype>). """ morph = data_frame.iloc[0]['morph_name'] mtype = data_frame.iloc[0]['fullmtype'] etype = data_frame.iloc[0]['etype'] return '{} ({}, {})'.format(morph, mtype, etype)
[docs] def plot_emodels_per_morphology(data, final_db): """Display result of tested e-models for each morphology. Args: data: pandas.DataFrame with data on run combos final_db: pandas.DataFrame with data on selected combos Returns: Figure with plot of stacked bars. Simulations that passed, threw an error, and failed are colored blue, yellow and red, respectively. """ sums = pandas.DataFrame() non_exemplars = data[data['is_exemplar'] == 0] for morph_name in non_exemplars['morph_name'].unique(): nb_matches = len(final_db[final_db['morph_name'] == morph_name]) non_exemplars_morph = non_exemplars[ non_exemplars['morph_name'] == morph_name] nb_errors = len( non_exemplars_morph[non_exemplars_morph['exception'].notnull()]) nb_combos = len(non_exemplars_morph) label = create_morphology_label(non_exemplars_morph) sums.loc[label, 'passed'] = nb_matches sums.loc[label, 'error'] = nb_errors sums.loc[label, 'failed'] = nb_combos - nb_matches - nb_errors return plot_stacked_bars( sums, '# tested e-models', 'Morphology name', 'Number of tested e-models for each morphology', [BLUE, YELLOW, RED])
[docs] def plot_emodels_per_metype(data, final_db): """Display result of tested e-model / morphology combinations per me-type. Args: data: pandas.DataFrame with data on run combos final_db: pandas.DataFrame with data on selected combos Returns: Figure with plot of stacked bars. Simulations that passed, threw an error, and failed are colored blue, yellow and red, respectively. """ # Add helper column 'metype' def create_metype(x): """Create me-type from m-type and e-type""" return '%s_%s' % (x['etype'], x['fullmtype']) data['metype'] = data.apply(create_metype, axis=1) final_db['metype'] = final_db.apply(create_metype, axis=1) sums = pandas.DataFrame() non_exemplars = data[data['is_exemplar'] == 0] for metype in non_exemplars['metype'].unique(): nb_matches = len(final_db[(final_db['metype'] == metype)]) nb_errors = len( non_exemplars[ (non_exemplars['metype'] == metype) & ( non_exemplars['exception'].notnull())]) nb_combos = len(non_exemplars[non_exemplars['metype'] == metype]) sums.loc[metype, 'passed'] = nb_matches sums.loc[metype, 'error'] = nb_errors sums.loc[metype, 'failed'] = nb_combos - nb_matches - nb_errors # Remove helper column 'metype' del data['metype'] del final_db['metype'] return plot_stacked_bars( sums, '# tested (e-model, morphology) combinations', 'me-type', 'Number of tested (e-model, morphology) combinations per me-type', [BLUE, YELLOW, RED], log=True, yticksize=3)
[docs] def create_metype(x): """Create me-type from m-type and e-type""" return '%s_%s' % (x['etype'], x['fullmtype'])
[docs] def plot_median_per_metype(combos, passed_median_scores, csv_path): """Display result median score per me-type""" metype_medians = passed_median_scores.join(combos)[ ['mtype', 'etype', 'median_score']] metype_medians = metype_medians.groupby( ['mtype', 'etype']).median().reset_index() metype_medians = metype_medians.pivot( index='mtype', columns='etype', values='median_score') metype_medians.to_csv(csv_path) print('Wrote me-type median scores to %s' % os.path.abspath(csv_path)) ax = plt.pcolor(metype_medians) ax = plt.gca() cbar = plt.colorbar() cbar.ax.text( 4, .5, 'median Z score', rotation=270, verticalalignment='center') plt.xlabel('e-type') plt.ylabel('m-type') ax.set_xticks(numpy.arange(metype_medians.shape[1]) + 0.5, minor=False) ax.set_yticks(numpy.arange(metype_medians.shape[0]) + 0.5, minor=False) ax.set_xticklabels(metype_medians.columns, rotation=90) ax.set_yticklabels(metype_medians.index) plt.tight_layout() plt.title('Median Z scores of me-types') return plt.gcf()
# TODO: can this function be split into processing and reporting?
[docs] def create_final_db_and_write_report(pdf_filename, to_skip_features, to_skip_patterns, megate_thresholds, megate_patterns, skip_repaired_exemplar, check_opt_scores, scores, score_values, enable_plot_emodels_per_morphology, output_dir, select_perc_best, n_processes=None): """Create the final output files and report""" ext_neurondb = pandas.DataFrame() emodel_infos = None megate_passed_all = pandas.DataFrame() median_scores = pandas.DataFrame() passed_combos = pandas.DataFrame() ''' median_scores = score_values.median( axis=1, skipna=True).to_frame(name='median_score') ''' with pdf_file(pdf_filename) as pp: # Plot input configuration details add_plot_to_report(pp, plot_dict, to_skip_features, 'Ignored feature patterns') add_plot_to_report(pp, plot_dict, megate_thresholds, 'MEGating thresholds (last match counts)') # Process all the e-models emodels = sorted(scores[scores.is_original == 0].emodel.unique()) emodel_infos = table_processing.process_emodels(emodels, scores, score_values, to_skip_patterns, megate_patterns, skip_repaired_exemplar, check_opt_scores, select_perc_best, n_processes=n_processes ) print("All emodels processed, generating output files") for emodel, emodel_info in emodel_infos.items(): if emodel_info is not None: emodel_ext_neurondb_rows, \ megate_scores, emodel_score_values, fullmtypes, \ emodel_megate_passed_all, emodel_median_scores, \ emodel_passed_combos = \ emodel_info ext_neurondb = pandas.concat( [ext_neurondb, emodel_ext_neurondb_rows], axis=0 ).reset_index(drop=True) megate_passed_all = pandas.concat( [megate_passed_all, emodel_megate_passed_all], axis=0 ).reset_index(drop=True) median_scores = pandas.concat( [median_scores, emodel_median_scores], axis=0 ).reset_index(drop=True) passed_combos = pandas.concat( [passed_combos, emodel_passed_combos], axis=0 ).reset_index(drop=True) # Reporting per e-model add_plot_to_report( pp, plot_morphs_per_feature_for_emodel, emodel, megate_scores, emodel_score_values) add_plot_to_report( pp, plot_morphs_per_mtype_for_emodel, emodel, fullmtypes, megate_scores) plt.close('all') else: print('WARNING: no info for emodel %s, skipping !' % emodel) # Get median score for every passed combo passed_median_scores = median_scores.loc[passed_combos.index] extra_data_dir = os.path.join(output_dir, 'extra_data') if not os.path.exists(extra_data_dir): os.makedirs(extra_data_dir) all_median_csv_path = os.path.join( extra_data_dir, 'all_median_scores.csv') scores[scores['is_exemplar'] == 0].join(median_scores)[ ['fullmtype', 'etype', 'emodel', 'median_score']].to_csv(all_median_csv_path) passed_median_csv_path = os.path.join( extra_data_dir, 'passed_median_scores.csv') scores[ scores['is_exemplar'] == 0].join( passed_median_scores, how='right')[ ['fullmtype', 'etype', 'emodel', 'median_score']].to_csv( passed_median_csv_path) metype_median_csv_path = os.path.join( extra_data_dir, 'metype_median_scores.csv') add_plot_to_report( pp, plot_median_per_metype, scores, passed_median_scores, metype_median_csv_path) # More reporting if enable_plot_emodels_per_morphology: add_plot_to_report(pp, plot_emodels_per_morphology, scores, ext_neurondb) add_plot_to_report(pp, plot_emodels_per_metype, scores, ext_neurondb) return ext_neurondb