# Initialize Notebook
from IPython.core.display import HTML,Image
#%run ../library/v1.0.5/init.ipy
HTML('''<script> code_show=true;  function code_toggle() {  if (code_show){  $('div.input').hide();  } else {  $('div.input').show();  }  code_show = !code_show }  $( document ).ready(code_toggle); </script> <form action="javascript:code_toggle()"><input type="submit" value="Toggle Code"></form>''')

import gc, argparse, sys, os, errno
%pylab inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#sns.set()
#sns.set_style('whitegrid')
import h5py
from PIL import Image
import os
from tqdm import tqdm_notebook as tqdm
import scipy
import sklearn
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')
from scipy.io import loadmat

Populating the interactive namespace from numpy and matplotlib

from matplotlib.backends.backend_pdf import PdfPages, PdfFile
from IPython.display import HTML, display, FileLink
from base64 import b64encode, b64decode
from io import StringIO, BytesIO
from contextlib import contextmanager

def display_dataframe(df, filename=None, encoding='utf-8', format='csv', type='button',gradientfunc=False, **kwargs):
    #display(df)
    #if isinstance(df, pd.DataFrame):
    #    display(df.style.set_caption(filename))
    #else:
    if gradientfunc == False:
        display(df.style.set_caption(filename))    
    else:
        display(df.style.format(gradient_func).set_caption(filename)) 
    if filename is None:
        filename = "dataframe"
    if format == 'csv':
        data = df.to_csv(**kwargs)
        mime_type = 'text/csv'
        filename = filename + '.csv'
    elif format == 'tsv':
        data = df.to_csv(**kwargs)
        mime_type = 'text/plain'
        filename = filename + '.txt'
    else:
        raise ValueError('unknown file format: {}'.format(format))
    data = 'data:{mime_type};base64,'.format(mime_type=mime_type) + str(b64encode(bytes(data, encoding=encoding)), encoding=encoding)
    if type == 'hyperlink':
        display(HTML('<a href=" " download={filename} target="_blank">{filename}</a >'.format(
            mime_type=mime_type, filename=filename, data=data)))
    elif type == 'button':
        button_id = 'button_{}'.format(np.random.randint(1000000000))
        display(HTML(r'<input type="button" id="{0}" value="Download">'.format(button_id)))
        display(HTML('''<script>
    document.getElementById("{button_id}").addEventListener("click", function(event){{
        var filename = "{filename}";
        var data = "{data}";
        const element = document.createElement('a');
        element.setAttribute('href', data);
        element.setAttribute('download', filename);
        element.style.display = 'none';
        document.body.appendChild(element);
        element.click();
        document.body.removeChild(element);
    }});
</script>'''.format(button_id=button_id, filename=filename, data=data)))

def MSE_pcc(A,B,ax=None):
    mse =np.mean(((A - B)**2/B.var()))
    pcc = pearsonr(A.ravel(),B.ravel())[0]
    return mse,pcc
def analyze(predict,GT_STFT_test_spkr):
    samples = predict.shape[0]
    pcc = np.zeros([samples])
    mse = np.zeros([samples])
    for i in range(samples):
        mse[i], pcc[i] = MSE_pcc(predict[i],GT_STFT_test_spkr[i])
    fig,ax=plt.subplots(1,2,figsize=(16,4))
    ax[0].hist(mse,bins=25,color='b')
    ax[0].set_title('MSE: %g(%g)' %(np.round(mse.mean(),3),np.round(mse.std(),3)))
    ax[1].hist(pcc,bins=50,color='g')
    ax[1].set_title('PCC: %g(%g)' %(np.round(pcc.mean(),3),np.round(pcc.std(),3)))
    return mse,pcc

pcc={}
mse={}
for i in ['NY717', 'NY742', 'NY749']:
    mse[i] ={}
    pcc[i] = {}
    for j in ['All_grid', '4areas', 'Aud', 'IFG', 'Motor', 'Sensory']:
        spec_gt = loadmat('multitask_multicortex/'+i+'/'+j+'/'+'spectrogram_GT.mat')['GT_STFT_test_spkr']
        spec_pred = loadmat('multitask_multicortex/'+i+'/'+j+'/'+'spectrogram_prediction.mat')['pred_STFT_test']
        mse_tmp, pcc_tmp = np.zeros([180]),np.zeros([180])
        for t in range(180):
            mse_tmp[t], pcc_tmp[t] = MSE_pcc(spec_pred[t], spec_gt[t],ax=None)
        mse[i][j], pcc[i][j] = np.mean(mse_tmp),np.mean(pcc_tmp)

mse_new = np.round(np.array(pd.DataFrame.from_dict({(i,j): mse[i][j] 
                           for i in mse.keys() 
                           for j in mse[i].keys()},
                       orient='index')).reshape(3,-1),3).T
pcc_new = np.round(np.array(pd.DataFrame.from_dict({(i,j): pcc[i][j] 
                           for i in pcc.keys() 
                           for j in pcc[i].keys()},
                       orient='index')).reshape(3,-1),3).T

df = pd.read_excel('performance.xlsx',index_col=0)
from copy import deepcopy
df_new = deepcopy(df)

df_new = df_new.astype('str')
for i in range(6):
    for j in range(3):
        df_new.iloc[i,j] = str(mse_new[i,j])+'/' +str(pcc_new[i,j])
for i in range(6):
    df_new.iloc[i,3] = str(np.round(np.mean(mse_new[i,:]),3))+'/' +str(np.round(np.mean(pcc_new[i,:]),3))

result table¶

We explored the speech decoding architecture during multiple language tasks (audio repetition, audio naming, sentence completion, word reading and picture naming). The brain signal is collected by the hybrid density ECoG array, an electrode grid of overall 10 mm spacing with particular regions inserted by 5 mm spacing sub-grid electrodes. The subject is instructed to pronounce English words after corresponding auditory or visual stimuli. We then decode each pronounced word from the 1000ms lasting signal started at 256ms before the onset of each production. This table reports the averaged Mean Squared Error(MSE)/Pearsonr Correlation Coefficent(PCC) of the decoded produced speech in all the tasks. Each column reports performance on data of each subjects (NY717, NY742, NY749) and the average number of the three. In the 3nd to 6th rows, we use singal of certain brain area to train and test (namely, superior temporal gyrus (STG), precentral gyrus (motor), postcentral gyrus (sensory) and inferior frontal gyrus (IFG)). When each invidual area is selected, the other ECoG electrodes data are zeroed. The second and first rows then respectively report the performance of combining all the 4 areas and all the areas collected by the ECoG array.

Each cell in the table hyperlinks to a subpage, in which reports the decoded results and visualization of the model.

MSE|PCC

df = df_new
df_index =  pd.DataFrame(np.array([(str(i)+'|'+str(j)) for i in range(df.shape[0]) \
                                   for j in range(df.shape[1])]).reshape(df.shape[0],-1))
df_index.index = df.index
df_index.columns = df.columns

def gradient_func(val):
    row, col = val.split('|')
    row, col = int(row), int(col)
    split1, split2 = df.iloc[row, col].split('/') #metric
    format_use = float(split2)*100
    color = 'black'#{'pass': 'green', 'fail': 'red', 'warn': 'orange'}.get(status, 'gray')
    if col!=3: 
        return '<a href="multitask_multicortex/{sample_id}/{area_id}/result_{sample_id}.{area_id}.html" style="color: {color}"><span style="background: linear-gradient(90deg, rgba(61,164,166,1) {format_use}%, transparent 0%)">{split1:.3f}|{split2:.3f}</span></a>'.format(
            sample_id=df.columns[col], area_id =df.index[row], color=color, split1=float(split1),split2=float(split2),format_use=format_use)
    else:
        return '<span style="background: linear-gradient(90deg, rgba(61,164,166,1) {format_use}%, transparent 0%)">{split1:.3f}|{split2:.3f}</span>'.format(
            sample_id=df.columns[col], area_id =df.index[row], color=color, split1=float(split1),split2=float(split2),format_use=format_use)

#df_index.style.format(style_func)
display_dataframe(df_index,gradientfunc=True,filename='overall performance')

Metrics summary¶

pcc={}
mse={}
stoi = {}
mcd = {}
for i in ['NY717', 'NY742', 'NY749']:
    mse[i] ={}
    pcc[i] = {}
    stoi[i] ={}
    mcd[i] = {}
    for j in ['All_grid', '4areas', 'Aud', 'IFG', 'Motor', 'Sensory']:
        pcc[i][j] = np.load('multitask_multicortex/{sample}/{area}/pcc.npy'.format(sample=i,area=j))
        mse[i][j] = np.load('multitask_multicortex/{sample}/{area}/mse.npy'.format(sample=i,area=j))
        stoi[i][j] = np.load('multitask_multicortex/{sample}/{area}/stois.npy'.format(sample=i,area=j))
        mcd[i][j] = np.load('multitask_multicortex/{sample}/{area}/mcd_distances.npy'.format(sample=i,area=j))

metrics_map = {'pcc':pcc,'mse':mse,'stoi':stoi,'mcd':mcd}
def boxplot_metric(metric='pcc'):
    arr = metrics_map[metric]
    arr_new = np.array([])
    for i in ['NY717', 'NY742', 'NY749']:
        for j in ['All_grid', '4areas', 'Aud', 'IFG', 'Motor', 'Sensory']:
            arr_new  = np.concatenate((arr_new, arr[i][j]))
    sample_name = np.repeat(np.repeat(['NY717', 'NY742', 'NY749'] ,6).ravel(),180).reshape(-1,1)
    area_name = np.repeat(np.array([['All_grid', '4areas', 'Aud', 'IFG', \
                                              'Motor', 'Sensory'] for j in range(3)]).ravel(),180).reshape(-1,1)

    df = pd.DataFrame(np.concatenate((arr_new.reshape(-1,1),sample_name,area_name),axis=1))
    df.columns= ['value','sample','area']
    df.value = arr_new.reshape(-1,1)

    fig,ax=plt.subplots(1,figsize=(20,10))
    b = sns.boxplot(ax=ax,data=df,y='value',x='sample',hue='area') #the middle line is median
    plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.,fontsize=20)
    b.axes.set_title(metric.upper(),fontsize=30)
    add_text = ''
    if metric=='mcd':
        add_text = '( dB)'
    b.set_xlabel("Sample",fontsize=20)
    b.set_ylabel(metric.upper()+add_text,fontsize=20)
    b.tick_params(labelsize=20)

PCC¶

boxplot_metric(metric='pcc')

MSE¶

boxplot_metric(metric='mse')

STOI¶

boxplot_metric(metric='stoi')

MCD¶

boxplot_metric(metric='mcd')

	NY717	NY742	NY749	AVG
Patient
All_grid	0.844\|0.470	0.624\|0.649	0.614\|0.684	0.694\|0.601
4areas	0.862\|0.453	0.593\|0.664	0.599\|0.689	0.685\|0.602
Aud	0.887\|0.431	0.634\|0.643	0.633\|0.666	0.718\|0.580
IFG	0.977\|0.375	0.886\|0.476	0.701\|0.628	0.855\|0.493
Motor	0.907\|0.417	0.746\|0.570	0.728\|0.602	0.794\|0.530
Sensory	0.901\|0.417	0.699\|0.597	0.792\|0.543	0.797\|0.519