# Initialize Notebook
from IPython.core.display import HTML,Image
#%run ../library/v1.0.5/init.ipy
HTML('''<script> code_show=true; function code_toggle() { if (code_show){ $('div.input').hide(); } else { $('div.input').show(); } code_show = !code_show } $( document ).ready(code_toggle); </script> <form action="javascript:code_toggle()"><input type="submit" value="Toggle Code"></form>''')
import gc, argparse, sys, os, errno
%pylab inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#sns.set()
#sns.set_style('whitegrid')
import h5py
from PIL import Image
import os
from tqdm import tqdm_notebook as tqdm
import scipy
import sklearn
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')
from scipy.io import loadmat
from matplotlib.backends.backend_pdf import PdfPages, PdfFile
from IPython.display import HTML, display, FileLink
from base64 import b64encode, b64decode
from io import StringIO, BytesIO
from contextlib import contextmanager
def display_dataframe(df, filename=None, encoding='utf-8', format='csv', type='button',gradientfunc=False, **kwargs):
#display(df)
#if isinstance(df, pd.DataFrame):
# display(df.style.set_caption(filename))
#else:
if gradientfunc == False:
display(df.style.set_caption(filename))
else:
display(df.style.format(gradient_func).set_caption(filename))
if filename is None:
filename = "dataframe"
if format == 'csv':
data = df.to_csv(**kwargs)
mime_type = 'text/csv'
filename = filename + '.csv'
elif format == 'tsv':
data = df.to_csv(**kwargs)
mime_type = 'text/plain'
filename = filename + '.txt'
else:
raise ValueError('unknown file format: {}'.format(format))
data = 'data:{mime_type};base64,'.format(mime_type=mime_type) + str(b64encode(bytes(data, encoding=encoding)), encoding=encoding)
if type == 'hyperlink':
display(HTML('<a href=" " download={filename} target="_blank">{filename}</a >'.format(
mime_type=mime_type, filename=filename, data=data)))
elif type == 'button':
button_id = 'button_{}'.format(np.random.randint(1000000000))
display(HTML(r'<input type="button" id="{0}" value="Download">'.format(button_id)))
display(HTML('''<script>
document.getElementById("{button_id}").addEventListener("click", function(event){{
var filename = "{filename}";
var data = "{data}";
const element = document.createElement('a');
element.setAttribute('href', data);
element.setAttribute('download', filename);
element.style.display = 'none';
document.body.appendChild(element);
element.click();
document.body.removeChild(element);
}});
</script>'''.format(button_id=button_id, filename=filename, data=data)))
def MSE_pcc(A,B,ax=None):
mse =np.mean(((A - B)**2/B.var()))
pcc = pearsonr(A.ravel(),B.ravel())[0]
return mse,pcc
def analyze(predict,GT_STFT_test_spkr):
samples = predict.shape[0]
pcc = np.zeros([samples])
mse = np.zeros([samples])
for i in range(samples):
mse[i], pcc[i] = MSE_pcc(predict[i],GT_STFT_test_spkr[i])
fig,ax=plt.subplots(1,2,figsize=(16,4))
ax[0].hist(mse,bins=25,color='b')
ax[0].set_title('MSE: %g(%g)' %(np.round(mse.mean(),3),np.round(mse.std(),3)))
ax[1].hist(pcc,bins=50,color='g')
ax[1].set_title('PCC: %g(%g)' %(np.round(pcc.mean(),3),np.round(pcc.std(),3)))
return mse,pcc
pcc={}
mse={}
for i in ['NY717', 'NY742', 'NY749']:
mse[i] ={}
pcc[i] = {}
for j in ['All_grid', '4areas', 'Aud', 'IFG', 'Motor', 'Sensory']:
spec_gt = loadmat('multitask_multicortex/'+i+'/'+j+'/'+'spectrogram_GT.mat')['GT_STFT_test_spkr']
spec_pred = loadmat('multitask_multicortex/'+i+'/'+j+'/'+'spectrogram_prediction.mat')['pred_STFT_test']
mse_tmp, pcc_tmp = np.zeros([180]),np.zeros([180])
for t in range(180):
mse_tmp[t], pcc_tmp[t] = MSE_pcc(spec_pred[t], spec_gt[t],ax=None)
mse[i][j], pcc[i][j] = np.mean(mse_tmp),np.mean(pcc_tmp)
mse_new = np.round(np.array(pd.DataFrame.from_dict({(i,j): mse[i][j]
for i in mse.keys()
for j in mse[i].keys()},
orient='index')).reshape(3,-1),3).T
pcc_new = np.round(np.array(pd.DataFrame.from_dict({(i,j): pcc[i][j]
for i in pcc.keys()
for j in pcc[i].keys()},
orient='index')).reshape(3,-1),3).T
df = pd.read_excel('performance.xlsx',index_col=0)
from copy import deepcopy
df_new = deepcopy(df)
df_new = df_new.astype('str')
for i in range(6):
for j in range(3):
df_new.iloc[i,j] = str(mse_new[i,j])+'/' +str(pcc_new[i,j])
for i in range(6):
df_new.iloc[i,3] = str(np.round(np.mean(mse_new[i,:]),3))+'/' +str(np.round(np.mean(pcc_new[i,:]),3))
We explored the speech decoding architecture during multiple language tasks (audio repetition, audio naming, sentence completion, word reading and picture naming). The brain signal is collected by the hybrid density ECoG array, an electrode grid of overall 10 mm spacing with particular regions inserted by 5 mm spacing sub-grid electrodes. The subject is instructed to pronounce English words after corresponding auditory or visual stimuli. We then decode each pronounced word from the 1000ms lasting signal started at 256ms before the onset of each production. This table reports the averaged Mean Squared Error(MSE)/Pearsonr Correlation Coefficent(PCC) of the decoded produced speech in all the tasks. Each column reports performance on data of each subjects (NY717, NY742, NY749) and the average number of the three. In the 3nd to 6th rows, we use singal of certain brain area to train and test (namely, superior temporal gyrus (STG), precentral gyrus (motor), postcentral gyrus (sensory) and inferior frontal gyrus (IFG)). When each invidual area is selected, the other ECoG electrodes data are zeroed. The second and first rows then respectively report the performance of combining all the 4 areas and all the areas collected by the ECoG array.
Each cell in the table hyperlinks to a subpage, in which reports the decoded results and visualization of the model.
df = df_new
df_index = pd.DataFrame(np.array([(str(i)+'|'+str(j)) for i in range(df.shape[0]) \
for j in range(df.shape[1])]).reshape(df.shape[0],-1))
df_index.index = df.index
df_index.columns = df.columns
def gradient_func(val):
row, col = val.split('|')
row, col = int(row), int(col)
split1, split2 = df.iloc[row, col].split('/') #metric
format_use = float(split2)*100
color = 'black'#{'pass': 'green', 'fail': 'red', 'warn': 'orange'}.get(status, 'gray')
if col!=3:
return '<a href="multitask_multicortex/{sample_id}/{area_id}/result_{sample_id}.{area_id}.html" style="color: {color}"><span style="background: linear-gradient(90deg, rgba(61,164,166,1) {format_use}%, transparent 0%)">{split1:.3f}|{split2:.3f}</span></a>'.format(
sample_id=df.columns[col], area_id =df.index[row], color=color, split1=float(split1),split2=float(split2),format_use=format_use)
else:
return '<span style="background: linear-gradient(90deg, rgba(61,164,166,1) {format_use}%, transparent 0%)">{split1:.3f}|{split2:.3f}</span>'.format(
sample_id=df.columns[col], area_id =df.index[row], color=color, split1=float(split1),split2=float(split2),format_use=format_use)
#df_index.style.format(style_func)
display_dataframe(df_index,gradientfunc=True,filename='overall performance')
pcc={}
mse={}
stoi = {}
mcd = {}
for i in ['NY717', 'NY742', 'NY749']:
mse[i] ={}
pcc[i] = {}
stoi[i] ={}
mcd[i] = {}
for j in ['All_grid', '4areas', 'Aud', 'IFG', 'Motor', 'Sensory']:
pcc[i][j] = np.load('multitask_multicortex/{sample}/{area}/pcc.npy'.format(sample=i,area=j))
mse[i][j] = np.load('multitask_multicortex/{sample}/{area}/mse.npy'.format(sample=i,area=j))
stoi[i][j] = np.load('multitask_multicortex/{sample}/{area}/stois.npy'.format(sample=i,area=j))
mcd[i][j] = np.load('multitask_multicortex/{sample}/{area}/mcd_distances.npy'.format(sample=i,area=j))
metrics_map = {'pcc':pcc,'mse':mse,'stoi':stoi,'mcd':mcd}
def boxplot_metric(metric='pcc'):
arr = metrics_map[metric]
arr_new = np.array([])
for i in ['NY717', 'NY742', 'NY749']:
for j in ['All_grid', '4areas', 'Aud', 'IFG', 'Motor', 'Sensory']:
arr_new = np.concatenate((arr_new, arr[i][j]))
sample_name = np.repeat(np.repeat(['NY717', 'NY742', 'NY749'] ,6).ravel(),180).reshape(-1,1)
area_name = np.repeat(np.array([['All_grid', '4areas', 'Aud', 'IFG', \
'Motor', 'Sensory'] for j in range(3)]).ravel(),180).reshape(-1,1)
df = pd.DataFrame(np.concatenate((arr_new.reshape(-1,1),sample_name,area_name),axis=1))
df.columns= ['value','sample','area']
df.value = arr_new.reshape(-1,1)
fig,ax=plt.subplots(1,figsize=(20,10))
b = sns.boxplot(ax=ax,data=df,y='value',x='sample',hue='area') #the middle line is median
plt.legend(bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.,fontsize=20)
b.axes.set_title(metric.upper(),fontsize=30)
add_text = ''
if metric=='mcd':
add_text = '( dB)'
b.set_xlabel("Sample",fontsize=20)
b.set_ylabel(metric.upper()+add_text,fontsize=20)
b.tick_params(labelsize=20)
boxplot_metric(metric='pcc')
boxplot_metric(metric='mse')
boxplot_metric(metric='stoi')
boxplot_metric(metric='mcd')