In [230]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

%config InlineBackend.figure_format = 'svg'
In [231]:
single_client = pd.read_table('single-client.csv')
multi_client = pd.read_table('multi-client.csv')
In [232]:
def label_config(df):
    """
    Filter the data and adds a shorter label for the most
    meaningful experiments.
    """
    conditions = [
        (df['branch'] == 'master'),
        (df['branch'].str.contains('patched')) & (df['prefetch'] == 'off'),
        (df['branch'].str.contains('patched')) & (df['prefetch'] == 'on') & (df['limit'] == 1),
        (df['branch'].str.contains('patched')) & (df['prefetch'] == 'on') & (df['limit'] == 0)
    ]
    labels = ['master', 'prefetch off', 'prefetch 1', 'prefetch on']
    df = df.copy()
    df['config'] = np.select(conditions, labels, default='other')
    return df[df['config'] != 'other']
def compute_stats(df, group_cols=['data', 'io', 'iomethod'], baseline='master'):
    """
    Compute statistics of the filtered experiments
    """
    labeled = label_config(df)
    
    # Compute stats from raw data, grouping by group_cols + config
    g = labeled.groupby(group_cols + ['config'])['timing'].agg(['mean', 'std', 'count'])
    
    # Pivot each statistic - config becomes columns
    means = g['mean'].unstack('config')
    stds = g['std'].unstack('config')
    counts = g['count'].unstack('config')
    
    
    # Difference vs baseline
    diff = means.sub(means[baseline], axis=0)
    # Percentage change
    pct_change = diff.div(means[baseline], axis=0) * 100
    
    return {
        'means': means, 
        'stds': stds, 
        'counts': counts, 
        'diff': diff, 
        'pct_change': pct_change
    }
In [233]:
s = compute_stats(multi_client)
print(s['pct_change'].drop(['master'], axis=1))
config                        prefetch 1  prefetch off  prefetch on
data       io       iomethod                                       
random     buffered io_uring    0.218599      0.002864   -23.910641
                    worker     91.768793      0.550272    57.890540
           direct   io_uring   -2.024426      0.112157   -84.855778
                    worker      8.916191      0.126356   -69.612002
sequential buffered io_uring    5.222741      1.874322    -1.418104
                    worker     32.703880      1.664899    -5.901672
           direct   io_uring  -14.925524      0.788549   -74.587668
                    worker     -8.362331      0.805051   -75.135987
In [234]:
s = compute_stats(single_client)
print(s['pct_change'].drop(['master'], axis=1))
config                        prefetch 1  prefetch off  prefetch on
data       io       iomethod                                       
random     buffered io_uring    2.377503      0.278389   -46.679616
                    worker     61.289192      0.840742   -52.338995
           direct   io_uring   -1.031931      0.378272   -88.859544
                    worker      4.973114      0.179462   -88.258811
sequential buffered io_uring    5.121254      2.031143    -3.545643
                    worker      5.339434      1.263939   -11.506386
           direct   io_uring  -27.580837      0.366973   -50.304248
                    worker    -28.087094      1.178066   -49.851895
In [235]:
def _forest_data(s, baseline='master'):
    ''' 
    Computes data for forest plot 
    
    given the statistics it will compute the confidence intervals of the 
    differences of the averages of the run times and normalize the 
    confidence intervals by the average of the baseline.

    '''
    means, stds, counts = s['means'], s['stds'], s['counts']
    configs = [c for c in means.columns if c != baseline]
    idx_names = list(means.index.names)
    
    se = stds / np.sqrt(counts)
    baseline_se, baseline_n = se[baseline].values.reshape(-1,1), counts[baseline].values.reshape(-1,1)
    se_diff = np.sqrt(se**2 + baseline_se**2)
    df_welch = (se**2 + baseline_se**2)**2 / ((se**4/(counts-1)) + (baseline_se**4/(baseline_n-1)))
    t_crit = stats.t.ppf(0.975, df_welch)
    
    baseline_mean = means[baseline].values.reshape(-1, 1)
    diff_pct = means.sub(means[baseline], axis=0) / baseline_mean * 100
    ci_pct = t_crit * se_diff / baseline_mean * 100
    
    rows = [dict(zip(idx_names, idx if isinstance(idx, tuple) else (idx,)), 
                 config=cfg, effect=diff_pct.loc[idx, cfg], ci=ci_pct.loc[idx, cfg])
            for idx in diff_pct.index for cfg in configs]
    return pd.DataFrame(rows).sort_values(idx_names + ['config']).reset_index(drop=True), idx_names
In [236]:
def _forest_render(df, idx_names, title):
    '''
    Render a dataframe of confidence intervals as a forest plot
    it will remove repeated labels to make the index more readable.
    '''

    prev, labels = {}, []
    widths = {c: df[c].astype(str).str.len().max() for c in idx_names + ['config']}
    for _, r in df.iterrows():
        changed, parts = False, []
        for c in idx_names + ['config']:
            if r[c] != prev.get(c) or changed:
                parts.append(str(r[c]).ljust(widths[c]))
                changed = changed or r[c] != prev.get(c)
                prev[c] = r[c]
            else:
                parts.append(' ' * widths[c])
        labels.append('  '.join(parts))
    
    fig, ax = plt.subplots(figsize=(8, len(df) * 0.32 + 1))
    for i, (_, r) in enumerate(df.iterrows()):
        ax.errorbar(r['effect'], i, xerr=r['ci'], fmt='o', capsize=3, markersize=6,
          color='green' if r['effect'] < 0 else 'red', alpha=1 if abs(r['effect']) > r['ci'] else 0.4)
    
    ax.axvline(0, color='black', linestyle='--')
    ax.set_yticks(range(len(df)))
    ax.set_yticklabels(labels, fontfamily='monospace', fontsize=9)
    ax.set_xlabel('% Change vs Master')
    ax.set_title(title)
    ax.invert_yaxis()
    ax.grid(True, axis='y')
    plt.tight_layout()
    return fig, ax


def forest_plot(s, title='Effect Size vs Master'):
    df, idx_names = _forest_data(s)
    return _forest_render(df, idx_names, title)
In [237]:
s_single = compute_stats(single_client)
forest_plot(s_single, title='Single-Client: Effect Size vs Master');
No description has been provided for this image
In [238]:
s_multi = compute_stats(multi_client, group_cols=['data', 'io', 'iomethod'])
forest_plot(s_multi, title='Multi-Client: Effect Size vs Master');
No description has been provided for this image
In [ ]: