Skip to content

flexcv.plot

This module provides functions for plotting and logging plots to neptune.

flexcv.plot.permutation_importance(model, model_name, X, y, features)

Calculates and plots the permutation importance of a model. Args: model (object): The model to calculate the permutation importance for. model_name (str): The name of the model. X (array-like): The features. y (array-like): The target. features (array-like | list): The feature names.

Returns:

Type Description
tuple[Figure, DataFrame]

A tuple containing the figure and the permutation importance dataframe.

Source code in flexcv/plot.py
def permutation_importance(
    model, model_name, X, y, features
) -> tuple[plt.Figure, pd.DataFrame]:
    """
    Calculates and plots the permutation importance of a model.
    Args:
        model (object): The model to calculate the permutation importance for.
        model_name (str): The name of the model.
        X (array-like): The features.
        y (array-like): The target.
        features (array-like | list): The feature names.

    Returns:
        (tuple[plt.Figure, pd.DataFrame]): A tuple containing the figure and the permutation importance dataframe.
    """
    fig = plt.figure()
    perm_importance = sk_permutation_importance(
        model, X, y, n_repeats=10, random_state=42, n_jobs=-1
    )
    features = np.array(features)
    sorted_idx = perm_importance.importances_mean.argsort()
    plt.barh(features[sorted_idx], perm_importance.importances_mean[sorted_idx])
    plt.xlabel(f"{model_name} Permutation Importance")
    df = pd.DataFrame(
        {
            "feature": features[sorted_idx],
            "importance": perm_importance.importances_mean[sorted_idx],
        }
    )
    return fig, df

flexcv.plot.plot_merf_training_stats(run, model, model_name, num_clusters_to_plot=5)

  • Generalized log-likelihood across iterations
  • trace and determinant of Sigma_b across iterations
  • sigma_e across iterations
  • bi for num_clusters_to_plot across iterations
  • a histogram of the final learned bi

Parameters:

Name Type Description Default
model MERF

trained MERF model

required
num_clusters_to_plot int

number of example bi's to plot across iterations

5
meta_string string

A string for use as additional info in filename.

required

Returns:

Type Description
fig

figure. Also draws to display.

Source code in flexcv/plot.py
def plot_merf_training_stats(run, model, model_name, num_clusters_to_plot=5) -> None:
    """
    * Generalized log-likelihood across iterations
    * trace and determinant of Sigma_b across iterations
    * sigma_e across iterations
    * bi for num_clusters_to_plot across iterations
    * a histogram of the final learned bi

    Args:
        model (MERF): trained MERF model
        num_clusters_to_plot (int): number of example bi's to plot across iterations
        meta_string (string): A string for use as additional info in filename.

    Returns:
        (matplotlib.pyplot.fig): figure. Also draws to display.
    """
    # get number of columns of model.trained_b

    dpi = matplotlib_settings()
    fig, axs = plt.subplots(nrows=2, ncols=2, dpi=dpi)

    # Plot GLL
    axs[0, 0].plot(model.gll_history)
    axs[0, 0].grid("on")
    axs[0, 0].set_ylabel("GLL")
    axs[0, 0].set_title("GLL")
    set_axes_params(axs[0, 0])

    # Plot trace and determinant of Sigma_b (covariance matrix)
    det_sigmaB_history = [np.linalg.det(x) for x in model.D_hat_history]
    trace_sigmaB_history = [np.trace(x) for x in model.D_hat_history]
    axs[0, 1].plot(det_sigmaB_history, label="$det(\sigma_b)$")  # det($sigma_b$)"
    axs[0, 1].plot(
        trace_sigmaB_history, label="$trace(\sigma_b)$"
    )  # "trace($sigma_b$)"
    axs[0, 1].grid("on")
    axs[0, 1].legend()
    axs[0, 1].set_title("Trace and Determinant of $\sigma_b$")
    set_axes_params(axs[0, 1])

    # Plot sigma_e across iterations
    axs[1, 0].plot(model.sigma2_hat_history)
    axs[1, 0].grid("on")
    axs[1, 0].set_ylabel("$\hat\sigma_e$")
    axs[1, 0].set_xlabel("Iteration")
    axs[1, 0].set_title("$\hat\sigma_e$ vs iterations")
    set_axes_params(axs[1, 0])

    # Plot bi across iterations
    b_hat_history_df = model.get_bhat_history_df()
    for cluster_id in model.cluster_counts.index[0:num_clusters_to_plot]:
        axs[1, 1].plot(
            b_hat_history_df.xs(cluster_id, level="cluster"), label=cluster_id
        )
    axs[1, 1].grid("on")
    axs[1, 1].set_ylabel("$b_hat$")
    axs[1, 1].set_xlabel("Iteration")
    tmp_title = "$b_i$ vs iterations\n" + f"({num_clusters_to_plot} clusters shown)"
    axs[1, 1].set_title(tmp_title)
    set_axes_params(axs[1, 1])
    run[f"{model_name}/Plots/Training_Stats"].append(fig)
    plt.close(fig)

    num_random_effects = model.trained_b.shape[1]
    dpi = matplotlib_settings()
    fig, axs = plt.subplots(nrows=1, ncols=num_random_effects, dpi=dpi)
    model.trained_b.hist(bins=100, ax=axs)
    if num_random_effects == 1:
        axs.set_xlabel("$b_i$")
        axs.set_title("Distribution of $b_i$ for Random Intercepts")
    else:
        try:
            for i in range(num_random_effects):
                axs[i].set_xlabel("$b_i$")
                axs[i].set_title(
                    "Distribution of $b_i$ for Random Effect " + str(i + 1)
                )

        except ValueError:
            print("Plotting Error:")
            print(
                "hist method requires numerical or datetime columns, nothing to plot."
            )
            run[
                "Errors"
            ] = "Error in plot_merf_training_stats: model.trained_b.hist(bins=100, ax=ax)"
    plt.tight_layout()
    run[f"{model_name}/Plots/Training_Hists"].append(fig)
    plt.close(fig)

    return None

flexcv.plot.plot_qq(y, yhat, run=None, model_name='LM', log_destination='LM_Plots/QQ/')

Creates QQ plot and logs it to a Neptune Run.

Source code in flexcv/plot.py
def plot_qq(
    y: pd.Series,
    yhat: pd.Series,
    run=None,
    model_name: str = "LM",
    log_destination: str = "LM_Plots/QQ/",
):
    """Creates QQ plot and logs it to a Neptune Run."""
    plt.close()
    plt.cla()
    fig = sm.qqplot(y - yhat, line="r")
    plt.title(f"QQ plot of residuals - {model_name}")
    run[f"{log_destination}{model_name}_QQ"].append(fig)
    del fig
    return None

flexcv.plot.plot_shap(shap_values, X, run=None, log_destination='SHAP/', dependency=True, k_features=None)

Creates SHAP summary beeswarm and dependency plots (if set to True) and logs them to a Neptune Run.

Source code in flexcv/plot.py
def plot_shap(
    shap_values,
    X: pd.DataFrame,
    run=None,
    log_destination="SHAP/",
    dependency: bool = True,
    k_features: pd.Series = None,
):
    """Creates SHAP summary beeswarm and dependency plots (if set to True) and logs them to a Neptune Run."""
    plt.close()
    plt.cla()
    shap.summary_plot(shap_values, X, show=False)
    matplotlib_settings()
    f = plt.gcf()
    run[f"{log_destination}Importance"].append(f)
    del f

    if dependency:
        for col_name in k_features:
            plt.close()
            plt.cla()
            shap.dependence_plot(col_name, shap_values, X, show=False, alpha=0.5)
            f = plt.gcf()
            run[f"{log_destination}Dependency"].append(f)
            del f

    return None