{
"distribution_profiles": [
{
"column": str,
"sample_size": int,
"skewness": float,
"kurtosis": float,
"best_fit_distribution": str | None,
"best_fit_distribution_parameters": [{"name": str, "value": float}] | None,
"best_fit_sample_size": int,
"shapiro_wilk_p_value": float, # present when Shapiro-Wilk was computed
"shapiro_wilk_reason_not_computed": str, # present instead when Shapiro-Wilk was not computed
"shapiro_wilk_sample_size": int,
"modality": "unimodal" | "bimodal" | "multimodal",
"peaks": [
{"location": float, "prominence": float},
# ... top 20 most prominent peaks, ordered by descending prominence
],
},
# ... numeric columns only
],
"fitted_distributions": [
{
"column": str,
"sample_size": int,
"reason_not_computed": str, # present when column has fewer than 2 observed values
"distributions": [
{
"distribution_name": str,
"bayesian_information_criterion": float, # absent when fitting failed for this distribution
"parameters": [{"name": str, "value": float}], # absent when fitting failed for this distribution
"goodness_of_fit_p_value": float, # absent when fitting failed for this distribution
"reason_not_computed": str, # present when fitting failed for this distribution
},
# ... one entry per candidate distribution, ranked by descending BIC
],
},
# ... numeric columns only
],
"numeric_histograms": [
{
"column": str,
"plot_data": [
{
"left": float,
"center": float,
"right": float,
"percentage": float,
},
# ... 30 entries
],
},
# ... numeric columns only
],
"categorical_histograms": [
{
"column": str,
"plot_data": [
{
"category": str,
"percentage": float,
},
# ... one entry per unique category, ordered by descending percentage
],
},
# ... categorical columns with ≤ 10 unique categories only
],
"kernel_density_estimates": [
{
"column": str,
"bandwidth": float,
"plot_data": [
{
"sample_point": float,
"density_estimate": float,
},
# ... 100 entries
],
},
# ... numeric columns only
],
}