Files
Lab1/Funzioni/Statistica.ipynb

635 lines
18 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "e1f82c70",
"metadata": {},
"source": [
"# Import librerie"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "b2b806a7",
"metadata": {},
"outputs": [],
"source": [
"# Heavy lifting\n",
"import numpy as np\n",
"import pandas as pd\n",
"from scipy import stats\n",
"\n",
"# Mostrare i dati\n",
"import ipysheet\n",
"from IPython.display import display\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Calcolo simbolico\n",
"import sympy as sp"
]
},
{
"cell_type": "markdown",
"id": "96b0e93d",
"metadata": {},
"source": [
"# Compatibilità"
]
},
{
"cell_type": "markdown",
"id": "e50c6d18",
"metadata": {},
"source": [
"**compat( x1, x2, u1, u2 )**\n",
"\n",
"*input:*\n",
" - x1, x2: quantità\n",
" - u1, u2: incertezze associate\n",
"\n",
"*output:*\n",
" - k: fattore di compatibilità (scarto normalizzato)\n",
" - diff: differenza x1 - x2"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "d07a554d",
"metadata": {},
"outputs": [],
"source": [
"def compat(x1, x2, u1,u2):\n",
" \n",
" diff = x1 - x2\n",
" k = np.abs(diff) / np.sqrt(u1**2 + u2**2)\n",
" return k, diff"
]
},
{
"cell_type": "markdown",
"id": "bc3e1539",
"metadata": {},
"source": [
"# Classe Data\n",
"\n",
"*attributi:*\n",
"\n",
"- campione: dati grezzi\n",
"- analisi_stat:\n",
" - valori medi\n",
" - incertezze strumentali, statistiche e complessive\n",
" - lunghezza campione\n",
" - outlier\n",
" - residui con incertezza\n",
"- param_reg:\n",
" - A, B\n",
" - uA, uB, covAB\n",
" - chi², P"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2649eb4e",
"metadata": {},
"outputs": [],
"source": [
"class Data:\n",
"\n",
" def __init__(self,\n",
" campione,\n",
" analisi_stat=None, param_reg=None):\n",
" \n",
" self.campione = campione\n",
" self.analisi_stat = analisi_stat\n",
" self.param_reg = param_reg"
]
},
{
"cell_type": "markdown",
"id": "a21e942b",
"metadata": {},
"source": [
"## Analisi con criterio di Chauvenet"
]
},
{
"cell_type": "markdown",
"id": "050285f6",
"metadata": {},
"source": [
"**data.stat_chauv( prefissi, err_strumentale )**\n",
"\n",
"*input data.campione:*\n",
" - prefix: nome/lista di nomi delle variabili da analizzare\n",
" - err_strumentale: incertezza/lista di incertezze da risoluzione strumentale\n",
"\n",
"*output data.analisi_stat:* \n",
" - \\<prefix>: media del campione\n",
" - u\\<prefix>_strum: incertezzaa da risoluzione strumentale\n",
" - u\\<prefix>_stat: incertezza statistica (s/sqrt(N))\n",
" - u\\<prefix>: incertezza complessiva\n",
" - n\\<prexix>: lunghezza del campione\n",
" - out\\<prefix>: lista outlier"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "91931bb9",
"metadata": {},
"outputs": [],
"source": [
"# Probabilità\n",
"def p_t_student(valori, err_strumentale):\n",
"\n",
" n = len(valori)\n",
" GdL = n - 1\n",
"\n",
" media = valori.mean()\n",
" s = valori.std(ddof=1)\n",
"\n",
" s = np.sqrt(s**2 + err_strumentale**2)\n",
" return 2 * (1 - stats.t.cdf(np.abs(valori - media) / s, df=GdL))\n",
"\n",
"\n",
"# Indice del peggiore outlier o None\n",
"def trova_outlier(valori, err_strumentale):\n",
"\n",
" n = len(valori)\n",
" soglia = 1 / (2 * n)\n",
" p = p_t_student(valori, err_strumentale)\n",
" idx_min = np.argmin(p)\n",
"\n",
" if p[idx_min] < soglia:\n",
" return idx_min\n",
" \n",
" return None\n",
"\n",
"\n",
"# Rimozione outlier\n",
"def rimuovi_outlier(valori, err_strumentale):\n",
" \n",
" rimossi = []\n",
" campione = valori.copy()\n",
"\n",
" while len(campione) > 2:\n",
" idx = trova_outlier(campione, err_strumentale)\n",
"\n",
" if idx is None: # nessun outlier: stop\n",
" break\n",
"\n",
" rimossi.append(campione[idx])\n",
" campione = np.delete(campione, idx)\n",
"\n",
" return campione, rimossi"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "70c37b24",
"metadata": {},
"outputs": [],
"source": [
"# Analisi statistica con criterio di Chauvenet\n",
"def stat_chauv(self, prefissi, err_strumentale):\n",
"\n",
" analisi_stat = self.analisi_stat\n",
" campione = self.campione\n",
" \n",
" \n",
" if isinstance(prefissi, str):\n",
" prefissi = [prefissi]\n",
" if isinstance(err_strumentale, (int, float)):\n",
" err_strumentale = [err_strumentale] * len(prefissi)\n",
"\n",
" for prefix, err_strum in zip(prefissi, err_strumentale):\n",
" cols = [col for col in campione.columns if col.startswith(prefix)]\n",
"\n",
" for i, row in campione.iterrows():\n",
" valori = row[cols].dropna().values.astype(float)\n",
" campione, rimossi = rimuovi_outlier(valori, err_strum)\n",
"\n",
" n = len(campione)\n",
" s = campione.std(ddof=1)\n",
" s = s / np.sqrt(n)\n",
" u = np.sqrt(s**2 + err_strum**2)\n",
"\n",
" analisi_stat.at[i, prefix] = campione.mean()\n",
" analisi_stat.at[i, f\"u{prefix}_strum\"] = err_strum\n",
" analisi_stat.at[i, f\"u{prefix}_stat\"] = s\n",
" analisi_stat.at[i, f\"u{prefix}\"] = u\n",
" analisi_stat.at[i, f\"n{prefix}\"] = n\n",
" analisi_stat.at[i, f\"out{prefix}\"] = rimossi\n",
"\n",
" return self\n",
"\n",
"\n",
"# Aggiunta a classe Data\n",
"Data.stat_chauv = stat_chauv"
]
},
{
"cell_type": "markdown",
"id": "86fcfbe5",
"metadata": {},
"source": [
"## Regressione lineare\n",
"y = Ax + By"
]
},
{
"cell_type": "markdown",
"id": "8e6a2c2e",
"metadata": {},
"source": [
"**data.reg_lin( stampa_param=True, plot_regressione=True, residui=True,** \n",
"&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&nbsp;**x_label=\"\", y_label=\"\", r_label=\"\",** \n",
"&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&nbsp;**titolo_reg=\"\", titolo_residui=\"\",** \n",
"&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&nbsp;**cd_A=4, cd_B=4, scala_barre=1 )** \n",
"\n",
"*input data.analisi_stat:*\n",
" - x, y: campioni x e y\n",
" - ux, uy: relative incertezze\n",
"\n",
"*input:*\n",
" - stampa_param: opzione stampa parametri regressione\n",
" - plot_regressione: opzione plot regressione lineare\n",
" - residui: opzione calcolo e plot residui\n",
" - x_label, y_label, r_label: etichette assi plot regressione e residui\n",
" - titolo_reg, titolo_residui: titolo plot regressione e residui\n",
" - cd_A, cd_B: cifre decimali visualizzazione parametri\n",
" - scala barre: scala ingrandimento barre di errore nella regressione\n",
"\n",
"*output param_reg:*\n",
" - A, B: parametri regressione\n",
" - uA, uB, covAB: relative incertezze e covarianze\n",
" - chi², P: chi quadro e relativa probabilità\n",
"\n",
"*output analisi_stat:*\n",
" - r, ur: residui e relativa incertezza"
]
},
{
"cell_type": "markdown",
"id": "6ccd1e25",
"metadata": {},
"source": [
"### Residui"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "7c815197",
"metadata": {},
"outputs": [],
"source": [
"# Plot residui\n",
" \n",
"def plot_residui(x, r,\n",
" ux, ur,\n",
" x_label, r_label, titolo):\n",
" \n",
" fig, ax = plt.subplots(figsize=(8, 5))\n",
"\n",
" # Residui con barre d'errore\n",
" ax.errorbar(\n",
" x, r,\n",
" xerr=ux, yerr=ur,\n",
" fmt='o', color=sns.color_palette()[0],\n",
" ecolor='gray', elinewidth=1, capsize=3,\n",
" markersize=5, label=\"Residui\"\n",
" )\n",
"\n",
" # Linea dello zero\n",
" ax.axhline(0, color='red', linestyle='--', linewidth=1)\n",
"\n",
" # Estetica\n",
" sns.despine(ax=ax)\n",
" ax.set_xlabel(x_label)\n",
" ax.set_ylabel(r_label)\n",
" ax.set_title(titolo)\n",
" ax.legend()\n",
" ax.grid(True, linestyle=':', linewidth=0.5, alpha=0.7)\n",
"\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "53ba0123",
"metadata": {},
"outputs": [],
"source": [
"# Calcolo residui\n",
"\n",
"def residui(analisi_stat,\n",
" A_num, B_num,\n",
" uA_num, uB_num, covAB_num):\n",
"\n",
" # Variabili simboliche\n",
" A, B, x, y = sp.symbols('A B x y', real=True)\n",
" uA, uB, ux, uy = sp.symbols('uA uB ux uy', positive=True)\n",
" covAB = sp.symbols('covAB', real=True)\n",
"\n",
" # Residuo: r = y - (Ax + B)\n",
" r = y - (A*x + B)\n",
"\n",
" # Propagazione errore (con covarianza A,B)\n",
" dr_dA = sp.diff(r, A)\n",
" dr_dB = sp.diff(r, B)\n",
" dr_dx = sp.diff(r, x)\n",
" dr_dy = sp.diff(r, y)\n",
"\n",
" u_r = sp.sqrt(\n",
" (dr_dA * uA)**2 +\n",
" (dr_dB * uB)**2 +\n",
" (dr_dx * ux)**2 +\n",
" (dr_dy * uy)**2 +\n",
" 2 * dr_dA * dr_dB * covAB\n",
" )\n",
"\n",
" # Funzioni numeriche\n",
" r_fn = sp.lambdify((x , y , A , B ), r, 'numpy')\n",
" u_r_fn = sp.lambdify(\n",
" (x , y , ux , uy , A , B , uA , uB , covAB ),\n",
" u_r , 'numpy'\n",
" )\n",
"\n",
" # Calcolo numerico\n",
" analisi_stat[\"r\"] = r_fn(\n",
" analisi_stat[\"x\"],\n",
" analisi_stat[\"y\"],\n",
" A_num,\n",
" B_num\n",
" )\n",
"\n",
" analisi_stat[\"ur\"] = u_r_fn(\n",
" analisi_stat[\"x\"], analisi_stat[\"y\"],\n",
" analisi_stat[\"ux\"], analisi_stat[\"uy\"],\n",
" A_num, B_num,\n",
" uA_num, uB_num, covAB_num\n",
" )\n",
"\n",
" return analisi_stat"
]
},
{
"cell_type": "markdown",
"id": "20b61f26",
"metadata": {},
"source": [
"### Regressione"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "299faa92",
"metadata": {},
"outputs": [],
"source": [
"# Plot regressione\n",
"\n",
"def plot_reg(x, y, ux, uy,\n",
" A, B, uA, uB, P,\n",
" x_label, y_label, titolo,\n",
" cd_A=4, cd_B=4, scala_barre=1):\n",
"\n",
" fig, ax = plt.subplots(figsize=(8, 5))\n",
"\n",
" x_fit = np.linspace(x.min(), x.max(), 300)\n",
" y_fit = A * x_fit + B\n",
"\n",
" ax.errorbar(\n",
" x, y,\n",
" xerr = scala_barre * ux,\n",
" yerr = scala_barre * uy,\n",
" fmt='o', color=sns.color_palette()[0],\n",
" ecolor='gray', elinewidth=1, capsize=3,\n",
" markersize=5, label=f\"Dati (barre errore x{scala_barre})\"\n",
" )\n",
" ax.plot(\n",
" x_fit, y_fit,\n",
" color='red', linewidth=1.5,\n",
" label = f\"$A={A:.{cd_A}f}\\\\pm{uA:.{cd_A}f}$\\n\"\n",
" f\"$B={B:.{cd_B}f}\\\\pm{uB:.{cd_B}f}$\\n\"\n",
" f\"$P(chi², ∞)={P:.4f}$\\n\"\n",
" )\n",
"\n",
" sns.despine(ax=ax)\n",
" ax.set_xlabel(x_label)\n",
" ax.set_ylabel(y_label)\n",
" ax.set_title(titolo)\n",
" ax.legend(fontsize=9)\n",
" ax.grid(True, linestyle=':', linewidth=0.5, alpha=0.7)\n",
"\n",
" plt.tight_layout()\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "9d3f0378",
"metadata": {},
"outputs": [],
"source": [
"# Funzioni regressione\n",
"\n",
"def uy_equiv(x, y, ux, uy):\n",
" \n",
" # Stima iniziale di A con sola uy\n",
" sy2 = uy**2\n",
" Sw = np.sum(1 / sy2)\n",
" Sx = np.sum(x / sy2)\n",
" Sxx = np.sum(x**2 / sy2)\n",
" Sy = np.sum(y / sy2)\n",
" Sxy = np.sum(x * y / sy2)\n",
" delta = Sxx * Sw - Sx**2\n",
" A_est = (Sxy * Sw - Sx * Sy) / delta\n",
"\n",
" # Propagazione\n",
" u_eq = np.sqrt(uy**2 + A_est**2 * ux**2)\n",
" return u_eq\n",
"\n",
"\n",
"def reg_lin(self,\n",
" stampa_param=True, plot_regressione=True, residui=True,\n",
" x_label=\"\", y_label=\"\", r_label=\"\",\n",
" titolo_reg=\"\", titolo_residui=\"\",\n",
" cd_A=4, cd_B=4, scala_barre=1):\n",
" \n",
" x = np.asarray(self.analisi_stat[\"x\"], dtype=float)\n",
" y = np.asarray(self.analisi_stat[\"y\"], dtype=float)\n",
" ux = np.asarray(self.analisi_stat[\"ux\"], dtype=float)\n",
" uy = np.asarray(self.analisi_stat[\"uy\"], dtype=float)\n",
"\n",
" # Propagazione errore x → y\n",
" uy_eq = uy_equiv(x, y, ux, uy)\n",
"\n",
" # Somme pesate\n",
" w = 1.0 / uy_eq**2\n",
" Sw = np.sum(w)\n",
" Sx = np.sum(w * x)\n",
" Sxx = np.sum(w * x**2)\n",
" Sy = np.sum(w * y)\n",
" Sxy = np.sum(w * x * y)\n",
" delta = Sxx * Sw - Sx**2\n",
"\n",
" # Parametri\n",
" A = (Sxy * Sw - Sx * Sy) / delta\n",
" B = (Sxx * Sy - Sxy * Sx) / delta\n",
" uA = np.sqrt(Sw / delta)\n",
" uB = np.sqrt(Sxx / delta)\n",
" covAB = -Sx / delta\n",
"\n",
" # Chi quadro\n",
" x2 = np.sum((y - A * x - B)**2 / uy_eq**2)\n",
" dof = len(x) - 2\n",
" P = stats.chi2.sf(x2, dof)\n",
"\n",
" # Raccolta parametri\n",
" self.param_reg[\"A\"] = A\n",
" self.param_reg[\"B\"] = B\n",
" self.param_reg[\"uA\"] = uA\n",
" self.param_reg[\"uB\"] = uB\n",
" self.param_reg[\"covAB\"] = covAB\n",
" self.param_reg[\"x2\"] = x2\n",
" self.param_reg[\"P\"] = P\n",
"\n",
" # Stampa\n",
" if stampa_param == True:\n",
" print(\"Ax + B : \")\n",
" print(f\"A = {A:.{cd_A}f} ± {uA:.{cd_A}f}\")\n",
" print(f\"B = {B:.{cd_B}f} ± {uB:.{cd_B}f}\")\n",
" print(f\"covAB = {covAB:.6f}\")\n",
" print(f\"chi² = {x2:.2f}\")\n",
" print(f\"P(chi², ∞) = {P:.2f}\")\n",
" \n",
" # Plot\n",
" if plot_regressione == True:\n",
" plot_reg(x, y, ux, uy,\n",
" A, B, uA, uB, P,\n",
" x_label, y_label, titolo_reg,\n",
" cd_A, cd_B, scala_barre)\n",
" \n",
" # Residui\n",
" if residui == True:\n",
" self = self.residui(A, B, uA, uB, covAB)\n",
" plot_residui(x, self.analisi_stat[\"r\"],\n",
" ux, self.analisi_stat[\"ur\"],\n",
" x_label, r_label, titolo_residui)\n",
"\n",
" return self\n",
"\n",
"\n",
"# Aggiunta a classe Data\n",
"Data.reg_lin = reg_lin"
]
},
{
"cell_type": "markdown",
"id": "b311d9e2",
"metadata": {},
"source": [
"# Plot gaussiane"
]
},
{
"cell_type": "markdown",
"id": "170849f1",
"metadata": {},
"source": [
"**plot_gauss( gaussiane )**\n",
"\n",
"*input gaussiane:* \n",
"[ (mi, sigma, colore label),... ]\n",
" - mi: valore medio (misura)\n",
" - sigma: deviazione standard (incertezza)\n",
" - colore: indice colore nella palette\n",
" - label: etichetta per la didascalia"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "f515a2bb",
"metadata": {},
"outputs": [],
"source": [
"def plot_gauss(gaussiane):\n",
" \n",
" # Creazione figura\n",
" plt.figure(figsize=(12, 7))\n",
"\n",
" # Creazione asse x\n",
" xMin = float('inf')\n",
" xMax = float('-inf')\n",
"\n",
" for mu, sigma, _, _ in gaussiane:\n",
" minimoLocale = mu - 4 * sigma\n",
" massimoLocale = mu + 4 * sigma\n",
" \n",
" if minimoLocale < xMin:\n",
" xMin = minimoLocale\n",
" \n",
" if massimoLocale > xMax:\n",
" xMax = massimoLocale\n",
" \n",
" x = np.linspace(xMin, xMax, 1000)\n",
"\n",
" # Ciclo gaussiane\n",
" for mu, sigma, colore, etichetta in gaussiane:\n",
" y = stats.norm.pdf(x, mu, sigma)\n",
" plt.plot(x, y, color=sns.color_palette()[colore], linewidth=1, label=etichetta)\n",
" \n",
" puntiLinee = [mu - sigma, mu, mu + sigma]\n",
" for px in puntiLinee:\n",
" py = stats.norm.pdf(px, mu, sigma) \n",
" plt.vlines(x=px,\n",
" ymin=0, ymax=py,\n",
" colors=sns.color_palette()[colore],\n",
" linestyles='dashed', linewidth=1)\n",
" \n",
" # Dettagli estetici finali\n",
" plt.ylim(bottom=0)\n",
" plt.title('Confronto dati')\n",
" plt.xlabel('k')\n",
" plt.legend()\n",
"\n",
" # Mostriamo il grafico\n",
" plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}