Files
Lab1/funzioni.ipynb

356 lines
9.0 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "17aa372a",
"metadata": {},
"source": [
"# Tutte le funzioni utili ordinate e nominate"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7f6f9db",
"metadata": {},
"outputs": [],
"source": [
"pip install matplotlib numpy pandas scipy"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e31f5b72",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import scipy as sc"
]
},
{
"cell_type": "markdown",
"id": "d8797256",
"metadata": {},
"source": [
"### Calcolo di compatibilità\n",
"\n",
"input:\n",
" - x1, x2: i valori numerici delle quantità\n",
" - u1, u2: le incertezze associate\n",
"\n",
"output:\n",
" - k: scarto normalizzato\n",
" - diff: differenza x1 - x2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a88eb843",
"metadata": {},
"outputs": [],
"source": [
"def compatibilita(x1, x2, u1, u2):\n",
" diff = x1 - x2\n",
" k = np.abs(diff) / np.sqrt(u1**2 + u2**2)\n",
" return k, diff\n",
"\n",
"print(compatibilita(1, 2, 0.5, 0.7))"
]
},
{
"cell_type": "markdown",
"id": "72995c62",
"metadata": {},
"source": [
"### Calcola la media e l'incertezza campionaria\n",
"\n",
"input:\n",
" - x: array numpy (anche con NaN)\n",
" - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n",
"\n",
"output:\n",
" - media\n",
" - uA: deviazione standarrd campionaria (s/sqrt(N))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d561eb0",
"metadata": {},
"outputs": [],
"source": [
"def media(x, dim = 0):\n",
" uA = np.nanstd(x, axis=dim, ddof=1)\n",
" media = np.nanmean(x, axis=dim)\n",
"\n",
" N = np.sum(~np.isnan(x), axis=dim)\n",
"\n",
" uA = uA / np.sqrt(N)\n",
"\n",
" return media, uA\n",
"\n",
"\n",
"df = pd.DataFrame({\n",
" \"a\": [1, 2, np.nan],\n",
" \"b\": [2, 3, 4]\n",
"})\n",
"media, uA = media(df)\n",
"print(media)\n",
"print(uA)"
]
},
{
"cell_type": "markdown",
"id": "f4e8604e",
"metadata": {},
"source": [
"### Media pesata\n",
"\n",
"input:\n",
" - x: valori\n",
" - sigmaX: sigma della x\n",
" - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n",
"\n",
"output:\n",
" - media: media pesata\n",
" - sigma: incertezza sulla media pesata"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ffdeea69",
"metadata": {},
"outputs": [],
"source": [
"\n",
"def mediaPesata(x, ux, dim = 0):\n",
" x_arr = x.to_numpy() if isinstance(x, (pd.Series, pd.DataFrame)) else np.asarray(x)\n",
" sigma_arr = ux.to_numpy() if isinstance(ux, (pd.Series, pd.DataFrame)) else np.asarray(ux)\n",
"\n",
" w = 1.0 / (sigma_arr ** 2)\n",
"\n",
" num = np.nansum(w * x_arr, axis=dim)\n",
" den = np.nansum(w, axis=dim)\n",
"\n",
" media = num / den\n",
" sigma = np.sqrt(1.0 / den)\n",
"\n",
" return media, sigma\n",
"\n",
"df = pd.DataFrame({\n",
" \"x\": [10, 12, 11, np.nan],\n",
" \"sx\": [0.3, 0.4, 0.2, np.nan]\n",
"})\n",
"\n",
"media, sigma = mediaPesata(df[\"x\"], df[\"sx\"], dim=0)\n",
"\n",
"print(\"media pesata:\", media)\n",
"print(\"sigma:\", sigma)"
]
},
{
"cell_type": "markdown",
"id": "c5441c1c",
"metadata": {},
"source": [
"### Identificazione autlier con t di Student\n",
"Questo sistema permette di calcolare la probabilità che un valore faccia parte di un dataset\n",
"input:\n",
" - x: dati\n",
" - ux: incertezze\n",
" - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n",
"\n",
"outpu:\n",
" - i: maschera degli outlier\n",
" - xi: maschera dei valori outlier\n",
" - NP: numero atteso di valori"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2c4561c9",
"metadata": {},
"outputs": [],
"source": [
"def outlier_t(x, ux, dim=0):\n",
" # Conversione posizionale\n",
" x_arr = x.to_numpy() if isinstance(x, (pd.Series, pd.DataFrame)) else np.asarray(x)\n",
" u_arr = ux.to_numpy() if isinstance(ux, (pd.Series, pd.DataFrame)) else np.asarray(ux)\n",
"\n",
" # Deviazione standard campionaria\n",
" s = np.nanstd(x_arr, axis=dim, ddof=1)\n",
" mi = np.nanmean(x_arr, axis=dim)\n",
" \n",
" N = np.sum(~np.isnan(x_arr), axis=dim)\n",
"\n",
" GdL = N - 1\n",
"\n",
" # Reshape magico (funziona)\n",
" shape = list(x_arr.shape)\n",
" shape[dim] = 1\n",
" s = s.reshape(shape)\n",
" mi = mi.reshape(shape)\n",
" N = N.reshape(shape)\n",
" GdL = GdL.reshape(shape)\n",
" if u_arr.ndim == 0 or u_arr.size == 1:\n",
" u_arr = float(u_arr.flat[0])\n",
"\n",
" # Statistica t\n",
" sigma_tot = np.sqrt(s**2 + u_arr**2)\n",
" t = np.abs(x_arr - mi) / sigma_tot\n",
" tail_p = 2 * sc.stats.t.sf(t, GdL)\n",
"\n",
" # Numero atteso\n",
" NP = N * tail_p\n",
"\n",
" # Outlier\n",
" i = NP < 0.5\n",
" xi = x_arr[i]\n",
"\n",
" return i, xi, NP\n",
"\n",
"\n",
"df = pd.DataFrame({\n",
" \"x\": [10, 12, 50, 11, np.nan, 13, 9, 7, 2],\n",
" \"u\": [1, 1, 1, 1, np.nan, 0.3, 1, 8, 3]\n",
"})\n",
"\n",
"i, xi, NP = outlier_t(df[\"x\"], df[\"u\"], dim = 0)\n",
"\n",
"print(\"Maschera:\", i)\n",
"print(\"Outlier:\", xi)\n",
"print(\"NP:\", NP)\n"
]
},
{
"cell_type": "markdown",
"id": "07d4fc7d",
"metadata": {},
"source": [
"### Regressione lineare pesata\n",
"input:\n",
" - x: dati sulla x\n",
" - y: dati sulla y\n",
" - ux: sigma x\n",
" - uy: sigma y\n",
"\n",
"output:\n",
" - A, B: Ax + B\n",
" - uA, uB: incertezze\n",
" - cov_AB: covarianza\n",
" - chi: chi^2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "78c30380",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"A = 1.9900 +- 0.0892\n",
"B = 0.0500 +- 0.2959\n",
"cov_AB = -0.023880\n",
"p-value chi² = 0.7187\n"
]
}
],
"source": [
"def sigma_y_equiv(x, y, sigma_x, sigma_y):\n",
" # Stima iniziale di A con sola sigma_y\n",
" sy2 = sigma_y**2\n",
" Sw = np.sum(1 / sy2)\n",
" Sx = np.sum(x / sy2)\n",
" Sxx = np.sum(x**2 / sy2)\n",
" Sy = np.sum(y / sy2)\n",
" Sxy = np.sum(x * y / sy2)\n",
" delta = Sxx * Sw - Sx**2\n",
" A_est = (Sxy * Sw - Sx * Sy) / delta\n",
"\n",
" # Propagazione\n",
" sigma_eq = np.sqrt(sigma_y**2 + A_est**2 * sigma_x**2)\n",
" return sigma_eq\n",
"\n",
"\n",
"def reg_lin(x, y, sigma_x, sigma_y):\n",
" x = np.asarray(x, dtype=float)\n",
" y = np.asarray(y, dtype=float)\n",
" sigma_x = np.asarray(sigma_x, dtype=float)\n",
" sigma_y = np.asarray(sigma_y, dtype=float)\n",
"\n",
" # Propagazione errore x → y\n",
" sigma_y_eq = sigma_y_equiv(x, y, sigma_x, sigma_y)\n",
"\n",
" # Somme pesate\n",
" w = 1.0 / sigma_y_eq**2\n",
" Sw = np.sum(w)\n",
" Sx = np.sum(w * x)\n",
" Sxx = np.sum(w * x**2)\n",
" Sy = np.sum(w * y)\n",
" Sxy = np.sum(w * x * y)\n",
" delta = Sxx * Sw - Sx**2\n",
"\n",
" # Parametri\n",
" A = (Sxy * Sw - Sx * Sy) / delta\n",
" B = (Sxx * Sy - Sxy * Sx) / delta\n",
" sigma_A = np.sqrt(Sw / delta)\n",
" sigma_B = np.sqrt(Sxx / delta)\n",
" cov_AB = -Sx / delta\n",
"\n",
" # Chi quadro\n",
" x2 = np.sum((y - A * x - B)**2 / sigma_y_eq**2)\n",
" dof = len(x) - 2\n",
" chi = sc.stats.chi2.sf(x2, dof) # P(X² > x2)\n",
"\n",
" return A, B, sigma_A, sigma_B, cov_AB, chi\n",
"\n",
"df = pd.DataFrame({\n",
" \"x\":[1.0, 2.0, 3.0, 4.0, 5.0],\n",
" \"y\":[2.1, 3.9, 6.2, 7.8, 10.1],\n",
" \"ux\":[0.1, 0.1, 0.1, 0.1, 0.1],\n",
" \"uy\":[0.2, 0.2, 0.2, 0.2, 0.2]\n",
" })\n",
"\n",
"A, B, sA, sB, covAB, chi = reg_lin(df[\"x\"], df[\"y\"], df[\"ux\"], df[\"uy\"])\n",
"print(f\"A = {A:.4f} +- {sA:.4f}\")\n",
"print(f\"B = {B:.4f} +- {sB:.4f}\")\n",
"print(f\"cov_AB = {covAB:.6f}\")\n",
"print(f\"p-value chi² = {chi:.4f}\")\n",
"\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}