369 lines
11 KiB
Plaintext
369 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "17aa372a",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Tutte le funzioni utili ordinate e nominate"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e7f6f9db",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# pip install matplotlib numpy pandas scipy"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "e31f5b72",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import scipy as sc"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d8797256",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Calcolo di compatibilità\n",
|
|
"\n",
|
|
"input:\n",
|
|
" - x1, x2: i valori numerici delle quantità\n",
|
|
" - u1, u2: le incertezze associate\n",
|
|
"\n",
|
|
"output:\n",
|
|
" - k: scarto normalizzato\n",
|
|
" - diff: differenza x1 - x2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a88eb843",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def compatibilita(x1, x2, u1, u2):\n",
|
|
" diff = x1 - x2\n",
|
|
" k = np.abs(diff) / np.sqrt(u1**2 + u2**2)\n",
|
|
" return k, diff\n",
|
|
"\n",
|
|
"print(compatibilita(1, 2, 0.5, 0.7))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "72995c62",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Calcola la media e l'incertezza campionaria\n",
|
|
"\n",
|
|
"input:\n",
|
|
" - x: array numpy (anche con NaN)\n",
|
|
" - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n",
|
|
"\n",
|
|
"output:\n",
|
|
" - media\n",
|
|
" - uA: deviazione standarrd campionaria (s/sqrt(N))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "3d561eb0",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[1.5 3. ]\n",
|
|
"a 0.50000\n",
|
|
"b 0.57735\n",
|
|
"dtype: float64\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"def media(x, dim = 0):\n",
|
|
" uA = np.nanstd(x, axis=dim, ddof=1)\n",
|
|
" media = np.nanmean(x, axis=dim)\n",
|
|
"\n",
|
|
" N = np.sum(~np.isnan(x), axis=dim)\n",
|
|
"\n",
|
|
" uA = uA / np.sqrt(N)\n",
|
|
"\n",
|
|
" return media, uA\n",
|
|
"\n",
|
|
"\n",
|
|
"df = pd.DataFrame({\n",
|
|
" \"a\": [1, 2, np.nan],\n",
|
|
" \"b\": [2, 3, 4]\n",
|
|
"})\n",
|
|
"media, uA = media(df)\n",
|
|
"print(media)\n",
|
|
"print(uA)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f4e8604e",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Media pesata\n",
|
|
"\n",
|
|
"input:\n",
|
|
" - x: valori\n",
|
|
" - sigmaX: sigma della x\n",
|
|
" - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n",
|
|
"\n",
|
|
"output:\n",
|
|
" - media: media pesata\n",
|
|
" - sigma: incertezza sulla media pesata"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ffdeea69",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"def mediaPesata(x, ux, dim = 0):\n",
|
|
" x_arr = x.to_numpy() if isinstance(x, (pd.Series, pd.DataFrame)) else np.asarray(x)\n",
|
|
" sigma_arr = ux.to_numpy() if isinstance(ux, (pd.Series, pd.DataFrame)) else np.asarray(ux)\n",
|
|
"\n",
|
|
" w = 1.0 / (sigma_arr ** 2)\n",
|
|
"\n",
|
|
" num = np.nansum(w * x_arr, axis=dim)\n",
|
|
" den = np.nansum(w, axis=dim)\n",
|
|
"\n",
|
|
" media = num / den\n",
|
|
" sigma = np.sqrt(1.0 / den)\n",
|
|
"\n",
|
|
" return media, sigma\n",
|
|
"\n",
|
|
"df = pd.DataFrame({\n",
|
|
" \"x\": [10, 12, 11, np.nan],\n",
|
|
" \"sx\": [0.3, 0.4, 0.2, np.nan]\n",
|
|
"})\n",
|
|
"\n",
|
|
"media, sigma = mediaPesata(df[\"x\"], df[\"sx\"], dim=0)\n",
|
|
"\n",
|
|
"print(\"media pesata:\", media)\n",
|
|
"print(\"sigma:\", sigma)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c5441c1c",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Identificazione autlier con t di Student\n",
|
|
"Questo sistema permette di calcolare la probabilità che un valore faccia parte di un dataset\n",
|
|
"input:\n",
|
|
" - x: dati\n",
|
|
" - ux: incertezze\n",
|
|
" - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n",
|
|
"\n",
|
|
"outpu:\n",
|
|
" - i: maschera degli outlier\n",
|
|
" - xi: maschera dei valori outlier\n",
|
|
" - NP: numero atteso di valori"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2c4561c9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def outlier_t(x, ux, dim=0):\n",
|
|
" # Conversione posizionale\n",
|
|
" x_arr = x.to_numpy() if isinstance(x, (pd.Series, pd.DataFrame)) else np.asarray(x)\n",
|
|
" u_arr = ux.to_numpy() if isinstance(ux, (pd.Series, pd.DataFrame)) else np.asarray(ux)\n",
|
|
"\n",
|
|
" # Deviazione standard campionaria\n",
|
|
" s = np.nanstd(x_arr, axis=dim, ddof=1)\n",
|
|
" mi = np.nanmean(x_arr, axis=dim)\n",
|
|
" \n",
|
|
" N = np.sum(~np.isnan(x_arr), axis=dim)\n",
|
|
"\n",
|
|
" GdL = N - 1\n",
|
|
"\n",
|
|
" # Reshape magico (funziona)\n",
|
|
" shape = list(x_arr.shape)\n",
|
|
" shape[dim] = 1\n",
|
|
" s = s.reshape(shape)\n",
|
|
" mi = mi.reshape(shape)\n",
|
|
" N = N.reshape(shape)\n",
|
|
" GdL = GdL.reshape(shape)\n",
|
|
" if u_arr.ndim == 0 or u_arr.size == 1:\n",
|
|
" u_arr = float(u_arr.flat[0])\n",
|
|
"\n",
|
|
" # Statistica t\n",
|
|
" sigma_tot = np.sqrt(s**2 + u_arr**2)\n",
|
|
" t = np.abs(x_arr - mi) / sigma_tot\n",
|
|
" tail_p = 2 * sc.stats.t.sf(t, GdL)\n",
|
|
"\n",
|
|
" # Numero atteso\n",
|
|
" NP = N * tail_p\n",
|
|
"\n",
|
|
" # Outlier\n",
|
|
" i = NP < 0.5\n",
|
|
" xi = x_arr[i]\n",
|
|
"\n",
|
|
" return i, xi, NP\n",
|
|
"\n",
|
|
"\n",
|
|
"df = pd.DataFrame({\n",
|
|
" \"x\": [10, 12, 50, 11, np.nan, 13, 9, 7, 2],\n",
|
|
" \"u\": [1, 1, 1, 1, np.nan, 0.3, 1, 8, 3]\n",
|
|
"})\n",
|
|
"\n",
|
|
"i, xi, NP = outlier_t(df[\"x\"], df[\"u\"], dim = 0)\n",
|
|
"\n",
|
|
"print(\"Maschera:\", i)\n",
|
|
"print(\"Outlier:\", xi)\n",
|
|
"print(\"NP:\", NP)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "07d4fc7d",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Regressione lineare pesata\n",
|
|
"input:\n",
|
|
" - x: dati sulla x\n",
|
|
" - y: dati sulla y\n",
|
|
" - ux: sigma x\n",
|
|
" - uy: sigma y\n",
|
|
"\n",
|
|
"output:\n",
|
|
" - A, B: Ax + B\n",
|
|
" - uA, uB: incertezze\n",
|
|
" - cov_AB: covarianza\n",
|
|
" - chi: chi^2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "78c30380",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "NameError",
|
|
"evalue": "name 'pd' is not defined",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
|
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 49\u001b[39m\n\u001b[32m 45\u001b[39m chi = sc.stats.chi2.cdf(x2, dof) \u001b[38;5;66;03m# P(X² > x2)\u001b[39;00m\n\u001b[32m 47\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m A, B, sigma_A, sigma_B, cov_AB, chi\n\u001b[32m---> \u001b[39m\u001b[32m49\u001b[39m df = \u001b[43mpd\u001b[49m.DataFrame({\n\u001b[32m 50\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mx\u001b[39m\u001b[33m\"\u001b[39m:[\u001b[32m1.0\u001b[39m, \u001b[32m2.0\u001b[39m, \u001b[32m3.0\u001b[39m, \u001b[32m4.0\u001b[39m, \u001b[32m5.0\u001b[39m],\n\u001b[32m 51\u001b[39m \u001b[33m\"\u001b[39m\u001b[33my\u001b[39m\u001b[33m\"\u001b[39m:[\u001b[32m2.1\u001b[39m, \u001b[32m3.9\u001b[39m, \u001b[32m6.2\u001b[39m, \u001b[32m7.8\u001b[39m, \u001b[32m10.1\u001b[39m],\n\u001b[32m 52\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mux\u001b[39m\u001b[33m\"\u001b[39m:[\u001b[32m0.1\u001b[39m, \u001b[32m0.1\u001b[39m, \u001b[32m0.1\u001b[39m, \u001b[32m0.1\u001b[39m, \u001b[32m0.1\u001b[39m],\n\u001b[32m 53\u001b[39m \u001b[33m\"\u001b[39m\u001b[33muy\u001b[39m\u001b[33m\"\u001b[39m:[\u001b[32m0.2\u001b[39m, \u001b[32m0.2\u001b[39m, \u001b[32m0.2\u001b[39m, \u001b[32m0.2\u001b[39m, \u001b[32m0.2\u001b[39m]\n\u001b[32m 54\u001b[39m })\n\u001b[32m 56\u001b[39m A, B, sA, sB, covAB, chi = reg_lin(df[\u001b[33m\"\u001b[39m\u001b[33mx\u001b[39m\u001b[33m\"\u001b[39m], df[\u001b[33m\"\u001b[39m\u001b[33my\u001b[39m\u001b[33m\"\u001b[39m], df[\u001b[33m\"\u001b[39m\u001b[33mux\u001b[39m\u001b[33m\"\u001b[39m], df[\u001b[33m\"\u001b[39m\u001b[33muy\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m 57\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mAx + B : \u001b[39m\u001b[33m\"\u001b[39m)\n",
|
|
"\u001b[31mNameError\u001b[39m: name 'pd' is not defined"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"def sigma_y_equiv(x, y, sigma_x, sigma_y):\n",
|
|
" # Stima iniziale di A con sola sigma_y\n",
|
|
" sy2 = sigma_y**2\n",
|
|
" Sw = np.sum(1 / sy2)\n",
|
|
" Sx = np.sum(x / sy2)\n",
|
|
" Sxx = np.sum(x**2 / sy2)\n",
|
|
" Sy = np.sum(y / sy2)\n",
|
|
" Sxy = np.sum(x * y / sy2)\n",
|
|
" delta = Sxx * Sw - Sx**2\n",
|
|
" A_est = (Sxy * Sw - Sx * Sy) / delta\n",
|
|
"\n",
|
|
" # Propagazione\n",
|
|
" sigma_eq = np.sqrt(sigma_y**2 + A_est**2 * sigma_x**2)\n",
|
|
" return sigma_eq\n",
|
|
"\n",
|
|
"\n",
|
|
"def reg_lin(x, y, sigma_x, sigma_y):\n",
|
|
" x = np.asarray(x, dtype=float)\n",
|
|
" y = np.asarray(y, dtype=float)\n",
|
|
" sigma_x = np.asarray(sigma_x, dtype=float)\n",
|
|
" sigma_y = np.asarray(sigma_y, dtype=float)\n",
|
|
"\n",
|
|
" # Propagazione errore x → y\n",
|
|
" sigma_y_eq = sigma_y_equiv(x, y, sigma_x, sigma_y)\n",
|
|
"\n",
|
|
" # Somme pesate\n",
|
|
" w = 1.0 / sigma_y_eq**2\n",
|
|
" Sw = np.sum(w)\n",
|
|
" Sx = np.sum(w * x)\n",
|
|
" Sxx = np.sum(w * x**2)\n",
|
|
" Sy = np.sum(w * y)\n",
|
|
" Sxy = np.sum(w * x * y)\n",
|
|
" delta = Sxx * Sw - Sx**2\n",
|
|
"\n",
|
|
" # Parametri\n",
|
|
" A = (Sxy * Sw - Sx * Sy) / delta\n",
|
|
" B = (Sxx * Sy - Sxy * Sx) / delta\n",
|
|
" sigma_A = np.sqrt(Sw / delta)\n",
|
|
" sigma_B = np.sqrt(Sxx / delta)\n",
|
|
" cov_AB = -Sx / delta\n",
|
|
"\n",
|
|
" # Chi quadro\n",
|
|
" x2 = np.sum((y - A * x - B)**2 / sigma_y_eq**2)\n",
|
|
" dof = len(x) - 2\n",
|
|
" chi = sc.stats.chi2.cdf(x2, dof) # P(X² > x2)\n",
|
|
"\n",
|
|
" return A, B, sigma_A, sigma_B, cov_AB, chi\n",
|
|
"\n",
|
|
"df = pd.DataFrame({\n",
|
|
" \"x\":[1.0, 2.0, 3.0, 4.0, 5.0],\n",
|
|
" \"y\":[2.1, 3.9, 6.2, 7.8, 10.1],\n",
|
|
" \"ux\":[0.1, 0.1, 0.1, 0.1, 0.1],\n",
|
|
" \"uy\":[0.2, 0.2, 0.2, 0.2, 0.2]\n",
|
|
" })\n",
|
|
"\n",
|
|
"A, B, sA, sB, covAB, chi = reg_lin(df[\"x\"], df[\"y\"], df[\"ux\"], df[\"uy\"])\n",
|
|
"print(\"Ax + B : \")\n",
|
|
"print(f\"A = {A:.4f} +- {sA:.4f}\")\n",
|
|
"print(f\"B = {B:.4f} +- {sB:.4f}\")\n",
|
|
"print(f\"cov_AB = {covAB:.6f}\")\n",
|
|
"print(f\"p-value chi² = {chi:.4f}\")\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|