{ "cells": [ { "cell_type": "markdown", "id": "17aa372a", "metadata": {}, "source": [ "# Tutte le funzioni utili ordinate e nominate" ] }, { "cell_type": "code", "execution_count": 2, "id": "e7f6f9db", "metadata": {}, "outputs": [], "source": [ "# pip install matplotlib numpy pandas scipy" ] }, { "cell_type": "code", "execution_count": 3, "id": "e31f5b72", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import scipy as sc" ] }, { "cell_type": "markdown", "id": "d8797256", "metadata": {}, "source": [ "### Calcolo di compatibilità\n", "\n", "input:\n", " - x1, x2: i valori numerici delle quantità\n", " - u1, u2: le incertezze associate\n", "\n", "output:\n", " - k: scarto normalizzato\n", " - diff: differenza x1 - x2" ] }, { "cell_type": "code", "execution_count": 4, "id": "a88eb843", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(np.float64(1.162476387438193), -1)\n" ] } ], "source": [ "def compatibilita(x1, x2, u1, u2):\n", " diff = x1 - x2\n", " k = np.abs(diff) / np.sqrt(u1**2 + u2**2)\n", " return k, diff\n", "\n", "print(compatibilita(1, 2, 0.5, 0.7))" ] }, { "cell_type": "markdown", "id": "72995c62", "metadata": {}, "source": [ "### Calcola la media e l'incertezza campionaria\n", "\n", "input:\n", " - x: array numpy (anche con NaN)\n", " - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n", "\n", "output:\n", " - media\n", " - uA: deviazione standarrd campionaria (s/sqrt(N))" ] }, { "cell_type": "code", "execution_count": 14, "id": "3d561eb0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1.5 3. ]\n", "a 0.50000\n", "b 0.57735\n", "dtype: float64\n" ] } ], "source": [ "def media(x, dim = 0):\n", " uA = np.nanstd(x, axis=dim, ddof=1)\n", " media = np.nanmean(x, axis=dim)\n", "\n", " N = np.sum(~np.isnan(x), axis=dim)\n", "\n", " uA = uA / np.sqrt(N)\n", "\n", " return media, uA\n", "\n", "\n", "df = pd.DataFrame({\n", " \"a\": [1, 2, np.nan],\n", " \"b\": [2, 3, 4]\n", "})\n", "media, uA = media(df)\n", "print(media)\n", "print(uA)" ] }, { "cell_type": "markdown", "id": "f4e8604e", "metadata": {}, "source": [ "### Media pesata\n", "\n", "input:\n", " - x: valori\n", " - sigmaX: sigma della x\n", " - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n", "\n", "output:\n", " - media: media pesata\n", " - sigma: incertezza sulla media pesata" ] }, { "cell_type": "code", "execution_count": 6, "id": "ffdeea69", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "media pesata: 10.885245901639344\n", "sigma: 0.15364425591947517\n" ] } ], "source": [ "\n", "def mediaPesata(x, ux, dim = 0):\n", " x_arr = x.to_numpy() if isinstance(x, (pd.Series, pd.DataFrame)) else np.asarray(x)\n", " sigma_arr = ux.to_numpy() if isinstance(ux, (pd.Series, pd.DataFrame)) else np.asarray(ux)\n", "\n", " w = 1.0 / (sigma_arr ** 2)\n", "\n", " num = np.nansum(w * x_arr, axis=dim)\n", " den = np.nansum(w, axis=dim)\n", "\n", " media = num / den\n", " sigma = np.sqrt(1.0 / den)\n", "\n", " return media, sigma\n", "\n", "df = pd.DataFrame({\n", " \"x\": [10, 12, 11, np.nan],\n", " \"sx\": [0.3, 0.4, 0.2, np.nan]\n", "})\n", "\n", "media, sigma = mediaPesata(df[\"x\"], df[\"sx\"], dim=0)\n", "\n", "print(\"media pesata:\", media)\n", "print(\"sigma:\", sigma)" ] }, { "cell_type": "markdown", "id": "c5441c1c", "metadata": {}, "source": [ "### Identificazione autlier con t di Student\n", "Questo sistema permette di calcolare la probabilità che un valore faccia parte di un dataset\n", "input:\n", " - x: dati\n", " - ux: incertezze\n", " - dim: dimensione in cui calcolare (0:righe / 1:colonne) [dafault 0]\n", "\n", "outpu:\n", " - i: maschera degli outlier\n", " - xi: maschera dei valori outlier\n", " - NP: numero atteso di valori" ] }, { "cell_type": "code", "execution_count": 7, "id": "2c4561c9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Maschera: [False False True False False False False False False]\n", "Outlier: [50.]\n", "NP: [6.26782319 7.07278344 0.37856532 6.66695415 nan 7.48227073\n", " 5.87724449 5.44192616 3.56273523]\n" ] } ], "source": [ "def outlier_t(x, ux, dim=0):\n", " # Conversione posizionale\n", " x_arr = x.to_numpy() if isinstance(x, (pd.Series, pd.DataFrame)) else np.asarray(x)\n", " u_arr = ux.to_numpy() if isinstance(ux, (pd.Series, pd.DataFrame)) else np.asarray(ux)\n", "\n", " # Deviazione standard campionaria\n", " s = np.nanstd(x_arr, axis=dim, ddof=1)\n", " mi = np.nanmean(x_arr, axis=dim)\n", " \n", " N = np.sum(~np.isnan(x_arr), axis=dim)\n", "\n", " GdL = N - 1\n", "\n", " # Reshape magico (funziona)\n", " shape = list(x_arr.shape)\n", " shape[dim] = 1\n", " s = s.reshape(shape)\n", " mi = mi.reshape(shape)\n", " N = N.reshape(shape)\n", " GdL = GdL.reshape(shape)\n", " if u_arr.ndim == 0 or u_arr.size == 1:\n", " u_arr = float(u_arr.flat[0])\n", "\n", " # Statistica t\n", " sigma_tot = np.sqrt(s**2 + u_arr**2)\n", " t = np.abs(x_arr - mi) / sigma_tot\n", " tail_p = 2 * sc.stats.t.sf(t, GdL)\n", "\n", " # Numero atteso\n", " NP = N * tail_p\n", "\n", " # Outlier\n", " i = NP < 0.5\n", " xi = x_arr[i]\n", "\n", " return i, xi, NP\n", "\n", "\n", "df = pd.DataFrame({\n", " \"x\": [10, 12, 50, 11, np.nan, 13, 9, 7, 2],\n", " \"u\": [1, 1, 1, 1, np.nan, 0.3, 1, 8, 3]\n", "})\n", "\n", "i, xi, NP = outlier_t(df[\"x\"], df[\"u\"], dim = 0)\n", "\n", "print(\"Maschera:\", i)\n", "print(\"Outlier:\", xi)\n", "print(\"NP:\", NP)\n" ] }, { "cell_type": "markdown", "id": "07d4fc7d", "metadata": {}, "source": [ "### Regressione lineare pesata\n", "input:\n", " - x: dati sulla x\n", " - y: dati sulla y\n", " - ux: sigma x\n", " - uy: sigma y\n", "\n", "output:\n", " - A, B: Ax + B\n", " - uA, uB: incertezze\n", " - cov_AB: covarianza\n", " - chi: chi^2" ] }, { "cell_type": "code", "execution_count": 8, "id": "78c30380", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Ax + B : \n", "A = 1.9900 +- 0.0892\n", "B = 0.0500 +- 0.2959\n", "cov_AB = -0.023880\n", "p-value chi² = 0.2813\n" ] } ], "source": [ "def sigma_y_equiv(x, y, sigma_x, sigma_y):\n", " # Stima iniziale di A con sola sigma_y\n", " sy2 = sigma_y**2\n", " Sw = np.sum(1 / sy2)\n", " Sx = np.sum(x / sy2)\n", " Sxx = np.sum(x**2 / sy2)\n", " Sy = np.sum(y / sy2)\n", " Sxy = np.sum(x * y / sy2)\n", " delta = Sxx * Sw - Sx**2\n", " A_est = (Sxy * Sw - Sx * Sy) / delta\n", "\n", " # Propagazione\n", " sigma_eq = np.sqrt(sigma_y**2 + A_est**2 * sigma_x**2)\n", " return sigma_eq\n", "\n", "\n", "def reg_lin(x, y, sigma_x, sigma_y):\n", " x = np.asarray(x, dtype=float)\n", " y = np.asarray(y, dtype=float)\n", " sigma_x = np.asarray(sigma_x, dtype=float)\n", " sigma_y = np.asarray(sigma_y, dtype=float)\n", "\n", " # Propagazione errore x → y\n", " sigma_y_eq = sigma_y_equiv(x, y, sigma_x, sigma_y)\n", "\n", " # Somme pesate\n", " w = 1.0 / sigma_y_eq**2\n", " Sw = np.sum(w)\n", " Sx = np.sum(w * x)\n", " Sxx = np.sum(w * x**2)\n", " Sy = np.sum(w * y)\n", " Sxy = np.sum(w * x * y)\n", " delta = Sxx * Sw - Sx**2\n", "\n", " # Parametri\n", " A = (Sxy * Sw - Sx * Sy) / delta\n", " B = (Sxx * Sy - Sxy * Sx) / delta\n", " sigma_A = np.sqrt(Sw / delta)\n", " sigma_B = np.sqrt(Sxx / delta)\n", " cov_AB = -Sx / delta\n", "\n", " # Chi quadro\n", " x2 = np.sum((y - A * x - B)**2 / sigma_y_eq**2)\n", " dof = len(x) - 2\n", " chi = sc.stats.chi2.cdf(x2, dof) # P(X² > x2)\n", "\n", " return A, B, sigma_A, sigma_B, cov_AB, chi\n", "\n", "df = pd.DataFrame({\n", " \"x\":[1.0, 2.0, 3.0, 4.0, 5.0],\n", " \"y\":[2.1, 3.9, 6.2, 7.8, 10.1],\n", " \"ux\":[0.1, 0.1, 0.1, 0.1, 0.1],\n", " \"uy\":[0.2, 0.2, 0.2, 0.2, 0.2]\n", " })\n", "\n", "A, B, sA, sB, covAB, chi = reg_lin(df[\"x\"], df[\"y\"], df[\"ux\"], df[\"uy\"])\n", "print(\"Ax + B : \")\n", "print(f\"A = {A:.4f} +- {sA:.4f}\")\n", "print(f\"B = {B:.4f} +- {sB:.4f}\")\n", "print(f\"cov_AB = {covAB:.6f}\")\n", "print(f\"p-value chi² = {chi:.4f}\")\n", "\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.11" } }, "nbformat": 4, "nbformat_minor": 5 }