Merged PR 5371: Flagging w. random predictor + DWH connection improvements + restructure

Connecting to DWH: * Any existing notebook (AB, Flagging & Template) now have an initial simplified block to connect to the DWH. This is done to handle the DRY, as we're going to start adding more and more experiment notebooks very soon (and we have already 4 notebooks). * This reads from a new `utils/dwh_utils.py` in which we handle the connection and test it accordingly. * This also requires an optional `settings.json` path configuration to avoid warnings (not errors) when reading from `dwh_utils`. Flagging: * All flagging notebooks now go within the folder `data_driven_risk_assessment`. The already existing notebook `flagging_performance_monitoring` has also been moved here. * There's a new `experiments` folder to store the different experiments on flagging. * A new notebook has been added containing a straight-forward baseline: a random predictor, which randomly flags as risk bookings on a test set based on the observed booking claim rate on a previous train dataset. I confirm that all existing notebooks work well after the connection changes. Once merged, or to review, you will need to re-install requirements.txt as I added sklearn. Related work items: #30804
2025-06-10 05:59:12 +00:00 · 2025-06-10 05:59:12 +00:00 · 38f63afbf7
commit 38f63afbf7
parent 2662f994f0
8 changed files with 634 additions and 533 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,3 @@
+{
+  "python.analysis.extraPaths": ["./data-jupyter-notebooks/utils"]
+}
--- a/ab_test_guest_journey_monitoring.ipynb
+++ b/ab_test_guest_journey_monitoring.ipynb
@ -12,89 +12,39 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This script connects to a Data Warehouse (DWH) using PostgreSQL. \n",
+    "# This should be common for all Notebooks, but you might need to adjust the path to the `dwh_utils` module.\n",
+    "\n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(\"./utils\"))  # Adjust path if needed\n",
+    "\n",
+    "from dwh_utils import read_credentials, create_postgres_engine, query_to_dataframe, test_connection\n",
+    "\n",
+    "# --- Connect to DWH ---\n",
+    "creds = read_credentials()\n",
+    "dwh_pg_engine = create_postgres_engine(creds)\n",
+    "\n",
+    "# --- Test Query ---\n",
+    "test_connection()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "import pathlib\n",
-    "import yaml\n",
    "import pandas as pd\n",
    "import numpy as np\n",
-    "from sqlalchemy import create_engine\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from statsmodels.stats.proportion import proportions_ztest\n",
-    "from scipy import stats\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/uri/.superhog-dwh/credentials.yml\n"
-     ]
-    }
-   ],
-   "source": [
-    "CREDS_FILEPATH = pathlib.Path.home() / \".superhog-dwh\" / \"credentials.yml\"\n",
-    "print(CREDS_FILEPATH)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Prepare connection to DWH\n",
-    "# Function to read credentials from the YAML file\n",
-    "def read_credentials(yaml_path: str, env: str = \"prd\"):\n",
-    "    with open(yaml_path, \"r\") as file:\n",
-    "        credentials = yaml.safe_load(file)\n",
-    "    return credentials[\"envs\"][env]\n",
-    "# Function to create a PostgreSQL connection string\n",
-    "def create_postgres_engine(creds: dict):\n",
-    "    user = creds[\"user\"]\n",
-    "    password = creds[\"password\"]\n",
-    "    host = creds[\"host\"]\n",
-    "    port = creds[\"port\"]\n",
-    "    database = creds[\"database\"]\n",
-    "    # Create the connection string for SQLAlchemy\n",
-    "    connection_string = f\"postgresql://{user}:{password}@{host}:{port}/{database}\"\n",
-    "    engine = create_engine(connection_string)\n",
-    "    return engine\n",
-    "# Function to execute a query and return the result as a pandas DataFrame\n",
-    "def query_to_dataframe(engine, query: str):\n",
-    "    with engine.connect() as connection:\n",
-    "        df = pd.read_sql(query, connection)\n",
-    "    return df\n",
-    "dwh_creds = read_credentials(yaml_path=CREDS_FILEPATH, env=\"prd\")\n",
-    "dwh_pg_engine = create_postgres_engine(creds=dwh_creds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   ?column?\n",
-      "0         1\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Silly query to test things out\n",
-    "test_df = query_to_dataframe(engine=dwh_pg_engine, query=\"SELECT 1;\")\n",
-    "print(test_df.head())"
+    "from scipy import stats"
   ]
  },
  {
@ -135,21 +85,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Empty DataFrame\n",
-      "Columns: [ab_test_name, variation, last_update, guest_journeys_count, guest_journey_started_count, guest_journey_completed_count, guest_journey_with_responses_count, guest_journey_with_payment_count, guest_revenue_count, deposit_count, waiver_count, check_in_cover_count, guest_revenue_sum, deposit_sum, waiver_sum, check_in_cover_sum, guest_revenue_avg_per_guest_journey, guest_revenue_sdv_per_guest_journey, deposit_avg_per_guest_journey, deposit_sdv_per_guest_journey, waiver_avg_per_guest_journey, waiver_sdv_per_guest_journey, check_in_cover_avg_per_guest_journey, check_in_cover_sdv_per_guest_journey, csat_avg_per_guest_journey_with_response, csat_sdv_per_guest_journey_with_response]\n",
-      "Index: []\n",
-      "\n",
-      "[0 rows x 26 columns]\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Query to extract data\n",
    "data_extraction_query = \"\"\"\n",
@ -213,24 +151,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "ename": "IndexError",
-     "evalue": "single positional indexer is out-of-bounds",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[19], line 9\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;66;03m# Find the total count and other metadata\u001b[39;00m\n\u001b[1;32m      8\u001b[0m total_count \u001b[38;5;241m=\u001b[39m grouped_data\u001b[38;5;241m.\u001b[39msum()\n\u001b[0;32m----> 9\u001b[0m ab_test_name \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mab_test_name\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miloc\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m  \u001b[38;5;66;03m# Assuming all rows are for the same A/B test\u001b[39;00m\n\u001b[1;32m     10\u001b[0m last_update \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlast_update\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mmax()\n\u001b[1;32m     12\u001b[0m \u001b[38;5;66;03m# Create a pie chart using Seaborn styling\u001b[39;00m\n",
-      "File \u001b[0;32m~/Workspace/data-jupyter-notebooks/venv/lib/python3.12/site-packages/pandas/core/indexing.py:1191\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   1189\u001b[0m maybe_callable \u001b[38;5;241m=\u001b[39m com\u001b[38;5;241m.\u001b[39mapply_if_callable(key, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj)\n\u001b[1;32m   1190\u001b[0m maybe_callable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_deprecated_callable_usage(key, maybe_callable)\n\u001b[0;32m-> 1191\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmaybe_callable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/Workspace/data-jupyter-notebooks/venv/lib/python3.12/site-packages/pandas/core/indexing.py:1752\u001b[0m, in \u001b[0;36m_iLocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m   1749\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot index by location index with a non-integer key\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1751\u001b[0m \u001b[38;5;66;03m# validate the location\u001b[39;00m\n\u001b[0;32m-> 1752\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_integer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1754\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_ixs(key, axis\u001b[38;5;241m=\u001b[39maxis)\n",
-      "File \u001b[0;32m~/Workspace/data-jupyter-notebooks/venv/lib/python3.12/site-packages/pandas/core/indexing.py:1685\u001b[0m, in \u001b[0;36m_iLocIndexer._validate_integer\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m   1683\u001b[0m len_axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj\u001b[38;5;241m.\u001b[39m_get_axis(axis))\n\u001b[1;32m   1684\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m len_axis \u001b[38;5;129;01mor\u001b[39;00m key \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m-\u001b[39mlen_axis:\n\u001b[0;32m-> 1685\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msingle positional indexer is out-of-bounds\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[0;31mIndexError\u001b[0m: single positional indexer is out-of-bounds"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Ensure Seaborn styling\n",
    "sns.set_theme(style=\"whitegrid\")\n",
@ -531,25 +454,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                          metric  relative_difference   p_value\n",
-      "0                conversion_rate            -0.022089  0.732507\n",
-      "1                   payment_rate            -0.019231  0.843783\n",
-      "2            waiver_payment_rate            -0.109637  0.344132\n",
-      "3           deposit_payment_rate             0.358974  0.297872\n",
-      "4               CIH_payment_rate            -0.235577  0.722020\n",
-      "5       avg_guest_revenue_per_gj            -0.087267  0.434288\n",
-      "6      avg_waiver_revenue_per_gj            -0.104328  0.391617\n",
-      "7     avg_deposit_revenue_per_gj             0.264787  0.459148\n",
-      "8         avg_CIH_revenue_per_gj            -0.191473  0.779274\n",
-      "9  avg_csat_per_gj_with_response             0.133721  0.051334\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "# Call the function to calculate the Z-test for each metric and aggregate the results\n",
    "z_test_results_df = run_z_tests(df, z_stat_metric_definition=z_stat_metric_definition, variations=variations)\n",
@ -579,36 +484,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "WelcomePageDestinationContext results (last updated at 2025-05-27)\n",
-      "\n",
-      "Total Guest Journeys affected by this A/B test: 420 - Total Guest Revenue: 5280 GBP.\n",
-      "  Variation GenericImageAndCopy: Guest Journeys 212 (50.5%) - Guest Revenue: 2786 GBP (52.8%).\n",
-      "  Variation ContextSpecificImageAndCopy: Guest Journeys 208 (49.5%) - Guest Revenue: 2494 GBP (47.2%).\n",
-      "\n",
-      "Main Metrics - Comparing ContextSpecificImageAndCopy vs. GenericImageAndCopy.\n",
-      "\n",
-      "CONVERSION RATE (not significant): 68.3% vs. 69.8% (-1.5% ppts.| -2.2%).\n",
-      "PAYMENT RATE (not significant): 49.0% vs. 50.0% (-1.0% ppts.| -1.9%).\n",
-      "AVG GUEST REVENUE PER GJ (not significant): 11.99 vs. 13.14 (-1.15 ppts.| -8.7%).\n",
-      "\n",
-      "Other Metrics\n",
-      "\n",
-      "WAIVER PAYMENT RATE (not significant): 36.5% vs. 41.0% (-4.5% ppts.| -11.0%).\n",
-      "DEPOSIT PAYMENT RATE (not significant): 11.5% vs. 8.5% (3.0% ppts.| 35.9%).\n",
-      "CIH PAYMENT RATE (not significant): 1.4% vs. 1.9% (-0.4% ppts.| -23.6%).\n",
-      "AVG WAIVER REVENUE PER GJ (not significant): 11.05 vs. 12.34 (-1.29 ppts.| -10.4%).\n",
-      "AVG DEPOSIT REVENUE PER GJ (not significant): 0.82 vs. 0.65 (0.17 ppts.| 26.5%).\n",
-      "AVG CIH REVENUE PER GJ (not significant): 0.13 vs. 0.16 (-0.03 ppts.| -19.1%).\n",
-      "AVG CSAT PER GJ WITH RESPONSE (not significant): 3.75 vs. 3.31 (0.44 ppts.| 13.4%).\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "print('\\n{} results (last updated at {})\\n'.format(ab_test_name, last_update))\n",
    "\n",
--- a/data_driven_risk_assessment/experiments/random_predictor.ipynb
+++ b/data_driven_risk_assessment/experiments/random_predictor.ipynb
@ -0,0 +1,336 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "84dcd475",
+   "metadata": {},
+   "source": [
+    "# DDRA - Random Predictor\n",
+    "\n",
+    "## Initial setup\n",
+    "This first section just ensures that the connection to DWH works correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12368ce1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This script connects to a Data Warehouse (DWH) using PostgreSQL. \n",
+    "# This should be common for all Notebooks, but you might need to adjust the path to the `dwh_utils` module.\n",
+    "\n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(\"../../utils\"))  # Adjust path if needed\n",
+    "\n",
+    "from dwh_utils import read_credentials, create_postgres_engine, query_to_dataframe, test_connection\n",
+    "\n",
+    "# --- Connect to DWH ---\n",
+    "creds = read_credentials()\n",
+    "dwh_pg_engine = create_postgres_engine(creds)\n",
+    "\n",
+    "# --- Test Query ---\n",
+    "test_connection()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c86f94f1",
+   "metadata": {},
+   "source": [
+    "## Data Extraction\n",
+    "In this section we extract the data for our random predictor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e3ed391",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialise all imports needed for the Notebook\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from datetime import date\n",
+    "from sklearn.metrics import (\n",
+    "    precision_score,\n",
+    "    recall_score,\n",
+    "    fbeta_score,\n",
+    "    confusion_matrix\n",
+    ")\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db5e3098",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Query to extract data\n",
+    "data_extraction_query = \"\"\"\n",
+    "select \n",
+    "    id_booking,\n",
+    "    booking_created_date_utc,\n",
+    "    has_resolution_incident\n",
+    "from intermediate.int_booking_summary\n",
+    "where \n",
+    "    -- 1. Bookings from New Dash users with Id Deal\n",
+    "    is_user_in_new_dash = True and \n",
+    "    is_missing_id_deal = False and\n",
+    "    -- 2. Protected Bookings with a Protection or a Deposit Management service\n",
+    "    (has_protection_service_business_type or \n",
+    "    has_deposit_management_service_business_type) and\n",
+    "    -- 3. Bookings with flagging categorisation (this excludes Cancelled/Incomplete/Rejected bookings)\n",
+    "    is_booking_flagged_as_risk is not null and \n",
+    "    -- 4. Booking is completed\n",
+    "    is_booking_past_completion_date = True \n",
+    "\"\"\"\n",
+    "\n",
+    "# Retrieve Data from Query\n",
+    "df = query_to_dataframe(engine=dwh_pg_engine, query=data_extraction_query)\n",
+    "print(df.head())\n",
+    "print(f\"Total Bookings: {len(df):,}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d36c9276",
+   "metadata": {},
+   "source": [
+    "## Processing\n",
+    "\n",
+    "We implement a very simple processing:\n",
+    "1. We split the dataset between Train and Test. The strategy followed is just to apply a cutoff date on booking creation.\n",
+    "2. We retrieve from the Train dataset the actual distribution of Bookings that raise a Resolution Incident (or Claim).\n",
+    "3. We randomly flag in the Test dataset bookings according to the actual distribution observed from the Train set.\n",
+    "4. This random flagging is stored in a new column called \"is_flagged_as_risk\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "943ef7d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Ensure booking_created_date_utc is datetime\n",
+    "df[\"booking_created_date_utc\"] = pd.to_datetime(df[\"booking_created_date_utc\"])\n",
+    "\n",
+    "# Split data into train and test\n",
+    "cutoff_date = pd.Timestamp(\"2025-04-01\")\n",
+    "train_df = df[df[\"booking_created_date_utc\"] < cutoff_date]\n",
+    "test_df = df[df[\"booking_created_date_utc\"] >= cutoff_date].copy()  # Copy for modification\n",
+    "print(f\"Train set size: {len(train_df):,} bookings\")\n",
+    "print(f\"Test set size : {len(test_df):,} bookings\")\n",
+    "\n",
+    "# Get the distribution from train set\n",
+    "positive_rate = train_df[\"has_resolution_incident\"].mean()\n",
+    "print(f\"Positive rate (has_resolution_incident = True) in train set: {positive_rate:.2%}\")\n",
+    "\n",
+    "# Apply random prediction to test set using that distribution\n",
+    "np.random.seed(123)  # For reproducibility\n",
+    "test_df[\"is_flagged_as_risk\"] = np.random.rand(len(test_df)) < positive_rate\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc2fcc89",
+   "metadata": {},
+   "source": [
+    "## Evaluation\n",
+    "This section aims to evaluate the Test set on the performance of our random predictor vs. the actual Resolution Incidents.\n",
+    "We start by computing the standard classification scores. This is later stored in a dataframe to be used in an adapted version of the confusion matrix, which is heavily inspired from the flagging_performance_monitoring notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7f3b80e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Actual and predicted\n",
+    "y_true = test_df[\"has_resolution_incident\"]\n",
+    "y_pred = test_df[\"is_flagged_as_risk\"]\n",
+    "\n",
+    "# Compute confusion matrix: [ [TN, FP], [FN, TP] ]\n",
+    "tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()\n",
+    "\n",
+    "# Total predictions\n",
+    "total = tp + tn + fp + fn\n",
+    "\n",
+    "# Compute all requested metrics\n",
+    "recall = recall_score(y_true, y_pred)\n",
+    "precision = precision_score(y_true, y_pred)\n",
+    "f1 = fbeta_score(y_true, y_pred, beta=1)\n",
+    "f2 = fbeta_score(y_true, y_pred, beta=2)\n",
+    "fpr = fp / (fp + tn) if (fp + tn) != 0 else 0\n",
+    "\n",
+    "# Scores relative to total\n",
+    "tp_score = tp / total\n",
+    "tn_score = tn / total\n",
+    "fp_score = fp / total\n",
+    "fn_score = fn / total\n",
+    "\n",
+    "# Create DataFrame\n",
+    "summary_df = pd.DataFrame([{\n",
+    "    \"flagging_analysis_type\": \"RISK_VS_CLAIM\",\n",
+    "    \"count_total\": total,\n",
+    "    \"count_true_positive\": tp,\n",
+    "    \"count_true_negative\": tn,\n",
+    "    \"count_false_positive\": fp,\n",
+    "    \"count_false_negative\": fn,\n",
+    "    \"true_positive_score\": tp_score,\n",
+    "    \"true_negative_score\": tn_score,\n",
+    "    \"false_positive_score\": fp_score,\n",
+    "    \"false_negative_score\": fn_score,\n",
+    "    \"recall_score\": recall,\n",
+    "    \"precision_score\": precision,\n",
+    "    \"false_positive_rate_score\": fpr,\n",
+    "    \"f1_score\": f1,\n",
+    "    \"f2_score\": f2\n",
+    "}])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "db3bf8e2",
+   "metadata": {},
+   "source": [
+    "Function to compute the confusion matrix from the summary dataframe:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8885dc39",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_confusion_matrix_from_df(df, flagging_analysis_type):\n",
+    "\n",
+    "    # Subset - just retrieve one row depending on the flagging_analysis_type\n",
+    "    row = df[df['flagging_analysis_type'] == flagging_analysis_type].iloc[0]\n",
+    "\n",
+    "    # Define custom x-axis labels and wording\n",
+    "    if flagging_analysis_type == 'RISK_VS_CLAIM':\n",
+    "        x_labels = ['With Submitted Claim', 'Without Submitted Claim']\n",
+    "        outcome_label = \"submitted claim\"\n",
+    "    elif flagging_analysis_type == 'RISK_VS_SUBMITTED_PAYOUT':\n",
+    "        x_labels = ['With Submitted Payout', 'Without Submitted Payout']\n",
+    "        outcome_label = \"submitted payout\"\n",
+    "    else:\n",
+    "        x_labels = ['Actual Positive', 'Actual Negative']  \n",
+    "        outcome_label = \"outcome\"\n",
+    "\n",
+    "    # Confusion matrix structure\n",
+    "    cm = np.array([\n",
+    "        [row['count_true_positive'], row['count_false_positive']],\n",
+    "        [row['count_false_negative'], row['count_true_negative']]\n",
+    "    ])\n",
+    "\n",
+    "    # Create annotations for the confusion matrix\n",
+    "    labels = [['True Positives', 'False Positives'], ['False Negatives', 'True Negatives']]\n",
+    "    counts = [[f\"{v:,}\" for v in [row['count_true_positive'], row['count_false_positive']]],\n",
+    "              [f\"{v:,}\" for v in [row['count_false_negative'], row['count_true_negative']]]]\n",
+    "    percentages = [[f\"{round(100*v,2):,}\" for v in [row['true_positive_score'], row['false_positive_score']]],\n",
+    "                   [f\"{round(100*v,2):,}\" for v in [row['false_negative_score'], row['true_negative_score']]]]\n",
+    "    annot = [[f\"{labels[i][j]}\\n{counts[i][j]} ({percentages[i][j]}%)\" for j in range(2)] for i in range(2)]\n",
+    "\n",
+    "    # Scores formatted as percentages\n",
+    "    recall = row['recall_score'] * 100\n",
+    "    precision = row['precision_score'] * 100\n",
+    "    f1 = row['f1_score'] * 100\n",
+    "    f2 = row['f2_score'] * 100\n",
+    "\n",
+    "    # Set up figure and axes manually for precise control\n",
+    "    fig = plt.figure(figsize=(9, 8))\n",
+    "    grid = fig.add_gridspec(nrows=4, height_ratios=[2, 2, 15, 2])\n",
+    "\n",
+    "    \n",
+    "    ax_main_title = fig.add_subplot(grid[0])\n",
+    "    ax_main_title.axis('off')\n",
+    "    ax_main_title.set_title(f\"Random Predictor - Flagged as Risk vs. {outcome_label.title()}\", fontsize=14, weight='bold')\n",
+    "    \n",
+    "    # Business explanation text\n",
+    "    ax_text = fig.add_subplot(grid[1])\n",
+    "    ax_text.axis('off')\n",
+    "    business_text = (\n",
+    "        f\"Random Predictor:\\n\\n\"\n",
+    "        f\"- We split bookings between train and test, considering as train those created before {cutoff_date.strftime('%Y-%m-%d')}.\\n\"\n",
+    "        f\"- We retrieve the actual distribution of incidents from the train set of {len(train_df):,} bookings, which is {positive_rate:.2%}.\\n\"\n",
+    "        f\"- We flag as risk randomly {positive_rate:.2%} bookings in the test set, which has a total of {len(test_df):,} bookings.\\n\"\n",
+    "    )\n",
+    "    ax_text.text(0.0, 0.0, business_text, fontsize=10.5, ha='left', va='bottom', wrap=False, linespacing=1.5)\n",
+    "\n",
+    "    # Heatmap\n",
+    "    ax_heatmap = fig.add_subplot(grid[2])\n",
+    "    ax_heatmap.set_title(f\"Confusion Matrix – Risk vs. {outcome_label.title()}\", fontsize=12, weight='bold', ha='center', va='center', wrap=False)\n",
+    "\n",
+    "    cmap = sns.light_palette(\"#315584\", as_cmap=True)\n",
+    "\n",
+    "    sns.heatmap(cm, annot=annot, fmt='', cmap=cmap, cbar=False,\n",
+    "                xticklabels=x_labels,\n",
+    "                yticklabels=['Flagged as Risk', 'Flagged as No Risk'],\n",
+    "                ax=ax_heatmap,\n",
+    "                linewidths=1.0,\n",
+    "                annot_kws={'fontsize': 10, 'linespacing': 1.2})\n",
+    "    ax_heatmap.set_xlabel(\"Resolution Outcome (Actual)\", fontsize=11, labelpad=10)\n",
+    "    ax_heatmap.set_ylabel(\"Flagging (Prediction)\", fontsize=11, labelpad=10)\n",
+    "    \n",
+    "    # Make borders visible\n",
+    "    for _, spine in ax_heatmap.spines.items():\n",
+    "        spine.set_visible(True)\n",
+    "\n",
+    "    # Footer with metrics and date\n",
+    "    ax_footer = fig.add_subplot(grid[3])\n",
+    "    ax_footer.axis('off')\n",
+    "    metrics_text = f\"Total Booking Count: {row['count_total']}   |   Recall: {recall:.2f}%   |   Precision: {precision:.2f}%   |   F1 Score: {f1:.2f}%   |   F2 Score: {f2:.2f}%\"\n",
+    "    date_text = f\"Generated on {date.today().strftime('%B %d, %Y')}\"\n",
+    "    ax_footer.text(0.5, 0.7, metrics_text, ha='center', fontsize=9)\n",
+    "    ax_footer.text(0.5, 0.1, date_text, ha='center', fontsize=8, color='gray')\n",
+    "\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9cd5e165",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot confusion matrix for claim scenario\n",
+    "plot_confusion_matrix_from_df(summary_df, 'RISK_VS_CLAIM')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/data_driven_risk_assessment/flagging_performance_monitoring.ipynb
+++ b/data_driven_risk_assessment/flagging_performance_monitoring.ipynb
@ -0,0 +1,203 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Flagging Performance Monitoring\n",
+    "\n",
+    "## Initial setup\n",
+    "This first section just ensures that the connection to DWH works correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This script connects to a Data Warehouse (DWH) using PostgreSQL. \n",
+    "# This should be common for all Notebooks, but you might need to adjust the path to the `dwh_utils` module.\n",
+    "\n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(\"../utils\"))  # Adjust path if needed\n",
+    "\n",
+    "from dwh_utils import read_credentials, create_postgres_engine, query_to_dataframe, test_connection\n",
+    "\n",
+    "# --- Connect to DWH ---\n",
+    "creds = read_credentials()\n",
+    "dwh_pg_engine = create_postgres_engine(creds)\n",
+    "\n",
+    "# --- Test Query ---\n",
+    "test_connection()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Extraction\n",
+    "In this section we extract the data from the Flagging Performance Analysis within DWH."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import numpy as np\n",
+    "from datetime import date"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Query to extract data\n",
+    "data_extraction_query = \"\"\"\n",
+    "select *\n",
+    "from intermediate.int_flagging_performance_analysis  \n",
+    "\"\"\"\n",
+    "\n",
+    "# Retrieve Data from Query\n",
+    "df = query_to_dataframe(engine=dwh_pg_engine, query=data_extraction_query)\n",
+    "print(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_confusion_matrix_from_df(df, flagging_analysis_type):\n",
+    "\n",
+    "    # Subset - just retrieve one row depending on the flagging_analysis_type\n",
+    "    row = df[df['flagging_analysis_type'] == flagging_analysis_type].iloc[0]\n",
+    "\n",
+    "    # Define custom x-axis labels and wording\n",
+    "    if flagging_analysis_type == 'RISK_VS_CLAIM':\n",
+    "        x_labels = ['With Submitted Claim', 'Without Submitted Claim']\n",
+    "        outcome_label = \"submitted claim\"\n",
+    "    elif flagging_analysis_type == 'RISK_VS_SUBMITTED_PAYOUT':\n",
+    "        x_labels = ['With Submitted Payout', 'Without Submitted Payout']\n",
+    "        outcome_label = \"submitted payout\"\n",
+    "    else:\n",
+    "        x_labels = ['Actual Positive', 'Actual Negative']  \n",
+    "        outcome_label = \"outcome\"\n",
+    "\n",
+    "    # Confusion matrix structure\n",
+    "    cm = np.array([\n",
+    "        [row['count_true_positive'], row['count_false_positive']],\n",
+    "        [row['count_false_negative'], row['count_true_negative']]\n",
+    "    ])\n",
+    "\n",
+    "    # Create annotations for the confusion matrix\n",
+    "    labels = [['True Positives', 'False Positives'], ['False Negatives', 'True Negatives']]\n",
+    "    counts = [[f\"{v:,}\" for v in [row['count_true_positive'], row['count_false_positive']]],\n",
+    "              [f\"{v:,}\" for v in [row['count_false_negative'], row['count_true_negative']]]]\n",
+    "    percentages = [[f\"{round(100*v,2):,}\" for v in [row['true_positive_score'], row['false_positive_score']]],\n",
+    "                   [f\"{round(100*v,2):,}\" for v in [row['false_negative_score'], row['true_negative_score']]]]\n",
+    "    annot = [[f\"{labels[i][j]}\\n{counts[i][j]} ({percentages[i][j]}%)\" for j in range(2)] for i in range(2)]\n",
+    "\n",
+    "    # Scores formatted as percentages\n",
+    "    recall = row['recall_score'] * 100\n",
+    "    precision = row['precision_score'] * 100\n",
+    "    f1 = row['f1_score'] * 100\n",
+    "    f2 = row['f2_score'] * 100\n",
+    "\n",
+    "    # Set up figure and axes manually for precise control\n",
+    "    fig = plt.figure(figsize=(9, 8))\n",
+    "    grid = fig.add_gridspec(nrows=4, height_ratios=[2, 3, 15, 2])\n",
+    "\n",
+    "    \n",
+    "    ax_main_title = fig.add_subplot(grid[0])\n",
+    "    ax_main_title.axis('off')\n",
+    "    ax_main_title.set_title(f\"Flagged as Risk vs. {outcome_label.title()}\", fontsize=14, weight='bold')\n",
+    "    \n",
+    "    # Business explanation text\n",
+    "    ax_text = fig.add_subplot(grid[1])\n",
+    "    ax_text.axis('off')\n",
+    "    business_text = (\n",
+    "        f\"Flagging performance analysis:\\n\\n\"\n",
+    "        f\"- Of all the bookings we flagged as at Risk, {precision:.2f}% actually turned into a {outcome_label}.\\n\"\n",
+    "        f\"- Of all the bookings that resulted in a {outcome_label}, we correctly flagged {recall:.2f}% of them.\\n\"\n",
+    "        f\"- The pure balance between these two is summarized by a score of {f1:.2f}%.\\n\"\n",
+    "        f\"- If we prioritise better probability of detection of a {outcome_label}, the balanced score is {f2:.2f}%.\\n\"\n",
+    "    )\n",
+    "    ax_text.text(0.0, 0.0, business_text, fontsize=10.5, ha='left', va='bottom', wrap=False, linespacing=1.5)\n",
+    "\n",
+    "    # Heatmap\n",
+    "    ax_heatmap = fig.add_subplot(grid[2])\n",
+    "    ax_heatmap.set_title(f\"Confusion Matrix – Risk vs. {outcome_label.title()}\", fontsize=12, weight='bold', ha='center', va='center', wrap=False)\n",
+    "\n",
+    "    cmap = sns.light_palette(\"#318450\", as_cmap=True)\n",
+    "\n",
+    "    sns.heatmap(cm, annot=annot, fmt='', cmap=cmap, cbar=False,\n",
+    "                xticklabels=x_labels,\n",
+    "                yticklabels=['Flagged as Risk', 'Flagged as No Risk'],\n",
+    "                ax=ax_heatmap,\n",
+    "                linewidths=1.0,\n",
+    "                annot_kws={'fontsize': 10, 'linespacing': 1.2})\n",
+    "    ax_heatmap.set_xlabel(\"Resolution Outcome (Actual)\", fontsize=11, labelpad=10)\n",
+    "    ax_heatmap.set_ylabel(\"Booking Status (Prediction)\", fontsize=11, labelpad=10)\n",
+    "    \n",
+    "    # Make borders visible\n",
+    "    for _, spine in ax_heatmap.spines.items():\n",
+    "        spine.set_visible(True)\n",
+    "\n",
+    "    # Footer with metrics and date\n",
+    "    ax_footer = fig.add_subplot(grid[3])\n",
+    "    ax_footer.axis('off')\n",
+    "    metrics_text = f\"Total Booking Count: {row['count_total']}   |   Recall: {recall:.2f}%   |   Precision: {precision:.2f}%   |   F1 Score: {f1:.2f}%   |   F2 Score: {f2:.2f}%\"\n",
+    "    date_text = f\"Generated on {date.today().strftime('%B %d, %Y')}\"\n",
+    "    ax_footer.text(0.5, 0.7, metrics_text, ha='center', fontsize=9)\n",
+    "    ax_footer.text(0.5, 0.1, date_text, ha='center', fontsize=8, color='gray')\n",
+    "\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot confusion matrix for claim scenario\n",
+    "plot_confusion_matrix_from_df(df, 'RISK_VS_CLAIM')\n",
+    "\n",
+    "# Plot confusion matrix for submitted payout scenario\n",
+    "plot_confusion_matrix_from_df(df, 'RISK_VS_SUBMITTED_PAYOUT')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/flagging_performance_monitoring.ipynb
+++ b/flagging_performance_monitoring.ipynb
--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,4 @@ sqlalchemy
 psycopg2-binary
 seaborn
 statsmodels
+scikit-learn
--- a/template.ipynb
+++ b/template.ipynb
@ -9,85 +9,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
-    "import pathlib\n",
-    "import yaml\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from sqlalchemy import create_engine\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/uri/.superhog-dwh/credentials.yml\n"
-     ]
-    }
-   ],
-   "source": [
-    "CREDS_FILEPATH = pathlib.Path.home() / \".superhog-dwh\" / \"credentials.yml\"\n",
-    "print(CREDS_FILEPATH)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Prepare connection to DWH\n",
-    "# Function to read credentials from the YAML file\n",
-    "def read_credentials(yaml_path: str, env: str = \"prd\"):\n",
-    "    with open(yaml_path, \"r\") as file:\n",
-    "        credentials = yaml.safe_load(file)\n",
-    "    return credentials[\"envs\"][env]\n",
-    "# Function to create a PostgreSQL connection string\n",
-    "def create_postgres_engine(creds: dict):\n",
-    "    user = creds[\"user\"]\n",
-    "    password = creds[\"password\"]\n",
-    "    host = creds[\"host\"]\n",
-    "    port = creds[\"port\"]\n",
-    "    database = creds[\"database\"]\n",
-    "    # Create the connection string for SQLAlchemy\n",
-    "    connection_string = f\"postgresql://{user}:{password}@{host}:{port}/{database}\"\n",
-    "    engine = create_engine(connection_string)\n",
-    "    return engine\n",
-    "# Function to execute a query and return the result as a pandas DataFrame\n",
-    "def query_to_dataframe(engine, query: str):\n",
-    "    with engine.connect() as connection:\n",
-    "        df = pd.read_sql(query, connection)\n",
-    "    return df\n",
-    "dwh_creds = read_credentials(yaml_path=CREDS_FILEPATH, env=\"prd\")\n",
-    "dwh_pg_engine = create_postgres_engine(creds=dwh_creds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   ?column?\n",
-      "0         1\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Silly query to test things out\n",
-    "test_df = query_to_dataframe(engine=dwh_pg_engine, query=\"SELECT 1;\")\n",
-    "print(test_df.head())"
+    "# This script connects to a Data Warehouse (DWH) using PostgreSQL. \n",
+    "# This should be common for all Notebooks, but you might need to adjust the path to the `dwh_utils` module.\n",
+    "\n",
+    "import sys\n",
+    "import os\n",
+    "sys.path.append(os.path.abspath(\"./utils\"))  # Adjust path if needed\n",
+    "\n",
+    "from dwh_utils import read_credentials, create_postgres_engine, query_to_dataframe, test_connection\n",
+    "\n",
+    "# --- Connect to DWH ---\n",
+    "creds = read_credentials()\n",
+    "dwh_pg_engine = create_postgres_engine(creds)\n",
+    "\n",
+    "# --- Test Query ---\n",
+    "test_connection()\n"
   ]
  }
 ],
--- a/utils/dwh_utils.py
+++ b/utils/dwh_utils.py
@ -0,0 +1,43 @@
+import pathlib
+import yaml
+import pandas as pd
+from sqlalchemy import create_engine
+
+# Path to credentials YAML file
+CREDS_FILEPATH = pathlib.Path.home() / ".superhog-dwh" / "credentials.yml"
+
+def read_credentials(yaml_path=CREDS_FILEPATH, env="prd"):
+    with open(yaml_path, "r") as file:
+        credentials = yaml.safe_load(file)
+    return credentials["envs"][env]
+
+def create_postgres_engine(creds: dict):
+    user = creds["user"]
+    password = creds["password"]
+    host = creds["host"]
+    port = creds["port"]
+    database = creds["database"]
+    connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
+    return create_engine(connection_string)
+
+def query_to_dataframe(engine, query: str):
+    with engine.connect() as connection:
+        df = pd.read_sql(query, connection)
+    return df
+
+# Optional test function to validate setup
+def test_connection():
+    print(f"🔌 Testing connection using credentials at: {CREDS_FILEPATH}")
+    try:
+        creds = read_credentials()
+        engine = create_postgres_engine(creds)
+        df = query_to_dataframe(engine, "SELECT 1;")
+        print("✅ Connection successful.")
+
+    except Exception as e:
+        print("❌ Connection failed:")
+        print(e)
+
+# Only run the test if this script is executed directly
+if __name__ == "__main__":
+    test_connection()