data-jupyter-notebooks/ab_test_guest_journey_monitoring.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# A/B test monitoring\n",
    "\n",
    "## Initial setup\n",
    "This first section just ensures that the connection to DWH works correctly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pathlib\n",
    "import yaml\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sqlalchemy import create_engine\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from statsmodels.stats.proportion import proportions_ztest\n",
    "from scipy import stats\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/uri/.superhog-dwh/credentials.yml\n"
     ]
    }
   ],
   "source": [
    "CREDS_FILEPATH = pathlib.Path.home() / \".superhog-dwh\" / \"credentials.yml\"\n",
    "print(CREDS_FILEPATH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare connection to DWH\n",
    "# Function to read credentials from the YAML file\n",
    "def read_credentials(yaml_path: str, env: str = \"prd\"):\n",
    "    with open(yaml_path, \"r\") as file:\n",
    "        credentials = yaml.safe_load(file)\n",
    "    return credentials[\"envs\"][env]\n",
    "# Function to create a PostgreSQL connection string\n",
    "def create_postgres_engine(creds: dict):\n",
    "    user = creds[\"user\"]\n",
    "    password = creds[\"password\"]\n",
    "    host = creds[\"host\"]\n",
    "    port = creds[\"port\"]\n",
    "    database = creds[\"database\"]\n",
    "    # Create the connection string for SQLAlchemy\n",
    "    connection_string = f\"postgresql://{user}:{password}@{host}:{port}/{database}\"\n",
    "    engine = create_engine(connection_string)\n",
    "    return engine\n",
    "# Function to execute a query and return the result as a pandas DataFrame\n",
    "def query_to_dataframe(engine, query: str):\n",
    "    with engine.connect() as connection:\n",
    "        df = pd.read_sql(query, connection)\n",
    "    return df\n",
    "dwh_creds = read_credentials(yaml_path=CREDS_FILEPATH, env=\"prd\")\n",
    "dwh_pg_engine = create_postgres_engine(creds=dwh_creds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   ?column?\n",
      "0         1\n"
     ]
    }
   ],
   "source": [
    "# Silly query to test things out\n",
    "test_df = query_to_dataframe(engine=dwh_pg_engine, query=\"SELECT 1;\")\n",
    "print(test_df.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Extraction\n",
    "In this section we extract the data from the Guest Journey monitoring within DWH by configuring which A/B test we want to measure. Here we already handle the basic aggregations that will be needed in the future, directly in SQL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    ab_test_name variation last_update  guest_journeys_count  \\\n",
      "0  AAVariantTest         A  2024-12-04                   470   \n",
      "1  AAVariantTest         B  2024-12-04                   478   \n",
      "\n",
      "   guest_journey_started_count  guest_journey_completed_count  \\\n",
      "0                          470                            284   \n",
      "1                          478                            270   \n",
      "\n",
      "   guest_journey_with_responses_count  guest_journey_with_payment_count  \\\n",
      "0                                  75                               146   \n",
      "1                                  72                               143   \n",
      "\n",
      "   guest_revenue_count  deposit_count  ...  \\\n",
      "0                  146             29  ...   \n",
      "1                  143             24  ...   \n",
      "\n",
      "   guest_revenue_avg_per_guest_journey  guest_revenue_sdv_per_guest_journey  \\\n",
      "0                             7.836050                            13.857327   \n",
      "1                             7.631339                            13.202289   \n",
      "\n",
      "   deposit_avg_per_guest_journey  deposit_sdv_per_guest_journey  \\\n",
      "0                       0.496000                       2.104774   \n",
      "1                       0.367761                       1.715542   \n",
      "\n",
      "   waiver_avg_per_guest_journey  waiver_sdv_per_guest_journey  \\\n",
      "0                      7.180958                     13.653492   \n",
      "1                      7.090684                     12.993245   \n",
      "\n",
      "   check_in_cover_avg_per_guest_journey  check_in_cover_sdv_per_guest_journey  \\\n",
      "0                              0.159091                              1.210487   \n",
      "1                              0.172894                              1.253337   \n",
      "\n",
      "   csat_avg_per_guest_journey_with_response  \\\n",
      "0                                  3.853333   \n",
      "1                                  3.972222   \n",
      "\n",
      "   csat_sdv_per_guest_journey_with_response  \n",
      "0                                  1.204646  \n",
      "1                                  1.020525  \n",
      "\n",
      "[2 rows x 26 columns]\n"
     ]
    }
   ],
   "source": [
    "# A/B test name to measure\n",
    "ab_test_name = \"AAVariantTest\"\n",
    "\n",
    "# Query to extract data\n",
    "data_extraction_query = \"\"\"\n",
    "select \n",
    "\tab_test_name,\n",
    "\tvariation,\n",
    "\tmax(first_appearance_date_utc) as last_update,\n",
    "    \n",
    "    -- SIMPLE COUNTS --\n",
    "\tcount(id_verification_request) as guest_journeys_count,\n",
    "\tcount(verification_started_date_utc) as guest_journey_started_count,\n",
    "\tcount(verification_completed_date_utc) as guest_journey_completed_count,\n",
    "\tcount(experience_rating) as guest_journey_with_responses_count,\n",
    "\tcount(last_payment_paid_date_utc) as guest_journey_with_payment_count,\n",
    "\tcount(guest_revenue_without_taxes_in_gbp) as guest_revenue_count,\n",
    "\tcount(deposit_fees_without_taxes_in_gbp) as deposit_count,\n",
    "\tcount(waiver_fees_without_taxes_in_gbp) as waiver_count,\n",
    "\tcount(check_in_cover_fees_without_taxes_in_gbp) as check_in_cover_count,\n",
    "    \n",
    "    -- SIMPLE SUMS --\n",
    "\tsum(guest_revenue_without_taxes_in_gbp) as guest_revenue_sum,\n",
    "\tsum(deposit_fees_without_taxes_in_gbp) as deposit_sum,\n",
    "\tsum(waiver_fees_without_taxes_in_gbp) as waiver_sum,\n",
    "\tsum(check_in_cover_fees_without_taxes_in_gbp) as check_in_cover_sum,\n",
    "    \n",
    "    -- AVGs/SDVs PER GUEST JOURNEY (ANY GJ APPEARING IN THE A/B TEST) --\n",
    "    -- NOTE THE COALESCE HERE. THIS IS IMPORTANT FOR THE T-TEST COMPUTATION --\n",
    "    avg(coalesce(guest_revenue_without_taxes_in_gbp,0)) as guest_revenue_avg_per_guest_journey,\n",
    "    stddev(coalesce(guest_revenue_without_taxes_in_gbp,0)) as guest_revenue_sdv_per_guest_journey,\n",
    "    avg(coalesce(deposit_fees_without_taxes_in_gbp,0)) as deposit_avg_per_guest_journey,\n",
    "    stddev(coalesce(deposit_fees_without_taxes_in_gbp,0)) as deposit_sdv_per_guest_journey,\n",
    "    avg(coalesce(waiver_fees_without_taxes_in_gbp,0)) as waiver_avg_per_guest_journey,\n",
    "    stddev(coalesce(waiver_fees_without_taxes_in_gbp,0)) as waiver_sdv_per_guest_journey,\n",
    "    avg(coalesce(check_in_cover_fees_without_taxes_in_gbp,0)) as check_in_cover_avg_per_guest_journey,\n",
    "    stddev(coalesce(check_in_cover_fees_without_taxes_in_gbp,0)) as check_in_cover_sdv_per_guest_journey,\n",
    "    \n",
    "    -- AVGs/SDVs PER GUEST JOURNEY WITH CSAT RESPONSE --\n",
    "    -- NOTE THAT THERE'S NO COALESCE HERE. THIS IS IMPORTANT FOR THE T-TEST COMPUTATION --\n",
    "    avg(experience_rating) as csat_avg_per_guest_journey_with_response,\n",
    "    stddev(experience_rating) as csat_sdv_per_guest_journey_with_response\n",
    "    \n",
    "from\n",
    "\tintermediate.int_core__ab_test_monitoring_guest_journey\n",
    "where\n",
    "\tab_test_name = '{}'\n",
    "group by\n",
    "\t1,2\n",
    "\"\"\".format(ab_test_name)\n",
    "\n",
    "# Retrieve Data from Query\n",
    "df = query_to_dataframe(engine=dwh_pg_engine, query=data_extraction_query)\n",
    "print(df.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check A/B test Allocation to Variation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAf0AAAIYCAYAAABnrTUkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABv7ElEQVR4nO3dd3hUZdoG8Htaeu+VNJJAEkLvvSMiClhYBVRUPlfR3dVdUVfRta8KrouFFRRFELBQLKD0HgiBQEIgCZBCCOkhjSRTz/fHmJFhkpBAkjPl/l1XLuBMOc9khrnPW857JIIgCCAiIiKrJxW7ACIiIuoaDH0iIiIbwdAnIiKyEQx9IiIiG8HQJyIishEMfSIiIhvB0CciIrIRDH0iIiIbwdAnIiKyEQz9djp69CheeuklTJ06FQMHDkR8fDwGDx6Mu+++G6+99hoOHz4MW1vkcOPGjYiNjcW4cePELoVa8MADDyA2Nhbvv/9+m+7/xhtvIDY2Fo899lin1vX8888jNjYWGzdu7NT9XCs2NhaxsbFdtr+OMH36dMTGxiIhIQFXrlxp9b4t/U6b/p8+//zznVmqKMaNG4fY2FhcunRJ7FLMHkO/jSorK/HII49g3rx5+O6771BXV4d+/fphypQp6NOnDyorK7F27Vo8/PDDmDlzptjltsvRo0cRGxuLuXPnil0KdZK7774bALB582ZotdpW76tSqfDTTz8ZPc5SzJ07F7GxsTh69KjYpXSYtLQ0ZGVlAQDUajV+/PFHkSvqWmIcGFozudgFWIKamhrcf//9yM3NRWRkJF555RUMGTLE5H7Z2dn48ssvsXXrVhGqJGrZlClT8MYbb6CsrAz79+/H2LFjW7zvrl27UFVVBS8vr07vvXnmmWfw2GOPwc/Pr1P3cy1L+//5/fffAwD8/f1RUlKC77//Hg8++KDIVZmXL7/8Emq1Gv7+/mKXYvbY0m+D119/Hbm5uQgNDcX69eubDXwAiImJwVtvvYXVq1d3cYVErXN0dMTtt98OADdsMTXdPn36dCgUik6ty8/PD1FRUXB1de3U/VwrKioKUVFRXba/W9HQ0IBffvkFAPDuu+/CyckJ2dnZSEtLE7ky89KtWzdERUV1+ufVGjD0b+DixYv4+eefAQAvvPAC3N3db/iYxMREk203GnO6URdWUlISFi5ciBEjRiAhIQFDhw7Fk08+idTU1Gbvn5eXhxdeeAHjxo1DQkIC+vbti7Fjx2LBggX44YcfDPebO3cu5s2bBwBITk42jHd25Bh9cXExXn/9dUyaNAm9evVC//79MXv2bKxfv77ZruZly5YhNjYWy5Yta/b5WhqOuHZ7Q0MDPvzwQ9x2223o3bu34bVcO65ZX1+PJUuWYOLEiUhISMDw4cOxaNEilJSUtPhaSkpK8Pbbbxuet2/fvpg1axbWrFkDjUZjdN85c+YgNjbW8PlpzooVKxAbG4u//OUvLd6nozR11e/ZsweVlZXN3qekpASHDh0yun9lZSVWr16Nxx57DOPGjUNiYiL69euHmTNn4rPPPoNSqWz2ua4dO//hhx9w3333oX///kb/D1r63NfV1eHbb7/FwoULMWnSJPTp0wd9+vTBHXfcgQ8++AA1NTVG929675OTkwEA8+bNM/osX/v8rY3pV1VVYenSpbj99tsN7+/MmTOxYsUKNDY2mtz/2s+cWq3GZ599httvvx2JiYkYPHgwFi5ciAsXLjS7r7b49ddfUVdXh5iYGAwZMgRTp04F8EfrvyOlpaXhL3/5i9F3zOOPP274PLQkKSkJTz/9NEaNGoWEhAQMGTIEs2bNwn//+1+j+QdqtRpbtmzBs88+iylTpqBfv35ITEzE5MmT8cYbb5j8v7t06RJiY2OxadMmAPrv32vf02u/H1r7fm1oaMBnn32GGTNmoG/fvujduzduv/12fPDBB6iurja5f9N+x40bB0EQsGHDBsycORN9+vRB//79MX/+/Ba/dy0Bu/dvYM+ePdDpdHB3d2+1S7Qz/fvf/8YXX3wBqVSKhIQE9O/fH0VFRdi1axf27NmD119/HbNmzTLcPzs7G3/6059QV1eHiIgIjB07FlKpFCUlJTh27BhKSkoM9x85ciTs7Oxw8OBB+Pj4YOTIkYbn8fT0vOXa09LS8Nhjj6GqqgpBQUGYMGECamtrkZycjNTUVOzYsQOffvop7OzsbnlfTZRKJebOnYsLFy5gwIAB6NGjB6qqqozuU1tbi9mzZ6OoqAj9+/dHdHQ0Tp48ic2bN+PYsWPYsmWLSevz2LFjePLJJ1FdXY3g4GAMGzYMKpUK6enpeP3117Fnzx4sX77c0NqYN28ejh07hjVr1mDatGkmdep0Oqxbtw6A/gChsyUmJiImJgbZ2dn48ccf8dBDD5ncZ9OmTdBqtejduzeio6MBAAcOHMCbb74Jf39/hIWFGeawnDp1CkuWLMHu3buxevXqFt/D119/Hd988w369u2LMWPGoKCgABKJpNVaMzMz8fLLL8PLywsRERGIj49HTU0NTp8+jeXLl2Pbtm3YsGGD4TPq4+ODGTNm4MCBAygvL8eIESPg6+treL5u3brd8PdTUFCABx98EIWFhfDy8sLo0aOhVqtx9OhRvP/++9i2bRtWrVrV7IG/Wq3GggULkJqaigEDBiAqKgppaWnYsWMHjh49ik2bNiEkJOSGNVyvKdyb/r/OmjUL33//PbZu3YoXX3wRDg4O7X7O5nz77bd45ZVXoNPpEBcXh8GDB6OwsBB79uzBnj178NRTT2HhwoUmj3vjjTfw9ddfAwB69uyJAQMGoLa2Frm5ufj4448xePBgDB48GABQUVGB5557Dq6uroiKikJsbCwaGhpw9uxZfP311/jll1+wfv16hIWFAQCcnJwwY8YMHD9+HBcvXkS/fv0MtzXt70aqqqrw0EMP4ezZs3BxccGQIUOgUCiQnJyM5cuX4+eff8ZXX33V4nvzwgsv4Oeff0b//v0xZswYnD17FocOHTL8v+7du3e7f9eiE6hV//jHP4SYmBjhwQcfvKXnGTt2rBATEyMUFBQ0e/uiRYuEmJgY4YcffjDavmHDBiEmJkaYOHGicPbsWaPbkpOThb59+wrx8fFCbm6uYfvzzz8vxMTECJ988onJfhoaGoTk5GSjbUeOHBFiYmKEOXPm3NRr++GHH4SYmBhh7NixRtuVSqXhdS9evFhQqVSG2y5evGi4benSpUaP++9//yvExMQI//3vf5vdX0v1Nm2PiYkR7rjjDqG0tLTFWmNiYoT58+cLtbW1htuqqqqEO++8U4iJiRGWL19u9LjS0lJh0KBBQmxsrLB27VpBq9UabqusrBTmzZsnxMTECMuWLTNs12g0hteYkZFhUsvu3bsNtXaVL7/8UoiJiRGmTZvW7O2TJk0SYmJihA0bNhi2nT9/XkhNTTW5b1VVlTB//nwhJiZGWLFihcntTb/nfv36Nft4QWj5c19UVCQcPnzY6PcsCIJQX18vPPfcc0JMTIzw6quvmjzfnDlzhJiYGOHIkSPN7u/auq53zz33CDExMcLjjz8uXL161bC9oqJCmDFjhhATEyM888wzRo+59jN31113GX3mGhsbDb+fl19+ucV6WpKTkyPExMQI8fHxQkVFhWH7lClThJiYGGHTpk3NPq6l32nTZ3/RokVG2zMzM4W4uDghNjbW5Dn37t0rxMfHCzExMcLBgweNblu9erUQExMjDBo0SEhKSjKp49SpU8Lly5cN/66trRV27twpKJVKo/upVCphyZIlQkxMjPDYY4+1+fVcq6Xv17/+9a9CTEyMcM899wiVlZWG7XV1dcKjjz4qxMTECPfdd5/RYwoKCgzv6dixY4WcnBzDbRqNRnjhhRcM3x+WiN37N9DUPeXl5dXs7ZmZmXj++edNflJSUm553zqdztCFtXTpUvTo0cPo9oEDB+KJJ56AWq3Ghg0bDNsrKioAAKNHjzZ5TgcHBwwcOPCWa2uLbdu2obCwEH5+fvjnP/9pNN4WGhqKRYsWAQC+/vrrFruIb9bixYuNWnrXc3Jywttvvw0XFxfDNnd3dyxYsAA
      "text/plain": [
       "<Figure size 800x600 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Ensure Seaborn styling\n",
    "sns.set_theme(style=\"whitegrid\")\n",
    "\n",
    "# Calculate the total guest_journeys_count per variation\n",
    "grouped_data = df.groupby('variation')['guest_journeys_count'].sum()\n",
    "\n",
    "# Find the total count and other metadata\n",
    "total_count = grouped_data.sum()\n",
    "ab_test_name = df['ab_test_name'].iloc[0]  # Assuming all rows are for the same A/B test\n",
    "last_update = df['last_update'].max()\n",
    "\n",
    "# Create a pie chart using Seaborn styling\n",
    "plt.figure(figsize=(8, 6))\n",
    "colors = sns.color_palette(\"pastel\")  # Seaborn pastel colors\n",
    "\n",
    "# Pie chart with labels inside each sector\n",
    "plt.pie(\n",
    "    grouped_data, \n",
    "    labels=[f\"{var}\\n{count} ({count/total_count:.1%})\" for var, count in grouped_data.items()],\n",
    "    autopct=None, \n",
    "    colors=colors, \n",
    "    startangle=90,\n",
    "    wedgeprops={'edgecolor': 'none'},  # Remove edges around sectors\n",
    "    pctdistance=0.70,  # Places the labels closer to the center (inside)\n",
    "    labeldistance=0.2  # Ensure labels are positioned inside the sectors\n",
    ")\n",
    "\n",
    "# Add title\n",
    "plt.title(\"Guest Journey - Variation Allocation\", fontsize=16)\n",
    "\n",
    "# Add total count to the bottom-left\n",
    "plt.text(-1.4, -1.3, f\"Total Count: {total_count}\", fontsize=10, ha='left', color='black')\n",
    "\n",
    "# Add A/B test name and last update to the bottom-right\n",
    "plt.text(1.2, -1.3, f\"A/B Test: {ab_test_name}\", fontsize=8, ha='right', color='gray')\n",
    "plt.text(1.2, -1.4, f\"Last Update: {last_update}\", fontsize=8, ha='right', color='gray')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Statistical Analysis\n",
    "In this section we compute the metrics needed for monitoring as well as check if there's any statistical difference between the different variations."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Z-test for Proportion Metrics (Rates)\n",
    "This section defines the functions used to compute Z-test Proportion analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generalized function to calculate Z-test for any metric\n",
    "def calculate_z_test(df, metric_name, variation_a, variation_b, success_counts, total_counts):\n",
    "\n",
    "    # Aggregate the success counts (numerator) and total counts (denominator) for each variation\n",
    "    success_a = df[df['variation'] == variation_a][success_counts].sum()\n",
    "    success_b = df[df['variation'] == variation_b][success_counts].sum()\n",
    "\n",
    "    total_a = df[df['variation'] == variation_a][total_counts].sum()\n",
    "    total_b = df[df['variation'] == variation_b][total_counts].sum()\n",
    "\n",
    "    # Calculate conversion rates for each variation\n",
    "    value_A = success_a / total_a if total_a != 0 else 0\n",
    "    value_B = success_b / total_b if total_b != 0 else 0\n",
    "\n",
    "    # Absolute difference (B - A)\n",
    "    abs_diff = value_B - value_A\n",
    "\n",
    "    # Relative difference (B - A) / A\n",
    "    rel_diff = (value_B - value_A) / value_A if value_A != 0 else 0\n",
    "\n",
    "    # Perform the z-test for proportions\n",
    "    count = [success_a, success_b]  # Success counts for A and B\n",
    "    nobs = [total_a, total_b]  # Total counts for A and B\n",
    "    \n",
    "    # Calculate z-stat and p-value\n",
    "    z_stat, p_value = proportions_ztest(count, nobs)\n",
    "    \n",
    "    # Flag for significance at 95% level (p-value < 0.05)\n",
    "    is_significant = p_value < 0.05\n",
    "\n",
    "    # Return the result as a dictionary\n",
    "    return {\n",
    "        'metric': metric_name,\n",
    "        'variation_A_name': variation_a,\n",
    "        'variation_B_name': variation_b,\n",
    "        'variation_A_value': value_A,\n",
    "        'variation_B_value': value_B,\n",
    "        'absolute_difference': abs_diff,\n",
    "        'relative_difference': rel_diff,\n",
    "        'statistic': z_stat,\n",
    "        'p_value': p_value,\n",
    "        'is_significant_95': is_significant\n",
    "    }\n",
    "\n",
    "# Function to run Z-tests for multiple metrics and aggregate results into a DataFrame\n",
    "def run_z_tests(df, z_stat_metric_definition, variations):\n",
    "    results = []\n",
    "    \n",
    "    # Loop over all metrics in z_stat_metric_definition\n",
    "    for metric_name, metric_definition in z_stat_metric_definition.items():\n",
    "        success_counts = metric_definition['success_counts']\n",
    "        total_counts = metric_definition['total_counts']\n",
    "        \n",
    "        # Run the Z-test for each metric\n",
    "        result = calculate_z_test(df, metric_name, variation_a=variations[0], variation_b=variations[1], \n",
    "                                  success_counts=success_counts, total_counts=total_counts)\n",
    "        results.append(result)\n",
    "    \n",
    "    # Create a DataFrame from the results\n",
    "    results_df = pd.DataFrame(results)\n",
    "    \n",
    "    return results_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### T-test for non-proportion metrics\n",
    "This section defines the functions used to compute T-tests for metrics outside of the proportion scope, mostly Revenue-related metrics."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Generalized function to calculate T-test for revenue-related metrics\n",
    "def calculate_t_test(df, metric_name, variation_a, variation_b, metric_avg_column, metric_sdv_column, total_counts):\n",
    "    # Aggregate the avgs and standard deviations for each variation\n",
    "    mean_a = df[df['variation'] == variation_a][metric_avg_column].mean() # Assuming the avg is calculated for each group\n",
    "    mean_b = df[df['variation'] == variation_b][metric_avg_column].mean() # Assuming the avg is calculated for each group\n",
    "    \n",
    "    sdv_a = df[df['variation'] == variation_a][metric_sdv_column].mean()  # Assuming the stddev is calculated for each group\n",
    "    sdv_b = df[df['variation'] == variation_b][metric_sdv_column].mean()  # Assuming the stddev is calculated for each group\n",
    "    \n",
    "    total_a = df[df['variation'] == variation_a][total_counts].sum()\n",
    "    total_b = df[df['variation'] == variation_b][total_counts].sum()\n",
    "\n",
    "    # Absolute difference (B - A)\n",
    "    abs_diff = mean_b - mean_a\n",
    "\n",
    "    # Relative difference (B - A) / A\n",
    "    rel_diff = (mean_b - mean_a) / mean_a if mean_a != 0 else 0\n",
    "\n",
    "    # Calculate the T-statistic and p-value using the formula for two-sample T-test\n",
    "    se_a = sdv_a / (total_a ** 0.5) if total_a != 0 else 0\n",
    "    se_b = sdv_b / (total_b ** 0.5) if total_b != 0 else 0\n",
    "\n",
    "    # Standard error of the difference between the means\n",
    "    se_diff = (se_a ** 2 + se_b ** 2) ** 0.5\n",
    "    \n",
    "    # T-statistic formula\n",
    "    if se_diff != 0:\n",
    "        t_stat = (mean_a - mean_b) / se_diff\n",
    "    else:\n",
    "        t_stat = 0\n",
    "    \n",
    "    # Degrees of freedom (for independent samples)\n",
    "    df_degrees = min(total_a - 1, total_b - 1)  # Using the smaller of the two sample sizes minus 1\n",
    "    \n",
    "    # P-value from the T-distribution\n",
    "    p_value = stats.t.sf(abs(t_stat), df_degrees) * 2  # Two-tailed test\n",
    "    \n",
    "    # Flag for significance at 95% level (p-value < 0.05)\n",
    "    is_significant = p_value < 0.05\n",
    "\n",
    "    # Return the result as a dictionary\n",
    "    return {\n",
    "        'metric': metric_name,\n",
    "        'variation_A_name': variation_a,\n",
    "        'variation_B_name': variation_b,\n",
    "        'variation_A_value': mean_a,\n",
    "        'variation_B_value': mean_b,\n",
    "        'absolute_difference': abs_diff,\n",
    "        'relative_difference': rel_diff,\n",
    "        'statistic': t_stat,\n",
    "        'p_value': p_value,\n",
    "        'is_significant_95': is_significant\n",
    "    }\n",
    "\n",
    "# Function to run T-tests for multiple revenue metrics and aggregate results into a DataFrame\n",
    "def run_t_tests(df, t_stat_metric_definition, variations):\n",
    "    results = []\n",
    "    \n",
    "    # Loop over all metrics in t_stat_metric_definition\n",
    "    for metric_name, metric_definition in t_stat_metric_definition.items():\n",
    "        metric_avg_column = metric_definition['metric_avg_column']\n",
    "        metric_sdv_column = metric_definition['metric_sdv_column']\n",
    "        total_counts = metric_definition['total_counts']\n",
    "        \n",
    "        # Run the T-test for each metric\n",
    "        result = calculate_t_test(df, metric_name, variation_a=variations[0], variation_b=variations[1], \n",
    "                                  metric_avg_column=metric_avg_column, metric_sdv_column=metric_sdv_column, \n",
    "                                  total_counts=total_counts)\n",
    "        results.append(result)\n",
    "    \n",
    "    # Create a DataFrame from the results\n",
    "    results_df = pd.DataFrame(results)\n",
    "    \n",
    "    return results_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Specify the metric definition for Z-stat and T-stat tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the variations in which we want to run the tests\n",
    "var_A = 'A'\n",
    "var_B = 'B'\n",
    "variations = [var_A, var_B]\n",
    "\n",
    "# Define the Z-test metric definitions (with both success_counts and total_counts)\n",
    "z_stat_metric_definition = {\n",
    "    'conversion_rate': {\n",
    "        'success_counts': 'guest_journey_completed_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'payment_rate': {\n",
    "        'success_counts': 'guest_journey_with_payment_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'waiver_payment_rate': {\n",
    "        'success_counts': 'waiver_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'deposit_payment_rate': {\n",
    "        'success_counts': 'deposit_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'CIH_payment_rate': {\n",
    "        'success_counts': 'check_in_cover_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    }\n",
    "}\n",
    "\n",
    "# Define the T-test metric definitions (with both metric_avg_column and metric_sdv_column)\n",
    "t_stat_metric_definition = {\n",
    "    'avg_guest_revenue_per_gj': {\n",
    "        'metric_avg_column': 'guest_revenue_avg_per_guest_journey',\n",
    "        'metric_sdv_column': 'guest_revenue_sdv_per_guest_journey',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'avg_waiver_revenue_per_gj': {\n",
    "        'metric_avg_column': 'waiver_avg_per_guest_journey',\n",
    "        'metric_sdv_column': 'waiver_sdv_per_guest_journey',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'avg_deposit_revenue_per_gj': {\n",
    "        'metric_avg_column': 'deposit_avg_per_guest_journey',\n",
    "        'metric_sdv_column': 'deposit_sdv_per_guest_journey',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'avg_CIH_revenue_per_gj': {\n",
    "        'metric_avg_column': 'check_in_cover_avg_per_guest_journey',\n",
    "        'metric_sdv_column': 'check_in_cover_sdv_per_guest_journey',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'avg_csat_per_gj_with_response': {\n",
    "        'metric_avg_column': 'csat_avg_per_guest_journey_with_response',\n",
    "        'metric_sdv_column': 'csat_sdv_per_guest_journey_with_response',\n",
    "        'total_counts': 'guest_journey_with_responses_count'\n",
    "    }\n",
    "\n",
    "}\n",
    "\n",
    "# Define the metrics that will be the main ones for this A/B test:\n",
    "main_metrics = ['avg_guest_revenue_per_gj', 'conversion_rate', 'payment_rate']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Run the computation of the metrics and statistical significance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          metric  relative_difference  is_significant_95\n",
      "0                conversion_rate            -0.065207              False\n",
      "1                   payment_rate            -0.036940              False\n",
      "2            waiver_payment_rate             0.000072              False\n",
      "3           deposit_payment_rate            -0.186265              False\n",
      "4               CIH_payment_rate             0.106172              False\n",
      "5       avg_guest_revenue_per_gj            -0.026124              False\n",
      "6      avg_waiver_revenue_per_gj            -0.012571              False\n",
      "7     avg_deposit_revenue_per_gj            -0.258547              False\n",
      "8         avg_CIH_revenue_per_gj             0.086757              False\n",
      "9  avg_csat_per_gj_with_response             0.030854              False\n"
     ]
    }
   ],
   "source": [
    "# Call the function to calculate the Z-test for each metric and aggregate the results\n",
    "z_test_results_df = run_z_tests(df, z_stat_metric_definition=z_stat_metric_definition, variations=variations)\n",
    "\n",
    "# Call the function to calculate the T-test for each metric and aggregate the results\n",
    "t_test_results_df = run_t_tests(df, t_stat_metric_definition=t_stat_metric_definition, variations=variations)\n",
    "\n",
    "# Add a new column to identify whether it's from Z-test or T-test\n",
    "z_test_results_df['test_type'] = 'Z-test'\n",
    "t_test_results_df['test_type'] = 'T-test'\n",
    "\n",
    "# Combine the dataframes after adding the 'test_type' column\n",
    "combined_results_df = pd.concat([z_test_results_df, t_test_results_df], ignore_index=True)\n",
    "\n",
    "# Print the main aggregated DataFrame\n",
    "print(combined_results_df[['metric','relative_difference','is_significant_95']])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "AAVariantTest results (last updated at 2024-12-04)\n",
      "\n",
      "Total Guest Journeys affected by this A/B test: 948 - Total Guest Revenue: 7330 GBP.\n",
      "  Variation A: Guest Journeys 470 (49.6%) - Guest Revenue: 3682 GBP (50.2%).\n",
      "  Variation B: Guest Journeys 478 (50.4%) - Guest Revenue: 3647 GBP (49.8%).\n",
      "\n",
      "Main Metrics - Comparing B vs. A.\n",
      "\n",
      "CONVERSION RATE (not significant): 56.5% vs. 60.4% (-3.9% | -6.5%).\n",
      "PAYMENT RATE (not significant): 29.9% vs. 31.1% (-1.1% | -3.7%).\n",
      "AVG GUEST REVENUE PER GJ (not significant): 7.63 vs. 7.84 (-0.2 | -2.6%).\n",
      "\n",
      "Other Metrics\n",
      "\n",
      "WAIVER PAYMENT RATE (not significant): 24.9% vs. 24.9% (0.0% | 0.0%).\n",
      "DEPOSIT PAYMENT RATE (not significant): 5.0% vs. 6.2% (-1.1% | -18.6%).\n",
      "CIH PAYMENT RATE (not significant): 1.9% vs. 1.7% (0.2% | 10.6%).\n",
      "AVG WAIVER REVENUE PER GJ (not significant): 7.09 vs. 7.18 (-0.09 | -1.3%).\n",
      "AVG DEPOSIT REVENUE PER GJ (not significant): 0.37 vs. 0.5 (-0.13 | -25.9%).\n",
      "AVG CIH REVENUE PER GJ (not significant): 0.17 vs. 0.16 (0.01 | 8.7%).\n",
      "AVG CSAT PER GJ WITH RESPONSE (not significant): 3.97 vs. 3.85 (0.12 | 3.1%).\n"
     ]
    }
   ],
   "source": [
    "print('\\n{} results (last updated at {})\\n'.format(ab_test_name, last_update))\n",
    "\n",
    "# Get main volume indicators per variation\n",
    "grouped_data = df.groupby('variation')[[\"guest_journeys_count\",\"guest_revenue_sum\"]].sum()\n",
    "\n",
    "# Find the totals over any variation\n",
    "total_count = grouped_data.sum()\n",
    "\n",
    "# Print overall indicators for volumes\n",
    "print('Total Guest Journeys affected by this A/B test: {} - Total Guest Revenue: {} GBP.'.format(int(total_count.loc[\"guest_journeys_count\"]), \n",
    "                                                                                                int(total_count.loc[\"guest_revenue_sum\"])))\n",
    "for var in variations:\n",
    "    print('  Variation {}: Guest Journeys {} ({}%) - Guest Revenue: {} GBP ({}%).'.format(\n",
    "        var, \n",
    "        int(grouped_data.loc[var,'guest_journeys_count']), \n",
    "        round(100*(grouped_data.loc[var,'guest_journeys_count']/total_count.loc[\"guest_journeys_count\"]),1),\n",
    "        int(grouped_data.loc[var,'guest_revenue_sum']),\n",
    "        round(100*(grouped_data.loc[var,'guest_revenue_sum']/total_count.loc[\"guest_revenue_sum\"]),1)\n",
    "    ))\n",
    "\n",
    "# Split results whether the metrics are main metrics or not\n",
    "main_metrics_rows = combined_results_df[combined_results_df['metric'].isin(main_metrics)]\n",
    "other_metrics_rows = combined_results_df[~combined_results_df['metric'].isin(main_metrics)]\n",
    "\n",
    "def print_metrics(df, header=None):\n",
    "    if header:\n",
    "        print(f'\\n{header}\\n')\n",
    "\n",
    "    for row in df.iterrows():\n",
    "        metric = row[1]['metric'].upper().replace('_', ' ')\n",
    "        if row[1]['test_type'] == 'Z-test':\n",
    "            value_a = str(round(100 * row[1]['variation_A_value'], 1)) + '%'\n",
    "            value_b = str(round(100 * row[1]['variation_B_value'], 1)) + '%'\n",
    "            abs_diff = str(round(100 * row[1]['absolute_difference'], 1)) + '%'\n",
    "        else:\n",
    "            value_a = str(round(row[1]['variation_A_value'], 2))\n",
    "            value_b = str(round(row[1]['variation_B_value'], 2))\n",
    "            abs_diff = str(round(row[1]['absolute_difference'], 2))\n",
    "        rel_diff = str(round(100 * row[1]['relative_difference'], 1)) + '%'\n",
    "        stat_sign = row[1]['is_significant_95']\n",
    "\n",
    "        if stat_sign:\n",
    "            print(f\"{metric} - SIGNIFICANT RESULT: {value_b} vs. {value_a} ({abs_diff} | {rel_diff}).\")\n",
    "        else:\n",
    "            print(f\"{metric} (not significant): {value_b} vs. {value_a} ({abs_diff} | {rel_diff}).\")\n",
    "\n",
    "# Print main metrics\n",
    "print_metrics(main_metrics_rows, header=\"Main Metrics - Comparing {} vs. {}.\".format(var_B, var_A))\n",
    "\n",
    "# Print other metrics\n",
    "print_metrics(other_metrics_rows, header=\"Other Metrics\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}