data-jupyter-notebooks/ab_test_guest_journey_monitoring.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# A/B test monitoring\n",
    "\n",
    "## Initial setup\n",
    "This first section just ensures that the connection to DWH works correctly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pathlib\n",
    "import yaml\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sqlalchemy import create_engine\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from statsmodels.stats.proportion import proportions_ztest\n",
    "from scipy import stats\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/uri/.superhog-dwh/credentials.yml\n"
     ]
    }
   ],
   "source": [
    "CREDS_FILEPATH = pathlib.Path.home() / \".superhog-dwh\" / \"credentials.yml\"\n",
    "print(CREDS_FILEPATH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare connection to DWH\n",
    "# Function to read credentials from the YAML file\n",
    "def read_credentials(yaml_path: str, env: str = \"prd\"):\n",
    "    with open(yaml_path, \"r\") as file:\n",
    "        credentials = yaml.safe_load(file)\n",
    "    return credentials[\"envs\"][env]\n",
    "# Function to create a PostgreSQL connection string\n",
    "def create_postgres_engine(creds: dict):\n",
    "    user = creds[\"user\"]\n",
    "    password = creds[\"password\"]\n",
    "    host = creds[\"host\"]\n",
    "    port = creds[\"port\"]\n",
    "    database = creds[\"database\"]\n",
    "    # Create the connection string for SQLAlchemy\n",
    "    connection_string = f\"postgresql://{user}:{password}@{host}:{port}/{database}\"\n",
    "    engine = create_engine(connection_string)\n",
    "    return engine\n",
    "# Function to execute a query and return the result as a pandas DataFrame\n",
    "def query_to_dataframe(engine, query: str):\n",
    "    with engine.connect() as connection:\n",
    "        df = pd.read_sql(query, connection)\n",
    "    return df\n",
    "dwh_creds = read_credentials(yaml_path=CREDS_FILEPATH, env=\"prd\")\n",
    "dwh_pg_engine = create_postgres_engine(creds=dwh_creds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   ?column?\n",
      "0         1\n"
     ]
    }
   ],
   "source": [
    "# Silly query to test things out\n",
    "test_df = query_to_dataframe(engine=dwh_pg_engine, query=\"SELECT 1;\")\n",
    "print(test_df.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Extraction\n",
    "In this section we extract the data from the Guest Journey monitoring within DWH by configuring which A/B test we want to measure. Here we already handle the basic aggregations that will be needed in the future, directly in SQL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                 ab_test_name variation last_update  \\\n",
      "0  VerificationProductSelectionButtonPosition     Fixed  2024-12-11   \n",
      "1  VerificationProductSelectionButtonPosition  Relative  2024-12-11   \n",
      "\n",
      "   guest_journeys_count  guest_journey_started_count  \\\n",
      "0                    18                           18   \n",
      "1                    24                           24   \n",
      "\n",
      "   guest_journey_completed_count  guest_journey_with_responses_count  \\\n",
      "0                              3                                   2   \n",
      "1                             10                                   3   \n",
      "\n",
      "   guest_journey_with_payment_count  guest_revenue_count  deposit_count  ...  \\\n",
      "0                                 4                    4              1  ...   \n",
      "1                                 5                    5              1  ...   \n",
      "\n",
      "   guest_revenue_avg_per_guest_journey  guest_revenue_sdv_per_guest_journey  \\\n",
      "0                             5.800667                            13.402593   \n",
      "1                             5.902642                            13.025999   \n",
      "\n",
      "   deposit_avg_per_guest_journey  deposit_sdv_per_guest_journey  \\\n",
      "0                       0.367500                       1.559170   \n",
      "1                       0.261454                       1.280859   \n",
      "\n",
      "   waiver_avg_per_guest_journey  waiver_sdv_per_guest_journey  \\\n",
      "0                      4.910261                     11.769242   \n",
      "1                      5.641188                     13.081060   \n",
      "\n",
      "   check_in_cover_avg_per_guest_journey  check_in_cover_sdv_per_guest_journey  \\\n",
      "0                              0.522906                                2.2185   \n",
      "1                              0.000000                                0.0000   \n",
      "\n",
      "   csat_avg_per_guest_journey_with_response  \\\n",
      "0                                       4.0   \n",
      "1                                       3.0   \n",
      "\n",
      "   csat_sdv_per_guest_journey_with_response  \n",
      "0                                  1.414214  \n",
      "1                                  1.732051  \n",
      "\n",
      "[2 rows x 26 columns]\n"
     ]
    }
   ],
   "source": [
    "# A/B test name to measure\n",
    "#ab_test_name = \"AAVariantTest\"\n",
    "ab_test_name = \"VerificationProductSelectionButtonPosition\"\n",
    "\n",
    "# Query to extract data\n",
    "data_extraction_query = \"\"\"\n",
    "select \n",
    "\tab_test_name,\n",
    "\tvariation,\n",
    "\tmax(first_appearance_date_utc) as last_update,\n",
    "    \n",
    "    -- SIMPLE COUNTS --\n",
    "\tcount(id_verification_request) as guest_journeys_count,\n",
    "\tcount(verification_started_date_utc) as guest_journey_started_count,\n",
    "\tcount(verification_completed_date_utc) as guest_journey_completed_count,\n",
    "\tcount(experience_rating) as guest_journey_with_responses_count,\n",
    "\tcount(last_payment_paid_date_utc) as guest_journey_with_payment_count,\n",
    "\tcount(guest_revenue_without_taxes_in_gbp) as guest_revenue_count,\n",
    "\tcount(deposit_fees_without_taxes_in_gbp) as deposit_count,\n",
    "\tcount(waiver_fees_without_taxes_in_gbp) as waiver_count,\n",
    "\tcount(check_in_cover_fees_without_taxes_in_gbp) as check_in_cover_count,\n",
    "    \n",
    "    -- SIMPLE SUMS --\n",
    "\tsum(guest_revenue_without_taxes_in_gbp) as guest_revenue_sum,\n",
    "\tsum(deposit_fees_without_taxes_in_gbp) as deposit_sum,\n",
    "\tsum(waiver_fees_without_taxes_in_gbp) as waiver_sum,\n",
    "\tsum(check_in_cover_fees_without_taxes_in_gbp) as check_in_cover_sum,\n",
    "    \n",
    "    -- AVGs/SDVs PER GUEST JOURNEY (ANY GJ APPEARING IN THE A/B TEST) --\n",
    "    -- NOTE THE COALESCE HERE. THIS IS IMPORTANT FOR THE T-TEST COMPUTATION --\n",
    "    avg(coalesce(guest_revenue_without_taxes_in_gbp,0)) as guest_revenue_avg_per_guest_journey,\n",
    "    stddev(coalesce(guest_revenue_without_taxes_in_gbp,0)) as guest_revenue_sdv_per_guest_journey,\n",
    "    avg(coalesce(deposit_fees_without_taxes_in_gbp,0)) as deposit_avg_per_guest_journey,\n",
    "    stddev(coalesce(deposit_fees_without_taxes_in_gbp,0)) as deposit_sdv_per_guest_journey,\n",
    "    avg(coalesce(waiver_fees_without_taxes_in_gbp,0)) as waiver_avg_per_guest_journey,\n",
    "    stddev(coalesce(waiver_fees_without_taxes_in_gbp,0)) as waiver_sdv_per_guest_journey,\n",
    "    avg(coalesce(check_in_cover_fees_without_taxes_in_gbp,0)) as check_in_cover_avg_per_guest_journey,\n",
    "    stddev(coalesce(check_in_cover_fees_without_taxes_in_gbp,0)) as check_in_cover_sdv_per_guest_journey,\n",
    "    \n",
    "    -- AVGs/SDVs PER GUEST JOURNEY WITH CSAT RESPONSE --\n",
    "    -- NOTE THAT THERE'S NO COALESCE HERE. THIS IS IMPORTANT FOR THE T-TEST COMPUTATION --\n",
    "    avg(experience_rating) as csat_avg_per_guest_journey_with_response,\n",
    "    stddev(experience_rating) as csat_sdv_per_guest_journey_with_response\n",
    "    \n",
    "from\n",
    "\tintermediate.int_core__ab_test_monitoring_guest_journey\n",
    "where\n",
    "\tab_test_name = '{}'\n",
    "group by\n",
    "\t1,2\n",
    "\"\"\".format(ab_test_name)\n",
    "\n",
    "# Retrieve Data from Query\n",
    "df = query_to_dataframe(engine=dwh_pg_engine, query=data_extraction_query)\n",
    "print(df.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check A/B test Allocation to Variation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAf0AAAIYCAYAAABnrTUkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB52UlEQVR4nO3dd3gUdf4H8PeW9N5DeiEJkBAg9F6kg4gURaUoKocKnKf3s52Hnnqn3tk9lcMCIqKgNEVAivSeEAiEBAJJSO+9bZ3fH2tWlnRSZjf7fj1PHsju7MxnS/Y93zIzEkEQBBAREVG3JxW7ACIiIuoaDH0iIiIzwdAnIiIyEwx9IiIiM8HQJyIiMhMMfSIiIjPB0CciIjITDH0iIiIzwdAnIiIyEwz9Njpz5gxefvllTJ8+HYMHD0ZkZCSGDh2KefPm4bXXXsPJkydhbic53LZtGyIiIjBhwgSxS6EmPPTQQ4iIiMA777zTquXfeOMNRERE4PHHH+/Uul544QVERERg27ZtnbqdW0VERCAiIqLLttcRZs2ahYiICERFRaG0tLTZZZt6Tev/Tl944YXOLFUUEyZMQEREBLKyssQuxegx9FuppKQEjz76KBYvXowffvgBVVVViImJwdSpU9G/f3+UlJTg22+/xSOPPII5c+aIXW6bnDlzBhEREVi0aJHYpVAnmTdvHgBgx44d0Gg0zS6rVCrx888/GzzOVCxatAgRERE4c+aM2KV0mISEBFy9ehUAoFKp8NNPP4lcUdcSY8ewO5OLXYApqKiowIMPPoi0tDSEhITglVdewbBhwxosd+3aNaxfvx67d+8WoUqipk2dOhVvvPEGCgsLcfToUYwfP77JZQ8ePIiysjK4urp2eu/NM888g8cffxyenp6dup1bmdrf548//ggA8PLyQn5+Pn788UcsWbJE5KqMy/r166FSqeDl5SV2KUaPLf1WeP3115GWlgZ/f398//33jQY+AISHh+Nf//oXNmzY0MUVEjXPxsYGM2bMAIAWW0z198+aNQsWFhadWpenpydCQ0Ph4ODQqdu5VWhoKEJDQ7tse+1RW1uLX375BQDw73//G7a2trh27RoSEhJErsy4BAQEIDQ0tNM/r90BQ78FGRkZ2LVrFwDgxRdfhJOTU4uPiY6ObnBbS2NOLXVhnTp1CitWrMCoUaMQFRWF4cOH46mnnkJ8fHyjy6enp+PFF1/EhAkTEBUVhQEDBmD8+PFYtmwZtm7dql9u0aJFWLx4MQDg7Nmz+vHOjhyjz8vLw+uvv47Jkyejb9++GDhwIBYsWIDvv/++0a7mjz/+GBEREfj4448bXV9TwxG33l5bW4sPP/wQ06ZNQ79+/fTP5dZxzZqaGrz77ruYNGkSoqKiMHLkSDz//PPIz89v8rnk5+fjzTff1K93wIABmDt3LjZu3Ai1Wm2w7MKFCxEREaH//DTm888/R0REBP785z83uUxHqe+qP3ToEEpKShpdJj8/HydOnDBYvqSkBBs2bMDjjz+OCRMmIDo6GjExMZgzZw7Wrl0LhULR6LpuHTvfunUr7r//fgwcONDg76Cpz31VVRW2bNmCFStWYPLkyejfvz/69++Pu+++G++//z4qKioMlq9/78+ePQsAWLx4scFn+db1NzemX1ZWhvfeew8zZszQv79z5szB559/jrq6ugbL3/qZU6lUWLt2LWbMmIHo6GgMHToUK1aswI0bNxrdVmvs3bsXVVVVCA8Px7BhwzB9+nQAf7T+O1JCQgL+/Oc/G3zHLF++XP95aMqpU6ewatUqjBkzBlFRURg2bBjmzp2Ljz76yGD+gUqlws6dO/Hss89i6tSpiImJQXR0NKZMmYI33nijwd9dVlYWIiIisH37dgC6799b39Nbvx+a+36tra3F2rVrce+992LAgAHo168fZsyYgffffx/l5eUNlq/f7oQJEyAIAjZv3ow5c+agf//+GDhwIJYuXdrk964pYPd+Cw4dOgStVgsnJ6dmu0Q709tvv42vvvoKUqkUUVFRGDhwIHJzc3Hw4EEcOnQIr7/+OubOnatf/tq1a3jggQdQVVWF4OBgjB8/HlKpFPn5+Th37hzy8/P1y48ePRqWlpY4fvw43N3dMXr0aP16XFxc2l17QkICHn/8cZSVlcHHxwcTJ05EZWUlzp49i/j4eOzfvx+fffYZLC0t272tegqFAosWLcKNGzcwaNAg9OrVC2VlZQbLVFZWYsGCBcjNzcXAgQMRFhaGCxcuYMeOHTh37hx27tzZoPV57tw5PPXUUygvL4evry9GjBgBpVKJS5cu4fXXX8ehQ4ewZs0afWtj8eLFOHfuHDZu3IiZM2c2qFOr1eK7774DoNtB6GzR0dEIDw/HtWvX8NNPP+Hhhx9usMz27duh0WjQr18/hIWFAQCOHTuGf/7zn/Dy8kJgYKB+DsvFixfx7rvv4rfffsOGDRuafA9ff/11bNq0CQMGDMC4ceOQmZkJiUTSbK3Jycn4+9//DldXVwQHByMyMhIVFRW4fPky1qxZgz179mDz5s36z6i7uzvuvfdeHDt2DEVFRRg1ahQ8PDz06wsICGjx9cnMzMSSJUuQnZ0NV1dXjB07FiqVCmfOnME777yDPXv2YN26dY3u+KtUKixbtgzx8fEYNGgQQkNDkZCQgP379+PMmTPYvn07/Pz8WqzhdvXhXv/3OnfuXPz444/YvXs3XnrpJVhbW7d5nY3ZsmULXnnlFWi1WvTp0wdDhw5FdnY2Dh06hEOHDmHlypVYsWJFg8e98cYb+OabbwAAvXv3xqBBg1BZWYm0tDR88sknGDp0KIYOHQoAKC4uxnPPPQcHBweEhoYiIiICtbW1SEpKwjfffINffvkF33//PQIDAwEAtra2uPfeexEXF4eMjAzExMTo76vfXkvKysrw8MMPIykpCfb29hg2bBgsLCxw9uxZrFmzBrt27cLXX3/d5Hvz4osvYteuXRg4cCDGjRuHpKQknDhxQv933a9fvza/1qITqFn/93//J4SHhwtLlixp13rGjx8vhIeHC5mZmY3e//zzzwvh4eHC1q1bDW7fvHmzEB4eLkyaNElISkoyuO/s2bPCgAEDhMjISCEtLU1/+wsvvCCEh4cLn376aYPt1NbWCmfPnjW47fTp00J4eLiwcOHCO3puW7duFcLDw4Xx48cb3K5QKPTPe/Xq1YJSqdTfl5GRob/vvffeM3jcRx99JISHhwsfffRRo9trqt7628PDw4W7775bKCgoaLLW8PBwYenSpUJlZaX+vrKyMuGee+4RwsPDhTVr1hg8rqCgQBgyZIgQEREhfPvtt4JGo9HfV1JSIixevFgIDw8XPv74Y/3tarVa/xwTExMb1PLbb7/pa+0q69evF8LDw4WZM2c2ev/kyZOF8PBwYfPmzfrbrl+/LsTHxzdYtqysTFi6dKkQHh4ufP755w3ur3+dY2JiGn28IDT9uc/NzRVOnjxp8DoLgiDU1NQIzz33nBAeHi68+uqrDda3cOFCITw8XDh9+nSj27u1rtvNnz9fCA8PF5YvXy5UV1frby8uLhbuvfdeITw8XHjmmWcMHnPrZ2727NkGn7m6ujr96/P3v/+9yXqakpqaKoSHhwuRkZFCcXGx/vapU6cK4eHhwvbt2xt9XFOvaf1n//nnnze4PTk5WejTp48QERHRYJ2HDx8WIiMjhfDwcOH48eMG923YsEEIDw8XhgwZIpw6dapBHRcvXhRycnL0v1dWVgoHDhwQFAqFwXJKpVJ49913hfDwcOHxxx9v9fO5VVPfr08//bQQHh4uzJ8/XygpKdHfXlVVJTz22GNCeHi4cP/99xs8JjMzU/+ejh8/XkhNTdXfp1arhRdffFH//WGK2L3fgvruKVdX10bvT05OxgsvvNDgJzY2tt3b1mq1+i6s9957D7169TK4f/DgwXjyySehUqmwefNm/e3FxcUAgLFjxzZYp7W1NQYPHtzu2lpjz549yM7OhqenJ/72t78ZjLf5+/vj+eefBwB88803TXYR36nVq1cbtPRuZ2trizfffBP29vb625ycnLBs2TIAwMmTJw2W//r
      "text/plain": [
       "<Figure size 800x600 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Ensure Seaborn styling\n",
    "sns.set_theme(style=\"whitegrid\")\n",
    "\n",
    "# Calculate the total guest_journeys_count per variation\n",
    "grouped_data = df.groupby('variation')['guest_journeys_count'].sum()\n",
    "\n",
    "# Find the total count and other metadata\n",
    "total_count = grouped_data.sum()\n",
    "ab_test_name = df['ab_test_name'].iloc[0]  # Assuming all rows are for the same A/B test\n",
    "last_update = df['last_update'].max()\n",
    "\n",
    "# Create a pie chart using Seaborn styling\n",
    "plt.figure(figsize=(8, 6))\n",
    "colors = sns.color_palette(\"pastel\")  # Seaborn pastel colors\n",
    "\n",
    "# Pie chart with labels inside each sector\n",
    "plt.pie(\n",
    "    grouped_data, \n",
    "    labels=[f\"{var}\\n{count} ({count/total_count:.1%})\" for var, count in grouped_data.items()],\n",
    "    autopct=None, \n",
    "    colors=colors, \n",
    "    startangle=90,\n",
    "    wedgeprops={'edgecolor': 'none'},  # Remove edges around sectors\n",
    "    pctdistance=0.70,  # Places the labels closer to the center (inside)\n",
    "    labeldistance=0.2  # Ensure labels are positioned inside the sectors\n",
    ")\n",
    "\n",
    "# Add title\n",
    "plt.title(\"Guest Journey - Variation Allocation\", fontsize=16)\n",
    "\n",
    "# Add total count to the bottom-left\n",
    "plt.text(-1.4, -1.3, f\"Total Count: {total_count}\", fontsize=10, ha='left', color='black')\n",
    "\n",
    "# Add A/B test name and last update to the bottom-right\n",
    "plt.text(1.2, -1.3, f\"A/B Test: {ab_test_name}\", fontsize=8, ha='right', color='gray')\n",
    "plt.text(1.2, -1.4, f\"Last Update: {last_update}\", fontsize=8, ha='right', color='gray')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Statistical Analysis\n",
    "In this section we compute the metrics needed for monitoring as well as check if there's any statistical difference between the different variations."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Z-test for Proportion Metrics (Rates)\n",
    "This section defines the functions used to compute Z-test Proportion analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generalized function to calculate Z-test for any metric\n",
    "def calculate_z_test(df, metric_name, variation_a, variation_b, success_counts, total_counts):\n",
    "\n",
    "    # Aggregate the success counts (numerator) and total counts (denominator) for each variation\n",
    "    success_a = df[df['variation'] == variation_a][success_counts].sum()\n",
    "    success_b = df[df['variation'] == variation_b][success_counts].sum()\n",
    "\n",
    "    total_a = df[df['variation'] == variation_a][total_counts].sum()\n",
    "    total_b = df[df['variation'] == variation_b][total_counts].sum()\n",
    "\n",
    "    # Calculate conversion rates for each variation\n",
    "    value_A = success_a / total_a if total_a != 0 else 0\n",
    "    value_B = success_b / total_b if total_b != 0 else 0\n",
    "\n",
    "    # Absolute difference (B - A)\n",
    "    abs_diff = value_B - value_A\n",
    "\n",
    "    # Relative difference (B - A) / A\n",
    "    rel_diff = (value_B - value_A) / value_A if value_A != 0 else 0\n",
    "\n",
    "    # Perform the z-test for proportions\n",
    "    count = [success_a, success_b]  # Success counts for A and B\n",
    "    nobs = [total_a, total_b]  # Total counts for A and B\n",
    "    \n",
    "    # Calculate z-stat and p-value\n",
    "    z_stat, p_value = proportions_ztest(count, nobs)\n",
    "    \n",
    "    # Flag for significance at 95% level (p-value < 0.05)\n",
    "    is_significant = p_value < 0.05\n",
    "\n",
    "    # Return the result as a dictionary\n",
    "    return {\n",
    "        'metric': metric_name,\n",
    "        'variation_A_name': variation_a,\n",
    "        'variation_B_name': variation_b,\n",
    "        'variation_A_value': value_A,\n",
    "        'variation_B_value': value_B,\n",
    "        'absolute_difference': abs_diff,\n",
    "        'relative_difference': rel_diff,\n",
    "        'statistic': z_stat,\n",
    "        'p_value': p_value,\n",
    "        'is_significant_95': is_significant\n",
    "    }\n",
    "\n",
    "# Function to run Z-tests for multiple metrics and aggregate results into a DataFrame\n",
    "def run_z_tests(df, z_stat_metric_definition, variations):\n",
    "    results = []\n",
    "    \n",
    "    # Loop over all metrics in z_stat_metric_definition\n",
    "    for metric_name, metric_definition in z_stat_metric_definition.items():\n",
    "        success_counts = metric_definition['success_counts']\n",
    "        total_counts = metric_definition['total_counts']\n",
    "        \n",
    "        # Run the Z-test for each metric\n",
    "        result = calculate_z_test(df, metric_name, variation_a=variations[0], variation_b=variations[1], \n",
    "                                  success_counts=success_counts, total_counts=total_counts)\n",
    "        results.append(result)\n",
    "    \n",
    "    # Create a DataFrame from the results\n",
    "    results_df = pd.DataFrame(results)\n",
    "    \n",
    "    return results_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### T-test for non-proportion metrics\n",
    "This section defines the functions used to compute T-tests for metrics outside of the proportion scope, mostly Revenue-related metrics."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Generalized function to calculate T-test for revenue-related metrics\n",
    "def calculate_t_test(df, metric_name, variation_a, variation_b, metric_avg_column, metric_sdv_column, total_counts):\n",
    "    # Aggregate the avgs and standard deviations for each variation\n",
    "    mean_a = df[df['variation'] == variation_a][metric_avg_column].mean() # Assuming the avg is calculated for each group\n",
    "    mean_b = df[df['variation'] == variation_b][metric_avg_column].mean() # Assuming the avg is calculated for each group\n",
    "    \n",
    "    sdv_a = df[df['variation'] == variation_a][metric_sdv_column].mean()  # Assuming the stddev is calculated for each group\n",
    "    sdv_b = df[df['variation'] == variation_b][metric_sdv_column].mean()  # Assuming the stddev is calculated for each group\n",
    "    \n",
    "    total_a = df[df['variation'] == variation_a][total_counts].sum()\n",
    "    total_b = df[df['variation'] == variation_b][total_counts].sum()\n",
    "\n",
    "    # Absolute difference (B - A)\n",
    "    abs_diff = mean_b - mean_a\n",
    "\n",
    "    # Relative difference (B - A) / A\n",
    "    rel_diff = (mean_b - mean_a) / mean_a if mean_a != 0 else 0\n",
    "\n",
    "    # Calculate the T-statistic and p-value using the formula for two-sample T-test\n",
    "    se_a = sdv_a / (total_a ** 0.5) if total_a != 0 else 0\n",
    "    se_b = sdv_b / (total_b ** 0.5) if total_b != 0 else 0\n",
    "\n",
    "    # Standard error of the difference between the means\n",
    "    se_diff = (se_a ** 2 + se_b ** 2) ** 0.5\n",
    "    \n",
    "    # T-statistic formula\n",
    "    if se_diff != 0:\n",
    "        t_stat = (mean_a - mean_b) / se_diff\n",
    "    else:\n",
    "        t_stat = 0\n",
    "    \n",
    "    # Degrees of freedom (for independent samples)\n",
    "    df_degrees = min(total_a - 1, total_b - 1)  # Using the smaller of the two sample sizes minus 1\n",
    "    \n",
    "    # P-value from the T-distribution\n",
    "    p_value = stats.t.sf(abs(t_stat), df_degrees) * 2  # Two-tailed test\n",
    "    \n",
    "    # Flag for significance at 95% level (p-value < 0.05)\n",
    "    is_significant = p_value < 0.05\n",
    "\n",
    "    # Return the result as a dictionary\n",
    "    return {\n",
    "        'metric': metric_name,\n",
    "        'variation_A_name': variation_a,\n",
    "        'variation_B_name': variation_b,\n",
    "        'variation_A_value': mean_a,\n",
    "        'variation_B_value': mean_b,\n",
    "        'absolute_difference': abs_diff,\n",
    "        'relative_difference': rel_diff,\n",
    "        'statistic': t_stat,\n",
    "        'p_value': p_value,\n",
    "        'is_significant_95': is_significant\n",
    "    }\n",
    "\n",
    "# Function to run T-tests for multiple revenue metrics and aggregate results into a DataFrame\n",
    "def run_t_tests(df, t_stat_metric_definition, variations):\n",
    "    results = []\n",
    "    \n",
    "    # Loop over all metrics in t_stat_metric_definition\n",
    "    for metric_name, metric_definition in t_stat_metric_definition.items():\n",
    "        metric_avg_column = metric_definition['metric_avg_column']\n",
    "        metric_sdv_column = metric_definition['metric_sdv_column']\n",
    "        total_counts = metric_definition['total_counts']\n",
    "        \n",
    "        # Run the T-test for each metric\n",
    "        result = calculate_t_test(df, metric_name, variation_a=variations[0], variation_b=variations[1], \n",
    "                                  metric_avg_column=metric_avg_column, metric_sdv_column=metric_sdv_column, \n",
    "                                  total_counts=total_counts)\n",
    "        results.append(result)\n",
    "    \n",
    "    # Create a DataFrame from the results\n",
    "    results_df = pd.DataFrame(results)\n",
    "    \n",
    "    return results_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Specify the metric definition for Z-stat and T-stat tests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the variations in which we want to run the tests\n",
    "var_A = 'Fixed'\n",
    "var_B = 'Relative'\n",
    "variations = [var_A, var_B]\n",
    "\n",
    "# Define the Z-test metric definitions (with both success_counts and total_counts)\n",
    "z_stat_metric_definition = {\n",
    "    'conversion_rate': {\n",
    "        'success_counts': 'guest_journey_completed_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'payment_rate': {\n",
    "        'success_counts': 'guest_journey_with_payment_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'waiver_payment_rate': {\n",
    "        'success_counts': 'waiver_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'deposit_payment_rate': {\n",
    "        'success_counts': 'deposit_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'CIH_payment_rate': {\n",
    "        'success_counts': 'check_in_cover_count',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    }\n",
    "}\n",
    "\n",
    "# Define the T-test metric definitions (with both metric_avg_column and metric_sdv_column)\n",
    "t_stat_metric_definition = {\n",
    "    'avg_guest_revenue_per_gj': {\n",
    "        'metric_avg_column': 'guest_revenue_avg_per_guest_journey',\n",
    "        'metric_sdv_column': 'guest_revenue_sdv_per_guest_journey',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'avg_waiver_revenue_per_gj': {\n",
    "        'metric_avg_column': 'waiver_avg_per_guest_journey',\n",
    "        'metric_sdv_column': 'waiver_sdv_per_guest_journey',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'avg_deposit_revenue_per_gj': {\n",
    "        'metric_avg_column': 'deposit_avg_per_guest_journey',\n",
    "        'metric_sdv_column': 'deposit_sdv_per_guest_journey',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'avg_CIH_revenue_per_gj': {\n",
    "        'metric_avg_column': 'check_in_cover_avg_per_guest_journey',\n",
    "        'metric_sdv_column': 'check_in_cover_sdv_per_guest_journey',\n",
    "        'total_counts': 'guest_journeys_count'\n",
    "    },\n",
    "    'avg_csat_per_gj_with_response': {\n",
    "        'metric_avg_column': 'csat_avg_per_guest_journey_with_response',\n",
    "        'metric_sdv_column': 'csat_sdv_per_guest_journey_with_response',\n",
    "        'total_counts': 'guest_journey_with_responses_count'\n",
    "    }\n",
    "\n",
    "}\n",
    "\n",
    "# Define the metrics that will be the main ones for this A/B test:\n",
    "main_metrics = ['avg_guest_revenue_per_gj', 'conversion_rate', 'payment_rate']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Run the computation of the metrics and statistical significance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          metric  relative_difference   p_value\n",
      "0                conversion_rate             1.500000  0.082857\n",
      "1                   payment_rate            -0.062500  0.913554\n",
      "2            waiver_payment_rate             0.000000  1.000000\n",
      "3           deposit_payment_rate            -0.250000  0.834319\n",
      "4               CIH_payment_rate            -1.000000  0.242526\n",
      "5       avg_guest_revenue_per_gj             0.017580  0.980584\n",
      "6      avg_waiver_revenue_per_gj             0.148857  0.851687\n",
      "7     avg_deposit_revenue_per_gj            -0.288560  0.816919\n",
      "8         avg_CIH_revenue_per_gj            -1.000000  0.331333\n",
      "9  avg_csat_per_gj_with_response            -0.250000  0.608173\n"
     ]
    }
   ],
   "source": [
    "# Call the function to calculate the Z-test for each metric and aggregate the results\n",
    "z_test_results_df = run_z_tests(df, z_stat_metric_definition=z_stat_metric_definition, variations=variations)\n",
    "\n",
    "# Call the function to calculate the T-test for each metric and aggregate the results\n",
    "t_test_results_df = run_t_tests(df, t_stat_metric_definition=t_stat_metric_definition, variations=variations)\n",
    "\n",
    "# Add a new column to identify whether it's from Z-test or T-test\n",
    "z_test_results_df['test_type'] = 'Z-test'\n",
    "t_test_results_df['test_type'] = 'T-test'\n",
    "\n",
    "# Combine the dataframes after adding the 'test_type' column\n",
    "combined_results_df = pd.concat([z_test_results_df, t_test_results_df], ignore_index=True)\n",
    "\n",
    "# Print the main aggregated DataFrame\n",
    "print(combined_results_df[['metric','relative_difference','p_value']])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "VerificationProductSelectionButtonPosition results (last updated at 2024-12-11)\n",
      "\n",
      "Total Guest Journeys affected by this A/B test: 42 - Total Guest Revenue: 246 GBP.\n",
      "  Variation Fixed: Guest Journeys 18 (42.9%) - Guest Revenue: 104 GBP (42.4%).\n",
      "  Variation Relative: Guest Journeys 24 (57.1%) - Guest Revenue: 141 GBP (57.6%).\n",
      "\n",
      "Main Metrics - Comparing Relative vs. Fixed.\n",
      "\n",
      "CONVERSION RATE (not significant): 41.7% vs. 16.7% (25.0% | 150.0%).\n",
      "PAYMENT RATE (not significant): 20.8% vs. 22.2% (-1.4% | -6.2%).\n",
      "AVG GUEST REVENUE PER GJ (not significant): 5.9 vs. 5.8 (0.1 | 1.8%).\n",
      "\n",
      "Other Metrics\n",
      "\n",
      "WAIVER PAYMENT RATE (not significant): 16.7% vs. 16.7% (0.0% | 0.0%).\n",
      "DEPOSIT PAYMENT RATE (not significant): 4.2% vs. 5.6% (-1.4% | -25.0%).\n",
      "CIH PAYMENT RATE (not significant): 0.0% vs. 5.6% (-5.6% | -100.0%).\n",
      "AVG WAIVER REVENUE PER GJ (not significant): 5.64 vs. 4.91 (0.73 | 14.9%).\n",
      "AVG DEPOSIT REVENUE PER GJ (not significant): 0.26 vs. 0.37 (-0.11 | -28.9%).\n",
      "AVG CIH REVENUE PER GJ (not significant): 0.0 vs. 0.52 (-0.52 | -100.0%).\n",
      "AVG CSAT PER GJ WITH RESPONSE (not significant): 3.0 vs. 4.0 (-1.0 | -25.0%).\n"
     ]
    }
   ],
   "source": [
    "print('\\n{} results (last updated at {})\\n'.format(ab_test_name, last_update))\n",
    "\n",
    "# Get main volume indicators per variation\n",
    "grouped_data = df.groupby('variation')[[\"guest_journeys_count\",\"guest_revenue_sum\"]].sum()\n",
    "\n",
    "# Find the totals over any variation\n",
    "total_count = grouped_data.sum()\n",
    "\n",
    "# Print overall indicators for volumes\n",
    "print('Total Guest Journeys affected by this A/B test: {} - Total Guest Revenue: {} GBP.'.format(int(total_count.loc[\"guest_journeys_count\"]), \n",
    "                                                                                                int(total_count.loc[\"guest_revenue_sum\"])))\n",
    "for var in variations:\n",
    "    print('  Variation {}: Guest Journeys {} ({}%) - Guest Revenue: {} GBP ({}%).'.format(\n",
    "        var, \n",
    "        int(grouped_data.loc[var,'guest_journeys_count']), \n",
    "        round(100*(grouped_data.loc[var,'guest_journeys_count']/total_count.loc[\"guest_journeys_count\"]),1),\n",
    "        int(grouped_data.loc[var,'guest_revenue_sum']),\n",
    "        round(100*(grouped_data.loc[var,'guest_revenue_sum']/total_count.loc[\"guest_revenue_sum\"]),1)\n",
    "    ))\n",
    "\n",
    "# Split results whether the metrics are main metrics or not\n",
    "main_metrics_rows = combined_results_df[combined_results_df['metric'].isin(main_metrics)]\n",
    "other_metrics_rows = combined_results_df[~combined_results_df['metric'].isin(main_metrics)]\n",
    "\n",
    "def print_metrics(df, header=None):\n",
    "    if header:\n",
    "        print(f'\\n{header}\\n')\n",
    "\n",
    "    for row in df.iterrows():\n",
    "        metric = row[1]['metric'].upper().replace('_', ' ')\n",
    "        if row[1]['test_type'] == 'Z-test':\n",
    "            value_a = str(round(100 * row[1]['variation_A_value'], 1)) + '%'\n",
    "            value_b = str(round(100 * row[1]['variation_B_value'], 1)) + '%'\n",
    "            abs_diff = str(round(100 * row[1]['absolute_difference'], 1)) + '%'\n",
    "        else:\n",
    "            value_a = str(round(row[1]['variation_A_value'], 2))\n",
    "            value_b = str(round(row[1]['variation_B_value'], 2))\n",
    "            abs_diff = str(round(row[1]['absolute_difference'], 2))\n",
    "        rel_diff = str(round(100 * row[1]['relative_difference'], 1)) + '%'\n",
    "        stat_sign = row[1]['is_significant_95']\n",
    "\n",
    "        if stat_sign:\n",
    "            print(f\"{metric} - SIGNIFICANT RESULT: {value_b} vs. {value_a} ({abs_diff} | {rel_diff}).\")\n",
    "        else:\n",
    "            print(f\"{metric} (not significant): {value_b} vs. {value_a} ({abs_diff} | {rel_diff}).\")\n",
    "\n",
    "# Print main metrics\n",
    "print_metrics(main_metrics_rows, header=\"Main Metrics - Comparing {} vs. {}.\".format(var_B, var_A))\n",
    "\n",
    "# Print other metrics\n",
    "print_metrics(other_metrics_rows, header=\"Other Metrics\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}