Merged PR 5371: Flagging w. random predictor + DWH connection improvements + restructure
Connecting to DWH: * Any existing notebook (AB, Flagging & Template) now have an initial simplified block to connect to the DWH. This is done to handle the DRY, as we're going to start adding more and more experiment notebooks very soon (and we have already 4 notebooks). * This reads from a new `utils/dwh_utils.py` in which we handle the connection and test it accordingly. * This also requires an optional `settings.json` path configuration to avoid warnings (not errors) when reading from `dwh_utils`. Flagging: * All flagging notebooks now go within the folder `data_driven_risk_assessment`. The already existing notebook `flagging_performance_monitoring` has also been moved here. * There's a new `experiments` folder to store the different experiments on flagging. * A new notebook has been added containing a straight-forward baseline: a random predictor, which randomly flags as risk bookings on a test set based on the observed booking claim rate on a previous train dataset. I confirm that all existing notebooks work well after the connection changes. Once merged, or to review, you will need to re-install requirements.txt as I added sklearn. Related work items: #30804
This commit is contained in:
parent
2662f994f0
commit
38f63afbf7
8 changed files with 634 additions and 533 deletions
|
|
@ -9,85 +9,25 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pathlib\n",
|
||||
"import yaml\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from sqlalchemy import create_engine\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/uri/.superhog-dwh/credentials.yml\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"CREDS_FILEPATH = pathlib.Path.home() / \".superhog-dwh\" / \"credentials.yml\"\n",
|
||||
"print(CREDS_FILEPATH)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prepare connection to DWH\n",
|
||||
"# Function to read credentials from the YAML file\n",
|
||||
"def read_credentials(yaml_path: str, env: str = \"prd\"):\n",
|
||||
" with open(yaml_path, \"r\") as file:\n",
|
||||
" credentials = yaml.safe_load(file)\n",
|
||||
" return credentials[\"envs\"][env]\n",
|
||||
"# Function to create a PostgreSQL connection string\n",
|
||||
"def create_postgres_engine(creds: dict):\n",
|
||||
" user = creds[\"user\"]\n",
|
||||
" password = creds[\"password\"]\n",
|
||||
" host = creds[\"host\"]\n",
|
||||
" port = creds[\"port\"]\n",
|
||||
" database = creds[\"database\"]\n",
|
||||
" # Create the connection string for SQLAlchemy\n",
|
||||
" connection_string = f\"postgresql://{user}:{password}@{host}:{port}/{database}\"\n",
|
||||
" engine = create_engine(connection_string)\n",
|
||||
" return engine\n",
|
||||
"# Function to execute a query and return the result as a pandas DataFrame\n",
|
||||
"def query_to_dataframe(engine, query: str):\n",
|
||||
" with engine.connect() as connection:\n",
|
||||
" df = pd.read_sql(query, connection)\n",
|
||||
" return df\n",
|
||||
"dwh_creds = read_credentials(yaml_path=CREDS_FILEPATH, env=\"prd\")\n",
|
||||
"dwh_pg_engine = create_postgres_engine(creds=dwh_creds)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ?column?\n",
|
||||
"0 1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Silly query to test things out\n",
|
||||
"test_df = query_to_dataframe(engine=dwh_pg_engine, query=\"SELECT 1;\")\n",
|
||||
"print(test_df.head())"
|
||||
"# This script connects to a Data Warehouse (DWH) using PostgreSQL. \n",
|
||||
"# This should be common for all Notebooks, but you might need to adjust the path to the `dwh_utils` module.\n",
|
||||
"\n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"sys.path.append(os.path.abspath(\"./utils\")) # Adjust path if needed\n",
|
||||
"\n",
|
||||
"from dwh_utils import read_credentials, create_postgres_engine, query_to_dataframe, test_connection\n",
|
||||
"\n",
|
||||
"# --- Connect to DWH ---\n",
|
||||
"creds = read_credentials()\n",
|
||||
"dwh_pg_engine = create_postgres_engine(creds)\n",
|
||||
"\n",
|
||||
"# --- Test Query ---\n",
|
||||
"test_connection()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue