data-jupyter-notebooks/template.ipynb

116 lines
2.9 KiB
Text
Raw Normal View History

2024-11-21 11:36:30 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is a template for general jupyter notebooks."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import pathlib\n",
"import yaml\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sqlalchemy import create_engine\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/uri/.superhog-dwh/credentials.yml\n"
]
}
],
"source": [
"CREDS_FILEPATH = pathlib.Path.home() / \".superhog-dwh\" / \"credentials.yml\"\n",
"print(CREDS_FILEPATH)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# Prepare connection to DWH\n",
"# Function to read credentials from the YAML file\n",
"def read_credentials(yaml_path: str, env: str = \"prd\"):\n",
" with open(yaml_path, \"r\") as file:\n",
" credentials = yaml.safe_load(file)\n",
" return credentials[\"envs\"][env]\n",
"# Function to create a PostgreSQL connection string\n",
"def create_postgres_engine(creds: dict):\n",
" user = creds[\"user\"]\n",
" password = creds[\"password\"]\n",
" host = creds[\"host\"]\n",
" port = creds[\"port\"]\n",
" database = creds[\"database\"]\n",
" # Create the connection string for SQLAlchemy\n",
" connection_string = f\"postgresql://{user}:{password}@{host}:{port}/{database}\"\n",
" engine = create_engine(connection_string)\n",
" return engine\n",
"# Function to execute a query and return the result as a pandas DataFrame\n",
"def query_to_dataframe(engine, query: str):\n",
" with engine.connect() as connection:\n",
" df = pd.read_sql(query, connection)\n",
" return df\n",
"dwh_creds = read_credentials(yaml_path=CREDS_FILEPATH, env=\"prd\")\n",
"dwh_pg_engine = create_postgres_engine(creds=dwh_creds)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" ?column?\n",
"0 1\n"
]
}
],
"source": [
"# Silly query to test things out\n",
"test_df = query_to_dataframe(engine=dwh_pg_engine, query=\"SELECT 1;\")\n",
"print(test_df.head())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}