First commit
This commit is contained in:
parent
1af838b661
commit
fb36843b4f
5 changed files with 314 additions and 14 deletions
162
.gitignore
vendored
Normal file
162
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||
.pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
38
README.md
38
README.md
|
|
@ -1,20 +1,30 @@
|
|||
# Introduction
|
||||
TODO: Give a short introduction of your project. Let this section explain the objectives or the motivation behind this project.
|
||||
Small repository to save and share Jupyter Notebooks within Data Team.
|
||||
|
||||
# Getting Started
|
||||
TODO: Guide users through getting your code up and running on their own system. In this section you can talk about:
|
||||
1. Installation process
|
||||
2. Software dependencies
|
||||
3. Latest releases
|
||||
4. API references
|
||||
|
||||
# Build and Test
|
||||
TODO: Describe and show how to build your code and run the tests.
|
||||
### Basics
|
||||
|
||||
# Contribute
|
||||
TODO: Explain how other users and developers can contribute to make your code better.
|
||||
- Pre-requisites
|
||||
- You need a Linux environment. That can be Linux, macOS or WSL.
|
||||
- You need to have Python `>=3.10` installed.
|
||||
- All docs will assume you are using VSCode.
|
||||
- Also install the following VSCode Python extension: ms-python.python
|
||||
- Set up
|
||||
- Create a virtual environment for the project with `python3 -m venv venv`.
|
||||
- It's recommended that you set up the new `venv` as your default interpreter for VSCode. To do this, click Ctrl+Shift+P, and look for the `Python: Select interpreter` option. Choose the new `venv`.
|
||||
- Ensure that VS code is using this virtual environment. You can activate it by running `source venv/bin/activate`
|
||||
- Activate the virtual environment and run `pip install -r requirements.txt`
|
||||
- Lastly, you need to install the following extension to ensure VS code can render the notebooks.
|
||||
https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter
|
||||
|
||||
If you want to learn more about creating good readme files then refer the following [guidelines](https://docs.microsoft.com/en-us/azure/devops/repos/git/create-a-readme?view=azure-devops). You can also seek inspiration from the below readme files:
|
||||
- [ASP.NET Core](https://github.com/aspnet/Home)
|
||||
- [Visual Studio Code](https://github.com/Microsoft/vscode)
|
||||
- [Chakra Core](https://github.com/Microsoft/ChakraCore)
|
||||
### DWH connection
|
||||
In order to connect to DWH, you will need to create a local file with the credentials. You can use the file `credentials_example.yml`. Remember to fill the user and password.
|
||||
|
||||
Once done, you need to save the credentials file in your local path:
|
||||
`/home/{your_user}/.superhog-dwh/credentials.yml`
|
||||
|
||||
Since this file has credentials, we need to secure it by ensuring that only your user has permissions. You need to run:
|
||||
`chmod 600 /home/{your_user}/.superhog-dwh/credentials.yml`
|
||||
|
||||
Once you've handled the previous steps, you can try to run the code in the template.ipynb file. If it works, then everything is successful. If not, check with someone in Data Team.
|
||||
|
|
|
|||
7
credentials_example.yml
Normal file
7
credentials_example.yml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
envs:
|
||||
prd:
|
||||
user: <dwh-user>
|
||||
password: <dwh-password>
|
||||
host: superhog-dwh-prd.postgres.database.azure.com
|
||||
port: 5432
|
||||
database: dwh
|
||||
6
requirements.txt
Normal file
6
requirements.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
jupyter
|
||||
pandas
|
||||
pyyaml
|
||||
sqlalchemy
|
||||
psycopg2-binary
|
||||
seaborn
|
||||
115
template.ipynb
Normal file
115
template.ipynb
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This is a template for general jupyter notebooks."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pathlib\n",
|
||||
"import yaml\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from sqlalchemy import create_engine\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/uri/.superhog-dwh/credentials.yml\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"CREDS_FILEPATH = pathlib.Path.home() / \".superhog-dwh\" / \"credentials.yml\"\n",
|
||||
"print(CREDS_FILEPATH)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prepare connection to DWH\n",
|
||||
"# Function to read credentials from the YAML file\n",
|
||||
"def read_credentials(yaml_path: str, env: str = \"prd\"):\n",
|
||||
" with open(yaml_path, \"r\") as file:\n",
|
||||
" credentials = yaml.safe_load(file)\n",
|
||||
" return credentials[\"envs\"][env]\n",
|
||||
"# Function to create a PostgreSQL connection string\n",
|
||||
"def create_postgres_engine(creds: dict):\n",
|
||||
" user = creds[\"user\"]\n",
|
||||
" password = creds[\"password\"]\n",
|
||||
" host = creds[\"host\"]\n",
|
||||
" port = creds[\"port\"]\n",
|
||||
" database = creds[\"database\"]\n",
|
||||
" # Create the connection string for SQLAlchemy\n",
|
||||
" connection_string = f\"postgresql://{user}:{password}@{host}:{port}/{database}\"\n",
|
||||
" engine = create_engine(connection_string)\n",
|
||||
" return engine\n",
|
||||
"# Function to execute a query and return the result as a pandas DataFrame\n",
|
||||
"def query_to_dataframe(engine, query: str):\n",
|
||||
" with engine.connect() as connection:\n",
|
||||
" df = pd.read_sql(query, connection)\n",
|
||||
" return df\n",
|
||||
"dwh_creds = read_credentials(yaml_path=CREDS_FILEPATH, env=\"prd\")\n",
|
||||
"dwh_pg_engine = create_postgres_engine(creds=dwh_creds)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ?column?\n",
|
||||
"0 1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Silly query to test things out\n",
|
||||
"test_df = query_to_dataframe(engine=dwh_pg_engine, query=\"SELECT 1;\")\n",
|
||||
"print(test_df.head())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue