Initial commit.

This commit is contained in:
Pablo Martin 2022-07-21 11:56:41 +02:00
commit 347d3a969d
7 changed files with 350 additions and 0 deletions

169
.gitignore vendored Normal file
View file

@ -0,0 +1,169 @@
# Specific to this project
## This way you can have a config to mess around in the repo but not commit it accidentally with secrets.
config.json
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

11
cli.py Normal file
View file

@ -0,0 +1,11 @@
import json
import click
from query_performance_gauge import main
@click.command()
@click.option("--config", required=True, type=click.File())
def measure_performance(config):
main(json.load(config))

22
example-config.json Normal file
View file

@ -0,0 +1,22 @@
{
"connection_details": {
"engine": "trino",
"host": "trino.mercadao.pt",
"port": "443",
"user": "your user goes here",
"password": "your password goes here",
"http_scheme": "https",
"catalog": "app_lm_mysql",
"schema": "comprea"
},
"queries_to_measure": [
{
"name": "Fast Smoke Test",
"query_string": "SELECT * FROM system.runtime.nodes"
},
{
"name": "Carts on a day",
"query_string": "select * from app_lm_mysql.comprea.cart c where c.status = 'delivered' and c.date_delivered >= to_unixtime(date('2022-05-24'))"
}
]
}

View file

@ -0,0 +1,93 @@
import time
import traceback
from typing import Union, Callable
import trino.dbapi
from trino.dbapi import connect
from trino.auth import BasicAuthentication
import mysql.connector
def main(config: dict) -> None:
print("Starting the measuring session.")
connection = get_connection(config)
for query_config in config["queries_to_measure"]:
try:
query = TestableQuery(
name=query_config["name"], query_string=query_config["query_string"]
)
measure_query_runtime(connection, query)
except Exception as e:
print(f"""Something went wrong with query {query_config["name"]}.""")
print(f"{traceback.format_exc()}")
print("Finished the measuring session.")
class TestableQuery:
def __init__(self, name: str, query_string: str):
self.name = name
self.query_string = query_string
def measure_query_runtime(connection: trino.dbapi.Connection, query: TestableQuery):
start_time = time.time()
cur = connection.cursor()
cur.execute(query.query_string)
rows = cur.fetchall()
print(f"Query '{query.name}' took {int(time.time() - start_time)} seconds to run.")
def get_connection(config: dict) -> Union[trino.dbapi.Connection]:
connection_builder = pick_connection_builder(config["connection_details"]["engine"])
connection = connection_builder(config)
return connection
def get_possible_connection_builders() -> dict:
return {
"trino": get_connection_to_trino,
"mysql": get_connection_to_mysql,
}
def pick_connection_builder(connection_engine_name: str) -> Callable:
possible_connection_builders = get_possible_connection_builders()
try:
connection_builder = possible_connection_builders[connection_engine_name]
except KeyError:
raise ValueError(
f"Connection type {connection_engine_name} is unknown. Please review config."
)
return connection_builder
def get_connection_to_trino(config):
return connect(
host=config["connection_details"]["host"],
port=config["connection_details"]["port"],
user=config["connection_details"]["user"],
auth=BasicAuthentication(
config["connection_details"]["user"],
config["connection_details"]["password"],
),
http_scheme=config["connection_details"]["http_scheme"],
catalog=config["connection_details"]["catalog"],
schema=config["connection_details"]["schema"],
)
def get_connection_to_mysql(config) -> mysql.connector.connection.MySQLConnection:
connection = mysql.connector.connect(
host=config["connection_details"]["host"],
port=config["connection_details"]["port"],
user=config["connection_details"]["user"],
password=config["connection_details"]["password"],
database=config["connection_details"]["schema"],
)
return connection

39
readme.md Normal file
View file

@ -0,0 +1,39 @@
# Query Performance Gauge
This is a little script to measure the performance of queries against a Trino or MySQL instance. You can use it to run
several queries and measure how long it takes for results to come back to your local machine.
## How to use
1. First, you need to install the package in your Python installation or a virtual environment. If you have our Google
Drive Shared Drive replicated locally, you can do it like this:
```
pip install g:\shared drives\data drive\90 useful\trino_query_performance_gauge
```
2. After, you need to make a config file. See below details on how to compose one.
3. Once you have your config file ready, run the following command from the terminal.
```commandline
trino_measure_query_performance --config my_config_file.json
```
## Composing a config file
You can take a look at the `example-config.json` in this repository.
A few notes:
- The valid engines are `"trino"` and `"mysql"`.
- You can place as many queries as you would like in the `queries_to_measure` list.
- I advice you to make the first query a silly, fast query such as `SELECT 1` to validate your connection and
quickly confirm that everything is set up properly.
## A few more details
- Queries are run sequentially, as in the second query will only start after the first query is finished.
- For this to work, your local machine must have access and permission to the connection you are targeting, so
remember to set up VPNs and other necessary configs properly.
-

BIN
requirements.txt Normal file

Binary file not shown.

16
setup.py Normal file
View file

@ -0,0 +1,16 @@
from setuptools import setup, find_packages
setup(
name="query_performance_gauge",
packages=find_packages(),
description="Measure how long queries take.",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
python_requires=">=3.7",
entry_points={
"console_scripts": [
"measure_query_performance = cli:measure_performance",
],
},
)