From 347d3a969deb949cbaa8cf58d3478608aa0c35cc Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Thu, 21 Jul 2022 11:56:41 +0200 Subject: [PATCH] Initial commit. --- .gitignore | 169 +++++++++++++++++++++++++++++++++++++ cli.py | 11 +++ example-config.json | 22 +++++ query_performance_gauge.py | 93 ++++++++++++++++++++ readme.md | 39 +++++++++ requirements.txt | Bin 0 -> 412 bytes setup.py | 16 ++++ 7 files changed, 350 insertions(+) create mode 100644 .gitignore create mode 100644 cli.py create mode 100644 example-config.json create mode 100644 query_performance_gauge.py create mode 100644 readme.md create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f8c66f3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,169 @@ +# Specific to this project +## This way you can have a config to mess around in the repo but not commit it accidentally with secrets. +config.json + + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + + + diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..fbdcc6e --- /dev/null +++ b/cli.py @@ -0,0 +1,11 @@ +import json + +import click + +from query_performance_gauge import main + + +@click.command() +@click.option("--config", required=True, type=click.File()) +def measure_performance(config): + main(json.load(config)) diff --git a/example-config.json b/example-config.json new file mode 100644 index 0000000..c6cc328 --- /dev/null +++ b/example-config.json @@ -0,0 +1,22 @@ +{ + "connection_details": { + "engine": "trino", + "host": "trino.mercadao.pt", + "port": "443", + "user": "your user goes here", + "password": "your password goes here", + "http_scheme": "https", + "catalog": "app_lm_mysql", + "schema": "comprea" + }, + "queries_to_measure": [ + { + "name": "Fast Smoke Test", + "query_string": "SELECT * FROM system.runtime.nodes" + }, + { + "name": "Carts on a day", + "query_string": "select * from app_lm_mysql.comprea.cart c where c.status = 'delivered' and c.date_delivered >= to_unixtime(date('2022-05-24'))" + } + ] +} \ No newline at end of file diff --git a/query_performance_gauge.py b/query_performance_gauge.py new file mode 100644 index 0000000..f762dad --- /dev/null +++ b/query_performance_gauge.py @@ -0,0 +1,93 @@ +import time +import traceback +from typing import Union, Callable + +import trino.dbapi +from trino.dbapi import connect +from trino.auth import BasicAuthentication +import mysql.connector + + +def main(config: dict) -> None: + print("Starting the measuring session.") + + connection = get_connection(config) + + for query_config in config["queries_to_measure"]: + try: + query = TestableQuery( + name=query_config["name"], query_string=query_config["query_string"] + ) + measure_query_runtime(connection, query) + except Exception as e: + print(f"""Something went wrong with query {query_config["name"]}.""") + print(f"{traceback.format_exc()}") + + print("Finished the measuring session.") + + +class TestableQuery: + def __init__(self, name: str, query_string: str): + self.name = name + self.query_string = query_string + + +def measure_query_runtime(connection: trino.dbapi.Connection, query: TestableQuery): + start_time = time.time() + cur = connection.cursor() + cur.execute(query.query_string) + rows = cur.fetchall() + print(f"Query '{query.name}' took {int(time.time() - start_time)} seconds to run.") + + +def get_connection(config: dict) -> Union[trino.dbapi.Connection]: + connection_builder = pick_connection_builder(config["connection_details"]["engine"]) + connection = connection_builder(config) + return connection + + +def get_possible_connection_builders() -> dict: + return { + "trino": get_connection_to_trino, + "mysql": get_connection_to_mysql, + } + + +def pick_connection_builder(connection_engine_name: str) -> Callable: + possible_connection_builders = get_possible_connection_builders() + + try: + connection_builder = possible_connection_builders[connection_engine_name] + except KeyError: + raise ValueError( + f"Connection type {connection_engine_name} is unknown. Please review config." + ) + + return connection_builder + + +def get_connection_to_trino(config): + return connect( + host=config["connection_details"]["host"], + port=config["connection_details"]["port"], + user=config["connection_details"]["user"], + auth=BasicAuthentication( + config["connection_details"]["user"], + config["connection_details"]["password"], + ), + http_scheme=config["connection_details"]["http_scheme"], + catalog=config["connection_details"]["catalog"], + schema=config["connection_details"]["schema"], + ) + + +def get_connection_to_mysql(config) -> mysql.connector.connection.MySQLConnection: + connection = mysql.connector.connect( + host=config["connection_details"]["host"], + port=config["connection_details"]["port"], + user=config["connection_details"]["user"], + password=config["connection_details"]["password"], + database=config["connection_details"]["schema"], + ) + + return connection diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..ae938ab --- /dev/null +++ b/readme.md @@ -0,0 +1,39 @@ +# Query Performance Gauge + +This is a little script to measure the performance of queries against a Trino or MySQL instance. You can use it to run +several queries and measure how long it takes for results to come back to your local machine. + +## How to use + +1. First, you need to install the package in your Python installation or a virtual environment. If you have our Google +Drive Shared Drive replicated locally, you can do it like this: + +``` +pip install g:\shared drives\data drive\90 useful\trino_query_performance_gauge +``` + +2. After, you need to make a config file. See below details on how to compose one. + +3. Once you have your config file ready, run the following command from the terminal. + +```commandline +trino_measure_query_performance --config my_config_file.json +``` + +## Composing a config file + +You can take a look at the `example-config.json` in this repository. + +A few notes: +- The valid engines are `"trino"` and `"mysql"`. +- You can place as many queries as you would like in the `queries_to_measure` list. +- I advice you to make the first query a silly, fast query such as `SELECT 1` to validate your connection and + quickly confirm that everything is set up properly. + + +## A few more details + +- Queries are run sequentially, as in the second query will only start after the first query is finished. +- For this to work, your local machine must have access and permission to the connection you are targeting, so + remember to set up VPNs and other necessary configs properly. +- diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c43e760c7b760e4342d622524b29e7d8f72f915f GIT binary patch literal 412 zcmYL_OAdlS5JYQj;!z+vsL_RoLBvQz$Ir0P!>g}*FoyI3s_IqKpLe5HcO`W?Fk7CW z852}$#mu$TR1-Bis>b=ipH%3g$lIB3J-L@ta@sN_jGQvH(aFhc=as5PEs`}#?Ho(b za4eYI*=bKt?b(X2Id|5REpB8B$Pved_J*yDRp^SI*wz$dTB~t`Hr{*172Unlz+aok zI)m>WMvpOGuKn*ZckNEwjbzIfiTV9?@TTlZbaeDbSFOE*ZY}vP1A6b5+o2f2ZWhLi GXU`WSygZEn literal 0 HcmV?d00001 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f3591d0 --- /dev/null +++ b/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup, find_packages + + +setup( + name="query_performance_gauge", + packages=find_packages(), + description="Measure how long queries take.", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + python_requires=">=3.7", + entry_points={ + "console_scripts": [ + "measure_query_performance = cli:measure_performance", + ], + }, +)