Initial commit.

2022-07-21 11:56:41 +02:00 · 2022-07-21 11:56:41 +02:00 · 347d3a969d
commit 347d3a969d
7 changed files with 350 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,169 @@
+# Specific to this project
+## This way you can have a config to mess around in the repo but not commit it accidentally with secrets.
+config.json
+
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+
+
--- a/cli.py
+++ b/cli.py
@ -0,0 +1,11 @@
+import json
+
+import click
+
+from query_performance_gauge import main
+
+
+@click.command()
+@click.option("--config", required=True, type=click.File())
+def measure_performance(config):
+    main(json.load(config))
--- a/example-config.json
+++ b/example-config.json
@ -0,0 +1,22 @@
+{
+  "connection_details": {
+    "engine": "trino",
+    "host": "trino.mercadao.pt",
+    "port": "443",
+    "user": "your user goes here",
+    "password": "your password goes here",
+    "http_scheme": "https",
+    "catalog": "app_lm_mysql",
+    "schema": "comprea"
+  },
+  "queries_to_measure": [
+    {
+      "name": "Fast Smoke Test",
+      "query_string": "SELECT * FROM system.runtime.nodes"
+    },
+    {
+      "name": "Carts on a day",
+      "query_string": "select * from app_lm_mysql.comprea.cart c where c.status = 'delivered' and c.date_delivered >= to_unixtime(date('2022-05-24'))"
+    }
+  ]
+}
--- a/query_performance_gauge.py
+++ b/query_performance_gauge.py
@ -0,0 +1,93 @@
+import time
+import traceback
+from typing import Union, Callable
+
+import trino.dbapi
+from trino.dbapi import connect
+from trino.auth import BasicAuthentication
+import mysql.connector
+
+
+def main(config: dict) -> None:
+    print("Starting the measuring session.")
+
+    connection = get_connection(config)
+
+    for query_config in config["queries_to_measure"]:
+        try:
+            query = TestableQuery(
+                name=query_config["name"], query_string=query_config["query_string"]
+            )
+            measure_query_runtime(connection, query)
+        except Exception as e:
+            print(f"""Something went wrong with query {query_config["name"]}.""")
+            print(f"{traceback.format_exc()}")
+
+    print("Finished the measuring session.")
+
+
+class TestableQuery:
+    def __init__(self, name: str, query_string: str):
+        self.name = name
+        self.query_string = query_string
+
+
+def measure_query_runtime(connection: trino.dbapi.Connection, query: TestableQuery):
+    start_time = time.time()
+    cur = connection.cursor()
+    cur.execute(query.query_string)
+    rows = cur.fetchall()
+    print(f"Query '{query.name}' took {int(time.time() - start_time)} seconds to run.")
+
+
+def get_connection(config: dict) -> Union[trino.dbapi.Connection]:
+    connection_builder = pick_connection_builder(config["connection_details"]["engine"])
+    connection = connection_builder(config)
+    return connection
+
+
+def get_possible_connection_builders() -> dict:
+    return {
+        "trino": get_connection_to_trino,
+        "mysql": get_connection_to_mysql,
+    }
+
+
+def pick_connection_builder(connection_engine_name: str) -> Callable:
+    possible_connection_builders = get_possible_connection_builders()
+
+    try:
+        connection_builder = possible_connection_builders[connection_engine_name]
+    except KeyError:
+        raise ValueError(
+            f"Connection type {connection_engine_name} is unknown. Please review config."
+        )
+
+    return connection_builder
+
+
+def get_connection_to_trino(config):
+    return connect(
+        host=config["connection_details"]["host"],
+        port=config["connection_details"]["port"],
+        user=config["connection_details"]["user"],
+        auth=BasicAuthentication(
+            config["connection_details"]["user"],
+            config["connection_details"]["password"],
+        ),
+        http_scheme=config["connection_details"]["http_scheme"],
+        catalog=config["connection_details"]["catalog"],
+        schema=config["connection_details"]["schema"],
+    )
+
+
+def get_connection_to_mysql(config) -> mysql.connector.connection.MySQLConnection:
+    connection = mysql.connector.connect(
+        host=config["connection_details"]["host"],
+        port=config["connection_details"]["port"],
+        user=config["connection_details"]["user"],
+        password=config["connection_details"]["password"],
+        database=config["connection_details"]["schema"],
+    )
+
+    return connection
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,39 @@
+# Query Performance Gauge
+
+This is a little script to measure the performance of queries against a Trino or MySQL instance. You can use it to run
+several queries and measure how long it takes for results to come back to your local machine.
+
+## How to use
+
+1. First, you need to install the package in your Python installation or a virtual environment. If you have our Google 
+Drive Shared Drive replicated locally, you can do it like this:
+
+```
+pip install g:\shared drives\data drive\90 useful\trino_query_performance_gauge
+```
+
+2. After, you need to make a config file. See below details on how to compose one.
+
+3. Once you have your config file ready, run the following command from the terminal.
+
+```commandline
+trino_measure_query_performance --config my_config_file.json
+```
+
+## Composing a config file
+
+You can take a look at the `example-config.json` in this repository.
+
+A few notes:
+- The valid engines are `"trino"` and `"mysql"`.
+- You can place as many queries as you would like in the `queries_to_measure` list.
+- I advice you to make the first query a silly, fast query such as `SELECT 1` to validate your connection and 
+  quickly confirm that everything is set up properly.
+
+
+## A few more details
+
+- Queries are run sequentially, as in the second query will only start after the first query is finished.
+- For this to work, your local machine must have access and permission to the connection you are targeting, so 
+  remember to set up VPNs and other necessary configs properly.
+- 
--- a/requirements.txt
+++ b/requirements.txt
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,16 @@
+from setuptools import setup, find_packages
+
+
+setup(
+    name="query_performance_gauge",
+    packages=find_packages(),
+    description="Measure how long queries take.",
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
+    python_requires=">=3.7",
+    entry_points={
+        "console_scripts": [
+            "measure_query_performance = cli:measure_performance",
+        ],
+    },
+)