diff --git a/CHANGELOG.MD b/CHANGELOG.MD index 6c74205..76ed3b8 100644 --- a/CHANGELOG.MD +++ b/CHANGELOG.MD @@ -1,5 +1,15 @@ # Changelog +## [unreleased] + +### Added +- Users can now specify a password for the SSH key used in the SSH tunnel. This is done by entering the password in + the config file, under the entry `connection_details > ssh_tunneling > ssh_private_key_password`. + +### Changed +- The `ssh_tunneling` section of the config file is now optional and the application will work even if the config + file does not contain it. + ## [0.2.0] - 2022-07-26 ### Added diff --git a/_version.py b/_version.py index d3ec452..ca70210 100644 --- a/_version.py +++ b/_version.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "latest" diff --git a/cli.py b/cli.py index c21acfa..1fa1ba3 100644 --- a/cli.py +++ b/cli.py @@ -3,9 +3,12 @@ import json import click from query_performance_gauge import run_measuring_session - +from utils import compose_config @click.command() -@click.option("--config", required=True, type=click.File()) -def measure_performance(config): - run_measuring_session(json.load(config)) +@click.option("--credentials", type=click.File()) +@click.option("--queries", type=click.File()) +@click.option("--config", type=click.File()) +def measure_performance(credentials, queries, config): + config = compose_config(config, credentials, queries) + run_measuring_session(config) diff --git a/config_examples/mysql_without_tunnel_config.json b/config_examples/legacy_full_config_file.json similarity index 86% rename from config_examples/mysql_without_tunnel_config.json rename to config_examples/legacy_full_config_file.json index a849e48..e6859df 100644 --- a/config_examples/mysql_without_tunnel_config.json +++ b/config_examples/legacy_full_config_file.json @@ -5,9 +5,7 @@ "port": 3306, "user": "your_user", "password": "your_password", - "schema": "comprea", - "ssh_tunneling": { - "use_tunnel": false + "schema": "comprea" }, "queries_to_measure": [ { diff --git a/config_examples/mysql_credentials_config.json b/config_examples/mysql_credentials_config.json new file mode 100644 index 0000000..31990dd --- /dev/null +++ b/config_examples/mysql_credentials_config.json @@ -0,0 +1,10 @@ +{ + "connection_details": { + "engine": "mysql", + "host": "the-sql-host", + "port": 3306, + "user": "your_user", + "password": "your_password", + "schema": "comprea" + } +} \ No newline at end of file diff --git a/config_examples/mysql_credentials_with_tunnel_config.json b/config_examples/mysql_credentials_with_tunnel_config.json new file mode 100644 index 0000000..5c3a911 --- /dev/null +++ b/config_examples/mysql_credentials_with_tunnel_config.json @@ -0,0 +1,18 @@ +{ + "connection_details": { + "engine": "mysql", + "host": "the-actual-host", + "port": 3306, + "user": "your_user", + "password": "your_password", + "schema": "comprea", + "ssh_tunneling": { + "use_tunnel": true, + "ssh_host": "the_ssh_tunnel_host", + "ssh_username": "the_ssh_tunnel_user", + "ssh_port": 22, + "path_to_key": "G:\\path\\to\\ssh\\key.pem", + "ssh_private_key_password": "my_keys_password" + } + } +} \ No newline at end of file diff --git a/config_examples/mysql_with_tunnel_config.json b/config_examples/mysql_with_tunnel_config.json deleted file mode 100644 index 63fd975..0000000 --- a/config_examples/mysql_with_tunnel_config.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "connection_details": { - "engine": "mysql", - "host": "the-actual-host", - "port": 3306, - "user": "your_user", - "password": "your_password", - "schema": "comprea", - "ssh_tunneling": { - "use_tunnel": true, - "ssh_host": "the_ssh_tunnel_host", - "ssh_username": "the_ssh_tunnel_user", - "ssh_port": 22, - "path_to_key": "G:\\path\\to\\ssh\\key.pem" - } - }, - "queries_to_measure": [ - { - "name": "Fast Smoke Test", - "query_string": "SELECT 1" - }, - { - "name": "Delivered carts on a day", - "query_string": "select * from comprea.cart c where c.status = 'delivered' and c.date_delivered >= UNIX_TIMESTAMP(date('2022-05-24'))" - } - ] -} \ No newline at end of file diff --git a/config_examples/query_config_example.json b/config_examples/query_config_example.json new file mode 100644 index 0000000..bd256cf --- /dev/null +++ b/config_examples/query_config_example.json @@ -0,0 +1,12 @@ +{ + "queries_to_measure": [ + { + "name": "Fast Smoke Test", + "query_string": "SELECT 1" + }, + { + "name": "Delivered carts on a day", + "query_string": "select * from comprea.cart c where c.status = 'delivered' and c.date_delivered >= UNIX_TIMESTAMP(date('2022-05-24'))" + } + ] +} \ No newline at end of file diff --git a/config_examples/trino_config.json b/config_examples/trino_config.json deleted file mode 100644 index eaac0c0..0000000 --- a/config_examples/trino_config.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "connection_details": { - "engine": "trino", - "host": "the_trino_host", - "port": "443", - "user": "your_user", - "password": "your_password", - "http_scheme": "https", - "catalog": "app_lm_mysql", - "schema": "comprea", - "ssh_tunneling": { - "use_tunnel": false - } - }, - "queries_to_measure": [ - { - "name": "Fast Smoke Test", - "query_string": "SELECT * FROM system.runtime.nodes" - }, - { - "name": "Delivered carts on a day", - "query_string": "select * from app_lm_mysql.comprea.cart c where c.status = 'delivered' and c.date_delivered >= to_unixtime(date('2022-05-24'))" - } - ] -} \ No newline at end of file diff --git a/config_examples/trino_credentials_config.json b/config_examples/trino_credentials_config.json new file mode 100644 index 0000000..68d1ab7 --- /dev/null +++ b/config_examples/trino_credentials_config.json @@ -0,0 +1,12 @@ +{ + "connection_details": { + "engine": "trino", + "host": "the_trino_host", + "port": "443", + "user": "your_user", + "password": "your_password", + "http_scheme": "https", + "catalog": "app_lm_mysql", + "schema": "comprea" + } +} \ No newline at end of file diff --git a/connections.py b/connections.py index 67edb91..b05810f 100644 --- a/connections.py +++ b/connections.py @@ -21,15 +21,22 @@ def singleton(class_): @singleton class MySSHTunnel: def __init__( - self, ssh_host, ssh_port, ssh_username, ssh_pkey, remote_host, remote_port + self, + ssh_host, + ssh_port, + ssh_username, + ssh_pkey, + remote_host, + remote_port, + ssh_private_key_password=None, ): - self.tunnel = SSHTunnelForwarder( ssh_host=(ssh_host, ssh_port), ssh_username=ssh_username, ssh_pkey=ssh_pkey, remote_bind_address=(remote_host, remote_port), local_bind_address=("127.0.0.1", remote_port), + ssh_private_key_password=ssh_private_key_password, ) def start(self): @@ -39,9 +46,9 @@ class MySSHTunnel: self.tunnel.stop() -def get_connection(connection_config: dict) -> Union[trino.dbapi.Connection]: +def get_connection(connection_config: dict) -> Union[trino.dbapi.Connection, mysql.connector.MySQLConnection]: """ - Pick the right way to build a connection and pass it the connection details. + Pick the right way to build a connection and connect. :param connection_config: confi :return: @@ -115,12 +122,13 @@ def get_connection_to_mysql( :param connection_config: specifies host, port, etc. :return: the connection object """ - if connection_config["ssh_tunneling"]["use_tunnel"]: + mysql_connection_host = connection_config["host"] + + if connection_config.get("ssh_tunneling", {}).get("use_tunnel", None): open_ssh_tunnel(connection_config) mysql_connection_host = "127.0.0.1" - - if not connection_config["ssh_tunneling"]["use_tunnel"]: - mysql_connection_host = connection_config["host"] + # If we open an SSH tunnel, we reference the local bind instead of the + # actual host connection = mysql.connector.connect( host=mysql_connection_host, @@ -162,6 +170,10 @@ def open_ssh_tunnel(connection_config: dict) -> None: ssh_pkey=connection_config["ssh_tunneling"]["path_to_key"], remote_host=connection_config["host"], remote_port=connection_config["port"], + ssh_private_key_password=connection_config["ssh_tunneling"].get( + "ssh_private_key_password", + None, # Since password is optional, we need a safe default + ), ).start() print("SSH tunnel is now open.") diff --git a/query_performance_gauge.py b/query_performance_gauge.py index 5d00fdf..353fc45 100644 --- a/query_performance_gauge.py +++ b/query_performance_gauge.py @@ -21,7 +21,25 @@ def run_measuring_session(config: dict) -> None: connection = get_connection(config["connection_details"]) - for query_config in config["queries_to_measure"]: + measure_queries(config["queries_to_measure"], connection) + + print("Finished the measuring session.") + + clean_up_connection(config["connection_details"]) + + +def measure_queries( + queries_config: dict, + connection: Union[trino.dbapi.Connection, mysql.connector.MySQLConnection], +) -> None: + """ + Measure several queries through a connection. + + :param queries_config: the configuration for the queries to measure. + :param connection: the connection to the queriable server. + :return: None + """ + for query_config in queries_config: try: query = TestableQuery( name=query_config["name"], query_string=query_config["query_string"] @@ -31,10 +49,6 @@ def run_measuring_session(config: dict) -> None: print(f"""Something went wrong with query {query_config["name"]}.""") print(f"{traceback.format_exc()}") - print("Finished the measuring session.") - - clean_up_connection(config["connection_details"]) - class TestableQuery: """ diff --git a/readme.md b/readme.md index e9b5652..714d9b1 100644 --- a/readme.md +++ b/readme.md @@ -9,41 +9,53 @@ several queries and measure how long it takes for results to come back to your l Drive Shared Drive replicated locally, you can do it like this: ```commandline -pip install "git+file:///g:\shared drives\data drive\90 useful\10 query_performance_gauge@master"``` +pip install "file:///g:\shared drives\data drive\90 useful\10 query_performance_gauge" ``` -If not, you simply need to clone the repo somewhere in your machine and replace the path in the previous command. +You will install whatever version is in the shared drive at that point. Depending on what you want to achieve, you +might want to instead make a copy of the repository in your own local machine and install from there. That way, you +won't be affected by someone making `git checkout` in the shared drive. -2. Afterwards, you need to make a config file. See below details on how to compose one. +2. Afterwards, you need to make a credentials and a query config file. See below details on how to compose one. 3. Once you have your config file ready, run the following command from the terminal. ```commandline -measure_query_performance --config my_config_file.json +measure_query_performance --credentials my_credentials.json --queries my_queries.json ``` 4. Results will be printed in your console as they are available. If instead you would like to store them in a file, a quick and easy hack is to redirect output in Powershell to a file. You can do it like this: ```commandline -measure_query_performance --config my_config_file.json | Out-File - FilePath my_results.txt +measure_query_performance --credentials my_credentials.json --queries my_queries.json | Out-File -FilePath my_results.txt ``` -## Composing a config file +## Composing config files + +The application takes two config files: one for the credentials and connection details, one for the queries to run. +You can mix and match several of both (as in, you can run the same query set at different dbs, or you can have +multiple query sets run on the same database). You can take a look at examples for different setups in `config_examples`. If you want to make a new config file, it -will probably be easier for you to start from one of those templates. +will probably be easier for you to start from one of those templates. The legacy examples should be ignored unless +you want to understand outdated config files. A few notes: - - The valid engines are `"trino"` and `"mysql"`. - You can place as many queries as you would like in the `queries_to_measure` list. - I advice you to make the first query a silly, fast query such as `SELECT 1` to validate your connection and quickly confirm that everything is set up properly. +## Other features +- The connection to the database can be made through an SSH tunnel. See the examples in `config_examples` to + understand how to configure it. + ## A few more details - Queries are run sequentially, as in the second query will only start after the first query is finished. +- The script will run all queries, even if there is an exception when running one or more of them. If one query + fails, the error traceback will be printed so you can debug and the script will move on to the next query. - For this to work, your local machine must have access and permission to the connection you are targeting, so remember to set up VPNs and other necessary configs properly. - A peculiarity: when using MySQL through an SSH tunnel, the port number used by the remote MySQL should be diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..9d38aa3 --- /dev/null +++ b/utils.py @@ -0,0 +1,32 @@ +from typing import Union +from io import TextIOWrapper +import json + + +def compose_config( + config: Union[TextIOWrapper, None], + credentials: Union[TextIOWrapper, None], + queries: Union[TextIOWrapper, None], +) -> dict: + """ + Receive the CLI arguments and compose a session config. + + :param config: file pointer to a full config file. + :param credentials: file pointer to a credentials file. + :param queries: file pointer to a queries file. + :return: a dict with the composed configuration. + """ + + if config is not None: + DeprecationWarning( + "Usage of a full config file will be deprecated. Instead, use the credentials and queries arguments." + ) + return json.load(config) + + if credentials is None or queries is None: + raise ValueError( + "You need to provide both --credentials and --queries arguments." + ) + + # Merge both into one dict so it follows the same structure as a full config file + return {**json.load(credentials), **json.load(queries)}