stuff
This commit is contained in:
parent
34496c3128
commit
74df79d380
2 changed files with 11 additions and 14 deletions
|
|
@ -1,21 +1,12 @@
|
||||||
import datetime
|
|
||||||
import json
|
|
||||||
|
|
||||||
from psycopg2.sql import SQL
|
from psycopg2.sql import SQL
|
||||||
|
|
||||||
from anaxi.config import (
|
from anaxi.config import (
|
||||||
get_cosmos_database_config_from_file,
|
get_cosmos_database_config_from_file,
|
||||||
get_postgres_database_config_from_file,
|
get_postgres_database_config_from_file,
|
||||||
get_stream_config_from_file,
|
|
||||||
)
|
)
|
||||||
from anaxi.constants import (
|
from anaxi.cosmos_tools import create_cosmos_client_from_config
|
||||||
ASSUMED_PREHISTORICAL_DATETIME,
|
|
||||||
DESTINATION_COLUMN_NAME,
|
|
||||||
PATHS,
|
|
||||||
)
|
|
||||||
from anaxi.cosmos_tools import create_cosmos_client_from_config, get_container_client
|
|
||||||
from anaxi.logging import get_anaxi_logger
|
from anaxi.logging import get_anaxi_logger
|
||||||
from anaxi.postgres_tools import create_postgres_connection_from_config, simply_query
|
from anaxi.postgres_tools import simply_query
|
||||||
from anaxi.sync import SyncJob
|
from anaxi.sync import SyncJob
|
||||||
|
|
||||||
logger = get_anaxi_logger(__name__)
|
logger = get_anaxi_logger(__name__)
|
||||||
|
|
@ -46,7 +37,7 @@ def run_postgres_healthcheck_process(postgres_database: str) -> None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def run_sync_process(stream_id: str):
|
def run_sync_process(stream_id: str) -> None:
|
||||||
|
|
||||||
logger.info("Preparing sync job...")
|
logger.info("Preparing sync job...")
|
||||||
sync_job = SyncJob(stream_id=stream_id)
|
sync_job = SyncJob(stream_id=stream_id)
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ class SyncJob:
|
||||||
self,
|
self,
|
||||||
stream_id: str,
|
stream_id: str,
|
||||||
batch_size: int = DEFAULT_BATCH_SIZE,
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
||||||
) -> None:
|
) -> "SyncJob":
|
||||||
|
|
||||||
self.stream_config = get_stream_config_from_file(stream_id)
|
self.stream_config = get_stream_config_from_file(stream_id)
|
||||||
self.cosmos_db_config = get_cosmos_database_config_from_file(
|
self.cosmos_db_config = get_cosmos_database_config_from_file(
|
||||||
|
|
@ -48,6 +48,9 @@ class SyncJob:
|
||||||
config=self.postgres_db_config
|
config=self.postgres_db_config
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.read_record_count = 0
|
||||||
|
self.committed_record_count = 0
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_checkpoint(self):
|
def current_checkpoint(self):
|
||||||
return self.checkpoint_manager.highest_synced_timestamp
|
return self.checkpoint_manager.highest_synced_timestamp
|
||||||
|
|
@ -59,6 +62,7 @@ class SyncJob:
|
||||||
|
|
||||||
docs_batch = []
|
docs_batch = []
|
||||||
for doc in changed_feed_response:
|
for doc in changed_feed_response:
|
||||||
|
self.read_record_count += 1
|
||||||
docs_batch.append(doc)
|
docs_batch.append(doc)
|
||||||
if len(docs_batch) < self._batch_size:
|
if len(docs_batch) < self._batch_size:
|
||||||
continue
|
continue
|
||||||
|
|
@ -84,6 +88,8 @@ class SyncJob:
|
||||||
logger.info("Committed final batch.")
|
logger.info("Committed final batch.")
|
||||||
|
|
||||||
logger.info("Finished sync job.")
|
logger.info("Finished sync job.")
|
||||||
|
logger.info(f"Total documents read: {self.read_record_count}.")
|
||||||
|
logger.info(f"Total documents written: {self.committed_record_count}.")
|
||||||
|
|
||||||
def commit_batch(self, docs_batch, cursor):
|
def commit_batch(self, docs_batch, cursor):
|
||||||
self.checkpoint_manager.set_new_goal(
|
self.checkpoint_manager.set_new_goal(
|
||||||
|
|
@ -111,12 +117,12 @@ class SyncJob:
|
||||||
raise e
|
raise e
|
||||||
else:
|
else:
|
||||||
self.checkpoint_manager.commit_goal()
|
self.checkpoint_manager.commit_goal()
|
||||||
|
self.committed_record_count += len(docs_batch)
|
||||||
|
|
||||||
def _prepare_source_and_sink(self):
|
def _prepare_source_and_sink(self):
|
||||||
changed_feed_response = self.cosmos_container_client.query_items_change_feed(
|
changed_feed_response = self.cosmos_container_client.query_items_change_feed(
|
||||||
start_time=self.checkpoint_manager.highest_synced_timestamp
|
start_time=self.checkpoint_manager.highest_synced_timestamp
|
||||||
)
|
)
|
||||||
|
|
||||||
cursor = self.postgres_connection.cursor()
|
cursor = self.postgres_connection.cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
f"""
|
f"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue