some refactors
This commit is contained in:
parent
74df79d380
commit
58e99a1869
1 changed files with 12 additions and 8 deletions
|
|
@ -37,7 +37,6 @@ class SyncJob:
|
||||||
self.postgres_db_config = get_postgres_database_config_from_file(
|
self.postgres_db_config = get_postgres_database_config_from_file(
|
||||||
self.stream_config.postgres_database
|
self.stream_config.postgres_database
|
||||||
)
|
)
|
||||||
self._batch_size = batch_size
|
|
||||||
|
|
||||||
self.checkpoint_manager = self._create_checkpoint_manager()
|
self.checkpoint_manager = self._create_checkpoint_manager()
|
||||||
self.cosmos_container_client = get_container_client(
|
self.cosmos_container_client = get_container_client(
|
||||||
|
|
@ -50,6 +49,7 @@ class SyncJob:
|
||||||
|
|
||||||
self.read_record_count = 0
|
self.read_record_count = 0
|
||||||
self.committed_record_count = 0
|
self.committed_record_count = 0
|
||||||
|
self._batch_size = batch_size
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_checkpoint(self):
|
def current_checkpoint(self):
|
||||||
|
|
@ -58,10 +58,11 @@ class SyncJob:
|
||||||
def run_sync(self):
|
def run_sync(self):
|
||||||
logger.info("Starting sync.")
|
logger.info("Starting sync.")
|
||||||
|
|
||||||
changed_feed_response, cursor = self._prepare_source_and_sink()
|
change_feed_response = self._prepare_cosmos_reader()
|
||||||
|
postgres_cursor = self._prepare_postgres_writer()
|
||||||
|
|
||||||
docs_batch = []
|
docs_batch = []
|
||||||
for doc in changed_feed_response:
|
for doc in change_feed_response:
|
||||||
self.read_record_count += 1
|
self.read_record_count += 1
|
||||||
docs_batch.append(doc)
|
docs_batch.append(doc)
|
||||||
if len(docs_batch) < self._batch_size:
|
if len(docs_batch) < self._batch_size:
|
||||||
|
|
@ -70,7 +71,7 @@ class SyncJob:
|
||||||
logger.info("Reached batch size, trying to commit to database...")
|
logger.info("Reached batch size, trying to commit to database...")
|
||||||
self.commit_batch(
|
self.commit_batch(
|
||||||
docs_batch,
|
docs_batch,
|
||||||
cursor,
|
postgres_cursor,
|
||||||
)
|
)
|
||||||
logger.info("Committed batch.")
|
logger.info("Committed batch.")
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
@ -83,7 +84,7 @@ class SyncJob:
|
||||||
logger.info(f"Committing dangling documents ({len(docs_batch)})")
|
logger.info(f"Committing dangling documents ({len(docs_batch)})")
|
||||||
self.commit_batch(
|
self.commit_batch(
|
||||||
docs_batch,
|
docs_batch,
|
||||||
cursor,
|
postgres_cursor,
|
||||||
)
|
)
|
||||||
logger.info("Committed final batch.")
|
logger.info("Committed final batch.")
|
||||||
|
|
||||||
|
|
@ -119,10 +120,13 @@ class SyncJob:
|
||||||
self.checkpoint_manager.commit_goal()
|
self.checkpoint_manager.commit_goal()
|
||||||
self.committed_record_count += len(docs_batch)
|
self.committed_record_count += len(docs_batch)
|
||||||
|
|
||||||
def _prepare_source_and_sink(self):
|
def _prepare_cosmos_reader(self):
|
||||||
changed_feed_response = self.cosmos_container_client.query_items_change_feed(
|
change_feed_response = self.cosmos_container_client.query_items_change_feed(
|
||||||
start_time=self.checkpoint_manager.highest_synced_timestamp
|
start_time=self.checkpoint_manager.highest_synced_timestamp
|
||||||
)
|
)
|
||||||
|
return change_feed_response
|
||||||
|
|
||||||
|
def _prepare_postgres_writer(self):
|
||||||
cursor = self.postgres_connection.cursor()
|
cursor = self.postgres_connection.cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
f"""
|
f"""
|
||||||
|
|
@ -132,7 +136,7 @@ class SyncJob:
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
self.postgres_connection.commit()
|
self.postgres_connection.commit()
|
||||||
return changed_feed_response, cursor
|
return cursor
|
||||||
|
|
||||||
def _create_checkpoint_manager(self):
|
def _create_checkpoint_manager(self):
|
||||||
checkpoint_file_path = PATHS.checkpoints_folder_path / (
|
checkpoint_file_path = PATHS.checkpoints_folder_path / (
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue