diff --git a/capturer/capturer.py b/capturer/capturer.py index 6971cfb..c6d24ed 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -1,3 +1,6 @@ +import sys + +sys.path.append("..") from time import sleep import datetime @@ -13,6 +16,7 @@ from core.throttling_utils import ( ) from refresher.refresher import Refresher from core.parsing_utils import * +from core import my_logger import logging @@ -83,12 +87,11 @@ class Capturer: self.last_try_datetime = datetime.datetime.now() task.capture() - if task.status == "Data ready": - ad_data = task.get_ad_data() - else: + if not task.status == "Data ready": logging.warning("Something went wrong, not adding data.") continue + ad_data = task.get_ad_data() self._capturas_interface.insert_captura(ad_data) task.update_status("Captura inserted") logging.info("New ad inserted.") @@ -154,18 +157,22 @@ class CapturingTask: attack.attack() if attack.success: + logging.info("URL attack successful.") self._parse_html(html=attack.get_text()) return if not attack.success: + logging.info("URL attack failed.") try: if self._is_dead_ad(attack.get_text()): self.update_status("Dead ad") + logging.info("Ad was tagged as dead.") return except AttributeError: logging.error( "Something went wrong when checking if the ad is gone" ) + logging.error(AttributeError) self.update_status("Fail {}".format(self.request_failures)) self.request_failures += 1 diff --git a/refresher/refresher.py b/refresher/refresher.py index 204967d..29881ed 100644 --- a/refresher/refresher.py +++ b/refresher/refresher.py @@ -1,16 +1,18 @@ -import sys - -sys.path.append("..") +import logging from time import sleep + +from core.config import refresher_delay from db_layer.capturas_interface import capturas_interface from db_layer.capturing_tasks_interface import capturing_interface -from core.config import refresher_delay -from core import my_logger -import logging class Refresher: - def start(self): + @staticmethod + def start() -> None: + """ + Execute main flow. + :return: None + """ while True: sleep(refresher_delay) @@ -28,12 +30,9 @@ class Refresher: :param html: HTML del anuncio en string. :return: True si esta dado de baja, False si no. """ - try: - if ":-|" in html or "El anunciante lo dio de baja" in html: - return True - else: - return False - except TypeError: + if "anunciante" in html and "baja" in html: + return True + else: return False