diff --git a/capturer/capturer.py b/capturer/capturer.py index c6d24ed..6971cfb 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -1,6 +1,3 @@ -import sys - -sys.path.append("..") from time import sleep import datetime @@ -16,7 +13,6 @@ from core.throttling_utils import ( ) from refresher.refresher import Refresher from core.parsing_utils import * -from core import my_logger import logging @@ -87,11 +83,12 @@ class Capturer: self.last_try_datetime = datetime.datetime.now() task.capture() - if not task.status == "Data ready": + if task.status == "Data ready": + ad_data = task.get_ad_data() + else: logging.warning("Something went wrong, not adding data.") continue - ad_data = task.get_ad_data() self._capturas_interface.insert_captura(ad_data) task.update_status("Captura inserted") logging.info("New ad inserted.") @@ -157,22 +154,18 @@ class CapturingTask: attack.attack() if attack.success: - logging.info("URL attack successful.") self._parse_html(html=attack.get_text()) return if not attack.success: - logging.info("URL attack failed.") try: if self._is_dead_ad(attack.get_text()): self.update_status("Dead ad") - logging.info("Ad was tagged as dead.") return except AttributeError: logging.error( "Something went wrong when checking if the ad is gone" ) - logging.error(AttributeError) self.update_status("Fail {}".format(self.request_failures)) self.request_failures += 1 diff --git a/refresher/refresher.py b/refresher/refresher.py index 29881ed..204967d 100644 --- a/refresher/refresher.py +++ b/refresher/refresher.py @@ -1,18 +1,16 @@ -import logging -from time import sleep +import sys -from core.config import refresher_delay +sys.path.append("..") +from time import sleep from db_layer.capturas_interface import capturas_interface from db_layer.capturing_tasks_interface import capturing_interface +from core.config import refresher_delay +from core import my_logger +import logging class Refresher: - @staticmethod - def start() -> None: - """ - Execute main flow. - :return: None - """ + def start(self): while True: sleep(refresher_delay) @@ -30,9 +28,12 @@ class Refresher: :param html: HTML del anuncio en string. :return: True si esta dado de baja, False si no. """ - if "anunciante" in html and "baja" in html: - return True - else: + try: + if ":-|" in html or "El anunciante lo dio de baja" in html: + return True + else: + return False + except TypeError: return False