From 639de7c60284887fbc3ed100c5c57cb93f00130f Mon Sep 17 00:00:00 2001 From: pablo Date: Mon, 4 Jan 2021 22:29:01 +0100 Subject: [PATCH 1/6] Change strings to look for in HTML. Chores. --- refresher/refresher.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/refresher/refresher.py b/refresher/refresher.py index 204967d..29881ed 100644 --- a/refresher/refresher.py +++ b/refresher/refresher.py @@ -1,16 +1,18 @@ -import sys - -sys.path.append("..") +import logging from time import sleep + +from core.config import refresher_delay from db_layer.capturas_interface import capturas_interface from db_layer.capturing_tasks_interface import capturing_interface -from core.config import refresher_delay -from core import my_logger -import logging class Refresher: - def start(self): + @staticmethod + def start() -> None: + """ + Execute main flow. + :return: None + """ while True: sleep(refresher_delay) @@ -28,12 +30,9 @@ class Refresher: :param html: HTML del anuncio en string. :return: True si esta dado de baja, False si no. """ - try: - if ":-|" in html or "El anunciante lo dio de baja" in html: - return True - else: - return False - except TypeError: + if "anunciante" in html and "baja" in html: + return True + else: return False From 50a56091b9466cd36e0b1448f7ad58900710a154 Mon Sep 17 00:00:00 2001 From: pablo Date: Wed, 6 Jan 2021 10:38:30 +0100 Subject: [PATCH 2/6] Added missing logger import. --- capturer/capturer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capturer/capturer.py b/capturer/capturer.py index 6971cfb..ce7ebe4 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -13,6 +13,7 @@ from core.throttling_utils import ( ) from refresher.refresher import Refresher from core.parsing_utils import * +from core import my_logger import logging From f10b62bfd258502a4673d321f88acb48995de5ea Mon Sep 17 00:00:00 2001 From: pablo Date: Wed, 6 Jan 2021 10:40:07 +0100 Subject: [PATCH 3/6] Reversed condition. --- capturer/capturer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/capturer/capturer.py b/capturer/capturer.py index ce7ebe4..ca30995 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -84,12 +84,11 @@ class Capturer: self.last_try_datetime = datetime.datetime.now() task.capture() - if task.status == "Data ready": - ad_data = task.get_ad_data() - else: + if not task.status == "Data ready": logging.warning("Something went wrong, not adding data.") continue + ad_data = task.get_ad_data() self._capturas_interface.insert_captura(ad_data) task.update_status("Captura inserted") logging.info("New ad inserted.") From 5e023edb000c51e725081ac850ba76a4ff16b838 Mon Sep 17 00:00:00 2001 From: pablo Date: Wed, 6 Jan 2021 10:43:16 +0100 Subject: [PATCH 4/6] Added a few logging points. --- capturer/capturer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/capturer/capturer.py b/capturer/capturer.py index ca30995..c0cb799 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -154,10 +154,12 @@ class CapturingTask: attack.attack() if attack.success: + logging.info("URL attack successful.") self._parse_html(html=attack.get_text()) return if not attack.success: + logging.info("URL attack failed.") try: if self._is_dead_ad(attack.get_text()): self.update_status("Dead ad") @@ -166,6 +168,7 @@ class CapturingTask: logging.error( "Something went wrong when checking if the ad is gone" ) + logging.error(AttributeError) self.update_status("Fail {}".format(self.request_failures)) self.request_failures += 1 From 575dadaaff6a119360bb7f59bb4cb0dcb96f2ce2 Mon Sep 17 00:00:00 2001 From: pablo Date: Wed, 6 Jan 2021 10:45:03 +0100 Subject: [PATCH 5/6] More logging. --- capturer/capturer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/capturer/capturer.py b/capturer/capturer.py index c0cb799..58e485f 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -163,6 +163,7 @@ class CapturingTask: try: if self._is_dead_ad(attack.get_text()): self.update_status("Dead ad") + logging.info("Ad was tagged as dead.") return except AttributeError: logging.error( From c7ddbb035f2a79aba1e7f49fbe7549ebb9fd11b5 Mon Sep 17 00:00:00 2001 From: pablo Date: Sat, 9 Jan 2021 17:48:24 +0100 Subject: [PATCH 6/6] Added sys.path trickery to make imports work again when executing out of Pycharm. --- capturer/capturer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/capturer/capturer.py b/capturer/capturer.py index 6971cfb..83612be 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -1,3 +1,6 @@ +import sys + +sys.path.append("..") from time import sleep import datetime