From 7a795d1fb8506ab7be6aa2eb90df507b9dcd7ef8 Mon Sep 17 00:00:00 2001 From: pablomartincalvo Date: Sun, 9 Sep 2018 19:22:21 +0200 Subject: [PATCH] Finalizado modulo de alertas. Testeado clase ExploringTask a fondo. --- core/alerts.py | 22 ++++------------- explorer/explorer.py | 56 ++++++++++++++++++++------------------------ 2 files changed, 30 insertions(+), 48 deletions(-) diff --git a/core/alerts.py b/core/alerts.py index c91cc36..48267e0 100644 --- a/core/alerts.py +++ b/core/alerts.py @@ -2,38 +2,26 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import smtplib - -my_adress = 'drogonalerts@gmail.com' +my_address = 'drogonalerts@gmail.com' master_address = 'pablomartincalvo@gmail.com' def alert_master(header, message): - #TODO Acabar la alerta de email msg = MIMEMultipart() - message = "Thank you" + password = "noesfacilvivirsindrogon" + msg['From'] = my_address + msg['To'] = master_address + msg['Subject'] = header - # setup the parameters of the message - password = "your_password" - msg['From'] = "your_address" - msg['To'] = "to_address" - msg['Subject'] = "Subscription" - - # add in the message body msg.attach(MIMEText(message, 'plain')) - # create server server = smtplib.SMTP('smtp.gmail.com: 587') server.starttls() - # Login Credentials for sending the mail server.login(msg['From'], password) - # send the message via the server. server.sendmail(msg['From'], msg['To'], msg.as_string()) server.quit() - -print -"successfully sent email to %s:" % (msg['To']) \ No newline at end of file diff --git a/explorer/explorer.py b/explorer/explorer.py index d551ffe..327723e 100644 --- a/explorer/explorer.py +++ b/explorer/explorer.py @@ -2,21 +2,21 @@ import sys sys.path.append('..') import uuid -from datetime import datetime +import datetime from time import sleep from bs4 import BeautifulSoup import re from random import randint from core.mysql_wrapper import get_anunciosdb, get_tasksdb from core.scrapping_utils import UrlAttack -import core.alerts +from core.alerts import alert_master class Explorer(): sleep_time_no_work = 60 sleep_time_no_service = 600 - working_hours = {start: datetime.time(9, 0, 0), - end: datetime.time(18, 0, 0)} + working_hours = {'start': datetime.time(9, 0, 0), + 'end': datetime.time(18, 0, 0)} monthly_capture_target = 1000 def __init__(self): @@ -34,25 +34,26 @@ class Explorer(): while True: if not self.there_is_work(): - sleep(sleep_time_no_work) + sleep(Explorer.sleep_time_no_work) continue if not self.database_is_up(): - break + alert_master("SQL DOWN", "El explorer informa de que SQL esta caida. Actividad detenida") + self.stop(self) if not self.queue_is_up(): - break - + alert_master("REDIS DOWN", "El explorer informa de que REDIS esta caido. Actividad detenida") + self.stop(self) + current_task = ExploringTask(self.compose_listing_url) current_task.explore() continue self.stop() - - + def stop(self): - #TODO + #TODO Detener el servicio #Detener el servicio pass @@ -78,26 +79,26 @@ class Explorer(): self.db_retries = 0 return True except: - sleep(sleep_time_no_service) + sleep(Explorer.sleep_time_no_service) self.db_retries = self.db_retries + 1 return False def queue_is_up(self): - #TODO + #TODO Comprobar que Redis esta vivo while self.queue_retries <= self.max_queue_retries: try: #codigo que testea si redis esta vivo self.queue_retries = 0 return True except: - sleep(sleep_time_no_service) + sleep(Explorer.sleep_time_no_service) self.queue_retries = self.queue_retries + 1 return False def in_working_hours(self): - return working_hours['start'] <= datetime.now().time() <= working_hours['end'] + return Explorer.working_hours['start'] <= datetime.now().time() <= Explorer.working_hours['end'] def get_referencias_acquired_today(self): """ @@ -125,8 +126,8 @@ class Explorer(): cursor_result = self.anunciosdb.query(query_statement) new_referencias_last_30 = cursor_result.fetchone() - deviation = (monthly_capture_target - new_referencias_last_30) / monthly_capture_target - max_referencias = (monthly_capture_target/30) * (1 + (deviation)) + deviation = (Explorer.monthly_capture_target - new_referencias_last_30) / Explorer.monthly_capture_target + max_referencias = (Explorer.monthly_capture_target/30) * (1 + (deviation)) return max_referencias @@ -198,7 +199,6 @@ class ExploringTask(): else: self._update_status('Failure - Bad request') - def _log_in_tasksdb(self): """ Graba en la base de datos de tareas un registro con el UUID de la tarea, @@ -225,10 +225,12 @@ class ExploringTask(): for ad in ads: if not re.match(pattern, ad["data-adid"]): - #TODO Levantar marron - pass - - + alert_master("Alerta - Referencias no válidas", + """Una tarea de exploración ha considerado inválida + una referencia. El texto de la referencia era : {} + """.format(ad["data-adid"])) + break + def _extract_referencias(self, html): """ Saca referencias de HTML, descarta las que ya exiten en la base de datos @@ -265,14 +267,6 @@ class ExploringTask(): pass -def testear_exploring_task(): - url = 'https://www.idealista.com/venta-garajes/barcelona-barcelona/' - task = ExploringTask(url) - task.explore() - - print(task.referencias) - - -testear_exploring_task() +