Finalizado modulo de alertas. Testeado clase ExploringTask a fondo.

This commit is contained in:
pablomartincalvo 2018-09-09 19:22:21 +02:00
parent 5eedb037ed
commit 7a795d1fb8
2 changed files with 30 additions and 48 deletions

View file

@ -2,38 +2,26 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
import smtplib import smtplib
my_address = 'drogonalerts@gmail.com'
my_adress = 'drogonalerts@gmail.com'
master_address = 'pablomartincalvo@gmail.com' master_address = 'pablomartincalvo@gmail.com'
def alert_master(header, message): def alert_master(header, message):
#TODO Acabar la alerta de email
msg = MIMEMultipart() msg = MIMEMultipart()
message = "Thank you" password = "noesfacilvivirsindrogon"
msg['From'] = my_address
msg['To'] = master_address
msg['Subject'] = header
# setup the parameters of the message
password = "your_password"
msg['From'] = "your_address"
msg['To'] = "to_address"
msg['Subject'] = "Subscription"
# add in the message body
msg.attach(MIMEText(message, 'plain')) msg.attach(MIMEText(message, 'plain'))
# create server
server = smtplib.SMTP('smtp.gmail.com: 587') server = smtplib.SMTP('smtp.gmail.com: 587')
server.starttls() server.starttls()
# Login Credentials for sending the mail
server.login(msg['From'], password) server.login(msg['From'], password)
# send the message via the server.
server.sendmail(msg['From'], msg['To'], msg.as_string()) server.sendmail(msg['From'], msg['To'], msg.as_string())
server.quit() server.quit()
print
"successfully sent email to %s:" % (msg['To'])

View file

@ -2,21 +2,21 @@
import sys import sys
sys.path.append('..') sys.path.append('..')
import uuid import uuid
from datetime import datetime import datetime
from time import sleep from time import sleep
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re
from random import randint from random import randint
from core.mysql_wrapper import get_anunciosdb, get_tasksdb from core.mysql_wrapper import get_anunciosdb, get_tasksdb
from core.scrapping_utils import UrlAttack from core.scrapping_utils import UrlAttack
import core.alerts from core.alerts import alert_master
class Explorer(): class Explorer():
sleep_time_no_work = 60 sleep_time_no_work = 60
sleep_time_no_service = 600 sleep_time_no_service = 600
working_hours = {start: datetime.time(9, 0, 0), working_hours = {'start': datetime.time(9, 0, 0),
end: datetime.time(18, 0, 0)} 'end': datetime.time(18, 0, 0)}
monthly_capture_target = 1000 monthly_capture_target = 1000
def __init__(self): def __init__(self):
@ -34,25 +34,26 @@ class Explorer():
while True: while True:
if not self.there_is_work(): if not self.there_is_work():
sleep(sleep_time_no_work) sleep(Explorer.sleep_time_no_work)
continue continue
if not self.database_is_up(): if not self.database_is_up():
break alert_master("SQL DOWN", "El explorer informa de que SQL esta caida. Actividad detenida")
self.stop(self)
if not self.queue_is_up(): if not self.queue_is_up():
break alert_master("REDIS DOWN", "El explorer informa de que REDIS esta caido. Actividad detenida")
self.stop(self)
current_task = ExploringTask(self.compose_listing_url) current_task = ExploringTask(self.compose_listing_url)
current_task.explore() current_task.explore()
continue continue
self.stop() self.stop()
def stop(self): def stop(self):
#TODO #TODO Detener el servicio
#Detener el servicio #Detener el servicio
pass pass
@ -78,26 +79,26 @@ class Explorer():
self.db_retries = 0 self.db_retries = 0
return True return True
except: except:
sleep(sleep_time_no_service) sleep(Explorer.sleep_time_no_service)
self.db_retries = self.db_retries + 1 self.db_retries = self.db_retries + 1
return False return False
def queue_is_up(self): def queue_is_up(self):
#TODO #TODO Comprobar que Redis esta vivo
while self.queue_retries <= self.max_queue_retries: while self.queue_retries <= self.max_queue_retries:
try: try:
#codigo que testea si redis esta vivo #codigo que testea si redis esta vivo
self.queue_retries = 0 self.queue_retries = 0
return True return True
except: except:
sleep(sleep_time_no_service) sleep(Explorer.sleep_time_no_service)
self.queue_retries = self.queue_retries + 1 self.queue_retries = self.queue_retries + 1
return False return False
def in_working_hours(self): def in_working_hours(self):
return working_hours['start'] <= datetime.now().time() <= working_hours['end'] return Explorer.working_hours['start'] <= datetime.now().time() <= Explorer.working_hours['end']
def get_referencias_acquired_today(self): def get_referencias_acquired_today(self):
""" """
@ -125,8 +126,8 @@ class Explorer():
cursor_result = self.anunciosdb.query(query_statement) cursor_result = self.anunciosdb.query(query_statement)
new_referencias_last_30 = cursor_result.fetchone() new_referencias_last_30 = cursor_result.fetchone()
deviation = (monthly_capture_target - new_referencias_last_30) / monthly_capture_target deviation = (Explorer.monthly_capture_target - new_referencias_last_30) / Explorer.monthly_capture_target
max_referencias = (monthly_capture_target/30) * (1 + (deviation)) max_referencias = (Explorer.monthly_capture_target/30) * (1 + (deviation))
return max_referencias return max_referencias
@ -198,7 +199,6 @@ class ExploringTask():
else: else:
self._update_status('Failure - Bad request') self._update_status('Failure - Bad request')
def _log_in_tasksdb(self): def _log_in_tasksdb(self):
""" """
Graba en la base de datos de tareas un registro con el UUID de la tarea, Graba en la base de datos de tareas un registro con el UUID de la tarea,
@ -225,10 +225,12 @@ class ExploringTask():
for ad in ads: for ad in ads:
if not re.match(pattern, ad["data-adid"]): if not re.match(pattern, ad["data-adid"]):
#TODO Levantar marron alert_master("Alerta - Referencias no válidas",
pass """Una tarea de exploración ha considerado inválida
una referencia. El texto de la referencia era : {}
""".format(ad["data-adid"]))
break
def _extract_referencias(self, html): def _extract_referencias(self, html):
""" """
Saca referencias de HTML, descarta las que ya exiten en la base de datos Saca referencias de HTML, descarta las que ya exiten en la base de datos
@ -265,14 +267,6 @@ class ExploringTask():
pass pass
def testear_exploring_task():
url = 'https://www.idealista.com/venta-garajes/barcelona-barcelona/'
task = ExploringTask(url)
task.explore()
print(task.referencias)
testear_exploring_task()