From 80d65b7a7c2e245a99bb657e7754d03d29091f87 Mon Sep 17 00:00:00 2001 From: pablomartincalvo Date: Sat, 22 Sep 2018 23:17:49 +0200 Subject: [PATCH] =?UTF-8?q?Corregidos=20peque=C3=B1os=20errores=20y=20type?= =?UTF-8?q?=20tras=20primer=20test=20del=20servicio.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/workspace.xml | 190 ++++++++++++------------------------------- capturer/capturer.py | 9 +- explorer/explorer.py | 22 +++-- 3 files changed, 68 insertions(+), 153 deletions(-) diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 2b02185..088cc1c 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,9 +2,8 @@ - - + @@ -319,7 +274,14 @@ \ No newline at end of file diff --git a/capturer/capturer.py b/capturer/capturer.py index fd9d807..01f614a 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -5,19 +5,20 @@ ads_root = 'https://www.idealista.com/inmueble/' def create_capturing_task(referencia, db_wrapper, uuid_exploring=None): - query_parameters = {'url': ads_root + referencia, + query_parameters = {'ad_url': ads_root + referencia, 'uuid': str(uuid.uuid4()), 'status': 'Pending'} if uuid_exploring is None: query_statement = """INSERT INTO capturing_tasks_logs (uuid, write_time, status, url) - VALUES (%(uuid)s, NOW(), %(status)s, url)""" + VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)""" else: + query_parameters['uuid_exploring'] = uuid_exploring query_statement = """INSERT INTO capturing_tasks_logs (uuid, write_time, status, url, fk_uuid_exploring) - VALUES (%(uuid)s, NOW(), %(status)s, url, %(uuid_exploring))s""" - query_parameters['uuid_exploring'] = uuid_exploring + VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)""" + db_wrapper.query(query_statement, query_parameters) diff --git a/explorer/explorer.py b/explorer/explorer.py index 0e8da92..414419d 100644 --- a/explorer/explorer.py +++ b/explorer/explorer.py @@ -19,6 +19,8 @@ class Explorer(): working_hours = {'start': datetime.time(9, 0, 0), 'end': datetime.time(18, 0, 0)} monthly_capture_target = 1000 + ad_types = {'1': 'alquiler', + '2': 'venta'} def __init__(self): try: @@ -42,12 +44,8 @@ class Explorer(): if not self.database_is_up(): alert_master("SQL DOWN", "El explorer informa de que SQL esta caida. Actividad detenida") self.stop(self) - - if not self.queue_is_up(): - alert_master("REDIS DOWN", "El explorer informa de que REDIS esta caido. Actividad detenida") - self.stop(self) - current_task = ExploringTask(self.compose_listing_url) + current_task = ExploringTask(self.compose_listing_url()) current_task.explore() if current_task.status == 'Referencias ready': @@ -55,7 +53,7 @@ class Explorer(): for referencia in referencias: create_capturing_task(referencia, self.tasksdb) - current_task._update_status(self, "Sent to queue") + current_task._update_status("Sent to queue") continue @@ -169,12 +167,12 @@ class Explorer(): Genera URLs de manera aleatoria :return: """ - raiz = 'https://www.idealista.com/' - tipo = randint(1,2) - ciudad = 'barcelona' - numero = randint(1,30) - url = raiz + tipo + '-garajes/' + ciudad + '-' + ciudad + '/' + \ - 'pagina-' + numero + '.htm' + root = 'https://www.idealista.com/' + type = ad_type[str(randint(1,2))] + city = 'barcelona' + page_number = str(randint(1,30)) + url = root + type + '-garajes/' + city + '-' + city + '/' + \ + 'pagina-' + page_number + '.htm' return url