diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 088cc1c..cc0d79c 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -26,25 +26,25 @@
-
+
-
+
-
-
+
+
-
-
+
+
@@ -61,34 +61,33 @@
-
-
+
+
-
-
+
+
-
+
+
+
+
+
+
+
+
-
+
-
-
-
-
-
-
-
-
-
-
-
+
+
-
+
+
@@ -103,17 +102,13 @@
-
-
-
-
-
-
+
+
-
-
+
+
-
+
@@ -157,8 +152,8 @@
-
+
@@ -281,7 +276,14 @@
1537546774036
-
+
+ 1537651070019
+
+
+
+ 1537651070019
+
+
@@ -297,17 +299,17 @@
-
-
+
+
-
+
-
+
@@ -351,7 +353,8 @@
-
+
+
@@ -373,16 +376,6 @@
-
-
-
-
-
-
-
-
-
-
@@ -390,20 +383,37 @@
+
+
+
+
+
+
+
+
+
+
-
-
+
+
-
+
+
-
-
+
+
+
+
+
+
+
+
diff --git a/capturer/capturer.py b/capturer/capturer.py
index 01f614a..a2f126d 100644
--- a/capturer/capturer.py
+++ b/capturer/capturer.py
@@ -1,7 +1,13 @@
+import sys
+sys.path.append('..')
import uuid
+from core.mysql_wrapper import get_anunciosdb, get_tasksdb
+from core.scrapping_utils import UrlAttack
ads_root = 'https://www.idealista.com/inmueble/'
+#TODO Crear la lista de campos
+
def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
@@ -19,6 +25,98 @@ def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
(uuid, write_time, status, url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
-
db_wrapper.query(query_statement, query_parameters)
+class CapturingTask:
+
+ def __init__(self, parameters):
+ self.uuid = parameters['uuid']
+ self.status = 'Loading'
+ self.ad_url = parameters['ad_url']
+ self.uuid_exploring = parameters['uuid_exploring']
+
+ self.tasksdb = get_tasksdb()
+
+ self._log_in_tasksdb()
+
+ def _update_status(self, new_status):
+ self.status = new_status
+ self._log_in_tasksdb()
+
+ def _log_in_tasksdb(self):
+ """
+ Graba en la base de datos de tareas un registro con el UUID de la tarea,
+ un timestamp y el status
+ """
+
+ query_statement = """INSERT INTO capturing_tasks_logs
+ (uuid, write_time, status, ad_url, fk_uuid_exploring)
+ VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(fk_uuid_exploring)s)"""
+
+ query_parameters = {'uuid': self.uuid,
+ 'status': self.status,
+ 'ad_url': self.ad_url,
+ 'fk_uuid_exploring': self.uuid_exploring}
+
+ self.tasksdb.query(query_statement, query_parameters)
+
+ def capture(self):
+ """
+ Metodo principal que contiene el flujo de captura
+ """
+ #TODO Desarrollar flujo de captura
+
+
+ def _html_is_valid(selfself, html=self.html):
+ """
+ Lee el HTML y aplica normas de validación del contenido
+ """
+
+ #TODO Comprobar si HTML es pagina de bloqueo
+
+
+ #TODO Check de longitud
+ pass
+
+ def _fields_not_present(self, field_list, html=self.html):
+ """
+ Lee el HTML y devuelve los campos que no esten presentes
+ """
+ fields_not_present = []
+ for field_parameters in field_list:
+ field = ScrapTargetField(field_parameters)
+ if not field.exists(html):
+ fields_not_present.append(field.name)
+
+ return fields_not_present
+
+
+class ScrapTargetField:
+
+ def __init__(self, target_parameters):
+ self.name = target_parameters['name']
+ self.search_method = target_parameters['search_method']
+ self.validation_method = target_parameters['validation_method']
+
+ def exists(self, html):
+ """
+ Busca el dato en un HTML
+ """
+ if self.search_method(html) is None:
+ return False
+ else:
+ return True
+
+ def validate_value(self, dato):
+ """
+ Comprueba el valor y valida con la norma respectiva que sea lo esperado
+ """
+ return self.validation_method(dato)
+
+ def get_value(self, html):
+ """
+ Busca en un HTML el dato
+ """
+ return self.search_method(html)
+
+
diff --git a/explorer/explorer.py b/explorer/explorer.py
index 414419d..1e2559b 100644
--- a/explorer/explorer.py
+++ b/explorer/explorer.py
@@ -43,7 +43,7 @@ class Explorer():
if not self.database_is_up():
alert_master("SQL DOWN", "El explorer informa de que SQL esta caida. Actividad detenida")
- self.stop(self)
+ self.stop()
current_task = ExploringTask(self.compose_listing_url())
current_task.explore()