Creado cache de Geocoding. Avanzado en Geocoding Task. Decido mover parte de la gestion del geocoding al capturer.

This commit is contained in:
pablomartincalvo 2018-10-02 23:14:24 +02:00
parent 9d947f7e2b
commit 3bd8de0e02
3 changed files with 150 additions and 54 deletions

View file

@ -11,7 +11,57 @@ ads_root = 'https://www.idealista.com/inmueble/'
#TODO Crear la lista de campos
ad_fields_parameters = []
ad_fields_parameters = [{'name': 'referencia',
'search_method': '',
'validation_method': ''},
{'name': 'precio',
'search_method': '',
'validation_method': ''},
{'name': 'tamano_categorico',
'search_method': '',
'validation_method': ''},
{'name': 'm2',
'search_method': '',
'validation_method': ''},
{'name': 'telefono',
'search_method': '',
'validation_method': ''},
{'name': 'texto_tipo',
'search_method': '',
'validation_method': ''},
{'name': 'ciudad',
'search_method': '',
'validation_method': ''},
{'name': 'distrito',
'search_method': '',
'validation_method': ''},
{'name': 'barrio',
'search_method': '',
'validation_method': ''},
{'name': 'calle',
'search_method': '',
'validation_method': ''},
{'name': 'cubierta',
'search_method': '',
'validation_method': ''},
{'name': 'puerta_auto',
'search_method': '',
'validation_method': ''},
{'name': 'ascensor',
'search_method': '',
'validation_method': ''},
{'name': 'alarma',
'search_method': '',
'validation_method': ''},
{'name': 'circuito',
'search_method': '',
'validation_method': ''},
{'name': 'personal',
'search_method': '',
'validation_method': ''},
{'name': 'texto_libre',
'search_method': '',
'validation_method': ''}]
def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
@ -32,6 +82,7 @@ def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
db_wrapper.query(query_statement, query_parameters)
class CapturingTask:
sleep_time_failed_request = 60
@ -102,11 +153,6 @@ class CapturingTask:
#Extraer datos
self.extract_data()
#Geocodear
self.geocode()
#TODO Lidiar con el resultado del geocoding
#TODO Manejar tema cache
else:
self.request_failures += 1
@ -126,6 +172,7 @@ class CapturingTask:
"""
Lee el HTML y devuelve los campos que no esten presentes
"""
#TODO Implementar campos optativos
fields_not_present = []
for field in self.fields:
if not field.exists(html):
@ -154,7 +201,7 @@ class CapturingTask:
return self.ad_data
def geocode(self):
#TODO Hacer esta funcion bien
# Construir direccion con formato adecuado
geocode_tries = 0