Creado cache de Geocoding. Avanzado en Geocoding Task. Decido mover parte de la gestion del geocoding al capturer.

This commit is contained in:
pablomartincalvo 2018-10-02 23:14:24 +02:00
parent 9d947f7e2b
commit 3bd8de0e02
3 changed files with 150 additions and 54 deletions

View file

@ -11,7 +11,57 @@ ads_root = 'https://www.idealista.com/inmueble/'
#TODO Crear la lista de campos
ad_fields_parameters = []
ad_fields_parameters = [{'name': 'referencia',
'search_method': '',
'validation_method': ''},
{'name': 'precio',
'search_method': '',
'validation_method': ''},
{'name': 'tamano_categorico',
'search_method': '',
'validation_method': ''},
{'name': 'm2',
'search_method': '',
'validation_method': ''},
{'name': 'telefono',
'search_method': '',
'validation_method': ''},
{'name': 'texto_tipo',
'search_method': '',
'validation_method': ''},
{'name': 'ciudad',
'search_method': '',
'validation_method': ''},
{'name': 'distrito',
'search_method': '',
'validation_method': ''},
{'name': 'barrio',
'search_method': '',
'validation_method': ''},
{'name': 'calle',
'search_method': '',
'validation_method': ''},
{'name': 'cubierta',
'search_method': '',
'validation_method': ''},
{'name': 'puerta_auto',
'search_method': '',
'validation_method': ''},
{'name': 'ascensor',
'search_method': '',
'validation_method': ''},
{'name': 'alarma',
'search_method': '',
'validation_method': ''},
{'name': 'circuito',
'search_method': '',
'validation_method': ''},
{'name': 'personal',
'search_method': '',
'validation_method': ''},
{'name': 'texto_libre',
'search_method': '',
'validation_method': ''}]
def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
@ -32,6 +82,7 @@ def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
db_wrapper.query(query_statement, query_parameters)
class CapturingTask:
sleep_time_failed_request = 60
@ -102,11 +153,6 @@ class CapturingTask:
#Extraer datos
self.extract_data()
#Geocodear
self.geocode()
#TODO Lidiar con el resultado del geocoding
#TODO Manejar tema cache
else:
self.request_failures += 1
@ -126,6 +172,7 @@ class CapturingTask:
"""
Lee el HTML y devuelve los campos que no esten presentes
"""
#TODO Implementar campos optativos
fields_not_present = []
for field in self.fields:
if not field.exists(html):
@ -154,7 +201,7 @@ class CapturingTask:
return self.ad_data
def geocode(self):
#TODO Hacer esta funcion bien
# Construir direccion con formato adecuado
geocode_tries = 0

View file

@ -1,6 +1,46 @@
import requests
class GeocodingCache:
cache_max_size = 1000
def __init__(self):
self.geocoded_addresses = []
def address_in_cache(self, address):
"""
Comprueba si la direccion ya esta en la cache
"""
for geocoded_address in self.geocoded_addresses:
if geocoded_address['address'] == address:
return True
return False
def get_coordinates(self, address):
"""
Recupera los datos asociados a la direccion
"""
for geocoded_address in self.geocoded_addresses:
if geocoded_address['address'] == address:
return geocoded_address['latitude'], \
geocoded_address['longitude'], \
geocoded_address['precision']
return None
def add_address(self, address, latitude, longitude, precision):
"""
Añade la direccion a la cache y le hace sitio si es necesario
"""
if len(self.geocoded_addresses) >= cache_max_size:
self.geocoded_addresses.pop()
self.geocoded_addresses.insert(0, {'address': address,
'latitude': latitude,
'longitude': longitude,
'precision': precision})
class GeocodingTask:
url = 'https://maps.googleapis.com/maps/api/geocode/json'