Creado cache de Geocoding. Avanzado en Geocoding Task. Decido mover parte de la gestion del geocoding al capturer.
This commit is contained in:
parent
9d947f7e2b
commit
3bd8de0e02
3 changed files with 150 additions and 54 deletions
|
|
@ -11,7 +11,57 @@ ads_root = 'https://www.idealista.com/inmueble/'
|
|||
|
||||
#TODO Crear la lista de campos
|
||||
|
||||
ad_fields_parameters = []
|
||||
ad_fields_parameters = [{'name': 'referencia',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'precio',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'tamano_categorico',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'm2',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'telefono',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'texto_tipo',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'ciudad',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'distrito',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'barrio',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'calle',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'cubierta',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'puerta_auto',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'ascensor',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'alarma',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'circuito',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'personal',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'texto_libre',
|
||||
'search_method': '',
|
||||
'validation_method': ''}]
|
||||
|
||||
|
||||
def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
|
||||
|
|
@ -32,6 +82,7 @@ def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
|
|||
|
||||
db_wrapper.query(query_statement, query_parameters)
|
||||
|
||||
|
||||
class CapturingTask:
|
||||
|
||||
sleep_time_failed_request = 60
|
||||
|
|
@ -102,11 +153,6 @@ class CapturingTask:
|
|||
|
||||
#Extraer datos
|
||||
self.extract_data()
|
||||
#Geocodear
|
||||
self.geocode()
|
||||
|
||||
#TODO Lidiar con el resultado del geocoding
|
||||
#TODO Manejar tema cache
|
||||
|
||||
else:
|
||||
self.request_failures += 1
|
||||
|
|
@ -126,6 +172,7 @@ class CapturingTask:
|
|||
"""
|
||||
Lee el HTML y devuelve los campos que no esten presentes
|
||||
"""
|
||||
#TODO Implementar campos optativos
|
||||
fields_not_present = []
|
||||
for field in self.fields:
|
||||
if not field.exists(html):
|
||||
|
|
@ -154,7 +201,7 @@ class CapturingTask:
|
|||
return self.ad_data
|
||||
|
||||
def geocode(self):
|
||||
|
||||
#TODO Hacer esta funcion bien
|
||||
# Construir direccion con formato adecuado
|
||||
geocode_tries = 0
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,46 @@
|
|||
import requests
|
||||
|
||||
|
||||
class GeocodingCache:
|
||||
|
||||
cache_max_size = 1000
|
||||
|
||||
def __init__(self):
|
||||
self.geocoded_addresses = []
|
||||
|
||||
def address_in_cache(self, address):
|
||||
"""
|
||||
Comprueba si la direccion ya esta en la cache
|
||||
"""
|
||||
for geocoded_address in self.geocoded_addresses:
|
||||
if geocoded_address['address'] == address:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_coordinates(self, address):
|
||||
"""
|
||||
Recupera los datos asociados a la direccion
|
||||
"""
|
||||
for geocoded_address in self.geocoded_addresses:
|
||||
if geocoded_address['address'] == address:
|
||||
return geocoded_address['latitude'], \
|
||||
geocoded_address['longitude'], \
|
||||
geocoded_address['precision']
|
||||
return None
|
||||
|
||||
def add_address(self, address, latitude, longitude, precision):
|
||||
"""
|
||||
Añade la direccion a la cache y le hace sitio si es necesario
|
||||
"""
|
||||
if len(self.geocoded_addresses) >= cache_max_size:
|
||||
self.geocoded_addresses.pop()
|
||||
|
||||
self.geocoded_addresses.insert(0, {'address': address,
|
||||
'latitude': latitude,
|
||||
'longitude': longitude,
|
||||
'precision': precision})
|
||||
|
||||
|
||||
class GeocodingTask:
|
||||
|
||||
url = 'https://maps.googleapis.com/maps/api/geocode/json'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue