Creado cache de Geocoding. Avanzado en Geocoding Task. Decido mover parte de la gestion del geocoding al capturer.
This commit is contained in:
parent
9d947f7e2b
commit
3bd8de0e02
3 changed files with 150 additions and 54 deletions
103
.idea/workspace.xml
generated
103
.idea/workspace.xml
generated
|
|
@ -2,9 +2,9 @@
|
|||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
|
||||
<change afterPath="$PROJECT_DIR$/capturer/geocoder.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/capturer/capturer.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/capturer/geocoder.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/geocoder.py" afterDir="false" />
|
||||
</list>
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
|
|
@ -16,13 +16,14 @@
|
|||
<session id="1687213926">
|
||||
<usages-collector id="statistics.lifecycle.project">
|
||||
<counts>
|
||||
<entry key="project.closed" value="3" />
|
||||
<entry key="project.closed" value="4" />
|
||||
<entry key="project.open.time.0" value="1" />
|
||||
<entry key="project.open.time.12" value="1" />
|
||||
<entry key="project.open.time.13" value="2" />
|
||||
<entry key="project.open.time.14" value="3" />
|
||||
<entry key="project.open.time.17" value="1" />
|
||||
<entry key="project.open.time.21" value="1" />
|
||||
<entry key="project.opened" value="8" />
|
||||
<entry key="project.opened" value="9" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.open">
|
||||
|
|
@ -38,14 +39,14 @@
|
|||
<usages-collector id="statistics.file.extensions.edit">
|
||||
<counts>
|
||||
<entry key="Python Console" value="1519" />
|
||||
<entry key="py" value="8493" />
|
||||
<entry key="txt" value="692" />
|
||||
<entry key="py" value="9871" />
|
||||
<entry key="txt" value="745" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.edit">
|
||||
<counts>
|
||||
<entry key="PLAIN_TEXT" value="692" />
|
||||
<entry key="Python" value="10012" />
|
||||
<entry key="PLAIN_TEXT" value="745" />
|
||||
<entry key="Python" value="11390" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.vcs.git.usages">
|
||||
|
|
@ -65,13 +66,13 @@
|
|||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="2325">
|
||||
<caret line="155" lean-forward="true" selection-start-line="155" selection-end-line="155" />
|
||||
<state relative-caret-position="180">
|
||||
<caret line="12" lean-forward="true" selection-start-line="12" selection-end-line="12" />
|
||||
<folding>
|
||||
<marker date="1537995271039" expanded="true" signature="1249:1250" ph="..." />
|
||||
<marker date="1537995271039" expanded="true" signature="4109:4257" ph="..." />
|
||||
<marker date="1537995271039" expanded="true" signature="4640:4938" ph="..." />
|
||||
<marker date="1537995271039" expanded="true" signature="5089:5094" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="3640:3641" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="6381:6529" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="6955:7253" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="7404:7409" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
|
|
@ -80,12 +81,12 @@
|
|||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/capturer/geocoder.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="77">
|
||||
<caret line="17" column="14" selection-start-line="17" selection-start-column="14" selection-end-line="17" selection-end-column="14" />
|
||||
<state relative-caret-position="615">
|
||||
<caret line="41" lean-forward="true" selection-start-line="41" selection-end-line="41" />
|
||||
<folding>
|
||||
<marker date="1537995271051" expanded="true" signature="519:524" ph="..." />
|
||||
<marker date="1537995271051" expanded="true" signature="708:826" ph="..." />
|
||||
<marker date="1537995271051" expanded="true" signature="1349:1354" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="66:1353" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="91:134" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="1854:1859" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
|
|
@ -98,8 +99,8 @@
|
|||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="75">
|
||||
<caret line="5" selection-start-line="5" selection-end-line="5" selection-end-column="22" />
|
||||
<state relative-caret-position="-3499">
|
||||
<caret line="16" column="27" lean-forward="true" selection-start-line="16" selection-start-column="27" selection-end-line="16" selection-end-column="27" />
|
||||
<folding>
|
||||
<marker date="1537653289735" expanded="true" signature="5088:5540" ph="..." />
|
||||
</folding>
|
||||
|
|
@ -167,8 +168,8 @@
|
|||
<option value="$PROJECT_DIR$/core/task.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/__init__.py" />
|
||||
<option value="$PROJECT_DIR$/explorer/explorer.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/geocoder.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/capturer.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/geocoder.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
|
|
@ -182,7 +183,6 @@
|
|||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="Scope" />
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<expand>
|
||||
|
|
@ -213,6 +213,7 @@
|
|||
<select />
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
|
|
@ -312,7 +313,14 @@
|
|||
<option name="project" value="LOCAL" />
|
||||
<updated>1537729440311</updated>
|
||||
</task>
|
||||
<option name="localTasksCounter" value="7" />
|
||||
<task id="LOCAL-00007" summary="Avanzado en desarrollo de capturing task. Creado clase GeocodingTask">
|
||||
<created>1537995406032</created>
|
||||
<option name="number" value="00007" />
|
||||
<option name="presentableId" value="LOCAL-00007" />
|
||||
<option name="project" value="LOCAL" />
|
||||
<updated>1537995406032</updated>
|
||||
</task>
|
||||
<option name="localTasksCounter" value="8" />
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TodoView" selected-index="1">
|
||||
|
|
@ -328,8 +336,8 @@
|
|||
<frame x="0" y="-2" width="1920" height="1082" extended-state="6" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.48669797" visible="true" weight="0.14918292" />
|
||||
<window_info id="Structure" order="1" sideWeight="0.513302" side_tool="true" visible="true" weight="0.14918292" />
|
||||
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.48513302" visible="true" weight="0.14918292" />
|
||||
<window_info id="Structure" order="1" sideWeight="0.514867" side_tool="true" visible="true" weight="0.14918292" />
|
||||
<window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" />
|
||||
<window_info active="true" id="Repositories" order="3" sideWeight="0.49529782" visible="true" weight="0.32999474" />
|
||||
<window_info anchor="bottom" id="Message" order="0" />
|
||||
|
|
@ -385,7 +393,8 @@
|
|||
<MESSAGE value="Corregidos pequeños errores y type tras primer test del servicio." />
|
||||
<MESSAGE value="Iniciadas clases de capturing_task y scraptargetfield." />
|
||||
<MESSAGE value="Avanzado en desarrollo de capturing task." />
|
||||
<option name="LAST_COMMIT_MESSAGE" value="Avanzado en desarrollo de capturing task." />
|
||||
<MESSAGE value="Avanzado en desarrollo de capturing task. Creado clase GeocodingTask" />
|
||||
<option name="LAST_COMMIT_MESSAGE" value="Avanzado en desarrollo de capturing task. Creado clase GeocodingTask" />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/explorer/test_explorer.py" />
|
||||
|
|
@ -407,16 +416,6 @@
|
|||
<entry file="file://$PROJECT_DIR$/capturer/__init__.py">
|
||||
<provider selected="true" editor-type-id="text-editor" />
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="75">
|
||||
<caret line="5" selection-start-line="5" selection-end-line="5" selection-end-column="22" />
|
||||
<folding>
|
||||
<marker date="1537653289735" expanded="true" signature="5088:5540" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="540">
|
||||
|
|
@ -434,27 +433,37 @@
|
|||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-3499">
|
||||
<caret line="16" column="27" lean-forward="true" selection-start-line="16" selection-start-column="27" selection-end-line="16" selection-end-column="27" />
|
||||
<folding>
|
||||
<marker date="1537653289735" expanded="true" signature="5088:5540" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/capturer/geocoder.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="77">
|
||||
<caret line="17" column="14" selection-start-line="17" selection-start-column="14" selection-end-line="17" selection-end-column="14" />
|
||||
<state relative-caret-position="615">
|
||||
<caret line="41" lean-forward="true" selection-start-line="41" selection-end-line="41" />
|
||||
<folding>
|
||||
<marker date="1537995271051" expanded="true" signature="519:524" ph="..." />
|
||||
<marker date="1537995271051" expanded="true" signature="708:826" ph="..." />
|
||||
<marker date="1537995271051" expanded="true" signature="1349:1354" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="66:1353" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="91:134" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="1854:1859" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="2325">
|
||||
<caret line="155" lean-forward="true" selection-start-line="155" selection-end-line="155" />
|
||||
<state relative-caret-position="180">
|
||||
<caret line="12" lean-forward="true" selection-start-line="12" selection-end-line="12" />
|
||||
<folding>
|
||||
<marker date="1537995271039" expanded="true" signature="1249:1250" ph="..." />
|
||||
<marker date="1537995271039" expanded="true" signature="4109:4257" ph="..." />
|
||||
<marker date="1537995271039" expanded="true" signature="4640:4938" ph="..." />
|
||||
<marker date="1537995271039" expanded="true" signature="5089:5094" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="3640:3641" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="6381:6529" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="6955:7253" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="7404:7409" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
|
|
|
|||
|
|
@ -11,7 +11,57 @@ ads_root = 'https://www.idealista.com/inmueble/'
|
|||
|
||||
#TODO Crear la lista de campos
|
||||
|
||||
ad_fields_parameters = []
|
||||
ad_fields_parameters = [{'name': 'referencia',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'precio',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'tamano_categorico',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'm2',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'telefono',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'texto_tipo',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'ciudad',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'distrito',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'barrio',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'calle',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'cubierta',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'puerta_auto',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'ascensor',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'alarma',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'circuito',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'personal',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'texto_libre',
|
||||
'search_method': '',
|
||||
'validation_method': ''}]
|
||||
|
||||
|
||||
def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
|
||||
|
|
@ -32,6 +82,7 @@ def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
|
|||
|
||||
db_wrapper.query(query_statement, query_parameters)
|
||||
|
||||
|
||||
class CapturingTask:
|
||||
|
||||
sleep_time_failed_request = 60
|
||||
|
|
@ -102,11 +153,6 @@ class CapturingTask:
|
|||
|
||||
#Extraer datos
|
||||
self.extract_data()
|
||||
#Geocodear
|
||||
self.geocode()
|
||||
|
||||
#TODO Lidiar con el resultado del geocoding
|
||||
#TODO Manejar tema cache
|
||||
|
||||
else:
|
||||
self.request_failures += 1
|
||||
|
|
@ -126,6 +172,7 @@ class CapturingTask:
|
|||
"""
|
||||
Lee el HTML y devuelve los campos que no esten presentes
|
||||
"""
|
||||
#TODO Implementar campos optativos
|
||||
fields_not_present = []
|
||||
for field in self.fields:
|
||||
if not field.exists(html):
|
||||
|
|
@ -154,7 +201,7 @@ class CapturingTask:
|
|||
return self.ad_data
|
||||
|
||||
def geocode(self):
|
||||
|
||||
#TODO Hacer esta funcion bien
|
||||
# Construir direccion con formato adecuado
|
||||
geocode_tries = 0
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,46 @@
|
|||
import requests
|
||||
|
||||
|
||||
class GeocodingCache:
|
||||
|
||||
cache_max_size = 1000
|
||||
|
||||
def __init__(self):
|
||||
self.geocoded_addresses = []
|
||||
|
||||
def address_in_cache(self, address):
|
||||
"""
|
||||
Comprueba si la direccion ya esta en la cache
|
||||
"""
|
||||
for geocoded_address in self.geocoded_addresses:
|
||||
if geocoded_address['address'] == address:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_coordinates(self, address):
|
||||
"""
|
||||
Recupera los datos asociados a la direccion
|
||||
"""
|
||||
for geocoded_address in self.geocoded_addresses:
|
||||
if geocoded_address['address'] == address:
|
||||
return geocoded_address['latitude'], \
|
||||
geocoded_address['longitude'], \
|
||||
geocoded_address['precision']
|
||||
return None
|
||||
|
||||
def add_address(self, address, latitude, longitude, precision):
|
||||
"""
|
||||
Añade la direccion a la cache y le hace sitio si es necesario
|
||||
"""
|
||||
if len(self.geocoded_addresses) >= cache_max_size:
|
||||
self.geocoded_addresses.pop()
|
||||
|
||||
self.geocoded_addresses.insert(0, {'address': address,
|
||||
'latitude': latitude,
|
||||
'longitude': longitude,
|
||||
'precision': precision})
|
||||
|
||||
|
||||
class GeocodingTask:
|
||||
|
||||
url = 'https://maps.googleapis.com/maps/api/geocode/json'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue