Corregidos pequeños errores y type tras primer test del servicio.

This commit is contained in:
pablomartincalvo 2018-09-22 23:17:49 +02:00
parent 1f372b85b6
commit 80d65b7a7c
3 changed files with 68 additions and 153 deletions

190
.idea/workspace.xml generated
View file

@ -2,9 +2,8 @@
<project version="4"> <project version="4">
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment=""> <list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/capturer/__init__.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/capturer/capturer.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/explorer/explorer.py" beforeDir="false" afterPath="$PROJECT_DIR$/explorer/explorer.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/explorer/explorer.py" beforeDir="false" afterPath="$PROJECT_DIR$/explorer/explorer.py" afterDir="false" />
</list> </list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
@ -17,10 +16,12 @@
<session id="1687213926"> <session id="1687213926">
<usages-collector id="statistics.lifecycle.project"> <usages-collector id="statistics.lifecycle.project">
<counts> <counts>
<entry key="project.closed" value="1" />
<entry key="project.open.time.0" value="1" /> <entry key="project.open.time.0" value="1" />
<entry key="project.open.time.12" value="1" />
<entry key="project.open.time.13" value="2" /> <entry key="project.open.time.13" value="2" />
<entry key="project.open.time.14" value="1" /> <entry key="project.open.time.14" value="2" />
<entry key="project.opened" value="4" /> <entry key="project.opened" value="6" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.extensions.open"> <usages-collector id="statistics.file.extensions.open">
@ -36,14 +37,14 @@
<usages-collector id="statistics.file.extensions.edit"> <usages-collector id="statistics.file.extensions.edit">
<counts> <counts>
<entry key="Python Console" value="1519" /> <entry key="Python Console" value="1519" />
<entry key="py" value="2871" /> <entry key="py" value="3070" />
<entry key="txt" value="214" /> <entry key="txt" value="472" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.types.edit"> <usages-collector id="statistics.file.types.edit">
<counts> <counts>
<entry key="PLAIN_TEXT" value="214" /> <entry key="PLAIN_TEXT" value="472" />
<entry key="Python" value="4390" /> <entry key="Python" value="4589" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.vcs.git.usages"> <usages-collector id="statistics.vcs.git.usages">
@ -60,68 +61,23 @@
<splitter split-orientation="horizontal" split-proportion="0.5"> <splitter split-orientation="horizontal" split-proportion="0.5">
<split-first> <split-first>
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="true"> <file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/explorer/explorer.py"> <entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="444"> <state relative-caret-position="2117">
<caret line="162" column="8" selection-start-line="162" selection-start-column="8" selection-end-line="162" selection-end-column="8" /> <caret line="176" column="18" lean-forward="true" selection-start-line="176" selection-start-column="18" selection-end-line="176" selection-end-column="18" />
<folding> <folding>
<element signature="e#418#504#0" /> <marker date="1537650937299" expanded="true" signature="5092:5544" ph="..." />
<element signature="e#572#861#0" />
<element signature="e#589#664#0" />
<element signature="e#970#1029#0" />
<element signature="e#1101#1222#0" />
<element signature="e#1287#1412#0" />
<element signature="e#1592#1756#0" />
<element signature="e#1632#1699#0" />
<element signature="e#1632#1657#0" />
<element signature="e#2298#2629#0" />
<element signature="e#2356#2591#0" />
<element signature="e#2377#2463#0" />
<element signature="e#2500#2591#0" />
<element signature="e#3125#3230#0" />
<element signature="e#3290#3736#0" />
<element signature="e#3290#3385#1" />
<element signature="e#3421#3612#0" />
<element signature="e#3795#4502#0" />
<element signature="e#3795#3930#1" />
<element signature="e#3957#4149#0" />
<element signature="e#4551#4649#1" />
<element signature="e#4676#4912#0" />
<element signature="e#5111#5313#0" />
<element signature="e#5111#5250#1" />
<element signature="e#5358#5712#0" />
<element signature="e#5358#5430#1" />
<element signature="e#5392#8966#0" />
<element signature="e#5784#5960#0" />
<element signature="e#6018#6073#0" />
<element signature="e#6878#7377#0" />
<element signature="e#6878#7007#1" />
<element signature="e#7043#7201#0" />
<element signature="e#7238#7306#0" />
<element signature="e#7438#8094#0" />
<element signature="e#7438#7565#1" />
<element signature="e#7744#8094#0" />
<element signature="e#7803#8094#0" />
<element signature="e#7880#8047#0" />
<element signature="e#8146#8320#1" />
<element signature="e#8702#9196#0" />
<element signature="e#8702#8791#1" />
<element signature="e#8818#8942#0" />
<marker date="1537546530445" expanded="true" signature="5242:5702" ph="..." />
</folding> </folding>
</state> </state>
</provider> </provider>
</entry> </entry>
</file> </file>
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/capturer/capturer.py"> <entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="345"> <state relative-caret-position="315">
<caret line="23" lean-forward="true" selection-start-line="23" selection-end-line="23" /> <caret line="21" lean-forward="true" selection-start-line="21" selection-end-line="21" />
<folding>
<marker date="1537545955912" expanded="true" signature="135:323" ph="..." />
</folding>
</state> </state>
</provider> </provider>
</entry> </entry>
@ -129,7 +85,7 @@
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/core/alerts.py"> <entry file="file://$PROJECT_DIR$/core/alerts.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="105"> <state relative-caret-position="75">
<caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" /> <caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" />
<folding> <folding>
<element signature="e#0#46#0" expanded="true" /> <element signature="e#0#46#0" expanded="true" />
@ -141,7 +97,7 @@
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py"> <entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="270"> <state relative-caret-position="540">
<caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" /> <caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" />
</state> </state>
</provider> </provider>
@ -150,15 +106,14 @@
</leaf> </leaf>
</split-first> </split-first>
<split-second> <split-second>
<leaf> <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="true"> <file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/explorer/explorer.py"> <entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1106"> <state relative-caret-position="317">
<caret line="274" column="44" selection-start-line="274" selection-start-column="44" selection-end-line="274" selection-end-column="44" /> <caret line="273" column="44" selection-start-line="273" selection-start-column="44" selection-end-line="273" selection-end-column="44" />
<folding> <folding>
<element signature="e#342#5712#0" /> <marker date="1537650937299" expanded="true" signature="5092:5544" ph="..." />
<marker date="1537546530445" expanded="true" signature="5242:5702" ph="..." />
</folding> </folding>
</state> </state>
</provider> </provider>
@ -202,8 +157,8 @@
<option value="$PROJECT_DIR$/core/alerts.py" /> <option value="$PROJECT_DIR$/core/alerts.py" />
<option value="$PROJECT_DIR$/core/task.py" /> <option value="$PROJECT_DIR$/core/task.py" />
<option value="$PROJECT_DIR$/capturer/__init__.py" /> <option value="$PROJECT_DIR$/capturer/__init__.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
<option value="$PROJECT_DIR$/explorer/explorer.py" /> <option value="$PROJECT_DIR$/explorer/explorer.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
</list> </list>
</option> </option>
</component> </component>
@ -319,7 +274,14 @@
<option name="project" value="LOCAL" /> <option name="project" value="LOCAL" />
<updated>1536514972249</updated> <updated>1536514972249</updated>
</task> </task>
<option name="localTasksCounter" value="3" /> <task id="LOCAL-00003" summary="Refactorizado Explorer para que el Explorer, y no el exploring_task, se encargue de postear las tareas de captura.&#10;&#10;Creado una funcion independiente de creacion de capturas para que sea compartida entre todos aquellos servicios que las creen.">
<created>1537546774036</created>
<option name="number" value="00003" />
<option name="presentableId" value="LOCAL-00003" />
<option name="project" value="LOCAL" />
<updated>1537546774036</updated>
</task>
<option name="localTasksCounter" value="4" />
<servers /> <servers />
</component> </component>
<component name="TodoView" selected-index="1"> <component name="TodoView" selected-index="1">
@ -335,17 +297,17 @@
<frame x="0" y="-2" width="1920" height="1082" extended-state="6" /> <frame x="0" y="-2" width="1920" height="1082" extended-state="6" />
<editor active="true" /> <editor active="true" />
<layout> <layout>
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.4937304" visible="true" weight="0.14918292" /> <window_info content_ui="combo" id="Project" order="0" sideWeight="0.4905956" weight="0.14918292" />
<window_info id="Structure" order="1" sideWeight="0.5062696" side_tool="true" visible="true" weight="0.14918292" /> <window_info id="Structure" order="1" sideWeight="0.50940436" side_tool="true" visible="true" weight="0.14918292" />
<window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" /> <window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" />
<window_info active="true" id="Repositories" order="3" sideWeight="0.49529782" visible="true" weight="0.32999474" /> <window_info active="true" id="Repositories" order="3" sideWeight="0.49529782" visible="true" weight="0.32999474" />
<window_info anchor="bottom" id="Message" order="0" /> <window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" /> <window_info active="true" anchor="bottom" id="Find" order="1" visible="true" weight="0.32983193" />
<window_info anchor="bottom" id="Run" order="2" weight="0.32983193" /> <window_info anchor="bottom" id="Run" order="2" weight="0.32983193" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" /> <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" /> <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" /> <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info active="true" anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" visible="true" weight="0.32983193" /> <window_info anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" weight="0.32914045" />
<window_info anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" weight="0.269958" /> <window_info anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" weight="0.269958" />
<window_info anchor="bottom" id="Terminal" order="8" weight="0.32983193" /> <window_info anchor="bottom" id="Terminal" order="8" weight="0.32983193" />
<window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5007907" side_tool="true" weight="0.32983193" /> <window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5007907" side_tool="true" weight="0.32983193" />
@ -388,7 +350,8 @@
<MESSAGE value="Correcciones en wrapper_mysql y avance en metodos de explorer. Iniciado modulo de alertas." /> <MESSAGE value="Correcciones en wrapper_mysql y avance en metodos de explorer. Iniciado modulo de alertas." />
<MESSAGE value="Finalizado modulo de alertas. Testeado clase ExploringTask a fondo." /> <MESSAGE value="Finalizado modulo de alertas. Testeado clase ExploringTask a fondo." />
<MESSAGE value="Pequeños detalles en Explorer." /> <MESSAGE value="Pequeños detalles en Explorer." />
<option name="LAST_COMMIT_MESSAGE" value="Pequeños detalles en Explorer." /> <MESSAGE value="Refactorizado Explorer para que el Explorer, y no el exploring_task, se encargue de postear las tareas de captura.&#10;&#10;Creado una funcion independiente de creacion de capturas para que sea compartida entre todos aquellos servicios que las creen." />
<option name="LAST_COMMIT_MESSAGE" value="Refactorizado Explorer para que el Explorer, y no el exploring_task, se encargue de postear las tareas de captura.&#10;&#10;Creado una funcion independiente de creacion de capturas para que sea compartida entre todos aquellos servicios que las creen." />
</component> </component>
<component name="editorHistoryManager"> <component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/explorer/test_explorer.py" /> <entry file="file://$PROJECT_DIR$/explorer/test_explorer.py" />
@ -406,9 +369,13 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/core/task.py" />
<entry file="file://$PROJECT_DIR$/capturer/__init__.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/core/alerts.py"> <entry file="file://$PROJECT_DIR$/core/alerts.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="105"> <state relative-caret-position="75">
<caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" /> <caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" />
<folding> <folding>
<element signature="e#0#46#0" expanded="true" /> <element signature="e#0#46#0" expanded="true" />
@ -416,80 +383,29 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/core/task.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/capturer/__init__.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="345">
<caret line="23" lean-forward="true" selection-start-line="23" selection-end-line="23" />
<folding>
<marker date="1537545955912" expanded="true" signature="135:323" ph="..." />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py"> <entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="270"> <state relative-caret-position="540">
<caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" /> <caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" />
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/explorer/explorer.py"> <entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="444"> <state relative-caret-position="2117">
<caret line="162" column="8" selection-start-line="162" selection-start-column="8" selection-end-line="162" selection-end-column="8" /> <caret line="176" column="18" lean-forward="true" selection-start-line="176" selection-start-column="18" selection-end-line="176" selection-end-column="18" />
<folding> <folding>
<element signature="e#418#504#0" /> <marker date="1537650937299" expanded="true" signature="5092:5544" ph="..." />
<element signature="e#572#861#0" />
<element signature="e#589#664#0" />
<element signature="e#970#1029#0" />
<element signature="e#1101#1222#0" />
<element signature="e#1287#1412#0" />
<element signature="e#1592#1756#0" />
<element signature="e#1632#1699#0" />
<element signature="e#1632#1657#0" />
<element signature="e#2298#2629#0" />
<element signature="e#2356#2591#0" />
<element signature="e#2377#2463#0" />
<element signature="e#2500#2591#0" />
<element signature="e#3125#3230#0" />
<element signature="e#3290#3736#0" />
<element signature="e#3290#3385#1" />
<element signature="e#3421#3612#0" />
<element signature="e#3795#4502#0" />
<element signature="e#3795#3930#1" />
<element signature="e#3957#4149#0" />
<element signature="e#4551#4649#1" />
<element signature="e#4676#4912#0" />
<element signature="e#5111#5313#0" />
<element signature="e#5111#5250#1" />
<element signature="e#5358#5712#0" />
<element signature="e#5358#5430#1" />
<element signature="e#5392#8966#0" />
<element signature="e#5784#5960#0" />
<element signature="e#6018#6073#0" />
<element signature="e#6878#7377#0" />
<element signature="e#6878#7007#1" />
<element signature="e#7043#7201#0" />
<element signature="e#7238#7306#0" />
<element signature="e#7438#8094#0" />
<element signature="e#7438#7565#1" />
<element signature="e#7744#8094#0" />
<element signature="e#7803#8094#0" />
<element signature="e#7880#8047#0" />
<element signature="e#8146#8320#1" />
<element signature="e#8702#9196#0" />
<element signature="e#8702#8791#1" />
<element signature="e#8818#8942#0" />
<marker date="1537546530445" expanded="true" signature="5242:5702" ph="..." />
</folding> </folding>
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="315">
<caret line="21" lean-forward="true" selection-start-line="21" selection-end-line="21" />
</state>
</provider>
</entry>
</component> </component>
</project> </project>

View file

@ -5,19 +5,20 @@ ads_root = 'https://www.idealista.com/inmueble/'
def create_capturing_task(referencia, db_wrapper, uuid_exploring=None): def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
query_parameters = {'url': ads_root + referencia, query_parameters = {'ad_url': ads_root + referencia,
'uuid': str(uuid.uuid4()), 'uuid': str(uuid.uuid4()),
'status': 'Pending'} 'status': 'Pending'}
if uuid_exploring is None: if uuid_exploring is None:
query_statement = """INSERT INTO capturing_tasks_logs query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, url) (uuid, write_time, status, url)
VALUES (%(uuid)s, NOW(), %(status)s, url)""" VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
else: else:
query_parameters['uuid_exploring'] = uuid_exploring
query_statement = """INSERT INTO capturing_tasks_logs query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, url, fk_uuid_exploring) (uuid, write_time, status, url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, url, %(uuid_exploring))s""" VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
query_parameters['uuid_exploring'] = uuid_exploring
db_wrapper.query(query_statement, query_parameters) db_wrapper.query(query_statement, query_parameters)

View file

@ -19,6 +19,8 @@ class Explorer():
working_hours = {'start': datetime.time(9, 0, 0), working_hours = {'start': datetime.time(9, 0, 0),
'end': datetime.time(18, 0, 0)} 'end': datetime.time(18, 0, 0)}
monthly_capture_target = 1000 monthly_capture_target = 1000
ad_types = {'1': 'alquiler',
'2': 'venta'}
def __init__(self): def __init__(self):
try: try:
@ -43,11 +45,7 @@ class Explorer():
alert_master("SQL DOWN", "El explorer informa de que SQL esta caida. Actividad detenida") alert_master("SQL DOWN", "El explorer informa de que SQL esta caida. Actividad detenida")
self.stop(self) self.stop(self)
if not self.queue_is_up(): current_task = ExploringTask(self.compose_listing_url())
alert_master("REDIS DOWN", "El explorer informa de que REDIS esta caido. Actividad detenida")
self.stop(self)
current_task = ExploringTask(self.compose_listing_url)
current_task.explore() current_task.explore()
if current_task.status == 'Referencias ready': if current_task.status == 'Referencias ready':
@ -55,7 +53,7 @@ class Explorer():
for referencia in referencias: for referencia in referencias:
create_capturing_task(referencia, self.tasksdb) create_capturing_task(referencia, self.tasksdb)
current_task._update_status(self, "Sent to queue") current_task._update_status("Sent to queue")
continue continue
@ -169,12 +167,12 @@ class Explorer():
Genera URLs de manera aleatoria Genera URLs de manera aleatoria
:return: :return:
""" """
raiz = 'https://www.idealista.com/' root = 'https://www.idealista.com/'
tipo = randint(1,2) type = ad_type[str(randint(1,2))]
ciudad = 'barcelona' city = 'barcelona'
numero = randint(1,30) page_number = str(randint(1,30))
url = raiz + tipo + '-garajes/' + ciudad + '-' + ciudad + '/' + \ url = root + type + '-garajes/' + city + '-' + city + '/' + \
'pagina-' + numero + '.htm' 'pagina-' + page_number + '.htm'
return url return url