Refactorizado Explorer para que el Explorer, y no el exploring_task, se encargue de postear las tareas de captura.

Creado una funcion independiente de creacion de capturas para que sea compartida entre todos aquellos servicios que las creen.
This commit is contained in:
pablomartincalvo 2018-09-21 18:19:33 +02:00
parent b77a4752b8
commit 1f372b85b6
4 changed files with 286 additions and 94 deletions

297
.idea/workspace.xml generated
View file

@ -2,6 +2,8 @@
<project version="4"> <project version="4">
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment=""> <list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/capturer/__init__.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/explorer/explorer.py" beforeDir="false" afterPath="$PROJECT_DIR$/explorer/explorer.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/explorer/explorer.py" beforeDir="false" afterPath="$PROJECT_DIR$/explorer/explorer.py" afterDir="false" />
</list> </list>
@ -16,32 +18,37 @@
<usages-collector id="statistics.lifecycle.project"> <usages-collector id="statistics.lifecycle.project">
<counts> <counts>
<entry key="project.open.time.0" value="1" /> <entry key="project.open.time.0" value="1" />
<entry key="project.open.time.13" value="1" /> <entry key="project.open.time.13" value="2" />
<entry key="project.open.time.14" value="1" /> <entry key="project.open.time.14" value="1" />
<entry key="project.opened" value="3" /> <entry key="project.opened" value="4" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.extensions.open"> <usages-collector id="statistics.file.extensions.open">
<counts> <counts>
<entry key="py" value="5" /> <entry key="py" value="10" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.types.open"> <usages-collector id="statistics.file.types.open">
<counts> <counts>
<entry key="Python" value="5" /> <entry key="Python" value="10" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.extensions.edit"> <usages-collector id="statistics.file.extensions.edit">
<counts> <counts>
<entry key="Python Console" value="1519" /> <entry key="Python Console" value="1519" />
<entry key="py" value="1383" /> <entry key="py" value="2871" />
<entry key="txt" value="183" /> <entry key="txt" value="214" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.types.edit"> <usages-collector id="statistics.file.types.edit">
<counts> <counts>
<entry key="PLAIN_TEXT" value="183" /> <entry key="PLAIN_TEXT" value="214" />
<entry key="Python" value="2902" /> <entry key="Python" value="4390" />
</counts>
</usages-collector>
<usages-collector id="statistics.vcs.git.usages">
<counts>
<entry key="git.branch.create.new" value="1" />
</counts> </counts>
</usages-collector> </usages-collector>
</session> </session>
@ -50,47 +57,116 @@
<favorites_list name="Drogon" /> <favorites_list name="Drogon" />
</component> </component>
<component name="FileEditorManager"> <component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> <splitter split-orientation="horizontal" split-proportion="0.5">
<file pinned="false" current-in-tab="true"> <split-first>
<entry file="file://$PROJECT_DIR$/explorer/explorer.py"> <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<provider selected="true" editor-type-id="text-editor"> <file pinned="false" current-in-tab="true">
<state relative-caret-position="90"> <entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<caret line="6" column="29" lean-forward="true" selection-start-line="6" selection-start-column="29" selection-end-line="6" selection-end-column="29" /> <provider selected="true" editor-type-id="text-editor">
</state> <state relative-caret-position="444">
</provider> <caret line="162" column="8" selection-start-line="162" selection-start-column="8" selection-end-line="162" selection-end-column="8" />
</entry> <folding>
</file> <element signature="e#418#504#0" />
<file pinned="false" current-in-tab="false"> <element signature="e#572#861#0" />
<entry file="file://$PROJECT_DIR$/core/alerts.py"> <element signature="e#589#664#0" />
<provider selected="true" editor-type-id="text-editor"> <element signature="e#970#1029#0" />
<state relative-caret-position="105"> <element signature="e#1101#1222#0" />
<caret line="7" column="34" lean-forward="true" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" /> <element signature="e#1287#1412#0" />
<folding> <element signature="e#1592#1756#0" />
<element signature="e#0#46#0" expanded="true" /> <element signature="e#1632#1699#0" />
</folding> <element signature="e#1632#1657#0" />
</state> <element signature="e#2298#2629#0" />
</provider> <element signature="e#2356#2591#0" />
</entry> <element signature="e#2377#2463#0" />
</file> <element signature="e#2500#2591#0" />
<file pinned="false" current-in-tab="false"> <element signature="e#3125#3230#0" />
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py"> <element signature="e#3290#3736#0" />
<provider selected="true" editor-type-id="text-editor"> <element signature="e#3290#3385#1" />
<state relative-caret-position="525"> <element signature="e#3421#3612#0" />
<caret line="35" column="37" selection-start-line="35" selection-start-column="37" selection-end-line="35" selection-end-column="37" /> <element signature="e#3795#4502#0" />
</state> <element signature="e#3795#3930#1" />
</provider> <element signature="e#3957#4149#0" />
</entry> <element signature="e#4551#4649#1" />
</file> <element signature="e#4676#4912#0" />
<file pinned="false" current-in-tab="false"> <element signature="e#5111#5313#0" />
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py"> <element signature="e#5111#5250#1" />
<provider selected="true" editor-type-id="text-editor"> <element signature="e#5358#5712#0" />
<state relative-caret-position="391"> <element signature="e#5358#5430#1" />
<caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" /> <element signature="e#5392#8966#0" />
</state> <element signature="e#5784#5960#0" />
</provider> <element signature="e#6018#6073#0" />
</entry> <element signature="e#6878#7377#0" />
</file> <element signature="e#6878#7007#1" />
</leaf> <element signature="e#7043#7201#0" />
<element signature="e#7238#7306#0" />
<element signature="e#7438#8094#0" />
<element signature="e#7438#7565#1" />
<element signature="e#7744#8094#0" />
<element signature="e#7803#8094#0" />
<element signature="e#7880#8047#0" />
<element signature="e#8146#8320#1" />
<element signature="e#8702#9196#0" />
<element signature="e#8702#8791#1" />
<element signature="e#8818#8942#0" />
<marker date="1537546530445" expanded="true" signature="5242:5702" ph="..." />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="345">
<caret line="23" lean-forward="true" selection-start-line="23" selection-end-line="23" />
<folding>
<marker date="1537545955912" expanded="true" signature="135:323" ph="..." />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/core/alerts.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="105">
<caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" />
<folding>
<element signature="e#0#46#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="270">
<caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" />
</state>
</provider>
</entry>
</file>
</leaf>
</split-first>
<split-second>
<leaf>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1106">
<caret line="274" column="44" selection-start-line="274" selection-start-column="44" selection-end-line="274" selection-end-column="44" />
<folding>
<element signature="e#342#5712#0" />
<marker date="1537546530445" expanded="true" signature="5242:5702" ph="..." />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</split-second>
</splitter>
</component> </component>
<component name="FileTemplateManagerImpl"> <component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES"> <option name="RECENT_TEMPLATES">
@ -111,6 +187,11 @@
</component> </component>
<component name="Git.Settings"> <component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" /> <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
<option name="RECENT_BRANCH_BY_REPOSITORY">
<map>
<entry key="$PROJECT_DIR$" value="master" />
</map>
</option>
</component> </component>
<component name="IdeDocumentHistory"> <component name="IdeDocumentHistory">
<option name="CHANGED_PATHS"> <option name="CHANGED_PATHS">
@ -119,6 +200,9 @@
<option value="$PROJECT_DIR$/explorer/test_explorer.py" /> <option value="$PROJECT_DIR$/explorer/test_explorer.py" />
<option value="$PROJECT_DIR$/core/mysql_wrapper.py" /> <option value="$PROJECT_DIR$/core/mysql_wrapper.py" />
<option value="$PROJECT_DIR$/core/alerts.py" /> <option value="$PROJECT_DIR$/core/alerts.py" />
<option value="$PROJECT_DIR$/core/task.py" />
<option value="$PROJECT_DIR$/capturer/__init__.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
<option value="$PROJECT_DIR$/explorer/explorer.py" /> <option value="$PROJECT_DIR$/explorer/explorer.py" />
</list> </list>
</option> </option>
@ -133,6 +217,7 @@
<foldersAlwaysOnTop value="true" /> <foldersAlwaysOnTop value="true" />
</navigator> </navigator>
<panes> <panes>
<pane id="Scope" />
<pane id="ProjectPane"> <pane id="ProjectPane">
<subPane> <subPane>
<expand> <expand>
@ -140,6 +225,11 @@
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" /> <item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" /> <item name="Drogon" type="462c0819:PsiDirectoryNode" />
</path> </path>
<path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
<item name="capturer" type="462c0819:PsiDirectoryNode" />
</path>
<path> <path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" /> <item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" /> <item name="Drogon" type="462c0819:PsiDirectoryNode" />
@ -158,7 +248,6 @@
<select /> <select />
</subPane> </subPane>
</pane> </pane>
<pane id="Scope" />
</panes> </panes>
</component> </component>
<component name="PropertiesComponent"> <component name="PropertiesComponent">
@ -223,7 +312,14 @@
<option name="project" value="LOCAL" /> <option name="project" value="LOCAL" />
<updated>1536513741650</updated> <updated>1536513741650</updated>
</task> </task>
<option name="localTasksCounter" value="2" /> <task id="LOCAL-00002" summary="Pequeños detalles en Explorer.">
<created>1536514972249</created>
<option name="number" value="00002" />
<option name="presentableId" value="LOCAL-00002" />
<option name="project" value="LOCAL" />
<updated>1536514972249</updated>
</task>
<option name="localTasksCounter" value="3" />
<servers /> <servers />
</component> </component>
<component name="TodoView" selected-index="1"> <component name="TodoView" selected-index="1">
@ -239,17 +335,18 @@
<frame x="0" y="-2" width="1920" height="1082" extended-state="6" /> <frame x="0" y="-2" width="1920" height="1082" extended-state="6" />
<editor active="true" /> <editor active="true" />
<layout> <layout>
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.4984326" visible="true" weight="0.14918292" /> <window_info content_ui="combo" id="Project" order="0" sideWeight="0.4937304" visible="true" weight="0.14918292" />
<window_info id="Structure" order="1" sideWeight="0.5015674" side_tool="true" visible="true" weight="0.14918292" /> <window_info id="Structure" order="1" sideWeight="0.5062696" side_tool="true" visible="true" weight="0.14918292" />
<window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" /> <window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" />
<window_info active="true" id="Repositories" order="3" sideWeight="0.49529782" visible="true" weight="0.32999474" />
<window_info anchor="bottom" id="Message" order="0" /> <window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" /> <window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.32983193" /> <window_info anchor="bottom" id="Run" order="2" weight="0.32983193" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" /> <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" /> <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" /> <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" weight="0.32983193" /> <window_info active="true" anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" visible="true" weight="0.32983193" />
<window_info active="true" anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" visible="true" weight="0.32983193" /> <window_info anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" weight="0.269958" />
<window_info anchor="bottom" id="Terminal" order="8" weight="0.32983193" /> <window_info anchor="bottom" id="Terminal" order="8" weight="0.32983193" />
<window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5007907" side_tool="true" weight="0.32983193" /> <window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5007907" side_tool="true" weight="0.32983193" />
<window_info anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" weight="0.32983193" /> <window_info anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" weight="0.32983193" />
@ -290,14 +387,15 @@
<component name="VcsManagerConfiguration"> <component name="VcsManagerConfiguration">
<MESSAGE value="Correcciones en wrapper_mysql y avance en metodos de explorer. Iniciado modulo de alertas." /> <MESSAGE value="Correcciones en wrapper_mysql y avance en metodos de explorer. Iniciado modulo de alertas." />
<MESSAGE value="Finalizado modulo de alertas. Testeado clase ExploringTask a fondo." /> <MESSAGE value="Finalizado modulo de alertas. Testeado clase ExploringTask a fondo." />
<option name="LAST_COMMIT_MESSAGE" value="Finalizado modulo de alertas. Testeado clase ExploringTask a fondo." /> <MESSAGE value="Pequeños detalles en Explorer." />
<option name="LAST_COMMIT_MESSAGE" value="Pequeños detalles en Explorer." />
</component> </component>
<component name="editorHistoryManager"> <component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/explorer/test_explorer.py" /> <entry file="file://$PROJECT_DIR$/explorer/test_explorer.py" />
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py"> <entry file="file:///usr/lib/python3/dist-packages/IPython/core/interactiveshell.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="391"> <state relative-caret-position="195">
<caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" /> <caret line="142" column="4" selection-start-line="142" selection-start-column="4" selection-end-line="142" selection-end-column="4" />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -308,27 +406,88 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file:///usr/lib/python3/dist-packages/IPython/core/interactiveshell.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="195">
<caret line="142" column="4" selection-start-line="142" selection-start-column="4" selection-end-line="142" selection-end-column="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/alerts.py"> <entry file="file://$PROJECT_DIR$/core/alerts.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="105"> <state relative-caret-position="105">
<caret line="7" column="34" lean-forward="true" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" /> <caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" />
<folding> <folding>
<element signature="e#0#46#0" expanded="true" /> <element signature="e#0#46#0" expanded="true" />
</folding> </folding>
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/core/task.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/capturer/__init__.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="345">
<caret line="23" lean-forward="true" selection-start-line="23" selection-end-line="23" />
<folding>
<marker date="1537545955912" expanded="true" signature="135:323" ph="..." />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="270">
<caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/explorer/explorer.py"> <entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="90"> <state relative-caret-position="444">
<caret line="6" column="29" lean-forward="true" selection-start-line="6" selection-start-column="29" selection-end-line="6" selection-end-column="29" /> <caret line="162" column="8" selection-start-line="162" selection-start-column="8" selection-end-line="162" selection-end-column="8" />
<folding>
<element signature="e#418#504#0" />
<element signature="e#572#861#0" />
<element signature="e#589#664#0" />
<element signature="e#970#1029#0" />
<element signature="e#1101#1222#0" />
<element signature="e#1287#1412#0" />
<element signature="e#1592#1756#0" />
<element signature="e#1632#1699#0" />
<element signature="e#1632#1657#0" />
<element signature="e#2298#2629#0" />
<element signature="e#2356#2591#0" />
<element signature="e#2377#2463#0" />
<element signature="e#2500#2591#0" />
<element signature="e#3125#3230#0" />
<element signature="e#3290#3736#0" />
<element signature="e#3290#3385#1" />
<element signature="e#3421#3612#0" />
<element signature="e#3795#4502#0" />
<element signature="e#3795#3930#1" />
<element signature="e#3957#4149#0" />
<element signature="e#4551#4649#1" />
<element signature="e#4676#4912#0" />
<element signature="e#5111#5313#0" />
<element signature="e#5111#5250#1" />
<element signature="e#5358#5712#0" />
<element signature="e#5358#5430#1" />
<element signature="e#5392#8966#0" />
<element signature="e#5784#5960#0" />
<element signature="e#6018#6073#0" />
<element signature="e#6878#7377#0" />
<element signature="e#6878#7007#1" />
<element signature="e#7043#7201#0" />
<element signature="e#7238#7306#0" />
<element signature="e#7438#8094#0" />
<element signature="e#7438#7565#1" />
<element signature="e#7744#8094#0" />
<element signature="e#7803#8094#0" />
<element signature="e#7880#8047#0" />
<element signature="e#8146#8320#1" />
<element signature="e#8702#9196#0" />
<element signature="e#8702#8791#1" />
<element signature="e#8818#8942#0" />
<marker date="1537546530445" expanded="true" signature="5242:5702" ph="..." />
</folding>
</state> </state>
</provider> </provider>
</entry> </entry>

0
capturer/__init__.py Normal file
View file

23
capturer/capturer.py Normal file
View file

@ -0,0 +1,23 @@
import uuid
ads_root = 'https://www.idealista.com/inmueble/'
def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
query_parameters = {'url': ads_root + referencia,
'uuid': str(uuid.uuid4()),
'status': 'Pending'}
if uuid_exploring is None:
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, url)
VALUES (%(uuid)s, NOW(), %(status)s, url)"""
else:
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, url, %(uuid_exploring))s"""
query_parameters['uuid_exploring'] = uuid_exploring
db_wrapper.query(query_statement, query_parameters)

View file

@ -10,6 +10,7 @@ from random import randint
from core.mysql_wrapper import get_anunciosdb, get_tasksdb from core.mysql_wrapper import get_anunciosdb, get_tasksdb
from core.scrapping_utils import UrlAttack from core.scrapping_utils import UrlAttack
from core.alerts import alert_master from core.alerts import alert_master
from capturer.capturer import create_capturing_task
class Explorer(): class Explorer():
@ -49,6 +50,13 @@ class Explorer():
current_task = ExploringTask(self.compose_listing_url) current_task = ExploringTask(self.compose_listing_url)
current_task.explore() current_task.explore()
if current_task.status == 'Referencias ready':
referencias = current_task.get_referencias()
for referencia in referencias:
create_capturing_task(referencia, self.tasksdb)
current_task._update_status(self, "Sent to queue")
continue continue
self.stop() self.stop()
@ -59,10 +67,12 @@ class Explorer():
pass pass
def there_is_work(self): def there_is_work(self):
#TODO Añadir que no se trabaja si se ha lanzado tarea en los ultimos 10 minutos
""" """
Funcion que agrupa las condiciones que se deben cumplir para poder trabajar Funcion que agrupa las condiciones que se deben cumplir para poder trabajar
""" """
if self.check_if_recent_task():
return False
if not self.in_working_hours(): if not self.in_working_hours():
return False return False
@ -86,19 +96,6 @@ class Explorer():
return False return False
def queue_is_up(self):
#TODO Comprobar que Redis esta vivo
while self.queue_retries <= self.max_queue_retries:
try:
#codigo que testea si redis esta vivo
self.queue_retries = 0
return True
except:
sleep(Explorer.sleep_time_no_service)
self.queue_retries = self.queue_retries + 1
return False
def in_working_hours(self): def in_working_hours(self):
return Explorer.working_hours['start'] <= datetime.datetime.now().time() <= Explorer.working_hours['end'] return Explorer.working_hours['start'] <= datetime.datetime.now().time() <= Explorer.working_hours['end']
@ -154,6 +151,19 @@ class Explorer():
""" """
return (self.get_max_referencias_for_today() / 30) * 6 return (self.get_max_referencias_for_today() / 30) * 6
def check_if_recent_task(self):
"""
Mira si se ha creado alguna tarea recientemente
"""
query_statement = """ SELECT count(uuid)
FROM exploring_tasks_logs
WHERE status = 'Attacked'
AND write_time >= now() - INTERVAL 10 MINUTE
"""
cursor_result = self.tasksdb.query(query_statement)
return cursor_result.row_count
def compose_listing_url(self): def compose_listing_url(self):
""" """
Genera URLs de manera aleatoria Genera URLs de manera aleatoria
@ -169,7 +179,7 @@ class Explorer():
return url return url
class ExploringTask(): class ExploringTask:
def __init__(self, url): def __init__(self, url):
self.anunciosdb = get_anunciosdb() self.anunciosdb = get_anunciosdb()
@ -192,8 +202,6 @@ class ExploringTask():
self._extract_referencias(attack.get_text()) self._extract_referencias(attack.get_text())
if self.referencias: if self.referencias:
self._update_status('Referencias ready') self._update_status('Referencias ready')
self._post_tasks_to_queue()
self._update_status('Sent to Queue')
elif self.there_are_referencias: elif self.there_are_referencias:
self._update_status('Failure - No new referencias in HTML') self._update_status('Failure - No new referencias in HTML')
else: else:
@ -247,7 +255,6 @@ class ExploringTask():
if self._is_new_listing(ad["data-adid"]): if self._is_new_listing(ad["data-adid"]):
self.referencias.append(ad["data-adid"]) self.referencias.append(ad["data-adid"])
def _is_new_listing(self, referencia): def _is_new_listing(self, referencia):
""" """
Comprueba si el listing ya existe en la base de datos de anuncios Comprueba si el listing ya existe en la base de datos de anuncios
@ -264,11 +271,14 @@ class ExploringTask():
else: else:
return True return True
def _post_tasks_to_queue(self): def get_referencias(self):
#TODO Mandar las referencias a redis """
pass Devuelve las referencias, si las hay
"""
if self.referencias:
return self.referencias
else:
return None