Cambios notables. Creadas interfaces para la tabla de capturing task
y la tabla de capturas. Traslado todo lo relacionado a Geocoding a un servicio independiente del capturer. Replanteo totalmente el parseo del html, creando un objeto nuevo.
This commit is contained in:
parent
3bd8de0e02
commit
240a61649c
7 changed files with 474 additions and 262 deletions
278
.idea/workspace.xml
generated
278
.idea/workspace.xml
generated
|
|
@ -2,9 +2,13 @@
|
|||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
|
||||
<change afterPath="$PROJECT_DIR$/mysql/capturas_interface.py" afterDir="false" />
|
||||
<change afterPath="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/capturer/capturer.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/capturer/geocoder.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/geocoder.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/capturer/geocoder.py" beforeDir="false" afterPath="$PROJECT_DIR$/geocoder/geocoder.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/core/mysql_wrapper.py" beforeDir="false" afterPath="$PROJECT_DIR$/core/mysql_wrapper.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/explorer/explorer.py" beforeDir="false" afterPath="$PROJECT_DIR$/explorer/explorer.py" afterDir="false" />
|
||||
</list>
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
|
|
@ -16,37 +20,41 @@
|
|||
<session id="1687213926">
|
||||
<usages-collector id="statistics.lifecycle.project">
|
||||
<counts>
|
||||
<entry key="project.closed" value="4" />
|
||||
<entry key="project.closed" value="5" />
|
||||
<entry key="project.open.time.0" value="1" />
|
||||
<entry key="project.open.time.12" value="1" />
|
||||
<entry key="project.open.time.13" value="2" />
|
||||
<entry key="project.open.time.14" value="3" />
|
||||
<entry key="project.open.time.17" value="1" />
|
||||
<entry key="project.open.time.18" value="1" />
|
||||
<entry key="project.open.time.21" value="1" />
|
||||
<entry key="project.opened" value="9" />
|
||||
<entry key="project.opened" value="10" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.open">
|
||||
<counts>
|
||||
<entry key="py" value="15" />
|
||||
<entry key="py" value="20" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.open">
|
||||
<counts>
|
||||
<entry key="Python" value="15" />
|
||||
<entry key="Python" value="18" />
|
||||
<entry key="Scratch" value="2" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.edit">
|
||||
<counts>
|
||||
<entry key="Python Console" value="1519" />
|
||||
<entry key="py" value="9871" />
|
||||
<entry key="txt" value="745" />
|
||||
<entry key="py" value="14320" />
|
||||
<entry key="scratch_1" value="489" />
|
||||
<entry key="txt" value="880" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.edit">
|
||||
<counts>
|
||||
<entry key="PLAIN_TEXT" value="745" />
|
||||
<entry key="Python" value="11390" />
|
||||
<entry key="PLAIN_TEXT" value="880" />
|
||||
<entry key="Python" value="16173" />
|
||||
<entry key="Scratch" value="155" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.vcs.git.usages">
|
||||
|
|
@ -66,53 +74,99 @@
|
|||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="180">
|
||||
<caret line="12" lean-forward="true" selection-start-line="12" selection-end-line="12" />
|
||||
<state relative-caret-position="76">
|
||||
<caret line="82" column="16" selection-start-line="82" selection-start-column="16" selection-end-line="82" selection-end-column="16" />
|
||||
<folding>
|
||||
<marker date="1538514781483" expanded="true" signature="3640:3641" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="6381:6529" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="6955:7253" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="7404:7409" ph="..." />
|
||||
<element signature="e#3455#6755#0" />
|
||||
<marker date="1538845705076" expanded="true" signature="395:1123" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="417:427" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="1122:1123" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="1179:1180" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2538:2547" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2538:2606" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2601:2606" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2679:6045" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="4773:5825" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5844:5853" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5844:5926" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5951:5960" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="6036:6039" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/capturer/geocoder.py">
|
||||
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="615">
|
||||
<caret line="41" lean-forward="true" selection-start-line="41" selection-end-line="41" />
|
||||
<state relative-caret-position="466">
|
||||
<caret line="261" column="15" lean-forward="true" selection-start-line="261" selection-start-column="15" selection-end-line="261" selection-end-column="15" />
|
||||
<folding>
|
||||
<marker date="1538514781491" expanded="true" signature="66:1353" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="91:134" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="1854:1859" ph="..." />
|
||||
<element signature="e#95512#95521#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/mysql/capturas_interface.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="405">
|
||||
<caret line="27" lean-forward="true" selection-start-line="27" selection-end-line="27" />
|
||||
<folding>
|
||||
<marker date="1538837294625" expanded="true" signature="74:75" ph="..." />
|
||||
<marker date="1538837294625" expanded="true" signature="74:76" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/mysql/capturing_tasks_interface.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="60">
|
||||
<caret line="4" selection-start-line="4" selection-end-line="6" selection-end-column="36" />
|
||||
<folding>
|
||||
<marker date="1538834627813" expanded="true" signature="74:80" ph="..." />
|
||||
<marker date="1538834627813" expanded="true" signature="1110:1701" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-298">
|
||||
<caret line="41" selection-start-line="41" selection-end-line="41" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
</split-first>
|
||||
<split-second>
|
||||
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-3499">
|
||||
<caret line="16" column="27" lean-forward="true" selection-start-line="16" selection-start-column="27" selection-end-line="16" selection-end-column="27" />
|
||||
<state relative-caret-position="165">
|
||||
<caret line="11" selection-start-line="11" selection-end-line="11" selection-end-column="36" />
|
||||
<folding>
|
||||
<marker date="1537653289735" expanded="true" signature="5088:5540" ph="..." />
|
||||
<marker date="1538826138348" expanded="true" signature="5106:5558" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="540">
|
||||
<caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" />
|
||||
<state relative-caret-position="435">
|
||||
<caret line="45" column="23" selection-start-line="45" selection-start-column="23" selection-end-line="45" selection-end-column="23" />
|
||||
<folding>
|
||||
<element signature="e#24#46#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
|
@ -148,6 +202,7 @@
|
|||
<find>datetime</find>
|
||||
<find>task</find>
|
||||
<find>exploring_tasks</find>
|
||||
<find>ge</find>
|
||||
</findStrings>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
|
|
@ -163,13 +218,17 @@
|
|||
<list>
|
||||
<option value="$PROJECT_DIR$/core/scrapping_utils.py" />
|
||||
<option value="$PROJECT_DIR$/explorer/test_explorer.py" />
|
||||
<option value="$PROJECT_DIR$/core/mysql_wrapper.py" />
|
||||
<option value="$PROJECT_DIR$/core/alerts.py" />
|
||||
<option value="$PROJECT_DIR$/core/task.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/__init__.py" />
|
||||
<option value="$PROJECT_DIR$/explorer/explorer.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/capturer.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/geocoder.py" />
|
||||
<option value="$PROJECT_DIR$/geocoder/geocoder.py" />
|
||||
<option value="$PROJECT_DIR$/explorer/explorer.py" />
|
||||
<option value="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" />
|
||||
<option value="$PROJECT_DIR$/core/mysql_wrapper.py" />
|
||||
<option value="$PROJECT_DIR$/mysql/capturas_interface.py" />
|
||||
<option value="$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/capturer.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
|
|
@ -183,6 +242,7 @@
|
|||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="Scope" />
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<expand>
|
||||
|
|
@ -198,28 +258,37 @@
|
|||
<path>
|
||||
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="core" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
<path>
|
||||
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="explorer" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="mysql" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
<path>
|
||||
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
|
||||
</path>
|
||||
<path>
|
||||
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="Scratches and Consoles" type="1a2a3e82:ScratchProjectViewPane$MyProjectNode" />
|
||||
</path>
|
||||
<path>
|
||||
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="Scratches and Consoles" type="1a2a3e82:ScratchProjectViewPane$MyProjectNode" />
|
||||
<item name="Scratches" type="d62648e6:ScratchProjectViewPane$MyRootNode" />
|
||||
</path>
|
||||
</expand>
|
||||
<select />
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="TODO_SCOPE" value="All Places" />
|
||||
<property name="com.intellij.ide.scratch.LRUPopupBuilder$1/New Scratch File" value="Python" />
|
||||
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="RecentsManager">
|
||||
<key name="MoveFile.RECENT_KEYS">
|
||||
<recent name="$PROJECT_DIR$/geocoder" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="RunDashboard">
|
||||
<option name="ruleStates">
|
||||
<list>
|
||||
|
|
@ -232,7 +301,7 @@
|
|||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="RunManager">
|
||||
<component name="RunManager" selected="Python.scratch_1">
|
||||
<configuration name="alerts" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="Drogon" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
|
|
@ -254,8 +323,34 @@
|
|||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="scratch_1" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="Drogon" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$APPLICATION_CONFIG_DIR$/scratches" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="true" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<list>
|
||||
<item itemvalue="Python.alerts" />
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
</list>
|
||||
<recent_temporary>
|
||||
<list>
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.alerts" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
|
|
@ -320,7 +415,14 @@
|
|||
<option name="project" value="LOCAL" />
|
||||
<updated>1537995406032</updated>
|
||||
</task>
|
||||
<option name="localTasksCounter" value="8" />
|
||||
<task id="LOCAL-00008" summary="Creado cache de Geocoding. Avanzado en Geocoding Task. Decido mover parte de la gestion del geocoding al capturer.">
|
||||
<created>1538514864934</created>
|
||||
<option name="number" value="00008" />
|
||||
<option name="presentableId" value="LOCAL-00008" />
|
||||
<option name="project" value="LOCAL" />
|
||||
<updated>1538514864935</updated>
|
||||
</task>
|
||||
<option name="localTasksCounter" value="9" />
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TodoView" selected-index="1">
|
||||
|
|
@ -336,21 +438,21 @@
|
|||
<frame x="0" y="-2" width="1920" height="1082" extended-state="6" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.48513302" visible="true" weight="0.14918292" />
|
||||
<window_info id="Structure" order="1" sideWeight="0.514867" side_tool="true" visible="true" weight="0.14918292" />
|
||||
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.484326" visible="true" weight="0.14918292" />
|
||||
<window_info id="Structure" order="1" sideWeight="0.515674" side_tool="true" visible="true" weight="0.14918292" />
|
||||
<window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" />
|
||||
<window_info active="true" id="Repositories" order="3" sideWeight="0.49529782" visible="true" weight="0.32999474" />
|
||||
<window_info anchor="bottom" id="Message" order="0" />
|
||||
<window_info anchor="bottom" id="Find" order="1" weight="0.32983193" />
|
||||
<window_info anchor="bottom" id="Run" order="2" weight="0.32983193" />
|
||||
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
|
||||
<window_info anchor="bottom" id="Debug" order="3" weight="0.39915967" />
|
||||
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
|
||||
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
||||
<window_info active="true" anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" visible="true" weight="0.32878152" />
|
||||
<window_info anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" weight="0.32878152" />
|
||||
<window_info anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" weight="0.269958" />
|
||||
<window_info anchor="bottom" id="Terminal" order="8" weight="0.32983193" />
|
||||
<window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5007907" side_tool="true" weight="0.32983193" />
|
||||
<window_info anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" weight="0.32983193" />
|
||||
<window_info active="true" anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" visible="true" weight="0.32983193" />
|
||||
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
|
||||
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
|
||||
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
||||
|
|
@ -394,7 +496,8 @@
|
|||
<MESSAGE value="Iniciadas clases de capturing_task y scraptargetfield." />
|
||||
<MESSAGE value="Avanzado en desarrollo de capturing task." />
|
||||
<MESSAGE value="Avanzado en desarrollo de capturing task. Creado clase GeocodingTask" />
|
||||
<option name="LAST_COMMIT_MESSAGE" value="Avanzado en desarrollo de capturing task. Creado clase GeocodingTask" />
|
||||
<MESSAGE value="Creado cache de Geocoding. Avanzado en Geocoding Task. Decido mover parte de la gestion del geocoding al capturer." />
|
||||
<option name="LAST_COMMIT_MESSAGE" value="Creado cache de Geocoding. Avanzado en Geocoding Task. Decido mover parte de la gestion del geocoding al capturer." />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/explorer/test_explorer.py" />
|
||||
|
|
@ -416,10 +519,10 @@
|
|||
<entry file="file://$PROJECT_DIR$/capturer/__init__.py">
|
||||
<provider selected="true" editor-type-id="text-editor" />
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
|
||||
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="540">
|
||||
<caret line="36" column="22" selection-start-line="36" selection-start-column="22" selection-end-line="36" selection-end-column="22" />
|
||||
<state relative-caret-position="615">
|
||||
<caret line="41" lean-forward="true" selection-start-line="41" selection-end-line="41" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
|
@ -433,37 +536,84 @@
|
|||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-298">
|
||||
<caret line="41" selection-start-line="41" selection-end-line="41" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-3499">
|
||||
<caret line="16" column="27" lean-forward="true" selection-start-line="16" selection-start-column="27" selection-end-line="16" selection-end-column="27" />
|
||||
<state relative-caret-position="165">
|
||||
<caret line="11" selection-start-line="11" selection-end-line="11" selection-end-column="36" />
|
||||
<folding>
|
||||
<marker date="1537653289735" expanded="true" signature="5088:5540" ph="..." />
|
||||
<marker date="1538826138348" expanded="true" signature="5106:5558" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/capturer/geocoder.py">
|
||||
<entry file="file://$PROJECT_DIR$/mysql/capturing_tasks_interface.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="615">
|
||||
<caret line="41" lean-forward="true" selection-start-line="41" selection-end-line="41" />
|
||||
<state relative-caret-position="60">
|
||||
<caret line="4" selection-start-line="4" selection-end-line="6" selection-end-column="36" />
|
||||
<folding>
|
||||
<marker date="1538514781491" expanded="true" signature="66:1353" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="91:134" ph="..." />
|
||||
<marker date="1538514781491" expanded="true" signature="1854:1859" ph="..." />
|
||||
<marker date="1538834627813" expanded="true" signature="74:80" ph="..." />
|
||||
<marker date="1538834627813" expanded="true" signature="1110:1701" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/mysql/capturas_interface.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="405">
|
||||
<caret line="27" lean-forward="true" selection-start-line="27" selection-end-line="27" />
|
||||
<folding>
|
||||
<marker date="1538837294625" expanded="true" signature="74:75" ph="..." />
|
||||
<marker date="1538837294625" expanded="true" signature="74:76" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="435">
|
||||
<caret line="45" column="23" selection-start-line="45" selection-start-column="23" selection-end-line="45" selection-end-column="23" />
|
||||
<folding>
|
||||
<element signature="e#24#46#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="466">
|
||||
<caret line="261" column="15" lean-forward="true" selection-start-line="261" selection-start-column="15" selection-end-line="261" selection-end-column="15" />
|
||||
<folding>
|
||||
<element signature="e#95512#95521#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="180">
|
||||
<caret line="12" lean-forward="true" selection-start-line="12" selection-end-line="12" />
|
||||
<state relative-caret-position="76">
|
||||
<caret line="82" column="16" selection-start-line="82" selection-start-column="16" selection-end-line="82" selection-end-column="16" />
|
||||
<folding>
|
||||
<marker date="1538514781483" expanded="true" signature="3640:3641" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="6381:6529" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="6955:7253" ph="..." />
|
||||
<marker date="1538514781483" expanded="true" signature="7404:7409" ph="..." />
|
||||
<element signature="e#3455#6755#0" />
|
||||
<marker date="1538845705076" expanded="true" signature="395:1123" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="417:427" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="1122:1123" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="1179:1180" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2538:2547" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2538:2606" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2601:2606" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2679:6045" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="4773:5825" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5844:5853" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5844:5926" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5951:5960" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="6036:6039" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
|
|
|
|||
|
|
@ -2,85 +2,45 @@ import sys
|
|||
sys.path.append('..')
|
||||
import uuid
|
||||
from time import sleep
|
||||
from core.mysql_wrapper import get_anunciosdb, get_tasksdb
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from mysql.capturing_tasks_interface import capturing_interface
|
||||
from mysql.capturas_interface import capturas_interface
|
||||
from core.scrapping_utils import UrlAttack
|
||||
from core.alerts import alert_master
|
||||
from capturer.geocoder import GeocodingTask
|
||||
|
||||
ads_root = 'https://www.idealista.com/inmueble/'
|
||||
|
||||
#TODO Crear la lista de campos
|
||||
|
||||
ad_fields_parameters = [{'name': 'referencia',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'precio',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'tamano_categorico',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'm2',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'telefono',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'texto_tipo',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'ciudad',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'distrito',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'barrio',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'calle',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'cubierta',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'puerta_auto',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'ascensor',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'alarma',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'circuito',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'personal',
|
||||
'search_method': '',
|
||||
'validation_method': ''},
|
||||
{'name': 'texto_libre',
|
||||
'search_method': '',
|
||||
'validation_method': ''}]
|
||||
|
||||
|
||||
def create_capturing_task(referencia, db_wrapper, uuid_exploring=None):
|
||||
|
||||
query_parameters = {'ad_url': ads_root + referencia,
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'status': 'Pending'}
|
||||
class Capturer:
|
||||
|
||||
if uuid_exploring is None:
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
|
||||
else:
|
||||
query_parameters['uuid_exploring'] = uuid_exploring
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url, fk_uuid_exploring)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
|
||||
sleep_time_no_work = 60
|
||||
minimum_seconds_between_tries = 120
|
||||
|
||||
def start(self):
|
||||
|
||||
#Juzgar si hay que currar
|
||||
while True:
|
||||
|
||||
if capturing_interface.get_pending_task() is None:
|
||||
sleep(Capturer.sleep_time_no_work)
|
||||
continue
|
||||
|
||||
if capturing_interface.seconds_since_last_try() < minimum_seconds_between_tries:
|
||||
sleep(Capturer.sleep_time_no_work)
|
||||
continue
|
||||
|
||||
task_parameters = capturing_interface.get_pending_task()
|
||||
|
||||
task = CapturingTask(task_parameters)
|
||||
task.capture()
|
||||
|
||||
if tasks.status = 'Data ready':
|
||||
ad_data = task.get_ad_data()
|
||||
else:
|
||||
continue
|
||||
|
||||
capturas_interface.insert_captura(ad_data)
|
||||
|
||||
db_wrapper.query(query_statement, query_parameters)
|
||||
|
||||
|
||||
class CapturingTask:
|
||||
|
|
@ -90,34 +50,16 @@ class CapturingTask:
|
|||
def __init__(self, parameters):
|
||||
self.uuid = parameters['uuid']
|
||||
self.ad_url = parameters['ad_url']
|
||||
self.uuid_exploring = parameters['uuid_exploring']
|
||||
self.uuid_exploring = parameters['fk_uuid_exploring']
|
||||
self.status = parameters['status']
|
||||
self.request_failures = 1
|
||||
self.geocode_status = "Pending"
|
||||
|
||||
self.tasksdb = get_tasksdb()
|
||||
|
||||
self._update_status('Loading')
|
||||
|
||||
def _update_status(self, new_status):
|
||||
self.status = new_status
|
||||
self._log_in_tasksdb()
|
||||
|
||||
def _log_in_tasksdb(self):
|
||||
"""
|
||||
Graba en la base de datos de tareas un registro con el UUID de la tarea,
|
||||
un timestamp y el status
|
||||
"""
|
||||
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, ad_url, fk_uuid_exploring)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(fk_uuid_exploring)s)"""
|
||||
|
||||
query_parameters = {'uuid': self.uuid,
|
||||
'status': self.status,
|
||||
'ad_url': self.ad_url,
|
||||
'fk_uuid_exploring': self.uuid_exploring}
|
||||
|
||||
self.tasksdb.query(query_statement, query_parameters)
|
||||
capturing_interface.update_capturing_task(self.uuid, self.uuid_exploring,
|
||||
self.status, self.ad_url)
|
||||
|
||||
def capture(self):
|
||||
"""
|
||||
|
|
@ -135,25 +77,12 @@ class CapturingTask:
|
|||
if attack.success():
|
||||
self.html = attack.get_text()
|
||||
|
||||
with self._fields_not_present() as missing_fields:
|
||||
if missing_fields:
|
||||
alert_master('ERROR CAPTURER',
|
||||
'Los siguientes campos no estaban presentes {}. '
|
||||
'URL = {}'.format(missing_fields, self.ad_url))
|
||||
self._update_status('Dead ad')
|
||||
return
|
||||
|
||||
with self._fields_not_valid() as unvalid_fields:
|
||||
if unvalid_fields:
|
||||
alert_master('ERROR CAPTURER',
|
||||
'Los siguientes campos no tenian valores presentes {}'
|
||||
'URL = {}'.format(unvalid_fields, self.ad_url))
|
||||
self._update_status('Dead ad')
|
||||
return
|
||||
|
||||
#Extraer datos
|
||||
self.extract_data()
|
||||
|
||||
|
||||
self._update_status('Data ready')
|
||||
|
||||
else:
|
||||
self.request_failures += 1
|
||||
self._update_status('Fail {}'.format(self.request_failures))
|
||||
|
|
@ -162,97 +91,120 @@ class CapturingTask:
|
|||
|
||||
self._update_status('Surrender')
|
||||
|
||||
|
||||
def _read_fields(self):
|
||||
self.fields = []
|
||||
for field_parameters in ad_fields_parameters:
|
||||
self.fields.append(ScrapTargetField(field_parameters))
|
||||
|
||||
def _fields_not_present(self, html=self.html):
|
||||
"""
|
||||
Lee el HTML y devuelve los campos que no esten presentes
|
||||
"""
|
||||
#TODO Implementar campos optativos
|
||||
fields_not_present = []
|
||||
for field in self.fields:
|
||||
if not field.exists(html):
|
||||
fields_not_present.append(field.name)
|
||||
|
||||
return fields_not_present
|
||||
|
||||
def _fields_not_valid(self, html=self.html):
|
||||
"""
|
||||
Lee el HTML y devuelve los campos que no tengan valores validos
|
||||
"""
|
||||
fields_not_valid = []
|
||||
for field in self.fields:
|
||||
if not field.validate_value(html):
|
||||
fields_not_valid.append(field.name)
|
||||
|
||||
return fields_not_valid
|
||||
|
||||
def extract_data(self):
|
||||
self.ad_data = {}
|
||||
|
||||
for field in self.fields:
|
||||
self.ad_data[field.name] = field.get_value(self.html)
|
||||
#TODO Crear un objeto parser y ver que todo esta bien
|
||||
|
||||
def get_ad_data(self):
|
||||
return self.ad_data
|
||||
|
||||
def geocode(self):
|
||||
#TODO Hacer esta funcion bien
|
||||
# Construir direccion con formato adecuado
|
||||
geocode_tries = 0
|
||||
|
||||
geo_task = GeocodingTask(formated_address)
|
||||
|
||||
while geocode_tries < 3:
|
||||
geo_task.geocode()
|
||||
|
||||
if geo_task.get_request_status() == 200:
|
||||
google_status = geo_task.success_surrender_retry()
|
||||
|
||||
if google_status == 'Success':
|
||||
self.geocode_status = 'Success'
|
||||
self.geocode_results = geo_task.get_results()
|
||||
return
|
||||
elif google_status == 'Surrender':
|
||||
self.geocode_status = 'Surrender'
|
||||
return
|
||||
elif google_status == 'Retry':
|
||||
geocode_tries += 1
|
||||
|
||||
self.geocode_status = 'Surrender'
|
||||
return
|
||||
|
||||
|
||||
class ScrapTargetField:
|
||||
class AdHtmlParser:
|
||||
|
||||
def __init__(self, html_string):
|
||||
self.html = html_string
|
||||
|
||||
self.ad_fields = {'referencia': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'precio': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'tamano_categorico': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'm2': {
|
||||
'found': False,
|
||||
'optional': True,
|
||||
'value': None},
|
||||
'tipo_anuncio': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'calle': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'barrio': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'distrito': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'ciudad': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'cubierta': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'puerta_auto': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'ascensor': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'alarma': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'circuito': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'personal': {
|
||||
'found': False,
|
||||
'optional': False,
|
||||
'value': None},
|
||||
'telefono': {
|
||||
'found': False,
|
||||
'optional': True,
|
||||
'value': None}}
|
||||
|
||||
def parse(self):
|
||||
|
||||
soup = BeautifulSoup(self.html, 'html5lib' )
|
||||
|
||||
|
||||
|
||||
if soup.findall('link', {'rel': 'canonical'}) is not None:
|
||||
self.ad_fields['referencia']['value'] = re.findall(r'[0-9]{5,20}',
|
||||
str(soup.findall('link', {'rel': 'canonical'})[0]))[0]
|
||||
self.ad_fields['referencia']['found'] = True
|
||||
|
||||
if sopa.find_all('strong', {'class': 'price'}) is not None:
|
||||
self.ad_fields['precio']['value'] = ''.join(re.findall(r'[0-9]',
|
||||
str(sopa.find_all('strong', {'class': 'price'})[0])))
|
||||
self.ad_fields['precio']['found'] = True
|
||||
|
||||
if soup.find('div', {'class':'info-features'}) is not None:
|
||||
self.ad_fields['tamano_categorico']['value'] = sopa.find('div',
|
||||
{'class':'info-features'}).find('span').find('span').text
|
||||
self.ad_fields['tamano_categorico']['found'] = True
|
||||
|
||||
#TODO Seguir con los metodos de parseo
|
||||
|
||||
|
||||
|
||||
def validate(self):
|
||||
#TODO Implementar validacion para aquellos campos que lo necesiten
|
||||
|
||||
|
||||
def fields_missing(self):
|
||||
#TODO Iterar el diccionario para ver que todos los campos obligatorios estan
|
||||
|
||||
|
||||
|
||||
def __init__(self, target_parameters):
|
||||
self.name = target_parameters['name']
|
||||
self.search_method = target_parameters['search_method']
|
||||
self.validation_method = target_parameters['validation_method']
|
||||
|
||||
def exists(self, html):
|
||||
"""
|
||||
Busca el dato en un HTML
|
||||
"""
|
||||
if self.search_method(html) is None:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def validate_value(self, dato):
|
||||
"""
|
||||
Comprueba el valor y valida con la norma respectiva que sea lo esperado
|
||||
"""
|
||||
return self.validation_method(dato)
|
||||
|
||||
def get_value(self, html):
|
||||
"""
|
||||
Busca en un HTML el dato
|
||||
"""
|
||||
return self.search_method(html)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import mysql.connector
|
||||
from core.alerts import alert_master
|
||||
|
||||
anuncios_db_parameters = {'host': '185.166.215.170',
|
||||
'database': 'anuncios',
|
||||
|
|
@ -37,13 +38,21 @@ class DatabaseWrapper():
|
|||
self.connect()
|
||||
self.disconnect()
|
||||
|
||||
def query(self, query_statement, query_parameters = None, dictionary = False):
|
||||
def query(self, query_statement, query_parameters=None, dictionary=False):
|
||||
self.connect()
|
||||
if self.connection.is_connected():
|
||||
execution_cursor = self.connection.cursor(dictionary = dictionary)
|
||||
execution_cursor.execute(query_statement, query_parameters)
|
||||
self.disconnect()
|
||||
return execution_cursor
|
||||
try:
|
||||
execution_cursor = self.connection.cursor(dictionary = dictionary)
|
||||
execution_cursor.execute(query_statement, query_parameters)
|
||||
self.disconnect()
|
||||
return execution_cursor
|
||||
except:
|
||||
alert_master("SQL ERROR", """Se ha producido un error ejecutando la
|
||||
siguiente query: %s.
|
||||
Con los siguientes parametros: %s
|
||||
""".format(query_statement,
|
||||
query_parameters))
|
||||
|
||||
else:
|
||||
raise Exception("Could not connect to the database.")
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from random import randint
|
|||
from core.mysql_wrapper import get_anunciosdb, get_tasksdb
|
||||
from core.scrapping_utils import UrlAttack
|
||||
from core.alerts import alert_master
|
||||
from capturer.capturer import create_capturing_task
|
||||
from mysql.capturing_tasks_interface import capturing_interface
|
||||
|
||||
class Explorer():
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ class Explorer():
|
|||
if current_task.status == 'Referencias ready':
|
||||
referencias = current_task.get_referencias()
|
||||
for referencia in referencias:
|
||||
create_capturing_task(referencia, self.tasksdb)
|
||||
capturing_interface.create_capturing_task(referencia)
|
||||
|
||||
current_task._update_status("Sent to queue")
|
||||
|
||||
|
|
|
|||
27
mysql/capturas_interface.py
Normal file
27
mysql/capturas_interface.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
from core.mysql_wrapper import get_anunciosdb
|
||||
|
||||
|
||||
class CapturasInterface():
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.anunciosdb = get_anunciosdb()
|
||||
|
||||
def insert_captura(self, ad_data):
|
||||
|
||||
columns = ', '.join(ad_data.keys())
|
||||
placeholders_string = ', '.join('%s' * len(ad_data))
|
||||
|
||||
query_statement = """ INSERT INTO capturas
|
||||
(%s)
|
||||
VALUES(%s)""".format(columns, placeholders_string)
|
||||
|
||||
query_parameters = ad_data.values()
|
||||
|
||||
self.anunciosdb.query(query_statement, query_parameters)
|
||||
|
||||
|
||||
capturas_interface = CapturasInterface()
|
||||
|
||||
|
||||
|
||||
74
mysql/capturing_tasks_interface.py
Normal file
74
mysql/capturing_tasks_interface.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
from core.mysql_wrapper import get_tasksdb
|
||||
|
||||
class CapturingTasksInterface:
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.tasksdb = get_tasksdb()
|
||||
|
||||
def create_capturing_task(self, referencia, uuid_exploring=None):
|
||||
ads_root = 'https://www.idealista.com/inmueble/'
|
||||
|
||||
query_parameters = {'ad_url': ads_root + referencia,
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'status': 'Pending'}
|
||||
|
||||
if uuid_exploring is None:
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
|
||||
else:
|
||||
query_parameters['uuid_exploring'] = uuid_exploring
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url, fk_uuid_exploring)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
|
||||
|
||||
self.tasksdb.query(query_statement, query_parameters)
|
||||
|
||||
def get_pending_task(self):
|
||||
query_statement = """SELECT logs.*
|
||||
FROM capturing_tasks_logs as logs
|
||||
INNER JOIN capturing_last as last
|
||||
ON logs.uuid = last.uuid
|
||||
WHERE last.status = 'Pending'
|
||||
ORDER BY logs.write_time ASC
|
||||
LIMIT 1
|
||||
"""
|
||||
cursor = self.tasksdb.query(query_statement, dictionary=True)
|
||||
|
||||
if cursor.rowcount:
|
||||
return cursor.fetchone()
|
||||
else:
|
||||
return None
|
||||
|
||||
def update_capturing_task(self, uuid, uuid_exploring, status, ad_url):
|
||||
query_parameters = {'ad_url': ad_url,
|
||||
'uuid': uuid,
|
||||
'status': status}
|
||||
|
||||
if uuid_exploring is None:
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
|
||||
else:
|
||||
query_parameters['uuid_exploring'] = uuid_exploring
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url, fk_uuid_exploring)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
|
||||
|
||||
self.tasksdb.query(query_statement, query_parameters)
|
||||
|
||||
def seconds_since_last_try(self):
|
||||
query_statement = """SELECT TIME_TO_SEC(TIME_DIFF(now(), write_time))
|
||||
FROM capturing_task_logs
|
||||
WHERE status = 'Pending'
|
||||
ORDER BY write_time
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
cursor = self.taskdb.query(query_statement)
|
||||
|
||||
return cursor.fetchone()[0]
|
||||
|
||||
|
||||
capturing_interface = CapturingTasksInterface()
|
||||
Loading…
Add table
Add a link
Reference in a new issue