Testeos en desarrollo del sistema de capturas. Pequeños retoques.

This commit is contained in:
pablomartincalvo 2018-10-13 02:08:58 +02:00
parent e97bbba274
commit 6a0baf4de6
7 changed files with 257 additions and 177 deletions

346
.idea/workspace.xml generated
View file

@ -2,13 +2,13 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/mysql/capturas_interface.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/tests/capturer_tests.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/capturer/capturer.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/capturer/geocoder.py" beforeDir="false" afterPath="$PROJECT_DIR$/geocoder/geocoder.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/core/mysql_wrapper.py" beforeDir="false" afterPath="$PROJECT_DIR$/core/mysql_wrapper.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/explorer/explorer.py" beforeDir="false" afterPath="$PROJECT_DIR$/explorer/explorer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mysql/Dockerfile" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/Dockerfile" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mysql/capturas_interface.py" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/capturas_interface.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/capturing_tasks_interface.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
@ -33,32 +33,35 @@
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="py" value="20" />
<entry key="py" value="39" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="Python" value="18" />
<entry key="Scratch" value="2" />
<entry key="Python" value="36" />
<entry key="Scratch" value="3" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.edit">
<counts>
<entry key="Python Console" value="1519" />
<entry key="py" value="14320" />
<entry key="Python Console" value="1555" />
<entry key="capturer" value="843" />
<entry key="dummy" value="14" />
<entry key="py" value="14798" />
<entry key="scratch_1" value="489" />
<entry key="txt" value="880" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="PLAIN_TEXT" value="880" />
<entry key="Python" value="16173" />
<entry key="Scratch" value="155" />
<entry key="PLAIN_TEXT" value="894" />
<entry key="Python" value="17515" />
<entry key="Scratch" value="170" />
</counts>
</usages-collector>
<usages-collector id="statistics.vcs.git.usages">
<counts>
<entry key="git.branch.checkout.local" value="1" />
<entry key="git.branch.create.new" value="1" />
</counts>
</usages-collector>
@ -71,65 +74,37 @@
<splitter split-orientation="horizontal" split-proportion="0.5">
<split-first>
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="76">
<caret line="82" column="16" selection-start-line="82" selection-start-column="16" selection-end-line="82" selection-end-column="16" />
<folding>
<element signature="e#3455#6755#0" />
<marker date="1538845705076" expanded="true" signature="395:1123" ph="..." />
<marker date="1538845705076" expanded="true" signature="417:427" ph="..." />
<marker date="1538845705076" expanded="true" signature="1122:1123" ph="..." />
<marker date="1538845705076" expanded="true" signature="1179:1180" ph="..." />
<marker date="1538845705076" expanded="true" signature="2538:2547" ph="..." />
<marker date="1538845705076" expanded="true" signature="2538:2606" ph="..." />
<marker date="1538845705076" expanded="true" signature="2601:2606" ph="..." />
<marker date="1538845705076" expanded="true" signature="2679:6045" ph="..." />
<marker date="1538845705076" expanded="true" signature="4773:5825" ph="..." />
<marker date="1538845705076" expanded="true" signature="5844:5853" ph="..." />
<marker date="1538845705076" expanded="true" signature="5844:5926" ph="..." />
<marker date="1538845705076" expanded="true" signature="5951:5960" ph="..." />
<marker date="1538845705076" expanded="true" signature="6036:6039" ph="..." />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="466">
<caret line="261" column="15" lean-forward="true" selection-start-line="261" selection-start-column="15" selection-end-line="261" selection-end-column="15" />
<state relative-caret-position="270">
<caret line="18" lean-forward="true" selection-start-line="18" selection-end-line="18" />
<folding>
<element signature="e#95512#95521#0" expanded="true" />
<marker date="1539387124485" expanded="true" signature="129:134" ph="..." />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/mysql/capturas_interface.py">
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="405">
<caret line="27" lean-forward="true" selection-start-line="27" selection-end-line="27" />
<folding>
<marker date="1538837294625" expanded="true" signature="74:75" ph="..." />
<marker date="1538837294625" expanded="true" signature="74:76" ph="..." />
</folding>
<caret line="27" selection-start-line="27" selection-end-line="27" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/mysql/capturing_tasks_interface.py">
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="60">
<caret line="4" selection-start-line="4" selection-end-line="6" selection-end-column="36" />
<folding>
<marker date="1538834627813" expanded="true" signature="74:80" ph="..." />
<marker date="1538834627813" expanded="true" signature="1110:1701" ph="..." />
</folding>
<state relative-caret-position="360">
<caret line="24" lean-forward="true" selection-start-line="24" selection-end-line="24" />
</state>
</provider>
</entry>
@ -137,7 +112,7 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-298">
<state relative-caret-position="615">
<caret line="41" selection-start-line="41" selection-end-line="41" />
</state>
</provider>
@ -148,22 +123,28 @@
<split-second>
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="165">
<caret line="11" selection-start-line="11" selection-end-line="11" selection-end-column="36" />
<folding>
<marker date="1538826138348" expanded="true" signature="5106:5558" ph="..." />
</folding>
<state relative-caret-position="225">
<caret line="15" column="2" selection-start-line="15" selection-start-column="2" selection-end-line="15" selection-end-column="2" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<caret selection-end-line="2" selection-end-column="21" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="435">
<caret line="45" column="23" selection-start-line="45" selection-start-column="23" selection-end-line="45" selection-end-column="23" />
<caret line="56" column="55" lean-forward="true" selection-start-line="56" selection-start-column="55" selection-end-line="56" selection-end-column="55" />
<folding>
<element signature="e#24#46#0" expanded="true" />
</folding>
@ -171,11 +152,11 @@
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/core/alerts.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="75">
<caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" />
<state relative-caret-position="390">
<caret line="26" column="17" lean-forward="true" selection-start-line="26" selection-start-column="17" selection-end-line="26" selection-end-column="17" />
<folding>
<element signature="e#0#46#0" expanded="true" />
</folding>
@ -183,6 +164,15 @@
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="173">
<caret line="166" selection-start-line="166" selection-end-line="166" />
</state>
</provider>
</entry>
</file>
</leaf>
</split-second>
</splitter>
@ -203,13 +193,16 @@
<find>task</find>
<find>exploring_tasks</find>
<find>ge</find>
<find>findall</find>
<find>re.</find>
</findStrings>
</component>
<component name="Git.Settings">
<option name="UPDATE_TYPE" value="REBASE" />
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
<option name="RECENT_BRANCH_BY_REPOSITORY">
<map>
<entry key="$PROJECT_DIR$" value="master" />
<entry key="$PROJECT_DIR$" value="testing" />
</map>
</option>
</component>
@ -225,10 +218,13 @@
<option value="$PROJECT_DIR$/geocoder/geocoder.py" />
<option value="$PROJECT_DIR$/explorer/explorer.py" />
<option value="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" />
<option value="$PROJECT_DIR$/core/mysql_wrapper.py" />
<option value="$PROJECT_DIR$/mysql/capturas_interface.py" />
<option value="$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py" />
<option value="$PROJECT_DIR$/tests/capturing_tests.py" />
<option value="$PROJECT_DIR$/tests/capturer_tests.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
<option value="$PROJECT_DIR$/core/mysql_wrapper.py" />
<option value="$PROJECT_DIR$/db_layer/capturing_tasks_interface.py" />
</list>
</option>
</component>
@ -242,7 +238,6 @@
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
<expand>
@ -258,7 +253,12 @@
<path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
<item name="mysql" type="462c0819:PsiDirectoryNode" />
<item name="core" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
<item name="tests" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
@ -277,6 +277,7 @@
<select />
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
@ -301,7 +302,7 @@
</list>
</option>
</component>
<component name="RunManager" selected="Python.scratch_1">
<component name="RunManager" selected="Python.capturer_tests">
<configuration name="alerts" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="Drogon" />
<option name="INTERPRETER_OPTIONS" value="" />
@ -323,10 +324,52 @@
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="capturer" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="Drogon" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/capturer" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/capturer/capturer.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="true" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="capturer_tests" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="Drogon" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/tests/capturer_tests.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="scratch_1" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="Drogon" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$APPLICATION_CONFIG_DIR$/scratches" />
<option name="IS_MODULE_SDK" value="false" />
@ -344,14 +387,16 @@
<list>
<item itemvalue="Python.alerts" />
<item itemvalue="Python.scratch_1" />
<item itemvalue="Python.capturer" />
<item itemvalue="Python.capturer_tests" />
</list>
<recent_temporary>
<list>
<item itemvalue="Python.scratch_1" />
<item itemvalue="Python.scratch_1" />
<item itemvalue="Python.scratch_1" />
<item itemvalue="Python.scratch_1" />
<item itemvalue="Python.alerts" />
<item itemvalue="Python.capturer_tests" />
<item itemvalue="Python.capturer" />
<item itemvalue="Python.capturer" />
<item itemvalue="Python.capturer" />
<item itemvalue="Python.capturer" />
</list>
</recent_temporary>
</component>
@ -438,21 +483,21 @@
<frame x="0" y="-2" width="1920" height="1082" extended-state="6" />
<editor active="true" />
<layout>
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.484326" visible="true" weight="0.14918292" />
<window_info id="Structure" order="1" sideWeight="0.515674" side_tool="true" visible="true" weight="0.14918292" />
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.48251748" visible="true" weight="0.14918292" />
<window_info id="Structure" order="1" sideWeight="0.5174825" side_tool="true" visible="true" weight="0.14918292" />
<window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" />
<window_info active="true" id="Repositories" order="3" sideWeight="0.49529782" visible="true" weight="0.32999474" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" weight="0.32983193" />
<window_info anchor="bottom" id="Run" order="2" weight="0.32983193" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.39915967" />
<window_info active="true" anchor="bottom" id="Debug" order="3" visible="true" weight="0.39915967" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" weight="0.32878152" />
<window_info anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" weight="0.269958" />
<window_info anchor="bottom" id="Terminal" order="8" weight="0.32983193" />
<window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5007907" side_tool="true" weight="0.32983193" />
<window_info active="true" anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" visible="true" weight="0.32983193" />
<window_info anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" weight="0.32983193" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
@ -508,13 +553,6 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="525">
<caret line="35" column="37" selection-start-line="35" selection-start-column="37" selection-end-line="35" selection-end-column="37" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/task.py" />
<entry file="file://$PROJECT_DIR$/capturer/__init__.py">
<provider selected="true" editor-type-id="text-editor" />
@ -522,98 +560,116 @@
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="615">
<caret line="41" lean-forward="true" selection-start-line="41" selection-end-line="41" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/alerts.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="75">
<caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" />
<folding>
<element signature="e#0#46#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-298">
<caret line="41" selection-start-line="41" selection-end-line="41" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="165">
<caret line="11" selection-start-line="11" selection-end-line="11" selection-end-column="36" />
<folding>
<marker date="1538826138348" expanded="true" signature="5106:5558" ph="..." />
</folding>
<state>
<caret selection-end-line="2" selection-end-column="21" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/mysql/capturing_tasks_interface.py">
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="60">
<caret line="4" selection-start-line="4" selection-end-line="6" selection-end-column="36" />
<folding>
<marker date="1538834627813" expanded="true" signature="74:80" ph="..." />
<marker date="1538834627813" expanded="true" signature="1110:1701" ph="..." />
</folding>
<state relative-caret-position="225">
<caret line="15" column="2" selection-start-line="15" selection-start-column="2" selection-end-line="15" selection-end-column="2" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/mysql/capturas_interface.py">
<entry file="file://$APPLICATION_HOME_DIR$/helpers/pydev/pydevd.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="405">
<caret line="27" lean-forward="true" selection-start-line="27" selection-end-line="27" />
<folding>
<marker date="1538837294625" expanded="true" signature="74:75" ph="..." />
<marker date="1538837294625" expanded="true" signature="74:76" ph="..." />
</folding>
<state relative-caret-position="336">
<caret line="1657" selection-start-line="1657" selection-end-line="1657" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="435">
<caret line="45" column="23" selection-start-line="45" selection-start-column="23" selection-end-line="45" selection-end-column="23" />
<caret line="56" column="55" lean-forward="true" selection-start-line="56" selection-start-column="55" selection-end-line="56" selection-end-column="55" />
<folding>
<element signature="e#24#46#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<entry file="file:///usr/lib/python3.6/socket.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="466">
<caret line="261" column="15" lean-forward="true" selection-start-line="261" selection-start-column="15" selection-end-line="261" selection-end-column="15" />
<folding>
<element signature="e#95512#95521#0" expanded="true" />
</folding>
<state relative-caret-position="421">
<caret line="595" column="41" selection-start-line="595" selection-start-column="41" selection-end-line="595" selection-end-column="41" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="405">
<caret line="27" selection-start-line="27" selection-end-line="27" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="360">
<caret line="24" lean-forward="true" selection-start-line="24" selection-end-line="24" />
</state>
</provider>
</entry>
<entry file="file:///usr/lib/python3.6/_collections_abc.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="91">
<caret line="658" selection-start-line="658" selection-end-line="658" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="76">
<caret line="82" column="16" selection-start-line="82" selection-start-column="16" selection-end-line="82" selection-end-column="16" />
<state relative-caret-position="173">
<caret line="166" selection-start-line="166" selection-end-line="166" />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/.local/lib/python3.6/site-packages/bs4/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="361">
<caret line="281" selection-start-line="281" selection-end-line="281" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/alerts.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="390">
<caret line="26" column="17" lean-forward="true" selection-start-line="26" selection-start-column="17" selection-end-line="26" selection-end-column="17" />
<folding>
<element signature="e#3455#6755#0" />
<marker date="1538845705076" expanded="true" signature="395:1123" ph="..." />
<marker date="1538845705076" expanded="true" signature="417:427" ph="..." />
<marker date="1538845705076" expanded="true" signature="1122:1123" ph="..." />
<marker date="1538845705076" expanded="true" signature="1179:1180" ph="..." />
<marker date="1538845705076" expanded="true" signature="2538:2547" ph="..." />
<marker date="1538845705076" expanded="true" signature="2538:2606" ph="..." />
<marker date="1538845705076" expanded="true" signature="2601:2606" ph="..." />
<marker date="1538845705076" expanded="true" signature="2679:6045" ph="..." />
<marker date="1538845705076" expanded="true" signature="4773:5825" ph="..." />
<marker date="1538845705076" expanded="true" signature="5844:5853" ph="..." />
<marker date="1538845705076" expanded="true" signature="5844:5926" ph="..." />
<marker date="1538845705076" expanded="true" signature="5951:5960" ph="..." />
<marker date="1538845705076" expanded="true" signature="6036:6039" ph="..." />
<element signature="e#0#46#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file:///usr/lib/python3/dist-packages/html5lib/_inputstream.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="181">
<caret line="244" selection-start-line="244" selection-end-line="244" />
</state>
</provider>
</entry>
<entry file="file:///usr/lib/python3.6/ssl.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="173">
<caret line="633" selection-start-line="633" selection-end-line="633" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="270">
<caret line="18" lean-forward="true" selection-start-line="18" selection-end-line="18" />
<folding>
<marker date="1539387124485" expanded="true" signature="129:134" ph="..." />
</folding>
</state>
</provider>

View file

@ -1,11 +1,10 @@
import sys
sys.path.append('..')
from time import sleep
from bs4 import BeautifulSoup
import re
from mysql.capturing_tasks_interface import capturing_interface
from mysql.capturas_interface import capturas_interface
from db_layer.capturing_tasks_interface import capturing_interface
from db_layer.capturas_interface import capturas_interface
from core.scrapping_utils import UrlAttack
@ -60,7 +59,7 @@ class CapturingTask:
attack = UrlAttack(self.ad_url)
attack.attack()
if attack.success():
if attack.success:
self.html = attack.get_text()
self._extract_data()
@ -79,8 +78,6 @@ class CapturingTask:
self.parser.parse()
def _check_data(self):
self.parser.validate()
if not self.parser.all_fields_are_valid():
self._update_status('Invalid value fields')
return
@ -169,9 +166,9 @@ class AdHtmlParser:
soup = BeautifulSoup(self.html, 'html5lib')
if soup.findall('link', {'rel': 'canonical'}) is not None:
if soup.find_all('link', {'rel': 'canonical'}) is not None:
self.ad_fields['referencia']['value'] = re.findall(r'[0-9]{5,20}',
str(soup.findall('link', {'rel': 'canonical'})[0]))[0]
str(soup.find_all('link', {'rel': 'canonical'})[0]))[0]
self.ad_fields['referencia']['found'] = True
if soup.find_all('strong', {'class': 'price'}) is not None:
@ -180,19 +177,20 @@ class AdHtmlParser:
self.ad_fields['precio']['found'] = True
if soup.find('div', {'class': 'info-features'}) is not None:
self.ad_fields['tamano_categorico']['value'] = soup.find('div',
{'class': 'info-features'}).find('span').find(
'span').text
self.ad_fields['tamano_categorico']['found'] = True
try:
self.ad_fields['tamano_categorico']['value'] = soup.find('div', {'class': 'info-features'}).find('span').find('span').text
self.ad_fields['tamano_categorico']['found'] = True
except:
pass
posible_m2 = [tag.text for tag in soup.find('div', {'class': 'info-features'}).findAll('span')]
posible_m2 = [tag.text for tag in soup.find('div', {'class': 'info-features'}).find_all('span')]
if [posible for posible in posible_m2 if '' in posible]:
self.ad_fields['m2']['value'] = \
[''.join(re.findall(r'[0-9]', posible)) for posible in posible_m2 if '' in posible][0]
self.ad_fields['m2']['found'] = True
if soup.find('title') is not None:
if 'venta' in soup.find('title'):
if 'venta' in soup.find('title').text:
self.ad_fields['tipo_anuncio']['value'] = 1
else:
self.ad_fields['tipo_anuncio']['value'] = 2
@ -200,14 +198,14 @@ class AdHtmlParser:
if len(soup.find('div', {'id': 'headerMap'}).find_all('li')) > 3:
self.ad_fields['calle']['value'] = ''
self.ad_fields['ciudad']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-2].text
self.ad_fields['ciudad']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-2].text.strip()
self.ad_fields['ciudad']['found'] = True
self.ad_fields['distrito']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-3].text
self.ad_fields['distrito']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-3].text.strip()
self.ad_fields['distrito']['found'] = True
self.ad_fields['barrio']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-4].text
self.ad_fields['barrio']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-4].text.strip()
self.ad_fields['barrio']['found'] = True
if len(soup.find('div', {'id': 'headerMap'}).find_all('li')) > 4:
self.ad_fields['calle']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[0].text
self.ad_fields['calle']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[0].text.strip()
self.ad_fields['calle']['found'] = True
features_lists = soup.find_all('div', {'class': 'details-property_features'})
@ -231,13 +229,13 @@ class AdHtmlParser:
.text.replace(' ', '')
self.ad_fields['telefono']['found'] = True
def validate(self):
def _validate(self):
self.invalid_fields = []
if re.match(r"[0-9]{4,20}", self.ad_fields['referencia']['value']):
if not re.match(r"[0-9]{4,20}", self.ad_fields['referencia']['value']):
self.invalid_fields.append('referencia')
if re.match(r"[0-9]{1,20}", self.ad_fields['precio']['value']):
if not re.match(r"[0-9]{1,20}", self.ad_fields['precio']['value']):
self.invalid_fields.append('precio')
possible_values_tamano = ['2 coches o más',
@ -255,18 +253,19 @@ class AdHtmlParser:
if not 'Distrito' in self.ad_fields['distrito']['value']:
self.invalid_fields.append('distrito')
if re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value']):
if not re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value']):
self.invalid_fields.append('telefono')
def all_fields_are_valid(self):
self._validate()
if self.invalid_fields:
return False
else:
return True
def fields_missing(self):
for ad_field in self.ad_fields:
if not ad_field['optional'] and not ad_field['found']:
for key, contents in self.ad_fields.items():
if not contents['optional'] and not contents['found']:
return True
return False

View file

@ -46,12 +46,16 @@ class DatabaseWrapper():
execution_cursor.execute(query_statement, query_parameters)
self.disconnect()
return execution_cursor
except:
except Exception as e:
alert_master("SQL ERROR", """Se ha producido un error ejecutando la
siguiente query: %s.
Con los siguientes parametros: %s
siguiente query: {}.
Con los siguientes parametros: {}
{}
""".format(query_statement,
query_parameters))
query_parameters,
e)
)
else:
raise Exception("Could not connect to the database.")

View file

@ -15,12 +15,12 @@ class CapturingTasksInterface:
if uuid_exploring is None:
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, url)
(uuid, write_time, status, ad_url)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
else:
query_parameters['uuid_exploring'] = uuid_exploring
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, url, fk_uuid_exploring)
(uuid, write_time, status, ad_url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
self.tasksdb.query(query_statement, query_parameters)
@ -48,12 +48,12 @@ class CapturingTasksInterface:
if uuid_exploring is None:
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, url)
(uuid, write_time, status, ad_url)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
else:
query_parameters['uuid_exploring'] = uuid_exploring
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, url, fk_uuid_exploring)
(uuid, write_time, status, ad_url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
self.tasksdb.query(query_statement, query_parameters)

21
tests/capturer_tests.py Normal file
View file

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
import sys
sys.path.append('..')
from capturer.capturer import CapturingTask
def test_CapturingTask():
parameters = {'uuid': 'f31af60a-1feb-4770-bee9-0085c678f08f',
'ad_url': 'https://www.idealista.com/inmueble/81593047',
'fk_uuid_exploring': 'e5f9c8d5-b6e9-47fc-b4de-6d78f40feccc',
'status': 'Pending'}
task = CapturingTask(parameters)
task.capture()
print(task.get_ad_data())
test_CapturingTask()