Testeos en desarrollo del sistema de capturas. Pequeños retoques.
This commit is contained in:
parent
e97bbba274
commit
6a0baf4de6
7 changed files with 257 additions and 177 deletions
346
.idea/workspace.xml
generated
346
.idea/workspace.xml
generated
|
|
@ -2,13 +2,13 @@
|
|||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
|
||||
<change afterPath="$PROJECT_DIR$/mysql/capturas_interface.py" afterDir="false" />
|
||||
<change afterPath="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" afterDir="false" />
|
||||
<change afterPath="$PROJECT_DIR$/tests/capturer_tests.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/capturer/capturer.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/capturer/geocoder.py" beforeDir="false" afterPath="$PROJECT_DIR$/geocoder/geocoder.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/core/mysql_wrapper.py" beforeDir="false" afterPath="$PROJECT_DIR$/core/mysql_wrapper.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/explorer/explorer.py" beforeDir="false" afterPath="$PROJECT_DIR$/explorer/explorer.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/mysql/Dockerfile" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/Dockerfile" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/mysql/capturas_interface.py" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/capturas_interface.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/capturing_tasks_interface.py" afterDir="false" />
|
||||
</list>
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
|
|
@ -33,32 +33,35 @@
|
|||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.open">
|
||||
<counts>
|
||||
<entry key="py" value="20" />
|
||||
<entry key="py" value="39" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.open">
|
||||
<counts>
|
||||
<entry key="Python" value="18" />
|
||||
<entry key="Scratch" value="2" />
|
||||
<entry key="Python" value="36" />
|
||||
<entry key="Scratch" value="3" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.extensions.edit">
|
||||
<counts>
|
||||
<entry key="Python Console" value="1519" />
|
||||
<entry key="py" value="14320" />
|
||||
<entry key="Python Console" value="1555" />
|
||||
<entry key="capturer" value="843" />
|
||||
<entry key="dummy" value="14" />
|
||||
<entry key="py" value="14798" />
|
||||
<entry key="scratch_1" value="489" />
|
||||
<entry key="txt" value="880" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.file.types.edit">
|
||||
<counts>
|
||||
<entry key="PLAIN_TEXT" value="880" />
|
||||
<entry key="Python" value="16173" />
|
||||
<entry key="Scratch" value="155" />
|
||||
<entry key="PLAIN_TEXT" value="894" />
|
||||
<entry key="Python" value="17515" />
|
||||
<entry key="Scratch" value="170" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
<usages-collector id="statistics.vcs.git.usages">
|
||||
<counts>
|
||||
<entry key="git.branch.checkout.local" value="1" />
|
||||
<entry key="git.branch.create.new" value="1" />
|
||||
</counts>
|
||||
</usages-collector>
|
||||
|
|
@ -71,65 +74,37 @@
|
|||
<splitter split-orientation="horizontal" split-proportion="0.5">
|
||||
<split-first>
|
||||
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="76">
|
||||
<caret line="82" column="16" selection-start-line="82" selection-start-column="16" selection-end-line="82" selection-end-column="16" />
|
||||
<folding>
|
||||
<element signature="e#3455#6755#0" />
|
||||
<marker date="1538845705076" expanded="true" signature="395:1123" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="417:427" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="1122:1123" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="1179:1180" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2538:2547" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2538:2606" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2601:2606" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2679:6045" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="4773:5825" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5844:5853" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5844:5926" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5951:5960" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="6036:6039" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
|
||||
<provider selected="true" editor-type-id="text-editor" />
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="466">
|
||||
<caret line="261" column="15" lean-forward="true" selection-start-line="261" selection-start-column="15" selection-end-line="261" selection-end-column="15" />
|
||||
<state relative-caret-position="270">
|
||||
<caret line="18" lean-forward="true" selection-start-line="18" selection-end-line="18" />
|
||||
<folding>
|
||||
<element signature="e#95512#95521#0" expanded="true" />
|
||||
<marker date="1539387124485" expanded="true" signature="129:134" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/mysql/capturas_interface.py">
|
||||
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="405">
|
||||
<caret line="27" lean-forward="true" selection-start-line="27" selection-end-line="27" />
|
||||
<folding>
|
||||
<marker date="1538837294625" expanded="true" signature="74:75" ph="..." />
|
||||
<marker date="1538837294625" expanded="true" signature="74:76" ph="..." />
|
||||
</folding>
|
||||
<caret line="27" selection-start-line="27" selection-end-line="27" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/mysql/capturing_tasks_interface.py">
|
||||
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="60">
|
||||
<caret line="4" selection-start-line="4" selection-end-line="6" selection-end-column="36" />
|
||||
<folding>
|
||||
<marker date="1538834627813" expanded="true" signature="74:80" ph="..." />
|
||||
<marker date="1538834627813" expanded="true" signature="1110:1701" ph="..." />
|
||||
</folding>
|
||||
<state relative-caret-position="360">
|
||||
<caret line="24" lean-forward="true" selection-start-line="24" selection-end-line="24" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
|
|
@ -137,7 +112,7 @@
|
|||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-298">
|
||||
<state relative-caret-position="615">
|
||||
<caret line="41" selection-start-line="41" selection-end-line="41" />
|
||||
</state>
|
||||
</provider>
|
||||
|
|
@ -148,22 +123,28 @@
|
|||
<split-second>
|
||||
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
|
||||
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="165">
|
||||
<caret line="11" selection-start-line="11" selection-end-line="11" selection-end-column="36" />
|
||||
<folding>
|
||||
<marker date="1538826138348" expanded="true" signature="5106:5558" ph="..." />
|
||||
</folding>
|
||||
<state relative-caret-position="225">
|
||||
<caret line="15" column="2" selection-start-line="15" selection-start-column="2" selection-end-line="15" selection-end-column="2" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state>
|
||||
<caret selection-end-line="2" selection-end-column="21" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="435">
|
||||
<caret line="45" column="23" selection-start-line="45" selection-start-column="23" selection-end-line="45" selection-end-column="23" />
|
||||
<caret line="56" column="55" lean-forward="true" selection-start-line="56" selection-start-column="55" selection-end-line="56" selection-end-column="55" />
|
||||
<folding>
|
||||
<element signature="e#24#46#0" expanded="true" />
|
||||
</folding>
|
||||
|
|
@ -171,11 +152,11 @@
|
|||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<file pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/core/alerts.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="75">
|
||||
<caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" />
|
||||
<state relative-caret-position="390">
|
||||
<caret line="26" column="17" lean-forward="true" selection-start-line="26" selection-start-column="17" selection-end-line="26" selection-end-column="17" />
|
||||
<folding>
|
||||
<element signature="e#0#46#0" expanded="true" />
|
||||
</folding>
|
||||
|
|
@ -183,6 +164,15 @@
|
|||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="173">
|
||||
<caret line="166" selection-start-line="166" selection-end-line="166" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
</split-second>
|
||||
</splitter>
|
||||
|
|
@ -203,13 +193,16 @@
|
|||
<find>task</find>
|
||||
<find>exploring_tasks</find>
|
||||
<find>ge</find>
|
||||
<find>findall</find>
|
||||
<find>re.</find>
|
||||
</findStrings>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
<option name="UPDATE_TYPE" value="REBASE" />
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||
<option name="RECENT_BRANCH_BY_REPOSITORY">
|
||||
<map>
|
||||
<entry key="$PROJECT_DIR$" value="master" />
|
||||
<entry key="$PROJECT_DIR$" value="testing" />
|
||||
</map>
|
||||
</option>
|
||||
</component>
|
||||
|
|
@ -225,10 +218,13 @@
|
|||
<option value="$PROJECT_DIR$/geocoder/geocoder.py" />
|
||||
<option value="$PROJECT_DIR$/explorer/explorer.py" />
|
||||
<option value="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" />
|
||||
<option value="$PROJECT_DIR$/core/mysql_wrapper.py" />
|
||||
<option value="$PROJECT_DIR$/mysql/capturas_interface.py" />
|
||||
<option value="$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py" />
|
||||
<option value="$PROJECT_DIR$/tests/capturing_tests.py" />
|
||||
<option value="$PROJECT_DIR$/tests/capturer_tests.py" />
|
||||
<option value="$PROJECT_DIR$/capturer/capturer.py" />
|
||||
<option value="$PROJECT_DIR$/core/mysql_wrapper.py" />
|
||||
<option value="$PROJECT_DIR$/db_layer/capturing_tasks_interface.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
|
|
@ -242,7 +238,6 @@
|
|||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="Scope" />
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<expand>
|
||||
|
|
@ -258,7 +253,12 @@
|
|||
<path>
|
||||
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="mysql" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="core" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
<path>
|
||||
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="tests" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
<path>
|
||||
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
|
||||
|
|
@ -277,6 +277,7 @@
|
|||
<select />
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
|
|
@ -301,7 +302,7 @@
|
|||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="RunManager" selected="Python.scratch_1">
|
||||
<component name="RunManager" selected="Python.capturer_tests">
|
||||
<configuration name="alerts" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="Drogon" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
|
|
@ -323,10 +324,52 @@
|
|||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="capturer" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="Drogon" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/capturer" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/capturer/capturer.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="true" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="capturer_tests" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="Drogon" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/tests/capturer_tests.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="scratch_1" type="PythonConfigurationType" factoryName="Python" temporary="true">
|
||||
<module name="Drogon" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$APPLICATION_CONFIG_DIR$/scratches" />
|
||||
<option name="IS_MODULE_SDK" value="false" />
|
||||
|
|
@ -344,14 +387,16 @@
|
|||
<list>
|
||||
<item itemvalue="Python.alerts" />
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.capturer" />
|
||||
<item itemvalue="Python.capturer_tests" />
|
||||
</list>
|
||||
<recent_temporary>
|
||||
<list>
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.scratch_1" />
|
||||
<item itemvalue="Python.alerts" />
|
||||
<item itemvalue="Python.capturer_tests" />
|
||||
<item itemvalue="Python.capturer" />
|
||||
<item itemvalue="Python.capturer" />
|
||||
<item itemvalue="Python.capturer" />
|
||||
<item itemvalue="Python.capturer" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
</component>
|
||||
|
|
@ -438,21 +483,21 @@
|
|||
<frame x="0" y="-2" width="1920" height="1082" extended-state="6" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.484326" visible="true" weight="0.14918292" />
|
||||
<window_info id="Structure" order="1" sideWeight="0.515674" side_tool="true" visible="true" weight="0.14918292" />
|
||||
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.48251748" visible="true" weight="0.14918292" />
|
||||
<window_info id="Structure" order="1" sideWeight="0.5174825" side_tool="true" visible="true" weight="0.14918292" />
|
||||
<window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" />
|
||||
<window_info active="true" id="Repositories" order="3" sideWeight="0.49529782" visible="true" weight="0.32999474" />
|
||||
<window_info anchor="bottom" id="Message" order="0" />
|
||||
<window_info anchor="bottom" id="Find" order="1" weight="0.32983193" />
|
||||
<window_info anchor="bottom" id="Run" order="2" weight="0.32983193" />
|
||||
<window_info anchor="bottom" id="Debug" order="3" weight="0.39915967" />
|
||||
<window_info active="true" anchor="bottom" id="Debug" order="3" visible="true" weight="0.39915967" />
|
||||
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
|
||||
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
|
||||
<window_info anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" weight="0.32878152" />
|
||||
<window_info anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" weight="0.269958" />
|
||||
<window_info anchor="bottom" id="Terminal" order="8" weight="0.32983193" />
|
||||
<window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5007907" side_tool="true" weight="0.32983193" />
|
||||
<window_info active="true" anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" visible="true" weight="0.32983193" />
|
||||
<window_info anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" weight="0.32983193" />
|
||||
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
|
||||
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
|
||||
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
|
||||
|
|
@ -508,13 +553,6 @@
|
|||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="525">
|
||||
<caret line="35" column="37" selection-start-line="35" selection-start-column="37" selection-end-line="35" selection-end-column="37" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/task.py" />
|
||||
<entry file="file://$PROJECT_DIR$/capturer/__init__.py">
|
||||
<provider selected="true" editor-type-id="text-editor" />
|
||||
|
|
@ -522,98 +560,116 @@
|
|||
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="615">
|
||||
<caret line="41" lean-forward="true" selection-start-line="41" selection-end-line="41" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/alerts.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="75">
|
||||
<caret line="7" column="34" selection-start-line="7" selection-start-column="34" selection-end-line="7" selection-end-column="34" />
|
||||
<folding>
|
||||
<element signature="e#0#46#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-298">
|
||||
<caret line="41" selection-start-line="41" selection-end-line="41" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="165">
|
||||
<caret line="11" selection-start-line="11" selection-end-line="11" selection-end-column="36" />
|
||||
<folding>
|
||||
<marker date="1538826138348" expanded="true" signature="5106:5558" ph="..." />
|
||||
</folding>
|
||||
<state>
|
||||
<caret selection-end-line="2" selection-end-column="21" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/mysql/capturing_tasks_interface.py">
|
||||
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
|
||||
<provider selected="true" editor-type-id="text-editor" />
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="60">
|
||||
<caret line="4" selection-start-line="4" selection-end-line="6" selection-end-column="36" />
|
||||
<folding>
|
||||
<marker date="1538834627813" expanded="true" signature="74:80" ph="..." />
|
||||
<marker date="1538834627813" expanded="true" signature="1110:1701" ph="..." />
|
||||
</folding>
|
||||
<state relative-caret-position="225">
|
||||
<caret line="15" column="2" selection-start-line="15" selection-start-column="2" selection-end-line="15" selection-end-column="2" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/mysql/capturas_interface.py">
|
||||
<entry file="file://$APPLICATION_HOME_DIR$/helpers/pydev/pydevd.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="405">
|
||||
<caret line="27" lean-forward="true" selection-start-line="27" selection-end-line="27" />
|
||||
<folding>
|
||||
<marker date="1538837294625" expanded="true" signature="74:75" ph="..." />
|
||||
<marker date="1538837294625" expanded="true" signature="74:76" ph="..." />
|
||||
</folding>
|
||||
<state relative-caret-position="336">
|
||||
<caret line="1657" selection-start-line="1657" selection-end-line="1657" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="435">
|
||||
<caret line="45" column="23" selection-start-line="45" selection-start-column="23" selection-end-line="45" selection-end-column="23" />
|
||||
<caret line="56" column="55" lean-forward="true" selection-start-line="56" selection-start-column="55" selection-end-line="56" selection-end-column="55" />
|
||||
<folding>
|
||||
<element signature="e#24#46#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
|
||||
<entry file="file:///usr/lib/python3.6/socket.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="466">
|
||||
<caret line="261" column="15" lean-forward="true" selection-start-line="261" selection-start-column="15" selection-end-line="261" selection-end-column="15" />
|
||||
<folding>
|
||||
<element signature="e#95512#95521#0" expanded="true" />
|
||||
</folding>
|
||||
<state relative-caret-position="421">
|
||||
<caret line="595" column="41" selection-start-line="595" selection-start-column="41" selection-end-line="595" selection-end-column="41" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="405">
|
||||
<caret line="27" selection-start-line="27" selection-end-line="27" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="360">
|
||||
<caret line="24" lean-forward="true" selection-start-line="24" selection-end-line="24" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file:///usr/lib/python3.6/_collections_abc.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="91">
|
||||
<caret line="658" selection-start-line="658" selection-end-line="658" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="76">
|
||||
<caret line="82" column="16" selection-start-line="82" selection-start-column="16" selection-end-line="82" selection-end-column="16" />
|
||||
<state relative-caret-position="173">
|
||||
<caret line="166" selection-start-line="166" selection-end-line="166" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$USER_HOME$/.local/lib/python3.6/site-packages/bs4/__init__.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="361">
|
||||
<caret line="281" selection-start-line="281" selection-end-line="281" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/core/alerts.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="390">
|
||||
<caret line="26" column="17" lean-forward="true" selection-start-line="26" selection-start-column="17" selection-end-line="26" selection-end-column="17" />
|
||||
<folding>
|
||||
<element signature="e#3455#6755#0" />
|
||||
<marker date="1538845705076" expanded="true" signature="395:1123" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="417:427" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="1122:1123" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="1179:1180" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2538:2547" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2538:2606" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2601:2606" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="2679:6045" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="4773:5825" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5844:5853" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5844:5926" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="5951:5960" ph="..." />
|
||||
<marker date="1538845705076" expanded="true" signature="6036:6039" ph="..." />
|
||||
<element signature="e#0#46#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file:///usr/lib/python3/dist-packages/html5lib/_inputstream.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="181">
|
||||
<caret line="244" selection-start-line="244" selection-end-line="244" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file:///usr/lib/python3.6/ssl.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="173">
|
||||
<caret line="633" selection-start-line="633" selection-end-line="633" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="270">
|
||||
<caret line="18" lean-forward="true" selection-start-line="18" selection-end-line="18" />
|
||||
<folding>
|
||||
<marker date="1539387124485" expanded="true" signature="129:134" ph="..." />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
import sys
|
||||
|
||||
sys.path.append('..')
|
||||
from time import sleep
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
from mysql.capturing_tasks_interface import capturing_interface
|
||||
from mysql.capturas_interface import capturas_interface
|
||||
from db_layer.capturing_tasks_interface import capturing_interface
|
||||
from db_layer.capturas_interface import capturas_interface
|
||||
from core.scrapping_utils import UrlAttack
|
||||
|
||||
|
||||
|
|
@ -60,7 +59,7 @@ class CapturingTask:
|
|||
attack = UrlAttack(self.ad_url)
|
||||
attack.attack()
|
||||
|
||||
if attack.success():
|
||||
if attack.success:
|
||||
self.html = attack.get_text()
|
||||
|
||||
self._extract_data()
|
||||
|
|
@ -79,8 +78,6 @@ class CapturingTask:
|
|||
self.parser.parse()
|
||||
|
||||
def _check_data(self):
|
||||
self.parser.validate()
|
||||
|
||||
if not self.parser.all_fields_are_valid():
|
||||
self._update_status('Invalid value fields')
|
||||
return
|
||||
|
|
@ -169,9 +166,9 @@ class AdHtmlParser:
|
|||
|
||||
soup = BeautifulSoup(self.html, 'html5lib')
|
||||
|
||||
if soup.findall('link', {'rel': 'canonical'}) is not None:
|
||||
if soup.find_all('link', {'rel': 'canonical'}) is not None:
|
||||
self.ad_fields['referencia']['value'] = re.findall(r'[0-9]{5,20}',
|
||||
str(soup.findall('link', {'rel': 'canonical'})[0]))[0]
|
||||
str(soup.find_all('link', {'rel': 'canonical'})[0]))[0]
|
||||
self.ad_fields['referencia']['found'] = True
|
||||
|
||||
if soup.find_all('strong', {'class': 'price'}) is not None:
|
||||
|
|
@ -180,19 +177,20 @@ class AdHtmlParser:
|
|||
self.ad_fields['precio']['found'] = True
|
||||
|
||||
if soup.find('div', {'class': 'info-features'}) is not None:
|
||||
self.ad_fields['tamano_categorico']['value'] = soup.find('div',
|
||||
{'class': 'info-features'}).find('span').find(
|
||||
'span').text
|
||||
self.ad_fields['tamano_categorico']['found'] = True
|
||||
try:
|
||||
self.ad_fields['tamano_categorico']['value'] = soup.find('div', {'class': 'info-features'}).find('span').find('span').text
|
||||
self.ad_fields['tamano_categorico']['found'] = True
|
||||
except:
|
||||
pass
|
||||
|
||||
posible_m2 = [tag.text for tag in soup.find('div', {'class': 'info-features'}).findAll('span')]
|
||||
posible_m2 = [tag.text for tag in soup.find('div', {'class': 'info-features'}).find_all('span')]
|
||||
if [posible for posible in posible_m2 if 'm²' in posible]:
|
||||
self.ad_fields['m2']['value'] = \
|
||||
[''.join(re.findall(r'[0-9]', posible)) for posible in posible_m2 if 'm²' in posible][0]
|
||||
self.ad_fields['m2']['found'] = True
|
||||
|
||||
if soup.find('title') is not None:
|
||||
if 'venta' in soup.find('title'):
|
||||
if 'venta' in soup.find('title').text:
|
||||
self.ad_fields['tipo_anuncio']['value'] = 1
|
||||
else:
|
||||
self.ad_fields['tipo_anuncio']['value'] = 2
|
||||
|
|
@ -200,14 +198,14 @@ class AdHtmlParser:
|
|||
|
||||
if len(soup.find('div', {'id': 'headerMap'}).find_all('li')) > 3:
|
||||
self.ad_fields['calle']['value'] = ''
|
||||
self.ad_fields['ciudad']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-2].text
|
||||
self.ad_fields['ciudad']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-2].text.strip()
|
||||
self.ad_fields['ciudad']['found'] = True
|
||||
self.ad_fields['distrito']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-3].text
|
||||
self.ad_fields['distrito']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-3].text.strip()
|
||||
self.ad_fields['distrito']['found'] = True
|
||||
self.ad_fields['barrio']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-4].text
|
||||
self.ad_fields['barrio']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[-4].text.strip()
|
||||
self.ad_fields['barrio']['found'] = True
|
||||
if len(soup.find('div', {'id': 'headerMap'}).find_all('li')) > 4:
|
||||
self.ad_fields['calle']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[0].text
|
||||
self.ad_fields['calle']['value'] = soup.find('div', {'id': 'headerMap'}).find_all('li')[0].text.strip()
|
||||
self.ad_fields['calle']['found'] = True
|
||||
|
||||
features_lists = soup.find_all('div', {'class': 'details-property_features'})
|
||||
|
|
@ -231,13 +229,13 @@ class AdHtmlParser:
|
|||
.text.replace(' ', '')
|
||||
self.ad_fields['telefono']['found'] = True
|
||||
|
||||
def validate(self):
|
||||
def _validate(self):
|
||||
self.invalid_fields = []
|
||||
|
||||
if re.match(r"[0-9]{4,20}", self.ad_fields['referencia']['value']):
|
||||
if not re.match(r"[0-9]{4,20}", self.ad_fields['referencia']['value']):
|
||||
self.invalid_fields.append('referencia')
|
||||
|
||||
if re.match(r"[0-9]{1,20}", self.ad_fields['precio']['value']):
|
||||
if not re.match(r"[0-9]{1,20}", self.ad_fields['precio']['value']):
|
||||
self.invalid_fields.append('precio')
|
||||
|
||||
possible_values_tamano = ['2 coches o más',
|
||||
|
|
@ -255,18 +253,19 @@ class AdHtmlParser:
|
|||
if not 'Distrito' in self.ad_fields['distrito']['value']:
|
||||
self.invalid_fields.append('distrito')
|
||||
|
||||
if re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value']):
|
||||
if not re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value']):
|
||||
self.invalid_fields.append('telefono')
|
||||
|
||||
def all_fields_are_valid(self):
|
||||
self._validate()
|
||||
if self.invalid_fields:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def fields_missing(self):
|
||||
for ad_field in self.ad_fields:
|
||||
if not ad_field['optional'] and not ad_field['found']:
|
||||
for key, contents in self.ad_fields.items():
|
||||
if not contents['optional'] and not contents['found']:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
|
|
|||
|
|
@ -46,12 +46,16 @@ class DatabaseWrapper():
|
|||
execution_cursor.execute(query_statement, query_parameters)
|
||||
self.disconnect()
|
||||
return execution_cursor
|
||||
except:
|
||||
except Exception as e:
|
||||
alert_master("SQL ERROR", """Se ha producido un error ejecutando la
|
||||
siguiente query: %s.
|
||||
Con los siguientes parametros: %s
|
||||
siguiente query: {}.
|
||||
Con los siguientes parametros: {}
|
||||
|
||||
{}
|
||||
""".format(query_statement,
|
||||
query_parameters))
|
||||
query_parameters,
|
||||
e)
|
||||
)
|
||||
|
||||
else:
|
||||
raise Exception("Could not connect to the database.")
|
||||
|
|
|
|||
|
|
@ -15,12 +15,12 @@ class CapturingTasksInterface:
|
|||
|
||||
if uuid_exploring is None:
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url)
|
||||
(uuid, write_time, status, ad_url)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
|
||||
else:
|
||||
query_parameters['uuid_exploring'] = uuid_exploring
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url, fk_uuid_exploring)
|
||||
(uuid, write_time, status, ad_url, fk_uuid_exploring)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
|
||||
|
||||
self.tasksdb.query(query_statement, query_parameters)
|
||||
|
|
@ -48,12 +48,12 @@ class CapturingTasksInterface:
|
|||
|
||||
if uuid_exploring is None:
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url)
|
||||
(uuid, write_time, status, ad_url)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
|
||||
else:
|
||||
query_parameters['uuid_exploring'] = uuid_exploring
|
||||
query_statement = """INSERT INTO capturing_tasks_logs
|
||||
(uuid, write_time, status, url, fk_uuid_exploring)
|
||||
(uuid, write_time, status, ad_url, fk_uuid_exploring)
|
||||
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
|
||||
|
||||
self.tasksdb.query(query_statement, query_parameters)
|
||||
21
tests/capturer_tests.py
Normal file
21
tests/capturer_tests.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
sys.path.append('..')
|
||||
from capturer.capturer import CapturingTask
|
||||
|
||||
|
||||
|
||||
def test_CapturingTask():
|
||||
parameters = {'uuid': 'f31af60a-1feb-4770-bee9-0085c678f08f',
|
||||
'ad_url': 'https://www.idealista.com/inmueble/81593047',
|
||||
'fk_uuid_exploring': 'e5f9c8d5-b6e9-47fc-b4de-6d78f40feccc',
|
||||
'status': 'Pending'}
|
||||
|
||||
task = CapturingTask(parameters)
|
||||
|
||||
task.capture()
|
||||
print(task.get_ad_data())
|
||||
|
||||
|
||||
|
||||
test_CapturingTask()
|
||||
Loading…
Add table
Add a link
Reference in a new issue