Adaptado capturer y base de datos para soportar datos de m2 con

decimales.
This commit is contained in:
pablomartincalvo 2018-11-05 20:49:54 +01:00
parent dd3362aa3c
commit 71456d3c92
5 changed files with 145 additions and 84 deletions

213
.idea/workspace.xml generated
View file

@ -4,6 +4,9 @@
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/capturer/capturer.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/core/config.py" beforeDir="false" afterPath="$PROJECT_DIR$/core/config.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/db_layer/db_init_scripts/1_20181028dump.sql" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/db_init_scripts/1_20181028dump.sql" afterDir="false" />
<change beforePath="$PROJECT_DIR$/tests/capturer_tests.py" beforeDir="false" afterPath="$PROJECT_DIR$/tests/capturer_tests.py" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
@ -31,19 +34,19 @@
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="Dockerfile" value="10" />
<entry key="py" value="113" />
<entry key="sh" value="6" />
<entry key="sql" value="5" />
<entry key="Dockerfile" value="11" />
<entry key="py" value="122" />
<entry key="sh" value="7" />
<entry key="sql" value="6" />
<entry key="txt" value="6" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="Bash" value="4" />
<entry key="DBN-SQL" value="2" />
<entry key="PLAIN_TEXT" value="18" />
<entry key="Python" value="107" />
<entry key="Bash" value="5" />
<entry key="DBN-SQL" value="3" />
<entry key="PLAIN_TEXT" value="19" />
<entry key="Python" value="116" />
<entry key="SQL" value="3" />
<entry key="Scratch" value="6" />
</counts>
@ -54,25 +57,25 @@
<entry key="Python Console" value="1651" />
<entry key="capturer" value="862" />
<entry key="dummy" value="14" />
<entry key="py" value="19552" />
<entry key="py" value="19636" />
<entry key="scratch_1" value="489" />
<entry key="sh" value="1633" />
<entry key="sql" value="53" />
<entry key="sql" value="63" />
<entry key="txt" value="1315" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="Bash" value="454" />
<entry key="DBN-SQL" value="53" />
<entry key="DBN-SQL" value="63" />
<entry key="PLAIN_TEXT" value="2617" />
<entry key="Python" value="22282" />
<entry key="Python" value="22366" />
<entry key="Scratch" value="272" />
</counts>
</usages-collector>
<usages-collector id="statistics.vcs.git.usages">
<counts>
<entry key="git.branch.checkout.local" value="4" />
<entry key="git.branch.checkout.local" value="5" />
<entry key="git.branch.create.new" value="1" />
</counts>
</usages-collector>
@ -83,15 +86,6 @@
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/deployer.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="420">
<caret line="28" column="60" selection-start-line="28" selection-start-column="60" selection-end-line="28" selection-end-column="60" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/db_layer/Dockerfile">
<provider selected="true" editor-type-id="text-editor">
@ -104,8 +98,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/core/config.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="390">
<caret line="27" selection-start-line="27" selection-end-line="27" />
<state relative-caret-position="165">
<caret line="11" column="30" selection-start-line="11" selection-start-column="30" selection-end-line="11" selection-end-column="30" />
<folding>
<element signature="e#0#22#0" expanded="true" />
</folding>
@ -113,6 +107,15 @@
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="300">
<caret line="20" selection-start-line="20" selection-end-line="20" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
@ -122,11 +125,29 @@
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="285">
<caret line="79" column="51" selection-start-line="79" selection-start-column="51" selection-end-line="79" selection-end-column="51" />
<state relative-caret-position="233">
<caret line="194" column="16" lean-forward="true" selection-start-line="194" selection-start-column="16" selection-end-line="194" selection-end-column="16" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="120">
<caret line="8" selection-start-line="8" selection-end-line="17" selection-end-column="57" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/db_layer/db_init_scripts/1_20181028dump.sql">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="276">
<caret line="180" column="45" selection-start-line="180" selection-start-column="45" selection-end-line="180" selection-end-column="45" />
</state>
</provider>
</entry>
@ -134,8 +155,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="135">
<caret line="9" selection-start-line="9" selection-end-line="9" />
<state relative-caret-position="285">
<caret line="47" selection-start-line="47" selection-end-line="47" />
</state>
</provider>
</entry>
@ -169,6 +190,7 @@
<find>ge</find>
<find>findall</find>
<find>re.</find>
<find>capturas</find>
</findStrings>
</component>
<component name="Git.Rebase.Settings">
@ -179,7 +201,7 @@
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
<option name="RECENT_BRANCH_BY_REPOSITORY">
<map>
<entry key="$PROJECT_DIR$" value="dev" />
<entry key="$PROJECT_DIR$" value="testing" />
</map>
</option>
</component>
@ -193,7 +215,6 @@
<option value="$PROJECT_DIR$/mysql/capturing_tasks_interface.py" />
<option value="$PROJECT_DIR$/mysql/capturas_interface.py" />
<option value="$PROJECT_DIR$/tests/capturing_tests.py" />
<option value="$PROJECT_DIR$/tests/capturer_tests.py" />
<option value="$PROJECT_DIR$/db_layer/capturing_tasks_interface.py" />
<option value="$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py" />
<option value="$PROJECT_DIR$/tests/refresher_tests.py" />
@ -212,7 +233,9 @@
<option value="$PROJECT_DIR$/deployer.sh" />
<option value="$PROJECT_DIR$/core/alerts.py" />
<option value="$PROJECT_DIR$/core/mysql_wrapper.py" />
<option value="$PROJECT_DIR$/db_layer/db_init_scripts/1_20181028dump.sql" />
<option value="$PROJECT_DIR$/core/config.py" />
<option value="$PROJECT_DIR$/tests/capturer_tests.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
</list>
</option>
@ -234,6 +257,22 @@
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
<item name="capturer" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
<item name="db_layer" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
<item name="db_layer" type="462c0819:PsiDirectoryNode" />
<item name="db_init_scripts" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="Drogon" type="b2602c69:ProjectViewProjectNode" />
<item name="Drogon" type="462c0819:PsiDirectoryNode" />
@ -303,7 +342,7 @@
</list>
</option>
</component>
<component name="RunManager" selected="Python.explorer">
<component name="RunManager" selected="Python.capturer_tests">
<configuration name="alerts" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="Drogon" />
<option name="INTERPRETER_OPTIONS" value="" />
@ -418,11 +457,11 @@
</list>
<recent_temporary>
<list>
<item itemvalue="Python.explorer" />
<item itemvalue="Python.capturer_tests" />
<item itemvalue="Python.alerts" />
<item itemvalue="Python.explorer" />
<item itemvalue="Python.mysql_wrapper" />
<item itemvalue="Python.geocoder" />
<item itemvalue="Python.capturer_tests" />
</list>
</recent_temporary>
</component>
@ -584,7 +623,14 @@
<option name="project" value="LOCAL" />
<updated>1541182912652</updated>
</task>
<option name="localTasksCounter" value="22" />
<task id="LOCAL-00022" summary="Typo en capturer">
<created>1541444944894</created>
<option name="number" value="00022" />
<option name="presentableId" value="LOCAL-00022" />
<option name="project" value="LOCAL" />
<updated>1541444944895</updated>
</task>
<option name="localTasksCounter" value="23" />
<servers />
</component>
<component name="TodoView" selected-index="1">
@ -600,7 +646,7 @@
<frame x="0" y="-2" width="1920" height="1082" extended-state="6" />
<editor active="true" />
<layout>
<window_info active="true" content_ui="combo" id="Project" order="0" sideWeight="0.48076922" visible="true" weight="0.14918292" />
<window_info content_ui="combo" id="Project" order="0" sideWeight="0.48076922" visible="true" weight="0.14918292" />
<window_info id="Structure" order="1" sideWeight="0.5192308" side_tool="true" weight="0.14918292" />
<window_info id="Favorites" order="2" sideWeight="0.5015674" side_tool="true" weight="0.14918292" />
<window_info active="true" id="Repositories" order="3" sideWeight="0.49529782" visible="true" weight="0.32999474" />
@ -608,11 +654,11 @@
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" weight="0.32983193" />
<window_info anchor="bottom" id="Run" order="2" weight="0.32983193" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.39915967" />
<window_info active="true" anchor="bottom" id="Debug" order="3" visible="true" weight="0.39915967" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" sideWeight="0.49973643" weight="0.32878152" />
<window_info anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" visible="true" weight="0.269958" />
<window_info anchor="bottom" id="Version Control" order="7" sideWeight="0.49973643" weight="0.269958" />
<window_info anchor="bottom" id="Terminal" order="8" weight="0.32983193" />
<window_info anchor="bottom" id="Event Log" order="9" sideWeight="0.5007907" side_tool="true" weight="0.32983193" />
<window_info anchor="bottom" id="Python Console" order="10" sideWeight="0.49920928" weight="0.32983193" />
@ -689,7 +735,8 @@
<MESSAGE value="Retoques menores en geocoder y capturer por problemas de tipos." />
<MESSAGE value="Testeando error en geocoder." />
<MESSAGE value="Mas correciones menores para testing." />
<option name="LAST_COMMIT_MESSAGE" value="Mas correciones menores para testing." />
<MESSAGE value="Typo en capturer" />
<option name="LAST_COMMIT_MESSAGE" value="Typo en capturer" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
@ -887,13 +934,6 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" selection-start-line="17" selection-end-line="17" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="613">
@ -901,13 +941,6 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<caret line="45" column="29" selection-start-line="45" selection-start-column="29" selection-end-line="45" selection-end-column="29" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/refresher/refresher.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="405">
@ -922,20 +955,6 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="15">
<caret line="1" selection-start-line="1" selection-end-line="1" />
</state>
</provider>
</entry>
<entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="203">
<caret line="48" selection-start-line="48" selection-end-line="48" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/alerts.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="195">
@ -943,6 +962,16 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="480">
<caret line="32" column="30" selection-start-line="32" selection-start-column="30" selection-end-line="32" selection-end-column="30" />
<folding>
<element signature="e#0#11#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/deployer.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="420">
@ -957,27 +986,55 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1950">
<caret line="130" column="11" selection-start-line="130" selection-start-column="11" selection-end-line="130" selection-end-column="11" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/db_init_scripts/1_20181028dump.sql">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="276">
<caret line="180" column="45" selection-start-line="180" selection-start-column="45" selection-end-line="180" selection-end-column="45" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/config.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="390">
<caret line="27" selection-start-line="27" selection-end-line="27" />
<state relative-caret-position="165">
<caret line="11" column="30" selection-start-line="11" selection-start-column="30" selection-end-line="11" selection-end-column="30" />
<folding>
<element signature="e#0#22#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="135">
<caret line="9" selection-start-line="9" selection-end-line="9" />
<state relative-caret-position="428">
<caret line="40" column="19" selection-start-line="40" selection-start-column="19" selection-end-line="40" selection-end-column="19" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1950">
<caret line="130" column="11" selection-start-line="130" selection-start-column="11" selection-end-line="130" selection-end-column="11" />
<state relative-caret-position="300">
<caret line="20" selection-start-line="20" selection-end-line="20" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/mysql_wrapper.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="285">
<caret line="47" selection-start-line="47" selection-end-line="47" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="233">
<caret line="194" column="16" lean-forward="true" selection-start-line="194" selection-start-column="16" selection-end-line="194" selection-end-column="16" />
</state>
</provider>
</entry>
@ -988,10 +1045,10 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="285">
<caret line="79" column="51" selection-start-line="79" selection-start-column="51" selection-end-line="79" selection-end-column="51" />
<state relative-caret-position="120">
<caret line="8" selection-start-line="8" selection-end-line="17" selection-end-column="57" />
</state>
</provider>
</entry>

View file

@ -202,7 +202,7 @@ class AdHtmlParser:
posible_m2 = [tag.text for tag in soup.find('div', {'class': 'info-features'}).find_all('span')]
if [posible for posible in posible_m2 if '' in posible]:
self.ad_fields['m2']['value'] = \
[''.join(re.findall(r'[0-9]', posible)) for posible in posible_m2 if '' in posible][0]
[''.join(re.findall(r'[0-9]+,*[0-9]*', posible)) for posible in posible_m2 if '' in posible][0].replace(',', '.')
self.ad_fields['m2']['found'] = True
if soup.find('title') is not None:

View file

@ -1,7 +1,7 @@
from os import environ
import datetime
databases = {'dev':{'host': 'dev_drogon_db',
databases = {'dev':{'host': '185.166.215.170 ',
'user': 'drogon',
'password': 'noesfacilvivirsindrogon'},
'tst':{'host': '46.183.114.164',
@ -14,7 +14,10 @@ databases = {'dev':{'host': 'dev_drogon_db',
'user': 'drogon',
'password': 'noesfacilvivirsindrogon'}}
current_db = environ['DROGON_ENV']
try:
current_db = environ['DROGON_ENV']
except KeyError:
current_db = 'dev'
try:
current_db_parameters = databases[current_db]

View file

@ -174,7 +174,7 @@ CREATE TABLE `capturas` (
`fecha_captura` datetime NOT NULL,
`referencia` int(11) NOT NULL,
`precio` int(11) NOT NULL,
`m2` int(11) DEFAULT NULL,
`m2` decimal(8,2) DEFAULT NULL,
`tamano_categorico` varchar(255) CHARACTER SET utf8 DEFAULT NULL,
`tipo_anuncio` int(11) NOT NULL,
`calle` varchar(1000) COLLATE utf8_unicode_ci DEFAULT NULL,

View file

@ -2,12 +2,12 @@
import sys
sys.path.append('..')
from capturer.capturer import CapturingTask, Capturer
from db_layer.capturas_interface import capturas_interface
def test_CapturingTask():
parameters = {'uuid': 'testie test',
'ad_url': 'https://www.idealista.com/inmueble/81514847',
'ad_url': 'https://www.idealista.com/inmueble/39627481',
'fk_uuid_exploring': None,
'status': 'Pending'}
@ -15,6 +15,7 @@ def test_CapturingTask():
task.capture()
print(task.get_ad_data())
capturas_interface.insert_captura(task.get_ad_data())
def test_Capturer():