Corregido criterio para identificar anuncios muertos en el capturer.

This commit is contained in:
pablomartincalvo 2018-10-26 20:34:43 +02:00
parent a3a2165f43
commit a2dcec95f4
3 changed files with 105 additions and 94 deletions

192
.idea/workspace.xml generated
View file

@ -2,8 +2,8 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/deployer.sh" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/capturer/capturer.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/db_layer/capturas_interface.py" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/capturas_interface.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/geocoder/geocoder.py" beforeDir="false" afterPath="$PROJECT_DIR$/geocoder/geocoder.py" afterDir="false" />
</list>
@ -32,16 +32,17 @@
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="Dockerfile" value="1" />
<entry key="py" value="75" />
<entry key="sh" value="1" />
<entry key="Dockerfile" value="2" />
<entry key="py" value="87" />
<entry key="sh" value="2" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="PLAIN_TEXT" value="2" />
<entry key="Python" value="70" />
<entry key="Scratch" value="5" />
<entry key="Bash" value="1" />
<entry key="PLAIN_TEXT" value="3" />
<entry key="Python" value="81" />
<entry key="Scratch" value="6" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.edit">
@ -50,7 +51,7 @@
<entry key="Python Console" value="1621" />
<entry key="capturer" value="862" />
<entry key="dummy" value="14" />
<entry key="py" value="18825" />
<entry key="py" value="18863" />
<entry key="scratch_1" value="489" />
<entry key="sh" value="237" />
<entry key="txt" value="1207" />
@ -59,7 +60,7 @@
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="PLAIN_TEXT" value="1464" />
<entry key="Python" value="21525" />
<entry key="Python" value="21563" />
<entry key="Scratch" value="272" />
</counts>
</usages-collector>
@ -78,11 +79,11 @@
<splitter split-orientation="horizontal" split-proportion="0.5">
<split-first>
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1125">
<caret line="75" column="36" selection-start-line="75" selection-start-column="36" selection-end-line="75" selection-end-column="36" />
<state relative-caret-position="1305">
<caret line="87" lean-forward="true" selection-start-line="87" selection-end-line="87" />
</state>
</provider>
</entry>
@ -118,7 +119,7 @@
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="310">
<caret line="80" column="15" lean-forward="true" selection-start-line="80" selection-start-column="15" selection-end-line="80" selection-end-column="39" />
<caret line="80" column="39" lean-forward="true" selection-start-line="80" selection-start-column="39" selection-end-line="80" selection-end-column="39" />
</state>
</provider>
</entry>
@ -126,17 +127,17 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="556">
<state relative-caret-position="3045">
<caret line="203" column="35" selection-start-line="203" selection-start-column="35" selection-end-line="203" selection-end-column="35" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-7">
<caret line="32" column="2" lean-forward="true" selection-start-line="32" selection-start-column="2" selection-end-line="32" selection-end-column="2" />
<state relative-caret-position="-180">
<caret line="15" column="21" selection-start-line="15" selection-start-column="21" selection-end-line="15" selection-end-column="21" />
</state>
</provider>
</entry>
@ -144,8 +145,8 @@
<file pinned="false" current-in-tab="false">
<entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="361">
<caret line="49" column="18" lean-forward="true" selection-start-line="49" selection-start-column="18" selection-end-line="49" selection-end-column="18" />
<state relative-caret-position="720">
<caret line="49" column="18" selection-start-line="49" selection-start-column="18" selection-end-line="49" selection-end-column="18" />
</state>
</provider>
</entry>
@ -157,7 +158,7 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="556">
<state relative-caret-position="4245">
<caret line="283" selection-start-line="283" selection-end-line="283" />
</state>
</provider>
@ -175,7 +176,7 @@
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-300">
<state relative-caret-position="15">
<caret line="1" selection-start-line="1" selection-end-line="1" />
</state>
</provider>
@ -268,12 +269,12 @@
<option value="$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py" />
<option value="$PROJECT_DIR$/tests/refresher_tests.py" />
<option value="$PROJECT_DIR$/refresher/refresher.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
<option value="$PROJECT_DIR$/db_layer/Dockerfile" />
<option value="$PROJECT_DIR$/core/scrapping_utils.py" />
<option value="$PROJECT_DIR$/geocoder/geocoder.py" />
<option value="$PROJECT_DIR$/db_layer/capturas_interface.py" />
<option value="$PROJECT_DIR$/deployer.sh" />
<option value="$PROJECT_DIR$/geocoder/geocoder.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
</list>
</option>
</component>
@ -611,7 +612,14 @@
<option name="project" value="LOCAL" />
<updated>1540320577212</updated>
</task>
<option name="localTasksCounter" value="21" />
<task id="LOCAL-00021" summary="Creando scripts de deployment del sistema.">
<created>1540578488234</created>
<option name="number" value="00021" />
<option name="presentableId" value="LOCAL-00021" />
<option name="project" value="LOCAL" />
<updated>1540578488235</updated>
</task>
<option name="localTasksCounter" value="22" />
<servers />
</component>
<component name="TodoView" selected-index="1">
@ -713,7 +721,8 @@
<MESSAGE value="Finalizado y testeado localmente refresher, listo para testear en&#10;entorno." />
<MESSAGE value="Retoques menores en geocoder y capturer por problemas de tipos." />
<MESSAGE value="Testeando error en geocoder." />
<option name="LAST_COMMIT_MESSAGE" value="Testeando error en geocoder." />
<MESSAGE value="Creando scripts de deployment del sistema." />
<option name="LAST_COMMIT_MESSAGE" value="Creando scripts de deployment del sistema." />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
@ -723,11 +732,6 @@
<line>38</line>
<option name="timeStamp" value="7" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/geocoder/geocoder.py</url>
<line>27</line>
<option name="timeStamp" value="10" />
</line-breakpoint>
</breakpoints>
<breakpoints-dialog>
<breakpoints-dialog />
@ -894,10 +898,69 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<entry file="file://$PROJECT_DIR$/db_layer/db_init_scripts/users.sql">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/tests/geocoder_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1125">
<caret line="75" column="36" selection-start-line="75" selection-start-column="36" selection-end-line="75" selection-end-column="36" />
<state>
<caret column="23" selection-start-column="23" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/Dockerfile">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="60">
<caret line="4" selection-start-line="4" selection-end-line="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/deployer.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" column="23" selection-start-line="17" selection-start-column="23" selection-end-line="17" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="615">
<caret line="41" column="23" selection-start-line="41" selection-start-column="23" selection-end-line="41" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="3045">
<caret line="203" column="35" selection-start-line="203" selection-start-column="35" selection-end-line="203" selection-end-column="35" />
</state>
</provider>
</entry>
<entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="720">
<caret line="49" column="18" selection-start-line="49" selection-start-column="18" selection-end-line="49" selection-end-column="18" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="4245">
<caret line="283" selection-start-line="283" selection-end-line="283" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" selection-start-line="17" selection-end-line="17" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="15">
<caret line="1" selection-start-line="1" selection-end-line="1" />
</state>
</provider>
</entry>
@ -921,34 +984,6 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" selection-start-line="17" selection-end-line="17" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="556">
<caret line="283" selection-start-line="283" selection-end-line="283" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/Dockerfile">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="60">
<caret line="4" selection-start-line="4" selection-end-line="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-300">
<caret line="1" selection-start-line="1" selection-end-line="1" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/refresher/refresher.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="405">
@ -956,45 +991,24 @@
</state>
</provider>
</entry>
<entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py">
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="361">
<caret line="49" column="18" lean-forward="true" selection-start-line="49" selection-start-column="18" selection-end-line="49" selection-end-column="18" />
</state>
</provider>
</entry>
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="556">
<caret line="203" column="35" selection-start-line="203" selection-start-column="35" selection-end-line="203" selection-end-column="35" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="615">
<caret line="41" column="23" selection-start-line="41" selection-start-column="23" selection-end-line="41" selection-end-column="23" />
<state relative-caret-position="-180">
<caret line="15" column="21" selection-start-line="15" selection-start-column="21" selection-end-line="15" selection-end-column="21" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="310">
<caret line="80" column="15" lean-forward="true" selection-start-line="80" selection-start-column="15" selection-end-line="80" selection-end-column="39" />
<caret line="80" column="39" lean-forward="true" selection-start-line="80" selection-start-column="39" selection-end-line="80" selection-end-column="39" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/deployer.sh">
<entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" column="23" selection-start-line="17" selection-start-column="23" selection-end-line="17" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-7">
<caret line="32" column="2" lean-forward="true" selection-start-line="32" selection-start-column="2" selection-end-line="32" selection-end-column="2" />
<state relative-caret-position="1305">
<caret line="87" lean-forward="true" selection-start-line="87" selection-end-line="87" />
</state>
</provider>
</entry>

View file

@ -77,7 +77,7 @@ class CapturingTask:
self._check_data()
return
elif attack.get_status_code() == 301:
elif refresher.dead_ad_checker(self.html):
self._update_status('Dead ad')
return
else:

View file

@ -13,13 +13,10 @@ class Geocoder:
def start(self):
while True:
sleep(1)
print("Exists: {}".format(capturas_interface.not_geocoded_captura_exists()))
sleep(120)
if capturas_interface.not_geocoded_captura_exists():
ad_data = capturas_interface.get_not_geocoded_captura()
print(ad_data)
else:
continue