Corregido criterio para identificar anuncios muertos en el capturer.

This commit is contained in:
pablomartincalvo 2018-10-26 20:34:43 +02:00
parent a3a2165f43
commit a2dcec95f4
3 changed files with 105 additions and 94 deletions

192
.idea/workspace.xml generated
View file

@ -2,8 +2,8 @@
<project version="4"> <project version="4">
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment=""> <list default="true" id="6e2fbba0-85ff-42d6-8e70-e4cdef1000c8" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/deployer.sh" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/capturer/capturer.py" beforeDir="false" afterPath="$PROJECT_DIR$/capturer/capturer.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/db_layer/capturas_interface.py" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/capturas_interface.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/db_layer/capturas_interface.py" beforeDir="false" afterPath="$PROJECT_DIR$/db_layer/capturas_interface.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/geocoder/geocoder.py" beforeDir="false" afterPath="$PROJECT_DIR$/geocoder/geocoder.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/geocoder/geocoder.py" beforeDir="false" afterPath="$PROJECT_DIR$/geocoder/geocoder.py" afterDir="false" />
</list> </list>
@ -32,16 +32,17 @@
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.extensions.open"> <usages-collector id="statistics.file.extensions.open">
<counts> <counts>
<entry key="Dockerfile" value="1" /> <entry key="Dockerfile" value="2" />
<entry key="py" value="75" /> <entry key="py" value="87" />
<entry key="sh" value="1" /> <entry key="sh" value="2" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.types.open"> <usages-collector id="statistics.file.types.open">
<counts> <counts>
<entry key="PLAIN_TEXT" value="2" /> <entry key="Bash" value="1" />
<entry key="Python" value="70" /> <entry key="PLAIN_TEXT" value="3" />
<entry key="Scratch" value="5" /> <entry key="Python" value="81" />
<entry key="Scratch" value="6" />
</counts> </counts>
</usages-collector> </usages-collector>
<usages-collector id="statistics.file.extensions.edit"> <usages-collector id="statistics.file.extensions.edit">
@ -50,7 +51,7 @@
<entry key="Python Console" value="1621" /> <entry key="Python Console" value="1621" />
<entry key="capturer" value="862" /> <entry key="capturer" value="862" />
<entry key="dummy" value="14" /> <entry key="dummy" value="14" />
<entry key="py" value="18825" /> <entry key="py" value="18863" />
<entry key="scratch_1" value="489" /> <entry key="scratch_1" value="489" />
<entry key="sh" value="237" /> <entry key="sh" value="237" />
<entry key="txt" value="1207" /> <entry key="txt" value="1207" />
@ -59,7 +60,7 @@
<usages-collector id="statistics.file.types.edit"> <usages-collector id="statistics.file.types.edit">
<counts> <counts>
<entry key="PLAIN_TEXT" value="1464" /> <entry key="PLAIN_TEXT" value="1464" />
<entry key="Python" value="21525" /> <entry key="Python" value="21563" />
<entry key="Scratch" value="272" /> <entry key="Scratch" value="272" />
</counts> </counts>
</usages-collector> </usages-collector>
@ -78,11 +79,11 @@
<splitter split-orientation="horizontal" split-proportion="0.5"> <splitter split-orientation="horizontal" split-proportion="0.5">
<split-first> <split-first>
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/capturer/capturer.py"> <entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1125"> <state relative-caret-position="1305">
<caret line="75" column="36" selection-start-line="75" selection-start-column="36" selection-end-line="75" selection-end-column="36" /> <caret line="87" lean-forward="true" selection-start-line="87" selection-end-line="87" />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -118,7 +119,7 @@
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py"> <entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="310"> <state relative-caret-position="310">
<caret line="80" column="15" lean-forward="true" selection-start-line="80" selection-start-column="15" selection-end-line="80" selection-end-column="39" /> <caret line="80" column="39" lean-forward="true" selection-start-line="80" selection-start-column="39" selection-end-line="80" selection-end-column="39" />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -126,17 +127,17 @@
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="false">
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py"> <entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="556"> <state relative-caret-position="3045">
<caret line="203" column="35" selection-start-line="203" selection-start-column="35" selection-end-line="203" selection-end-column="35" /> <caret line="203" column="35" selection-start-line="203" selection-start-column="35" selection-end-line="203" selection-end-column="35" />
</state> </state>
</provider> </provider>
</entry> </entry>
</file> </file>
<file pinned="false" current-in-tab="true"> <file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py"> <entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-7"> <state relative-caret-position="-180">
<caret line="32" column="2" lean-forward="true" selection-start-line="32" selection-start-column="2" selection-end-line="32" selection-end-column="2" /> <caret line="15" column="21" selection-start-line="15" selection-start-column="21" selection-end-line="15" selection-end-column="21" />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -144,8 +145,8 @@
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="false">
<entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py"> <entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="361"> <state relative-caret-position="720">
<caret line="49" column="18" lean-forward="true" selection-start-line="49" selection-start-column="18" selection-end-line="49" selection-end-column="18" /> <caret line="49" column="18" selection-start-line="49" selection-start-column="18" selection-end-line="49" selection-end-column="18" />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -157,7 +158,7 @@
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/explorer/explorer.py"> <entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="556"> <state relative-caret-position="4245">
<caret line="283" selection-start-line="283" selection-end-line="283" /> <caret line="283" selection-start-line="283" selection-end-line="283" />
</state> </state>
</provider> </provider>
@ -175,7 +176,7 @@
<file pinned="false" current-in-tab="false"> <file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py"> <entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-300"> <state relative-caret-position="15">
<caret line="1" selection-start-line="1" selection-end-line="1" /> <caret line="1" selection-start-line="1" selection-end-line="1" />
</state> </state>
</provider> </provider>
@ -268,12 +269,12 @@
<option value="$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py" /> <option value="$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py" />
<option value="$PROJECT_DIR$/tests/refresher_tests.py" /> <option value="$PROJECT_DIR$/tests/refresher_tests.py" />
<option value="$PROJECT_DIR$/refresher/refresher.py" /> <option value="$PROJECT_DIR$/refresher/refresher.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
<option value="$PROJECT_DIR$/db_layer/Dockerfile" /> <option value="$PROJECT_DIR$/db_layer/Dockerfile" />
<option value="$PROJECT_DIR$/core/scrapping_utils.py" /> <option value="$PROJECT_DIR$/core/scrapping_utils.py" />
<option value="$PROJECT_DIR$/geocoder/geocoder.py" />
<option value="$PROJECT_DIR$/db_layer/capturas_interface.py" /> <option value="$PROJECT_DIR$/db_layer/capturas_interface.py" />
<option value="$PROJECT_DIR$/deployer.sh" /> <option value="$PROJECT_DIR$/deployer.sh" />
<option value="$PROJECT_DIR$/geocoder/geocoder.py" />
<option value="$PROJECT_DIR$/capturer/capturer.py" />
</list> </list>
</option> </option>
</component> </component>
@ -611,7 +612,14 @@
<option name="project" value="LOCAL" /> <option name="project" value="LOCAL" />
<updated>1540320577212</updated> <updated>1540320577212</updated>
</task> </task>
<option name="localTasksCounter" value="21" /> <task id="LOCAL-00021" summary="Creando scripts de deployment del sistema.">
<created>1540578488234</created>
<option name="number" value="00021" />
<option name="presentableId" value="LOCAL-00021" />
<option name="project" value="LOCAL" />
<updated>1540578488235</updated>
</task>
<option name="localTasksCounter" value="22" />
<servers /> <servers />
</component> </component>
<component name="TodoView" selected-index="1"> <component name="TodoView" selected-index="1">
@ -713,7 +721,8 @@
<MESSAGE value="Finalizado y testeado localmente refresher, listo para testear en&#10;entorno." /> <MESSAGE value="Finalizado y testeado localmente refresher, listo para testear en&#10;entorno." />
<MESSAGE value="Retoques menores en geocoder y capturer por problemas de tipos." /> <MESSAGE value="Retoques menores en geocoder y capturer por problemas de tipos." />
<MESSAGE value="Testeando error en geocoder." /> <MESSAGE value="Testeando error en geocoder." />
<option name="LAST_COMMIT_MESSAGE" value="Testeando error en geocoder." /> <MESSAGE value="Creando scripts de deployment del sistema." />
<option name="LAST_COMMIT_MESSAGE" value="Creando scripts de deployment del sistema." />
</component> </component>
<component name="XDebuggerManager"> <component name="XDebuggerManager">
<breakpoint-manager> <breakpoint-manager>
@ -723,11 +732,6 @@
<line>38</line> <line>38</line>
<option name="timeStamp" value="7" /> <option name="timeStamp" value="7" />
</line-breakpoint> </line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/geocoder/geocoder.py</url>
<line>27</line>
<option name="timeStamp" value="10" />
</line-breakpoint>
</breakpoints> </breakpoints>
<breakpoints-dialog> <breakpoints-dialog>
<breakpoints-dialog /> <breakpoints-dialog />
@ -894,10 +898,69 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/capturer/capturer.py"> <entry file="file://$PROJECT_DIR$/db_layer/db_init_scripts/users.sql">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/tests/geocoder_tests.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1125"> <state>
<caret line="75" column="36" selection-start-line="75" selection-start-column="36" selection-end-line="75" selection-end-column="36" /> <caret column="23" selection-start-column="23" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/Dockerfile">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="60">
<caret line="4" selection-start-line="4" selection-end-line="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/deployer.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" column="23" selection-start-line="17" selection-start-column="23" selection-end-line="17" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="615">
<caret line="41" column="23" selection-start-line="41" selection-start-column="23" selection-end-line="41" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="3045">
<caret line="203" column="35" selection-start-line="203" selection-start-column="35" selection-end-line="203" selection-end-column="35" />
</state>
</provider>
</entry>
<entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="720">
<caret line="49" column="18" selection-start-line="49" selection-start-column="18" selection-end-line="49" selection-end-column="18" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="4245">
<caret line="283" selection-start-line="283" selection-end-line="283" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" selection-start-line="17" selection-end-line="17" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="15">
<caret line="1" selection-start-line="1" selection-end-line="1" />
</state> </state>
</provider> </provider>
</entry> </entry>
@ -921,34 +984,6 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/tests/capturer_tests.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="17" selection-start-line="17" selection-end-line="17" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/explorer/explorer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="556">
<caret line="283" selection-start-line="283" selection-end-line="283" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/Dockerfile">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="60">
<caret line="4" selection-start-line="4" selection-end-line="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturing_tasks_interface.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-300">
<caret line="1" selection-start-line="1" selection-end-line="1" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/refresher/refresher.py"> <entry file="file://$PROJECT_DIR$/refresher/refresher.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="405"> <state relative-caret-position="405">
@ -956,45 +991,24 @@
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file:///usr/lib/python3/dist-packages/apport_python_hook.py"> <entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="361"> <state relative-caret-position="-180">
<caret line="49" column="18" lean-forward="true" selection-start-line="49" selection-start-column="18" selection-end-line="49" selection-end-column="18" /> <caret line="15" column="21" selection-start-line="15" selection-start-column="21" selection-end-line="15" selection-end-column="21" />
</state>
</provider>
</entry>
<entry file="file://$APPLICATION_CONFIG_DIR$/scratches/scratch_1.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="556">
<caret line="203" column="35" selection-start-line="203" selection-start-column="35" selection-end-line="203" selection-end-column="35" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/core/scrapping_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="615">
<caret line="41" column="23" selection-start-line="41" selection-start-column="23" selection-end-line="41" selection-end-column="23" />
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py"> <entry file="file://$PROJECT_DIR$/db_layer/capturas_interface.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="310"> <state relative-caret-position="310">
<caret line="80" column="15" lean-forward="true" selection-start-line="80" selection-start-column="15" selection-end-line="80" selection-end-column="39" /> <caret line="80" column="39" lean-forward="true" selection-start-line="80" selection-start-column="39" selection-end-line="80" selection-end-column="39" />
</state> </state>
</provider> </provider>
</entry> </entry>
<entry file="file://$PROJECT_DIR$/deployer.sh"> <entry file="file://$PROJECT_DIR$/capturer/capturer.py">
<provider selected="true" editor-type-id="text-editor"> <provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255"> <state relative-caret-position="1305">
<caret line="17" column="23" selection-start-line="17" selection-start-column="23" selection-end-line="17" selection-end-column="23" /> <caret line="87" lean-forward="true" selection-start-line="87" selection-end-line="87" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/geocoder/geocoder.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-7">
<caret line="32" column="2" lean-forward="true" selection-start-line="32" selection-start-column="2" selection-end-line="32" selection-end-column="2" />
</state> </state>
</provider> </provider>
</entry> </entry>

View file

@ -77,7 +77,7 @@ class CapturingTask:
self._check_data() self._check_data()
return return
elif attack.get_status_code() == 301: elif refresher.dead_ad_checker(self.html):
self._update_status('Dead ad') self._update_status('Dead ad')
return return
else: else:

View file

@ -13,13 +13,10 @@ class Geocoder:
def start(self): def start(self):
while True: while True:
sleep(1) sleep(120)
print("Exists: {}".format(capturas_interface.not_geocoded_captura_exists()))
if capturas_interface.not_geocoded_captura_exists(): if capturas_interface.not_geocoded_captura_exists():
ad_data = capturas_interface.get_not_geocoded_captura() ad_data = capturas_interface.get_not_geocoded_captura()
print(ad_data)
else: else:
continue continue