From 02973f0d48862e39c432998eb5ad5530763befa9 Mon Sep 17 00:00:00 2001 From: pablomartincalvo Date: Wed, 15 Aug 2018 22:31:40 +0200 Subject: [PATCH 1/6] 'Avanzado Dockerfile de mysql' --- mysql/.Dockerfile.swp | Bin 0 -> 1024 bytes mysql/Dockerfile | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 mysql/.Dockerfile.swp diff --git a/mysql/.Dockerfile.swp b/mysql/.Dockerfile.swp new file mode 100644 index 0000000000000000000000000000000000000000..838206a05ccd04b741555c42b3d60e8e31d2a030 GIT binary patch literal 1024 zcmYc?$V<%2S1{5u)iY*50+L(|3 Date: Mon, 5 Nov 2018 23:40:54 +0100 Subject: [PATCH 2/6] Corregido queries de anuncios viejos. --- db_layer/capturas_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db_layer/capturas_interface.py b/db_layer/capturas_interface.py index f69df35..4c1161c 100644 --- a/db_layer/capturas_interface.py +++ b/db_layer/capturas_interface.py @@ -28,7 +28,7 @@ class CapturasInterface(): FROM tasks.capturing_last as cl WHERE cl.status = 'Dead ad') as da ON da.ad_url LIKE CONCAT('%', uc.referencia, '%') - WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 10 day) AND (NOW() - INTERVAL 60 day) + WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 90 day) AND (NOW() - INTERVAL 10 day) AND da.ad_url is null """ @@ -47,7 +47,7 @@ class CapturasInterface(): FROM tasks.capturing_last as cl WHERE cl.status = 'Dead ad') as da ON da.ad_url LIKE CONCAT('%', uc.referencia, '%') - WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 10 day) AND (NOW() - INTERVAL 60 day) + WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 90 day) AND (NOW() - INTERVAL 10 day) AND da.ad_url is null ORDER BY RAND() LIMIT 1 From df07497125f4649309a90e3a171d6df494812b59 Mon Sep 17 00:00:00 2001 From: pablomartincalvo Date: Fri, 16 Nov 2018 18:20:50 +0100 Subject: [PATCH 3/6] Correciones para deteccion de anuncios dados de baja. Mejoras en script de deployment. --- .idea/workspace.xml | 461 ++++++++++++++++++--------------- capturer/capturer.py | 2 +- core/config.py | 2 +- core/scrapping_utils.py | 6 +- deployer.sh | 5 +- tests/refresher_tests.py | 18 +- tests/scrapping_utils_tests.py | 15 ++ 7 files changed, 285 insertions(+), 224 deletions(-) create mode 100644 tests/scrapping_utils_tests.py diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 8c8c303..0bc04d8 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,11 +2,13 @@ + - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -250,6 +291,7 @@ + @@ -265,18 +307,7 @@ - - - - - - - - - - - - + @@ -310,7 +341,6 @@ Comprar
También puedes buscar por característica, teléfono...

¿Eres el anunciante?

entra en tu cuenta de idealista Si quieres más información sobre el estado de tu anuncio,

+ """ html_normal = """ diff --git a/tests/scrapping_utils_tests.py b/tests/scrapping_utils_tests.py new file mode 100644 index 0000000..ad3aaab --- /dev/null +++ b/tests/scrapping_utils_tests.py @@ -0,0 +1,15 @@ +from core.scrapping_utils import * + + + +def UrlAttack_test(url): + + attack = UrlAttack(url) + + attack.attack() + + print(attack.get_status_code()) + print(attack.get_response()) + + +UrlAttack_test('https://www.idealista.com/inmueble/40402493/') \ No newline at end of file From 3bbe7475e2e71aae309111da639b6e8a3561a513 Mon Sep 17 00:00:00 2001 From: pablomartincalvo Date: Fri, 16 Nov 2018 18:45:42 +0100 Subject: [PATCH 4/6] Correciones para deteccion de anuncios dados de baja. --- capturer/capturer.py | 2 +- tests/scrapping_utils_tests.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/capturer/capturer.py b/capturer/capturer.py index a5ce531..4e92798 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -77,7 +77,7 @@ class CapturingTask: self._check_data() return - elif Refresher.dead_ad_checker(self.html): + elif Refresher.dead_ad_checker(attack.get_text()): self._update_status('Dead ad') return else: diff --git a/tests/scrapping_utils_tests.py b/tests/scrapping_utils_tests.py index ad3aaab..576b3af 100644 --- a/tests/scrapping_utils_tests.py +++ b/tests/scrapping_utils_tests.py @@ -9,7 +9,9 @@ def UrlAttack_test(url): attack.attack() print(attack.get_status_code()) + print(attack.success) print(attack.get_response()) + print(attack.get_text()) -UrlAttack_test('https://www.idealista.com/inmueble/40402493/') \ No newline at end of file +UrlAttack_test('https://www.idealista.com/inmueble/82810718/') \ No newline at end of file From 02dfa06b3630bbc0a7907134a8dbf96cbc760235 Mon Sep 17 00:00:00 2001 From: pablomartincalvo Date: Sat, 17 Nov 2018 12:58:16 +0100 Subject: [PATCH 5/6] =?UTF-8?q?A=C3=B1adidos=20requirements.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/workspace.xml | 383 ++++++++++++++------------------------------ core/config.py | 7 +- deployer.sh | 26 +-- requirements.txt | 3 + 4 files changed, 137 insertions(+), 282 deletions(-) create mode 100644 requirements.txt diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 0bc04d8..66f1a9a 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,13 +2,10 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + @@ -299,44 +183,6 @@
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -325,9 +305,9 @@ + - @@ -506,7 +486,14 @@ - - - @@ -612,7 +611,8 @@ - @@ -633,9 +633,6 @@