diff --git a/capturer/capturer.py b/capturer/capturer.py index 5e75bd0..769eaed 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -77,12 +77,16 @@ class CapturingTask: self._check_data() return - elif Refresher.dead_ad_checker(self.html): - self._update_status('Dead ad') - return else: - self.request_failures += 1 + try: + if Refresher.dead_ad_checker(attack.get_text()): + self._update_status('Dead ad') + return + except AttributeError: + pass + self._update_status('Fail {}'.format(self.request_failures)) + self.request_failures += 1 sleep(CapturingTask.sleep_time_failed_request) continue diff --git a/core/config.py b/core/config.py index 90dd571..23bcf80 100644 --- a/core/config.py +++ b/core/config.py @@ -7,17 +7,14 @@ databases = {'dev':{'host': '185.166.215.170 ', 'tst':{'host': '46.183.114.164', 'user': 'drogon', 'password': 'noesfacilvivirsindrogon'}, - 'prd':{'host': 'prd_drogon_db', - 'user': 'drogon', - 'password': 'noesfacilvivirsindrogon'}, - 'default':{'host': 'tst_drogon_db', + 'pro':{'host': '185.166.215.250', 'user': 'drogon', 'password': 'noesfacilvivirsindrogon'}} try: current_db = environ['DROGON_ENV'] except KeyError: - current_db = 'dev' + current_db = 'pro' try: current_db_parameters = databases[current_db] diff --git a/core/scrapping_utils.py b/core/scrapping_utils.py index 376a641..1b76c1e 100644 --- a/core/scrapping_utils.py +++ b/core/scrapping_utils.py @@ -28,12 +28,10 @@ class UrlAttack(): self.success = False def get_response(self): - if self.success: - return self.response + return self.response def get_text(self): - if self.success: - return self.response.text + return self.response.text def get_status_code(self): try: diff --git a/db_layer/.Dockerfile.swp b/db_layer/.Dockerfile.swp new file mode 100644 index 0000000..838206a Binary files /dev/null and b/db_layer/.Dockerfile.swp differ diff --git a/db_layer/capturas_interface.py b/db_layer/capturas_interface.py index f69df35..4c1161c 100644 --- a/db_layer/capturas_interface.py +++ b/db_layer/capturas_interface.py @@ -28,7 +28,7 @@ class CapturasInterface(): FROM tasks.capturing_last as cl WHERE cl.status = 'Dead ad') as da ON da.ad_url LIKE CONCAT('%', uc.referencia, '%') - WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 10 day) AND (NOW() - INTERVAL 60 day) + WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 90 day) AND (NOW() - INTERVAL 10 day) AND da.ad_url is null """ @@ -47,7 +47,7 @@ class CapturasInterface(): FROM tasks.capturing_last as cl WHERE cl.status = 'Dead ad') as da ON da.ad_url LIKE CONCAT('%', uc.referencia, '%') - WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 10 day) AND (NOW() - INTERVAL 60 day) + WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 90 day) AND (NOW() - INTERVAL 10 day) AND da.ad_url is null ORDER BY RAND() LIMIT 1 diff --git a/deployer.sh b/deployer.sh index 4630ae3..34faa7f 100644 --- a/deployer.sh +++ b/deployer.sh @@ -3,13 +3,14 @@ set -x ### Variables de entorno ### -echo "export DROGON_ENV=tst" | tee -a /etc/profile.d/drogon_variables.sh -echo "export DB_SERVICE_NAME=tst_drogon_db" | tee -a /etc/profile.d/drogon_variables.sh -echo "export NETWORK_NAME=tst_drogon_network" | tee -a /etc/profile.d/drogon_variables.sh -DROGON_ENV=tst -DB_SERVICE_NAME=tst_drogon_db -NETWORK_NAME=tst_drogon_network - +echo "export DROGON_ENV=pro" | tee -a /etc/profile.d/drogon_variables.sh +echo "export DB_SERVICE_NAME=pro_drogon_db" | tee -a /etc/profile.d/drogon_variables.sh +echo "export NETWORK_NAME=pro_drogon_network" | tee -a /etc/profile.d/drogon_variables.sh +echo "export GIT_BRANCH=master" | tee -a /etc/profile.d/drogon_variables.sh +DROGON_ENV=pro +DB_SERVICE_NAME=pro_drogon_db +NETWORK_NAME=pro_drogon_network +GIT_BRANCH=master ############################################################### ################# INSTALACION DE HERRAMIENTAS ################# @@ -43,7 +44,7 @@ fi # Hacer clone desde remote git clone git@gitlab.com:pablomartincalvo/Drogon.git /opt/Drogon cd /opt/Drogon -git checkout testing +git checkout ${GIT_BRANCH} ################# DB @@ -74,30 +75,30 @@ sleep 10s #Instalar dependencias cd .. -pip3 install requirements.txt +pip3 install -r requirements.txt #Explorer - cd ./explorer/ + cd explorer python3 explorer.py >> explorer_log.log 2>&1 & # Respiro sleep 10s #Refresher - cd ./refresher/ + cd ../refresher/ python3 refresher.py >> refresher_log.log 2>&1 & # Respiro sleep 10s #Capturer - cd ./capturer/ + cd ../capturer/ python3 capturer.py >> capturer_log.log 2>&1 & # Respiro sleep 10s #Geocoder - cd ./geocoder/ + cd ../geocoder/ python3 geocoder.py >> geocoder_log.log 2>&1 & diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..42a487e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +bs4 +mysql-connector-python +requests==2.18.4 +html5lib \ No newline at end of file diff --git a/tests/refresher_tests.py b/tests/refresher_tests.py index 1ccdd96..4b1e296 100644 --- a/tests/refresher_tests.py +++ b/tests/refresher_tests.py @@ -12,6 +12,7 @@ html_baja = """ + @@ -19,28 +20,27 @@ html_baja = """ - - Viviendas venta. Viviendas alquiler. Pisos. Chalets — idealista

¿Eres el anunciante?

entra en tu cuenta de idealista Si quieres más información sobre el estado de tu anuncio,

+ """ html_normal = """ diff --git a/tests/scrapping_utils_tests.py b/tests/scrapping_utils_tests.py new file mode 100644 index 0000000..576b3af --- /dev/null +++ b/tests/scrapping_utils_tests.py @@ -0,0 +1,17 @@ +from core.scrapping_utils import * + + + +def UrlAttack_test(url): + + attack = UrlAttack(url) + + attack.attack() + + print(attack.get_status_code()) + print(attack.success) + print(attack.get_response()) + print(attack.get_text()) + + +UrlAttack_test('https://www.idealista.com/inmueble/82810718/') \ No newline at end of file