This commit is contained in:
pablomartincalvo 2018-12-01 12:01:11 +01:00
commit 99d5d36bf4
9 changed files with 58 additions and 37 deletions

View file

@ -77,12 +77,16 @@ class CapturingTask:
self._check_data()
return
elif Refresher.dead_ad_checker(self.html):
self._update_status('Dead ad')
return
else:
self.request_failures += 1
try:
if Refresher.dead_ad_checker(attack.get_text()):
self._update_status('Dead ad')
return
except AttributeError:
pass
self._update_status('Fail {}'.format(self.request_failures))
self.request_failures += 1
sleep(CapturingTask.sleep_time_failed_request)
continue

View file

@ -7,17 +7,14 @@ databases = {'dev':{'host': '185.166.215.170 ',
'tst':{'host': '46.183.114.164',
'user': 'drogon',
'password': 'noesfacilvivirsindrogon'},
'prd':{'host': 'prd_drogon_db',
'user': 'drogon',
'password': 'noesfacilvivirsindrogon'},
'default':{'host': 'tst_drogon_db',
'pro':{'host': '185.166.215.250',
'user': 'drogon',
'password': 'noesfacilvivirsindrogon'}}
try:
current_db = environ['DROGON_ENV']
except KeyError:
current_db = 'dev'
current_db = 'pro'
try:
current_db_parameters = databases[current_db]

View file

@ -28,12 +28,10 @@ class UrlAttack():
self.success = False
def get_response(self):
if self.success:
return self.response
return self.response
def get_text(self):
if self.success:
return self.response.text
return self.response.text
def get_status_code(self):
try:

BIN
db_layer/.Dockerfile.swp Normal file

Binary file not shown.

View file

@ -28,7 +28,7 @@ class CapturasInterface():
FROM tasks.capturing_last as cl
WHERE cl.status = 'Dead ad') as da
ON da.ad_url LIKE CONCAT('%', uc.referencia, '%')
WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 10 day) AND (NOW() - INTERVAL 60 day)
WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 90 day) AND (NOW() - INTERVAL 10 day)
AND da.ad_url is null
"""
@ -47,7 +47,7 @@ class CapturasInterface():
FROM tasks.capturing_last as cl
WHERE cl.status = 'Dead ad') as da
ON da.ad_url LIKE CONCAT('%', uc.referencia, '%')
WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 10 day) AND (NOW() - INTERVAL 60 day)
WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 90 day) AND (NOW() - INTERVAL 10 day)
AND da.ad_url is null
ORDER BY RAND()
LIMIT 1

View file

@ -3,13 +3,14 @@
set -x
### Variables de entorno ###
echo "export DROGON_ENV=tst" | tee -a /etc/profile.d/drogon_variables.sh
echo "export DB_SERVICE_NAME=tst_drogon_db" | tee -a /etc/profile.d/drogon_variables.sh
echo "export NETWORK_NAME=tst_drogon_network" | tee -a /etc/profile.d/drogon_variables.sh
DROGON_ENV=tst
DB_SERVICE_NAME=tst_drogon_db
NETWORK_NAME=tst_drogon_network
echo "export DROGON_ENV=pro" | tee -a /etc/profile.d/drogon_variables.sh
echo "export DB_SERVICE_NAME=pro_drogon_db" | tee -a /etc/profile.d/drogon_variables.sh
echo "export NETWORK_NAME=pro_drogon_network" | tee -a /etc/profile.d/drogon_variables.sh
echo "export GIT_BRANCH=master" | tee -a /etc/profile.d/drogon_variables.sh
DROGON_ENV=pro
DB_SERVICE_NAME=pro_drogon_db
NETWORK_NAME=pro_drogon_network
GIT_BRANCH=master
###############################################################
################# INSTALACION DE HERRAMIENTAS #################
@ -43,7 +44,7 @@ fi
# Hacer clone desde remote
git clone git@gitlab.com:pablomartincalvo/Drogon.git /opt/Drogon
cd /opt/Drogon
git checkout testing
git checkout ${GIT_BRANCH}
################# DB
@ -74,30 +75,30 @@ sleep 10s
#Instalar dependencias
cd ..
pip3 install requirements.txt
pip3 install -r requirements.txt
#Explorer
cd ./explorer/
cd explorer
python3 explorer.py >> explorer_log.log 2>&1 &
# Respiro
sleep 10s
#Refresher
cd ./refresher/
cd ../refresher/
python3 refresher.py >> refresher_log.log 2>&1 &
# Respiro
sleep 10s
#Capturer
cd ./capturer/
cd ../capturer/
python3 capturer.py >> capturer_log.log 2>&1 &
# Respiro
sleep 10s
#Geocoder
cd ./geocoder/
cd ../geocoder/
python3 geocoder.py >> geocoder_log.log 2>&1 &

4
requirements.txt Normal file
View file

@ -0,0 +1,4 @@
bs4
mysql-connector-python
requests==2.18.4
html5lib

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,17 @@
from core.scrapping_utils import *
def UrlAttack_test(url):
attack = UrlAttack(url)
attack.attack()
print(attack.get_status_code())
print(attack.success)
print(attack.get_response())
print(attack.get_text())
UrlAttack_test('https://www.idealista.com/inmueble/82810718/')