lol
This commit is contained in:
commit
99d5d36bf4
9 changed files with 58 additions and 37 deletions
|
|
@ -77,12 +77,16 @@ class CapturingTask:
|
||||||
self._check_data()
|
self._check_data()
|
||||||
return
|
return
|
||||||
|
|
||||||
elif Refresher.dead_ad_checker(self.html):
|
|
||||||
self._update_status('Dead ad')
|
|
||||||
return
|
|
||||||
else:
|
else:
|
||||||
self.request_failures += 1
|
try:
|
||||||
|
if Refresher.dead_ad_checker(attack.get_text()):
|
||||||
|
self._update_status('Dead ad')
|
||||||
|
return
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
self._update_status('Fail {}'.format(self.request_failures))
|
self._update_status('Fail {}'.format(self.request_failures))
|
||||||
|
self.request_failures += 1
|
||||||
sleep(CapturingTask.sleep_time_failed_request)
|
sleep(CapturingTask.sleep_time_failed_request)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,17 +7,14 @@ databases = {'dev':{'host': '185.166.215.170 ',
|
||||||
'tst':{'host': '46.183.114.164',
|
'tst':{'host': '46.183.114.164',
|
||||||
'user': 'drogon',
|
'user': 'drogon',
|
||||||
'password': 'noesfacilvivirsindrogon'},
|
'password': 'noesfacilvivirsindrogon'},
|
||||||
'prd':{'host': 'prd_drogon_db',
|
'pro':{'host': '185.166.215.250',
|
||||||
'user': 'drogon',
|
|
||||||
'password': 'noesfacilvivirsindrogon'},
|
|
||||||
'default':{'host': 'tst_drogon_db',
|
|
||||||
'user': 'drogon',
|
'user': 'drogon',
|
||||||
'password': 'noesfacilvivirsindrogon'}}
|
'password': 'noesfacilvivirsindrogon'}}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
current_db = environ['DROGON_ENV']
|
current_db = environ['DROGON_ENV']
|
||||||
except KeyError:
|
except KeyError:
|
||||||
current_db = 'dev'
|
current_db = 'pro'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
current_db_parameters = databases[current_db]
|
current_db_parameters = databases[current_db]
|
||||||
|
|
|
||||||
|
|
@ -28,12 +28,10 @@ class UrlAttack():
|
||||||
self.success = False
|
self.success = False
|
||||||
|
|
||||||
def get_response(self):
|
def get_response(self):
|
||||||
if self.success:
|
return self.response
|
||||||
return self.response
|
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
if self.success:
|
return self.response.text
|
||||||
return self.response.text
|
|
||||||
|
|
||||||
def get_status_code(self):
|
def get_status_code(self):
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
BIN
db_layer/.Dockerfile.swp
Normal file
BIN
db_layer/.Dockerfile.swp
Normal file
Binary file not shown.
|
|
@ -28,7 +28,7 @@ class CapturasInterface():
|
||||||
FROM tasks.capturing_last as cl
|
FROM tasks.capturing_last as cl
|
||||||
WHERE cl.status = 'Dead ad') as da
|
WHERE cl.status = 'Dead ad') as da
|
||||||
ON da.ad_url LIKE CONCAT('%', uc.referencia, '%')
|
ON da.ad_url LIKE CONCAT('%', uc.referencia, '%')
|
||||||
WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 10 day) AND (NOW() - INTERVAL 60 day)
|
WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 90 day) AND (NOW() - INTERVAL 10 day)
|
||||||
AND da.ad_url is null
|
AND da.ad_url is null
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -47,7 +47,7 @@ class CapturasInterface():
|
||||||
FROM tasks.capturing_last as cl
|
FROM tasks.capturing_last as cl
|
||||||
WHERE cl.status = 'Dead ad') as da
|
WHERE cl.status = 'Dead ad') as da
|
||||||
ON da.ad_url LIKE CONCAT('%', uc.referencia, '%')
|
ON da.ad_url LIKE CONCAT('%', uc.referencia, '%')
|
||||||
WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 10 day) AND (NOW() - INTERVAL 60 day)
|
WHERE uc.fecha_captura BETWEEN (NOW() - INTERVAL 90 day) AND (NOW() - INTERVAL 10 day)
|
||||||
AND da.ad_url is null
|
AND da.ad_url is null
|
||||||
ORDER BY RAND()
|
ORDER BY RAND()
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
|
|
|
||||||
27
deployer.sh
27
deployer.sh
|
|
@ -3,13 +3,14 @@
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
### Variables de entorno ###
|
### Variables de entorno ###
|
||||||
echo "export DROGON_ENV=tst" | tee -a /etc/profile.d/drogon_variables.sh
|
echo "export DROGON_ENV=pro" | tee -a /etc/profile.d/drogon_variables.sh
|
||||||
echo "export DB_SERVICE_NAME=tst_drogon_db" | tee -a /etc/profile.d/drogon_variables.sh
|
echo "export DB_SERVICE_NAME=pro_drogon_db" | tee -a /etc/profile.d/drogon_variables.sh
|
||||||
echo "export NETWORK_NAME=tst_drogon_network" | tee -a /etc/profile.d/drogon_variables.sh
|
echo "export NETWORK_NAME=pro_drogon_network" | tee -a /etc/profile.d/drogon_variables.sh
|
||||||
DROGON_ENV=tst
|
echo "export GIT_BRANCH=master" | tee -a /etc/profile.d/drogon_variables.sh
|
||||||
DB_SERVICE_NAME=tst_drogon_db
|
DROGON_ENV=pro
|
||||||
NETWORK_NAME=tst_drogon_network
|
DB_SERVICE_NAME=pro_drogon_db
|
||||||
|
NETWORK_NAME=pro_drogon_network
|
||||||
|
GIT_BRANCH=master
|
||||||
|
|
||||||
###############################################################
|
###############################################################
|
||||||
################# INSTALACION DE HERRAMIENTAS #################
|
################# INSTALACION DE HERRAMIENTAS #################
|
||||||
|
|
@ -43,7 +44,7 @@ fi
|
||||||
# Hacer clone desde remote
|
# Hacer clone desde remote
|
||||||
git clone git@gitlab.com:pablomartincalvo/Drogon.git /opt/Drogon
|
git clone git@gitlab.com:pablomartincalvo/Drogon.git /opt/Drogon
|
||||||
cd /opt/Drogon
|
cd /opt/Drogon
|
||||||
git checkout testing
|
git checkout ${GIT_BRANCH}
|
||||||
|
|
||||||
|
|
||||||
################# DB
|
################# DB
|
||||||
|
|
@ -74,30 +75,30 @@ sleep 10s
|
||||||
|
|
||||||
#Instalar dependencias
|
#Instalar dependencias
|
||||||
cd ..
|
cd ..
|
||||||
pip3 install requirements.txt
|
pip3 install -r requirements.txt
|
||||||
|
|
||||||
#Explorer
|
#Explorer
|
||||||
cd ./explorer/
|
cd explorer
|
||||||
python3 explorer.py >> explorer_log.log 2>&1 &
|
python3 explorer.py >> explorer_log.log 2>&1 &
|
||||||
|
|
||||||
# Respiro
|
# Respiro
|
||||||
sleep 10s
|
sleep 10s
|
||||||
|
|
||||||
#Refresher
|
#Refresher
|
||||||
cd ./refresher/
|
cd ../refresher/
|
||||||
python3 refresher.py >> refresher_log.log 2>&1 &
|
python3 refresher.py >> refresher_log.log 2>&1 &
|
||||||
|
|
||||||
# Respiro
|
# Respiro
|
||||||
sleep 10s
|
sleep 10s
|
||||||
|
|
||||||
#Capturer
|
#Capturer
|
||||||
cd ./capturer/
|
cd ../capturer/
|
||||||
python3 capturer.py >> capturer_log.log 2>&1 &
|
python3 capturer.py >> capturer_log.log 2>&1 &
|
||||||
|
|
||||||
# Respiro
|
# Respiro
|
||||||
sleep 10s
|
sleep 10s
|
||||||
|
|
||||||
#Geocoder
|
#Geocoder
|
||||||
cd ./geocoder/
|
cd ../geocoder/
|
||||||
python3 geocoder.py >> geocoder_log.log 2>&1 &
|
python3 geocoder.py >> geocoder_log.log 2>&1 &
|
||||||
|
|
||||||
|
|
|
||||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
bs4
|
||||||
|
mysql-connector-python
|
||||||
|
requests==2.18.4
|
||||||
|
html5lib
|
||||||
File diff suppressed because one or more lines are too long
17
tests/scrapping_utils_tests.py
Normal file
17
tests/scrapping_utils_tests.py
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
from core.scrapping_utils import *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def UrlAttack_test(url):
|
||||||
|
|
||||||
|
attack = UrlAttack(url)
|
||||||
|
|
||||||
|
attack.attack()
|
||||||
|
|
||||||
|
print(attack.get_status_code())
|
||||||
|
print(attack.success)
|
||||||
|
print(attack.get_response())
|
||||||
|
print(attack.get_text())
|
||||||
|
|
||||||
|
|
||||||
|
UrlAttack_test('https://www.idealista.com/inmueble/82810718/')
|
||||||
Loading…
Add table
Add a link
Reference in a new issue