Merge branch 'dev'

This commit is contained in:
pablomartincalvo 2018-12-04 21:02:56 +01:00
commit 5b245c0aed
4 changed files with 26 additions and 7 deletions

View file

@ -13,18 +13,23 @@ from refresher.refresher import Refresher
class Capturer:
sleep_time_no_work = 60
sleep_time_no_work = 15
def __init__(self):
self.last_try_datetime = datetime.datetime.now()
def start(self):
while True:
if (capturing_interface.get_pending_task() is None
or capturing_interface.seconds_since_last_try() < minimum_seconds_between_tries
or self.seconds_since_last_try() < minimum_seconds_between_tries
or not self.in_working_hours()):
sleep(Capturer.sleep_time_no_work)
continue
task = CapturingTask(capturing_interface.get_pending_task())
self.last_try_datetime = datetime.datetime.now()
task.capture()
if task.status == 'Data ready':
@ -38,9 +43,13 @@ class Capturer:
def in_working_hours(self):
return working_hours['start'] <= datetime.datetime.now().time() <= working_hours['end']
def seconds_since_last_try(self):
return (datetime.datetime.now() - self.last_try_datetime).total_seconds()
class CapturingTask:
sleep_time_failed_request = 60
sleep_time_failed_request = 180
def __init__(self, parameters):
self.uuid = parameters['uuid']
@ -63,7 +72,7 @@ class CapturingTask:
"""
self._update_status('WIP')
while self.request_failures < 3:
while self.request_failures < 4:
attack = UrlAttack(self.ad_url)
attack.attack()
@ -88,6 +97,9 @@ class CapturingTask:
continue
self._update_status('Surrender')
print(datetime.datetime.now())
print(self.html)
print(attack.get_response())
def _extract_data(self):
self.parser = AdHtmlParser(self.html)
@ -177,6 +189,7 @@ class AdHtmlParser:
'found': False,
'optional': True,
'value': None}}
#TODO añadir campos de visitas
def parse(self):
@ -246,6 +259,9 @@ class AdHtmlParser:
.text.replace(' ', '')
self.ad_fields['telefono']['found'] = True
# TODO capturar datos de visitas
def _validate(self):
self.invalid_fields = []
@ -274,6 +290,9 @@ class AdHtmlParser:
and not re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value'])):
self.invalid_fields.append('telefono')
def all_fields_are_valid(self):
self._validate()
if self.invalid_fields:

View file

@ -25,6 +25,6 @@ working_hours = {'start': datetime.time(9, 0, 0),
'end': datetime.time(21, 0, 0)}
monthly_new_ads_target = 1200
google_api_key = 'AIzaSyCnKj0WnsxVZcaoxeAYkuRw3cKRNGiISYA'
minimum_seconds_between_tries = 45
geocoder_delay = 30
minimum_seconds_between_tries = 60
geocoder_delay = 10
refresher_delay = 10

View file

@ -1,7 +1,7 @@
from core.mysql_wrapper import get_anunciosdb
class CapturasInterface():
class CapturasInterface:
def __init__(self):