Merge branch 'dev'
This commit is contained in:
commit
5b245c0aed
4 changed files with 26 additions and 7 deletions
|
|
@ -13,18 +13,23 @@ from refresher.refresher import Refresher
|
|||
|
||||
|
||||
class Capturer:
|
||||
sleep_time_no_work = 60
|
||||
sleep_time_no_work = 15
|
||||
|
||||
def __init__(self):
|
||||
self.last_try_datetime = datetime.datetime.now()
|
||||
|
||||
def start(self):
|
||||
|
||||
while True:
|
||||
|
||||
if (capturing_interface.get_pending_task() is None
|
||||
or capturing_interface.seconds_since_last_try() < minimum_seconds_between_tries
|
||||
or self.seconds_since_last_try() < minimum_seconds_between_tries
|
||||
or not self.in_working_hours()):
|
||||
sleep(Capturer.sleep_time_no_work)
|
||||
continue
|
||||
|
||||
task = CapturingTask(capturing_interface.get_pending_task())
|
||||
self.last_try_datetime = datetime.datetime.now()
|
||||
task.capture()
|
||||
|
||||
if task.status == 'Data ready':
|
||||
|
|
@ -38,9 +43,13 @@ class Capturer:
|
|||
def in_working_hours(self):
|
||||
return working_hours['start'] <= datetime.datetime.now().time() <= working_hours['end']
|
||||
|
||||
def seconds_since_last_try(self):
|
||||
return (datetime.datetime.now() - self.last_try_datetime).total_seconds()
|
||||
|
||||
|
||||
|
||||
class CapturingTask:
|
||||
sleep_time_failed_request = 60
|
||||
sleep_time_failed_request = 180
|
||||
|
||||
def __init__(self, parameters):
|
||||
self.uuid = parameters['uuid']
|
||||
|
|
@ -63,7 +72,7 @@ class CapturingTask:
|
|||
"""
|
||||
self._update_status('WIP')
|
||||
|
||||
while self.request_failures < 3:
|
||||
while self.request_failures < 4:
|
||||
attack = UrlAttack(self.ad_url)
|
||||
attack.attack()
|
||||
|
||||
|
|
@ -88,6 +97,9 @@ class CapturingTask:
|
|||
continue
|
||||
|
||||
self._update_status('Surrender')
|
||||
print(datetime.datetime.now())
|
||||
print(self.html)
|
||||
print(attack.get_response())
|
||||
|
||||
def _extract_data(self):
|
||||
self.parser = AdHtmlParser(self.html)
|
||||
|
|
@ -177,6 +189,7 @@ class AdHtmlParser:
|
|||
'found': False,
|
||||
'optional': True,
|
||||
'value': None}}
|
||||
#TODO añadir campos de visitas
|
||||
|
||||
def parse(self):
|
||||
|
||||
|
|
@ -246,6 +259,9 @@ class AdHtmlParser:
|
|||
.text.replace(' ', '')
|
||||
self.ad_fields['telefono']['found'] = True
|
||||
|
||||
# TODO capturar datos de visitas
|
||||
|
||||
|
||||
def _validate(self):
|
||||
self.invalid_fields = []
|
||||
|
||||
|
|
@ -274,6 +290,9 @@ class AdHtmlParser:
|
|||
and not re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value'])):
|
||||
self.invalid_fields.append('telefono')
|
||||
|
||||
|
||||
|
||||
|
||||
def all_fields_are_valid(self):
|
||||
self._validate()
|
||||
if self.invalid_fields:
|
||||
|
|
|
|||
|
|
@ -25,6 +25,6 @@ working_hours = {'start': datetime.time(9, 0, 0),
|
|||
'end': datetime.time(21, 0, 0)}
|
||||
monthly_new_ads_target = 1200
|
||||
google_api_key = 'AIzaSyCnKj0WnsxVZcaoxeAYkuRw3cKRNGiISYA'
|
||||
minimum_seconds_between_tries = 45
|
||||
geocoder_delay = 30
|
||||
minimum_seconds_between_tries = 60
|
||||
geocoder_delay = 10
|
||||
refresher_delay = 10
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
from core.mysql_wrapper import get_anunciosdb
|
||||
|
||||
|
||||
class CapturasInterface():
|
||||
class CapturasInterface:
|
||||
|
||||
def __init__(self):
|
||||
|
||||
|
|
|
|||
0
db_layer/db_init_scripts/3_alter_capturas.sql
Normal file
0
db_layer/db_init_scripts/3_alter_capturas.sql
Normal file
Loading…
Add table
Add a link
Reference in a new issue