diff --git a/capturer/capturer.py b/capturer/capturer.py index e37f032..8ef69b3 100644 --- a/capturer/capturer.py +++ b/capturer/capturer.py @@ -13,18 +13,23 @@ from refresher.refresher import Refresher class Capturer: - sleep_time_no_work = 60 + sleep_time_no_work = 15 + + def __init__(self): + self.last_try_datetime = datetime.datetime.now() + def start(self): while True: if (capturing_interface.get_pending_task() is None - or capturing_interface.seconds_since_last_try() < minimum_seconds_between_tries + or self.seconds_since_last_try() < minimum_seconds_between_tries or not self.in_working_hours()): sleep(Capturer.sleep_time_no_work) continue task = CapturingTask(capturing_interface.get_pending_task()) + self.last_try_datetime = datetime.datetime.now() task.capture() if task.status == 'Data ready': @@ -38,9 +43,13 @@ class Capturer: def in_working_hours(self): return working_hours['start'] <= datetime.datetime.now().time() <= working_hours['end'] + def seconds_since_last_try(self): + return (datetime.datetime.now() - self.last_try_datetime).total_seconds() + + class CapturingTask: - sleep_time_failed_request = 60 + sleep_time_failed_request = 180 def __init__(self, parameters): self.uuid = parameters['uuid'] @@ -63,7 +72,7 @@ class CapturingTask: """ self._update_status('WIP') - while self.request_failures < 3: + while self.request_failures < 4: attack = UrlAttack(self.ad_url) attack.attack() @@ -88,6 +97,9 @@ class CapturingTask: continue self._update_status('Surrender') + print(datetime.datetime.now()) + print(self.html) + print(attack.get_response()) def _extract_data(self): self.parser = AdHtmlParser(self.html) @@ -177,6 +189,7 @@ class AdHtmlParser: 'found': False, 'optional': True, 'value': None}} + #TODO aƱadir campos de visitas def parse(self): @@ -246,6 +259,9 @@ class AdHtmlParser: .text.replace(' ', '') self.ad_fields['telefono']['found'] = True + # TODO capturar datos de visitas + + def _validate(self): self.invalid_fields = [] @@ -274,6 +290,9 @@ class AdHtmlParser: and not re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value'])): self.invalid_fields.append('telefono') + + + def all_fields_are_valid(self): self._validate() if self.invalid_fields: diff --git a/core/config.py b/core/config.py index 594c282..07d3f6c 100644 --- a/core/config.py +++ b/core/config.py @@ -25,6 +25,6 @@ working_hours = {'start': datetime.time(9, 0, 0), 'end': datetime.time(21, 0, 0)} monthly_new_ads_target = 1200 google_api_key = 'AIzaSyCnKj0WnsxVZcaoxeAYkuRw3cKRNGiISYA' -minimum_seconds_between_tries = 45 -geocoder_delay = 30 +minimum_seconds_between_tries = 60 +geocoder_delay = 10 refresher_delay = 10 \ No newline at end of file diff --git a/db_layer/capturas_interface.py b/db_layer/capturas_interface.py index f06c2d5..8892c45 100644 --- a/db_layer/capturas_interface.py +++ b/db_layer/capturas_interface.py @@ -1,7 +1,7 @@ from core.mysql_wrapper import get_anunciosdb -class CapturasInterface(): +class CapturasInterface: def __init__(self): diff --git a/db_layer/db_init_scripts/3_alter_capturas.sql b/db_layer/db_init_scripts/3_alter_capturas.sql new file mode 100644 index 0000000..e69de29