Merge branch 'dev'

This commit is contained in:
pablomartincalvo 2018-12-01 16:36:10 +01:00
commit 5e80f3e35c
5 changed files with 12 additions and 12 deletions

View file

@ -8,21 +8,18 @@ import datetime
from db_layer.capturing_tasks_interface import capturing_interface from db_layer.capturing_tasks_interface import capturing_interface
from db_layer.capturas_interface import capturas_interface from db_layer.capturas_interface import capturas_interface
from core.scrapping_utils import UrlAttack from core.scrapping_utils import UrlAttack
from core.config import working_hours, minimum_seconds_between_tries
from refresher.refresher import Refresher from refresher.refresher import Refresher
class Capturer: class Capturer:
sleep_time_no_work = 60 sleep_time_no_work = 60
minimum_seconds_between_tries = 120
working_hours = {'start': datetime.time(9, 0, 0),
'end': datetime.time(21, 0, 0)}
def start(self): def start(self):
while True: while True:
if (capturing_interface.get_pending_task() is None if (capturing_interface.get_pending_task() is None
or capturing_interface.seconds_since_last_try() < Capturer.minimum_seconds_between_tries or capturing_interface.seconds_since_last_try() < minimum_seconds_between_tries
or not self.in_working_hours()): or not self.in_working_hours()):
sleep(Capturer.sleep_time_no_work) sleep(Capturer.sleep_time_no_work)
continue continue
@ -39,7 +36,7 @@ class Capturer:
task._update_status('Captura inserted') task._update_status('Captura inserted')
def in_working_hours(self): def in_working_hours(self):
return Capturer.working_hours['start'] <= datetime.datetime.now().time() <= Capturer.working_hours['end'] return working_hours['start'] <= datetime.datetime.now().time() <= working_hours['end']
class CapturingTask: class CapturingTask:

View file

@ -23,5 +23,8 @@ except KeyError:
working_hours = {'start': datetime.time(9, 0, 0), working_hours = {'start': datetime.time(9, 0, 0),
'end': datetime.time(21, 0, 0)} 'end': datetime.time(21, 0, 0)}
monthly_new_ads_target = 1000 monthly_new_ads_target = 1200
google_api_key = 'AIzaSyCnKj0WnsxVZcaoxeAYkuRw3cKRNGiISYA' google_api_key = 'AIzaSyCnKj0WnsxVZcaoxeAYkuRw3cKRNGiISYA'
minimum_seconds_between_tries = 45
geocoder_delay = 30
refresher_delay = 10

View file

@ -226,7 +226,7 @@ class ExploringTask:
Lanza una advertencia si no es así. Lanza una advertencia si no es así.
""" """
soup = BeautifulSoup(html, 'html5lib') soup = BeautifulSoup(html, 'html5lib')
ads = soup.find_all(class_ = "item") ads = soup.find_all(class_="item")
pattern = "^[0-9]{3,20}$" pattern = "^[0-9]{3,20}$"
for ad in ads: for ad in ads:

View file

@ -3,7 +3,7 @@ sys.path.append('..')
import requests import requests
from time import sleep from time import sleep
from db_layer.capturas_interface import capturas_interface from db_layer.capturas_interface import capturas_interface
from core.config import google_api_key from core.config import google_api_key, geocoder_delay
class Geocoder: class Geocoder:
@ -13,7 +13,7 @@ class Geocoder:
def start(self): def start(self):
while True: while True:
sleep(120) sleep(geocoder_delay)
if capturas_interface.not_geocoded_captura_exists(): if capturas_interface.not_geocoded_captura_exists():
ad_data = capturas_interface.get_not_geocoded_captura() ad_data = capturas_interface.get_not_geocoded_captura()

View file

@ -3,14 +3,14 @@ sys.path.append('..')
from time import sleep from time import sleep
from db_layer.capturas_interface import capturas_interface from db_layer.capturas_interface import capturas_interface
from db_layer.capturing_tasks_interface import capturing_interface from db_layer.capturing_tasks_interface import capturing_interface
from core.config import refresher_delay
class Refresher: class Refresher:
def start(self): def start(self):
while True: while True:
sleep(60) sleep(refresher_delay)
if capturas_interface.old_ads_exist(): if capturas_interface.old_ads_exist():
old_ad = capturas_interface.get_old_ad() old_ad = capturas_interface.get_old_ad()