# -*- coding: utf-8 -*- import sys sys.path.append('..') import uuid import datetime from time import sleep from core.mysql_wrapper import get_anunciosdb from core.scrapping_utils import UrlAttack class Explorer(): sleep_time_no_work = 60 sleep_time_no_service = 600 def __init__(self): try: self.anunciosdb = get_anunciosdb() except: print("Could not connect to DB") self.max_db_retries = 3 self.db_retries = 0 self.max_queue_retries = 3 self.queue_retries = 0 def start(self): #Arrancar el servicio while True: if not self.there_is_work(): sleep(sleep_time_no_work) continue if not self.database_is_up(): break if not self.queue_is_up(): break current_task = ExploringTask(self.compose_listing_url) self.stop() def stop(self): #Detener el servicio def there_is_work(self): #Comprueba si hay trabajo por hacer def database_is_up(self): while self.db_retries <= self.max_db_retries: try: self.anunciosdb.ping() self.db_retries = 0 return True except: sleep(sleep_time_no_service) self.db_retries = self.db_retries + 1 return False def queue_is_up(self): #WIP while self.queue_retries <= self.max_queue_retries: try: #codigo que testea si redis esta vivo self.queue_retries = 0 return True except: sleep(sleep_time_no_service) self.queue_retries = self.queue_retries + 1 return False def compose_listing_url(self): #Decide que url hay que componer y la compone raiz = 'https://www.idealista.com/' tipo = #Logica random ciudad = 'barcelona' numero = #logica random url = raiz + tipo + '-garajes/' + ciudad + '-' + ciudad + '/' + 'pagina-' + numero + '.htm' return url class ExploringTask(): def __init__(self, url): self.creation_date = Datetime.Datetime.now() self.target_url = url self.id = str(uuid.uuid4()) self.status = 'Pending' self.log() def explore(self): self.attack_date = Datetime.Datetime.now() attack = UrlAttack(self.url) attack.attack() if attack.success: self.get_listings() if self.listings_ok: self.status = 'Success' else: self.status = 'Failure' self.log def get_listings(self): #TODO #Funcion que devuelva los listings limpitos def log(self): #TODO #Funcion que grabe estado y demas en una mongodb o argo azin def extract_listings(self): #TODO #Coge el html de una pagina de listado y pilla las referencias. #Si no cae ninguna, se considera un fail