Arreglos menores en capturer y refresher.
This commit is contained in:
parent
e379708c04
commit
c3c16e7015
5 changed files with 224 additions and 152 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import sys
|
||||
|
||||
sys.path.append('..')
|
||||
from time import sleep
|
||||
from bs4 import BeautifulSoup
|
||||
|
|
@ -9,11 +10,13 @@ from db_layer.capturas_interface import capturas_interface
|
|||
from core.scrapping_utils import UrlAttack
|
||||
from refresher.refresher import Refresher
|
||||
|
||||
|
||||
class Capturer:
|
||||
sleep_time_no_work = 60
|
||||
minimum_seconds_between_tries = 120
|
||||
working_hours = {'start': datetime.time(9, 0, 0),
|
||||
'end': datetime.time(21, 0, 0)}
|
||||
|
||||
def start(self):
|
||||
|
||||
while True:
|
||||
|
|
@ -42,7 +45,6 @@ class Capturer:
|
|||
return Capturer.working_hours['start'] <= datetime.datetime.now().time() <= Capturer.working_hours['end']
|
||||
|
||||
|
||||
|
||||
class CapturingTask:
|
||||
sleep_time_failed_request = 60
|
||||
|
||||
|
|
@ -90,14 +92,14 @@ class CapturingTask:
|
|||
self.parser.parse()
|
||||
|
||||
def _check_data(self):
|
||||
if not self.parser.all_fields_are_valid():
|
||||
self._update_status('Invalid value fields')
|
||||
return
|
||||
|
||||
if self.parser.fields_missing():
|
||||
self._update_status('Fields missing')
|
||||
return
|
||||
|
||||
if not self.parser.all_fields_are_valid():
|
||||
self._update_status('Invalid value fields')
|
||||
return
|
||||
|
||||
self._update_status('Data ready')
|
||||
|
||||
def get_ad_data(self):
|
||||
|
|
@ -190,7 +192,8 @@ class AdHtmlParser:
|
|||
|
||||
if soup.find('div', {'class': 'info-features'}) is not None:
|
||||
try:
|
||||
self.ad_fields['tamano_categorico']['value'] = soup.find('div', {'class': 'info-features'}).find('span').find('span').text
|
||||
self.ad_fields['tamano_categorico']['value'] = soup.find('div', {'class': 'info-features'}).find(
|
||||
'span').find('span').text
|
||||
self.ad_fields['tamano_categorico']['found'] = True
|
||||
except:
|
||||
pass
|
||||
|
|
@ -198,7 +201,7 @@ class AdHtmlParser:
|
|||
posible_m2 = [tag.text for tag in soup.find('div', {'class': 'info-features'}).find_all('span')]
|
||||
if [posible for posible in posible_m2 if 'm²' in posible]:
|
||||
self.ad_fields['m2']['value'] = \
|
||||
[''.join(re.findall(r'[0-9]', posible)) for posible in posible_m2 if 'm²' in posible][0]
|
||||
[''.join(re.findall(r'[0-9]', posible)) for posible in posible_m2 if 'm²' in posible][0]
|
||||
self.ad_fields['m2']['found'] = True
|
||||
|
||||
if soup.find('title') is not None:
|
||||
|
|
@ -265,7 +268,8 @@ class AdHtmlParser:
|
|||
if not 'Distrito' in self.ad_fields['distrito']['value']:
|
||||
self.invalid_fields.append('distrito')
|
||||
|
||||
if not re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value']):
|
||||
if (self.ad_fields['telefono']['found']
|
||||
and not re.match(r"[0-9]{1,20}", self.ad_fields['telefono']['value'])):
|
||||
self.invalid_fields.append('telefono')
|
||||
|
||||
def all_fields_are_valid(self):
|
||||
|
|
@ -292,4 +296,4 @@ class AdHtmlParser:
|
|||
|
||||
if __name__ == '__main__':
|
||||
capturer = Capturer()
|
||||
capturer.start()
|
||||
capturer.start()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue