Cambios en validacion del parser

This commit is contained in:
pablomartincalvo 2018-12-29 11:37:43 +01:00
parent 368f8a00bb
commit 98165ce8f0
2 changed files with 10 additions and 6 deletions

View file

@ -207,9 +207,10 @@ class AdHtmlParser:
if soup.find('div', {'class': 'info-features'}) is not None:
try:
self.ad_fields['tamano_categorico']['value'] = soup.find('div', {'class': 'info-features'}).find(
'span').find('span').text
self.ad_fields['tamano_categorico']['found'] = True
if '' not in soup.find('div', {'class': 'info-features'}).find('span').find('span').text:
self.ad_fields['tamano_categorico']['value'] = soup.find('div', {'class': 'info-features'}).find(
'span').find('span').text
self.ad_fields['tamano_categorico']['found'] = True
except:
pass
@ -286,7 +287,7 @@ class AdHtmlParser:
self.invalid_fields.append('distrito')
if (self.ad_fields['telefono']['found']
and not re.match(r".\+?.[0-9]{1,20}", self.ad_fields['telefono']['value'])):
and not re.match(r"\s*\+?[0-9\s]*", self.ad_fields['telefono']['value'])):
self.invalid_fields.append('telefono')
#TODO añadir + a caracteres validos

View file

@ -24,7 +24,8 @@ def test_Capturer():
def test_AdHtmlParser():
html = """<!DOCTYPE html>
html = """
<!DOCTYPE html>
@ -214,7 +215,9 @@ var configTwoSteps = {
);
</script> </body> </html>
"""
"""
parser = AdHtmlParser(html)