Cambios en validacion del parser
This commit is contained in:
parent
368f8a00bb
commit
98165ce8f0
2 changed files with 10 additions and 6 deletions
|
|
@ -207,9 +207,10 @@ class AdHtmlParser:
|
|||
|
||||
if soup.find('div', {'class': 'info-features'}) is not None:
|
||||
try:
|
||||
self.ad_fields['tamano_categorico']['value'] = soup.find('div', {'class': 'info-features'}).find(
|
||||
'span').find('span').text
|
||||
self.ad_fields['tamano_categorico']['found'] = True
|
||||
if 'm²' not in soup.find('div', {'class': 'info-features'}).find('span').find('span').text:
|
||||
self.ad_fields['tamano_categorico']['value'] = soup.find('div', {'class': 'info-features'}).find(
|
||||
'span').find('span').text
|
||||
self.ad_fields['tamano_categorico']['found'] = True
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
@ -286,7 +287,7 @@ class AdHtmlParser:
|
|||
self.invalid_fields.append('distrito')
|
||||
|
||||
if (self.ad_fields['telefono']['found']
|
||||
and not re.match(r".\+?.[0-9]{1,20}", self.ad_fields['telefono']['value'])):
|
||||
and not re.match(r"\s*\+?[0-9\s]*", self.ad_fields['telefono']['value'])):
|
||||
self.invalid_fields.append('telefono')
|
||||
#TODO añadir + a caracteres validos
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,8 @@ def test_Capturer():
|
|||
|
||||
def test_AdHtmlParser():
|
||||
|
||||
html = """<!DOCTYPE html>
|
||||
html = """
|
||||
<!DOCTYPE html>
|
||||
|
||||
|
||||
|
||||
|
|
@ -214,7 +215,9 @@ var configTwoSteps = {
|
|||
);
|
||||
</script> </body> </html>
|
||||
|
||||
"""
|
||||
|
||||
|
||||
"""
|
||||
|
||||
parser = AdHtmlParser(html)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue