Improvements in listing page URL generation.

This commit is contained in:
pablo 2020-11-15 12:54:17 +01:00
parent a61fac72f7
commit e939d67467

View file

@ -7,7 +7,7 @@ import datetime
from time import sleep from time import sleep
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re
from random import randint from random import randint, choice
import mysql.connector import mysql.connector
from core.mysql_wrapper import get_anunciosdb, get_tasksdb from core.mysql_wrapper import get_anunciosdb, get_tasksdb
from core.config import monthly_new_ads_target, working_hours from core.config import monthly_new_ads_target, working_hours
@ -210,7 +210,13 @@ class Explorer:
root = "https://www.idealista.com/" root = "https://www.idealista.com/"
type = Explorer.ad_types[str(randint(1, 2))] type = Explorer.ad_types[str(randint(1, 2))]
city = "barcelona" city = "barcelona"
page_number = str(randint(1, 30)) page_number = str(randint(1, 45))
order_string = choice(
[
"?ordenado-por=fecha-publicacion-asc",
"?ordenado-por=fecha-publicacion-desc",
]
)
url = ( url = (
root root
+ type + type
@ -222,6 +228,7 @@ class Explorer:
+ "pagina-" + "pagina-"
+ page_number + page_number
+ ".htm" + ".htm"
+ order_string
) )
return url return url