129 lines
5.3 KiB
Python
129 lines
5.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
import requests
|
|
import random
|
|
from typing import Union
|
|
|
|
|
|
def headers_random_generator() -> dict:
|
|
"""
|
|
Generates a random set of headers for requests.
|
|
:return: a dict with the selected headers.
|
|
"""
|
|
|
|
base_headers = {
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Accept-Language": "en-US,en;q=0.5",
|
|
"Connection": "keep-alive",
|
|
"Host": "www.idealista.com",
|
|
"DNT": "1",
|
|
"TE": "Trailers",
|
|
"user-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
|
|
}
|
|
|
|
potential_user_agents = [
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Safari/605.1.15",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0",
|
|
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0",
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
|
|
]
|
|
|
|
random_index = random.randint(0, len(potential_user_agents) - 1)
|
|
|
|
random_headers = base_headers
|
|
random_headers["user-agent"] = potential_user_agents[random_index]
|
|
|
|
return random_headers
|
|
|
|
|
|
class UrlAttack:
|
|
"""
|
|
Stores the flow of attempting an HTTP GET request to a certain URL.
|
|
Request headers and an HTTP session are assigned at the class level on
|
|
runtime initialization and shared across instances. Refreshing of these
|
|
attributes takes place with the probability specified below.
|
|
"""
|
|
|
|
headers = headers_random_generator()
|
|
|
|
session = requests.Session()
|
|
|
|
timeout = 20
|
|
|
|
identity_change_probability = 2
|
|
|
|
def __init__(self, url: str) -> None:
|
|
"""
|
|
Initialize with required data.
|
|
:param url: URL that will be requested.
|
|
"""
|
|
self.url = url
|
|
self.success = None
|
|
self.has_been_attacked = False
|
|
self.response = None
|
|
|
|
def attack(self) -> None:
|
|
"""
|
|
Execute the request and record the response status. Randomly changes
|
|
identity with a predefined probability.
|
|
:return: None
|
|
"""
|
|
self.has_been_attacked = True
|
|
try:
|
|
self.response = UrlAttack.session.get(
|
|
self.url, headers=self.headers, timeout=self.timeout
|
|
)
|
|
|
|
if self.response.ok:
|
|
self.success = True
|
|
except Exception:
|
|
self.success = False
|
|
|
|
if (
|
|
not self.success
|
|
or random.randrange(0, 100) < UrlAttack.identity_change_probability
|
|
):
|
|
self._change_identity()
|
|
|
|
def _change_identity(self) -> None:
|
|
"""
|
|
Changes headers and initializes a new session, dropping old cookies and
|
|
acquiring new ones. Efectively results in a change of identity to the
|
|
target server, from the same IP.
|
|
:return: None
|
|
"""
|
|
UrlAttack.headers = headers_random_generator()
|
|
UrlAttack.session = requests.Session()
|
|
|
|
def get_text(self) -> str:
|
|
"""
|
|
Return the text of the request response.
|
|
:return: the text of the respone
|
|
"""
|
|
return self.response.text
|
|
|
|
def get_status_code(self) -> Union[int, None]:
|
|
"""
|
|
Returns the status code of the response, if there is one.
|
|
:return: the status code, if the there is one.
|
|
"""
|
|
try:
|
|
return self.response.status_code
|
|
except AttributeError:
|
|
return None
|