2018-08-12 23:14:47 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
import requests
|
2020-04-26 15:05:40 +02:00
|
|
|
import random
|
2020-11-02 12:02:56 +01:00
|
|
|
import pickle
|
2018-08-12 23:14:47 +02:00
|
|
|
|
2020-04-26 15:06:04 +02:00
|
|
|
|
2020-04-26 15:05:40 +02:00
|
|
|
def headers_random_generator():
|
2018-08-12 23:14:47 +02:00
|
|
|
|
2020-04-26 15:05:40 +02:00
|
|
|
base_headers = {
|
|
|
|
|
"Upgrade-Insecure-Requests": "1",
|
|
|
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
|
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
|
|
|
"Accept-Language": "en-US,en;q=0.5",
|
|
|
|
|
"Connection": "keep-alive",
|
|
|
|
|
"Host": "www.idealista.com",
|
|
|
|
|
"DNT": "1",
|
|
|
|
|
"TE": "Trailers",
|
|
|
|
|
"user-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
|
|
|
|
|
}
|
2018-12-30 19:28:05 +01:00
|
|
|
|
2020-04-26 15:06:04 +02:00
|
|
|
potential_user_agents = [
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15",
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0",
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0",
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Safari/605.1.15",
|
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0",
|
|
|
|
|
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0",
|
|
|
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0",
|
|
|
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36",
|
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
|
|
|
|
|
]
|
2020-04-26 15:05:40 +02:00
|
|
|
|
|
|
|
|
random_index = random.randint(0, len(potential_user_agents) - 1)
|
|
|
|
|
|
|
|
|
|
random_headers = base_headers
|
|
|
|
|
random_headers["user-agent"] = potential_user_agents[random_index]
|
|
|
|
|
|
|
|
|
|
return random_headers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class UrlAttack:
|
|
|
|
|
|
|
|
|
|
headers = headers_random_generator()
|
2018-08-12 23:14:47 +02:00
|
|
|
|
2020-11-02 13:08:37 +01:00
|
|
|
session = requests.Session()
|
|
|
|
|
|
2018-08-12 23:14:47 +02:00
|
|
|
timeout = 20
|
2020-04-26 15:05:40 +02:00
|
|
|
|
2018-08-12 23:14:47 +02:00
|
|
|
def __init__(self, url):
|
|
|
|
|
self.url = url
|
|
|
|
|
self.success = None
|
|
|
|
|
self.has_been_attacked = False
|
2020-04-26 15:05:40 +02:00
|
|
|
|
2018-08-12 23:14:47 +02:00
|
|
|
def attack(self):
|
|
|
|
|
self.has_been_attacked = True
|
|
|
|
|
try:
|
2020-11-02 13:08:37 +01:00
|
|
|
self.response = UrlAttack.session.get(
|
2020-04-26 15:05:40 +02:00
|
|
|
self.url, headers=self.headers, timeout=self.timeout
|
|
|
|
|
)
|
2020-11-02 12:02:56 +01:00
|
|
|
|
2020-11-02 12:43:49 +01:00
|
|
|
with open(f"request.pickle", "wb") as output_file:
|
2020-11-02 12:02:56 +01:00
|
|
|
pickle.dump(self.response.request, output_file)
|
2018-08-12 23:14:47 +02:00
|
|
|
if self.response.ok:
|
|
|
|
|
self.success = True
|
2020-11-02 12:43:49 +01:00
|
|
|
except Exception as e:
|
2018-08-12 23:14:47 +02:00
|
|
|
self.success = False
|
2020-04-26 15:05:40 +02:00
|
|
|
|
2020-11-03 07:26:06 +01:00
|
|
|
if random.randrange(0, 100) < 2:
|
|
|
|
|
self.change_identity()
|
|
|
|
|
|
|
|
|
|
def change_identity(self):
|
|
|
|
|
UrlAttack.headers = headers_random_generator()
|
|
|
|
|
UrlAttack.session = requests.Session()
|
|
|
|
|
|
2018-08-12 23:14:47 +02:00
|
|
|
def get_response(self):
|
2018-11-16 18:20:50 +01:00
|
|
|
return self.response
|
2020-04-26 15:05:40 +02:00
|
|
|
|
2018-08-12 23:14:47 +02:00
|
|
|
def get_text(self):
|
2018-11-16 18:20:50 +01:00
|
|
|
return self.response.text
|
2018-10-20 15:58:37 +02:00
|
|
|
|
|
|
|
|
def get_status_code(self):
|
2018-10-23 20:49:37 +02:00
|
|
|
try:
|
|
|
|
|
return self.response.status_code
|
|
|
|
|
except AttributeError:
|
2020-04-26 15:05:40 +02:00
|
|
|
return None
|