Typing, docstrings, formatting for scrapping_utils.py
This commit is contained in:
parent
a79fc533ee
commit
e9ee23f852
1 changed files with 45 additions and 14 deletions
|
|
@ -1,10 +1,14 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import requests
|
import requests
|
||||||
import random
|
import random
|
||||||
import pickle
|
from typing import Union
|
||||||
|
|
||||||
|
|
||||||
def headers_random_generator():
|
def headers_random_generator() -> dict:
|
||||||
|
"""
|
||||||
|
Generates a random set of headers for requests.
|
||||||
|
:return: a dict with the selected headers.
|
||||||
|
"""
|
||||||
|
|
||||||
base_headers = {
|
base_headers = {
|
||||||
"Upgrade-Insecure-Requests": "1",
|
"Upgrade-Insecure-Requests": "1",
|
||||||
|
|
@ -49,6 +53,12 @@ def headers_random_generator():
|
||||||
|
|
||||||
|
|
||||||
class UrlAttack:
|
class UrlAttack:
|
||||||
|
"""
|
||||||
|
Stores the flow of attempting an HTTP GET request to a certain URL.
|
||||||
|
Request headers and an HTTP session are assigned at the class level on
|
||||||
|
runtime initialization and shared across instances. Refreshing of these
|
||||||
|
attributes takes place with the probability specified below.
|
||||||
|
"""
|
||||||
|
|
||||||
headers = headers_random_generator()
|
headers = headers_random_generator()
|
||||||
|
|
||||||
|
|
@ -56,39 +66,60 @@ class UrlAttack:
|
||||||
|
|
||||||
timeout = 20
|
timeout = 20
|
||||||
|
|
||||||
def __init__(self, url):
|
identity_change_probability = 2
|
||||||
|
|
||||||
|
def __init__(self, url: str) -> None:
|
||||||
|
"""
|
||||||
|
Initialize with required data.
|
||||||
|
:param url: URL that will be requested.
|
||||||
|
"""
|
||||||
self.url = url
|
self.url = url
|
||||||
self.success = None
|
self.success = None
|
||||||
self.has_been_attacked = False
|
self.has_been_attacked = False
|
||||||
|
self.response = None
|
||||||
|
|
||||||
def attack(self):
|
def attack(self) -> None:
|
||||||
|
"""
|
||||||
|
Execute the request and record the response status. Randomly changes
|
||||||
|
identity with a predefined probability.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
self.has_been_attacked = True
|
self.has_been_attacked = True
|
||||||
try:
|
try:
|
||||||
self.response = UrlAttack.session.get(
|
self.response = UrlAttack.session.get(
|
||||||
self.url, headers=self.headers, timeout=self.timeout
|
self.url, headers=self.headers, timeout=self.timeout
|
||||||
)
|
)
|
||||||
|
|
||||||
with open(f"request.pickle", "wb") as output_file:
|
|
||||||
pickle.dump(self.response.request, output_file)
|
|
||||||
if self.response.ok:
|
if self.response.ok:
|
||||||
self.success = True
|
self.success = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.success = False
|
self.success = False
|
||||||
|
|
||||||
if random.randrange(0, 100) < 2:
|
if random.randrange(0, 100) < UrlAttack.identity_change_probability:
|
||||||
self.change_identity()
|
self._change_identity()
|
||||||
|
|
||||||
def change_identity(self):
|
def _change_identity(self) -> None:
|
||||||
|
"""
|
||||||
|
Changes headers and initializes a new session, dropping old cookies and
|
||||||
|
acquiring new ones. Efectively results in a change of identity to the
|
||||||
|
target server, from the same IP.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
UrlAttack.headers = headers_random_generator()
|
UrlAttack.headers = headers_random_generator()
|
||||||
UrlAttack.session = requests.Session()
|
UrlAttack.session = requests.Session()
|
||||||
|
|
||||||
def get_response(self):
|
def get_text(self) -> str:
|
||||||
return self.response
|
"""
|
||||||
|
Return the text of the request response.
|
||||||
def get_text(self):
|
:return: the text of the respone
|
||||||
|
"""
|
||||||
return self.response.text
|
return self.response.text
|
||||||
|
|
||||||
def get_status_code(self):
|
def get_status_code(self) -> Union[int, None]:
|
||||||
|
"""
|
||||||
|
Returns the status code of the response, if there is one.
|
||||||
|
:return: the status code, if the there is one.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
return self.response.status_code
|
return self.response.status_code
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue