Typing, docstrings, formatting for scrapping_utils.py
This commit is contained in:
parent
a79fc533ee
commit
e9ee23f852
1 changed files with 45 additions and 14 deletions
|
|
@ -1,10 +1,14 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import requests
|
||||
import random
|
||||
import pickle
|
||||
from typing import Union
|
||||
|
||||
|
||||
def headers_random_generator():
|
||||
def headers_random_generator() -> dict:
|
||||
"""
|
||||
Generates a random set of headers for requests.
|
||||
:return: a dict with the selected headers.
|
||||
"""
|
||||
|
||||
base_headers = {
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
|
|
@ -49,6 +53,12 @@ def headers_random_generator():
|
|||
|
||||
|
||||
class UrlAttack:
|
||||
"""
|
||||
Stores the flow of attempting an HTTP GET request to a certain URL.
|
||||
Request headers and an HTTP session are assigned at the class level on
|
||||
runtime initialization and shared across instances. Refreshing of these
|
||||
attributes takes place with the probability specified below.
|
||||
"""
|
||||
|
||||
headers = headers_random_generator()
|
||||
|
||||
|
|
@ -56,39 +66,60 @@ class UrlAttack:
|
|||
|
||||
timeout = 20
|
||||
|
||||
def __init__(self, url):
|
||||
identity_change_probability = 2
|
||||
|
||||
def __init__(self, url: str) -> None:
|
||||
"""
|
||||
Initialize with required data.
|
||||
:param url: URL that will be requested.
|
||||
"""
|
||||
self.url = url
|
||||
self.success = None
|
||||
self.has_been_attacked = False
|
||||
self.response = None
|
||||
|
||||
def attack(self):
|
||||
def attack(self) -> None:
|
||||
"""
|
||||
Execute the request and record the response status. Randomly changes
|
||||
identity with a predefined probability.
|
||||
:return: None
|
||||
"""
|
||||
self.has_been_attacked = True
|
||||
try:
|
||||
self.response = UrlAttack.session.get(
|
||||
self.url, headers=self.headers, timeout=self.timeout
|
||||
)
|
||||
|
||||
with open(f"request.pickle", "wb") as output_file:
|
||||
pickle.dump(self.response.request, output_file)
|
||||
if self.response.ok:
|
||||
self.success = True
|
||||
except Exception as e:
|
||||
self.success = False
|
||||
|
||||
if random.randrange(0, 100) < 2:
|
||||
self.change_identity()
|
||||
if random.randrange(0, 100) < UrlAttack.identity_change_probability:
|
||||
self._change_identity()
|
||||
|
||||
def change_identity(self):
|
||||
def _change_identity(self) -> None:
|
||||
"""
|
||||
Changes headers and initializes a new session, dropping old cookies and
|
||||
acquiring new ones. Efectively results in a change of identity to the
|
||||
target server, from the same IP.
|
||||
:return: None
|
||||
"""
|
||||
UrlAttack.headers = headers_random_generator()
|
||||
UrlAttack.session = requests.Session()
|
||||
|
||||
def get_response(self):
|
||||
return self.response
|
||||
|
||||
def get_text(self):
|
||||
def get_text(self) -> str:
|
||||
"""
|
||||
Return the text of the request response.
|
||||
:return: the text of the respone
|
||||
"""
|
||||
return self.response.text
|
||||
|
||||
def get_status_code(self):
|
||||
def get_status_code(self) -> Union[int, None]:
|
||||
"""
|
||||
Returns the status code of the response, if there is one.
|
||||
:return: the status code, if the there is one.
|
||||
"""
|
||||
try:
|
||||
return self.response.status_code
|
||||
except AttributeError:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue