diff --git a/core/scrapping_utils.py b/core/scrapping_utils.py index 69382c2..67a47cf 100644 --- a/core/scrapping_utils.py +++ b/core/scrapping_utils.py @@ -1,10 +1,14 @@ # -*- coding: utf-8 -*- import requests import random -import pickle +from typing import Union -def headers_random_generator(): +def headers_random_generator() -> dict: + """ + Generates a random set of headers for requests. + :return: a dict with the selected headers. + """ base_headers = { "Upgrade-Insecure-Requests": "1", @@ -49,6 +53,12 @@ def headers_random_generator(): class UrlAttack: + """ + Stores the flow of attempting an HTTP GET request to a certain URL. + Request headers and an HTTP session are assigned at the class level on + runtime initialization and shared across instances. Refreshing of these + attributes takes place with the probability specified below. + """ headers = headers_random_generator() @@ -56,39 +66,60 @@ class UrlAttack: timeout = 20 - def __init__(self, url): + identity_change_probability = 2 + + def __init__(self, url: str) -> None: + """ + Initialize with required data. + :param url: URL that will be requested. + """ self.url = url self.success = None self.has_been_attacked = False + self.response = None - def attack(self): + def attack(self) -> None: + """ + Execute the request and record the response status. Randomly changes + identity with a predefined probability. + :return: None + """ self.has_been_attacked = True try: self.response = UrlAttack.session.get( self.url, headers=self.headers, timeout=self.timeout ) - with open(f"request.pickle", "wb") as output_file: - pickle.dump(self.response.request, output_file) if self.response.ok: self.success = True except Exception as e: self.success = False - if random.randrange(0, 100) < 2: - self.change_identity() + if random.randrange(0, 100) < UrlAttack.identity_change_probability: + self._change_identity() - def change_identity(self): + def _change_identity(self) -> None: + """ + Changes headers and initializes a new session, dropping old cookies and + acquiring new ones. Efectively results in a change of identity to the + target server, from the same IP. + :return: None + """ UrlAttack.headers = headers_random_generator() UrlAttack.session = requests.Session() - def get_response(self): - return self.response - - def get_text(self): + def get_text(self) -> str: + """ + Return the text of the request response. + :return: the text of the respone + """ return self.response.text - def get_status_code(self): + def get_status_code(self) -> Union[int, None]: + """ + Returns the status code of the response, if there is one. + :return: the status code, if the there is one. + """ try: return self.response.status_code except AttributeError: