diff --git a/core/alerts.py b/core/alerts.py index 3d269a5..f5a19ba 100644 --- a/core/alerts.py +++ b/core/alerts.py @@ -2,26 +2,27 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import smtplib -my_address = 'drogonalerts@gmail.com' -master_address = 'pablomartincalvo@gmail.com' +my_address = "drogonalerts@gmail.com" +master_address = "pablomartincalvo@gmail.com" + def alert_master(header, message): msg = MIMEMultipart() password = "noesfacilvivirsindrogon" - msg['From'] = my_address - msg['To'] = master_address - msg['Subject'] = header + msg["From"] = my_address + msg["To"] = master_address + msg["Subject"] = header - msg.attach(MIMEText(message, 'plain')) + msg.attach(MIMEText(message, "plain")) - server = smtplib.SMTP('smtp.gmail.com: 465') + server = smtplib.SMTP("smtp.gmail.com: 465") server.starttls() - server.login(msg['From'], password) + server.login(msg["From"], password) - server.sendmail(msg['From'], msg['To'], msg.as_string()) + server.sendmail(msg["From"], msg["To"], msg.as_string()) server.quit() diff --git a/core/mysql_wrapper.py b/core/mysql_wrapper.py index df405bb..b38811d 100644 --- a/core/mysql_wrapper.py +++ b/core/mysql_wrapper.py @@ -1,31 +1,34 @@ # -*- coding: utf-8 -*- import sys -sys.path.append('..') + +sys.path.append("..") import mysql.connector from core.alerts import alert_master from core.config import current_db_parameters -anuncios_db_parameters = {'database': 'anuncios', **current_db_parameters} -tasks_db_parameters = {'database': 'tasks', **current_db_parameters} +anuncios_db_parameters = {"database": "anuncios", **current_db_parameters} +tasks_db_parameters = {"database": "tasks", **current_db_parameters} -class DatabaseWrapper(): - + +class DatabaseWrapper: def __init__(self, connection_parameters): - self.host = connection_parameters['host'] - self.database = connection_parameters['database'] - self.user = connection_parameters['user'] - self.password = connection_parameters['password'] + self.host = connection_parameters["host"] + self.database = connection_parameters["database"] + self.user = connection_parameters["user"] + self.password = connection_parameters["password"] self.connection = None - + self.ping() - + def connect(self): - self.connection = mysql.connector.connect(host = self.host, - database = self.database, - user = self.user, - password = self.password, - autocommit = False) - + self.connection = mysql.connector.connect( + host=self.host, + database=self.database, + user=self.user, + password=self.password, + autocommit=False, + ) + def disconnect(self): if self.connection.is_connected(): self.connection.disconnect() @@ -33,41 +36,41 @@ class DatabaseWrapper(): def ping(self): self.connect() self.disconnect() - + def query(self, query_statement, query_parameters=None, dictionary=False): self.connect() if self.connection.is_connected(): try: - execution_cursor = self.connection.cursor(dictionary=dictionary, - buffered=True) + execution_cursor = self.connection.cursor( + dictionary=dictionary, buffered=True + ) execution_cursor.execute(query_statement, query_parameters) self.connection.commit() self.disconnect() return execution_cursor except Exception as e: - alert_master("SQL ERROR", """Se ha producido un error ejecutando la + alert_master( + "SQL ERROR", + """Se ha producido un error ejecutando la siguiente query: {}. Con los siguientes parametros: {} {} - """.format(query_statement, - query_parameters, - e)) + """.format( + query_statement, query_parameters, e + ), + ) else: raise Exception("Could not connect to the database.") - - - def query_dict(self, query_statement, query_parameters = None): - return self.query(query_statement, query_parameters, dictionary = True) - + + def query_dict(self, query_statement, query_parameters=None): + return self.query(query_statement, query_parameters, dictionary=True) + def get_anunciosdb(): - return DatabaseWrapper(anuncios_db_parameters) + return DatabaseWrapper(anuncios_db_parameters) def get_tasksdb(): return DatabaseWrapper(tasks_db_parameters) - - - \ No newline at end of file diff --git a/db_layer/capturas_interface.py b/db_layer/capturas_interface.py index db11749..ac821f2 100644 --- a/db_layer/capturas_interface.py +++ b/db_layer/capturas_interface.py @@ -2,19 +2,20 @@ from core.mysql_wrapper import get_anunciosdb class CapturasInterface: - def __init__(self): self.anunciosdb = get_anunciosdb() def insert_captura(self, ad_data): - columns = ', '.join(ad_data.keys()) - placeholders_string = ', '.join(['%s'] * len(ad_data)) + columns = ", ".join(ad_data.keys()) + placeholders_string = ", ".join(["%s"] * len(ad_data)) query_statement = """ INSERT INTO capturas ( fecha_captura, {} ) - VALUES( NOW(), {} )""".format(columns, placeholders_string) + VALUES( NOW(), {} )""".format( + columns, placeholders_string + ) query_parameters = tuple([v for v in ad_data.values()]) @@ -71,7 +72,6 @@ class CapturasInterface: return result > 0 - def get_not_geocoded_captura(self): query_statement = """ SELECT * @@ -82,17 +82,21 @@ class CapturasInterface: cursor_result = self.anunciosdb.query(query_statement, dictionary=True) return cursor_result.fetchone() - def update_geo_data(self, referencia, fecha_captura, latitude, longitude, precision): + def update_geo_data( + self, referencia, fecha_captura, latitude, longitude, precision + ): query_statement = """ UPDATE anuncios.capturas SET latitud = %(latitud)s, longitud = %(longitud)s, `precision` = %(precision)s WHERE referencia = %(referencia)s AND fecha_captura = %(fecha_captura)s """ - query_parameters = {'referencia': referencia, - 'fecha_captura': fecha_captura, - 'latitud': latitude, - 'longitud': longitude, - 'precision': precision} + query_parameters = { + "referencia": referencia, + "fecha_captura": fecha_captura, + "latitud": latitude, + "longitud": longitude, + "precision": precision, + } self.anunciosdb.query(query_statement, query_parameters) @@ -114,13 +118,15 @@ class CapturasInterface: AND (`t1`.`fecha_captura` BETWEEN %(start_date)s AND %(end_date)s) ) """ - query_parameters = {'start_date': start_date.strftime('%Y-%m-%d 00:00:00'), - 'end_date': end_date.strftime('%Y-%m-%d 00:00:00')} + query_parameters = { + "start_date": start_date.strftime("%Y-%m-%d 00:00:00"), + "end_date": end_date.strftime("%Y-%m-%d 00:00:00"), + } - cursor_result = self.anunciosdb.query(query_statement, query_parameters, dictionary=True) + cursor_result = self.anunciosdb.query( + query_statement, query_parameters, dictionary=True + ) return cursor_result.fetchall() capturas_interface = CapturasInterface() - - diff --git a/db_layer/capturing_tasks_interface.py b/db_layer/capturing_tasks_interface.py index 27e917f..036aaba 100644 --- a/db_layer/capturing_tasks_interface.py +++ b/db_layer/capturing_tasks_interface.py @@ -1,25 +1,27 @@ import uuid from core.mysql_wrapper import get_tasksdb -class CapturingTasksInterface: +class CapturingTasksInterface: def __init__(self): self.tasksdb = get_tasksdb() def create_capturing_task(self, referencia, uuid_exploring=None): - ads_root = 'https://www.idealista.com/inmueble/' + ads_root = "https://www.idealista.com/inmueble/" - query_parameters = {'ad_url': ads_root + referencia, - 'uuid': str(uuid.uuid4()), - 'status': 'Pending'} + query_parameters = { + "ad_url": ads_root + referencia, + "uuid": str(uuid.uuid4()), + "status": "Pending", + } if uuid_exploring is None: query_statement = """INSERT INTO capturing_tasks_logs (uuid, write_time, status, ad_url) VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)""" else: - query_parameters['uuid_exploring'] = uuid_exploring + query_parameters["uuid_exploring"] = uuid_exploring query_statement = """INSERT INTO capturing_tasks_logs (uuid, write_time, status, ad_url, fk_uuid_exploring) VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)""" @@ -43,16 +45,14 @@ class CapturingTasksInterface: return None def update_capturing_task(self, uuid, uuid_exploring, status, ad_url): - query_parameters = {'ad_url': ad_url, - 'uuid': uuid, - 'status': status} + query_parameters = {"ad_url": ad_url, "uuid": uuid, "status": status} if uuid_exploring is None: query_statement = """INSERT INTO capturing_tasks_logs (uuid, write_time, status, ad_url) VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)""" else: - query_parameters['uuid_exploring'] = uuid_exploring + query_parameters["uuid_exploring"] = uuid_exploring query_statement = """INSERT INTO capturing_tasks_logs (uuid, write_time, status, ad_url, fk_uuid_exploring) VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)""" @@ -74,4 +74,5 @@ class CapturingTasksInterface: except: return 999 + capturing_interface = CapturingTasksInterface() diff --git a/db_layer/indices_interface.py b/db_layer/indices_interface.py index c4b1069..ea256a2 100644 --- a/db_layer/indices_interface.py +++ b/db_layer/indices_interface.py @@ -2,7 +2,6 @@ from core.mysql_wrapper import get_anunciosdb class IndicesInterface: - def __init__(self): self.anunciosdb = get_anunciosdb() diff --git a/refresher/refresher.py b/refresher/refresher.py index eb76ce8..b9cc272 100644 --- a/refresher/refresher.py +++ b/refresher/refresher.py @@ -1,5 +1,6 @@ import sys -sys.path.append('..') + +sys.path.append("..") from time import sleep from db_layer.capturas_interface import capturas_interface from db_layer.capturing_tasks_interface import capturing_interface @@ -7,7 +8,6 @@ from core.config import refresher_delay class Refresher: - def start(self): while True: @@ -15,7 +15,7 @@ class Refresher: old_ad = capturas_interface.get_old_ad() if old_ad: - capturing_interface.create_capturing_task(str(old_ad['referencia'])) + capturing_interface.create_capturing_task(str(old_ad["referencia"])) @staticmethod def dead_ad_checker(html): @@ -25,7 +25,7 @@ class Refresher: :return: True si esta dado de baja, False si no. """ try: - if ':-|' in html or 'El anunciante lo dio de baja' in html: + if ":-|" in html or "El anunciante lo dio de baja" in html: return True else: return False @@ -33,17 +33,6 @@ class Refresher: return False -if __name__ == '__main__': +if __name__ == "__main__": refresher = Refresher() refresher.start() - - - - - - - - - - - diff --git a/tests/capturer_tests.py b/tests/capturer_tests.py index b558b59..772b1f5 100644 --- a/tests/capturer_tests.py +++ b/tests/capturer_tests.py @@ -1,15 +1,18 @@ # -*- coding: utf-8 -*- import sys -sys.path.append('..') + +sys.path.append("..") from capturer.capturer import CapturingTask, Capturer, AdHtmlParser from db_layer.capturas_interface import capturas_interface def test_CapturingTask(): - parameters = {'uuid': 'testie test', - 'ad_url': 'https://www.idealista.com/inmueble/28252032', - 'fk_uuid_exploring': None, - 'status': 'Pending'} + parameters = { + "uuid": "testie test", + "ad_url": "https://www.idealista.com/inmueble/28252032", + "fk_uuid_exploring": None, + "status": "Pending", + } task = CapturingTask(parameters) @@ -22,6 +25,7 @@ def test_Capturer(): capturer = Capturer() capturer.start() + def test_AdHtmlParser(): html = """ @@ -225,8 +229,8 @@ var configTwoSteps = { parser._validate() -#test_AdHtmlParser() +# test_AdHtmlParser() test_CapturingTask() -#test_Capturer() \ No newline at end of file +# test_Capturer() diff --git a/tests/geocoder_tests.py b/tests/geocoder_tests.py index ee61a9c..9e6b3ee 100644 --- a/tests/geocoder_tests.py +++ b/tests/geocoder_tests.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- import sys -sys.path.append('..') -from geocoder.geocoder import Geocoder, GeocodingTask, GeocodingCache +sys.path.append("..") +from geocoder.geocoder import Geocoder, GeocodingTask, GeocodingCache def test_GeocodingTask(): - good_address = 'Avinguda de la Republica Argentina 245, Barcelona' - bad_address = 'ASdasda, 123asd' + good_address = "Avinguda de la Republica Argentina 245, Barcelona" + bad_address = "ASdasda, 123asd" good_task = GeocodingTask(good_address) good_task.geocode() @@ -20,25 +20,30 @@ def test_GeocodingTask(): print(bad_address.is_successfull()) print(bad_address.get_results()) + def test_GeocodingCache(): cache = GeocodingCache() - test_record = {'address':'Calle Don Pepito', - 'latitude': 12.1, - 'longitude': 1.12, - 'precision': 'absoluta'} + test_record = { + "address": "Calle Don Pepito", + "latitude": 12.1, + "longitude": 1.12, + "precision": "absoluta", + } - print(cache.address_in_cache(test_record['address'])) - cache.add_address(test_record['address'], - test_record['latitude'], - test_record['longitude'], - test_record['precision']) + print(cache.address_in_cache(test_record["address"])) + cache.add_address( + test_record["address"], + test_record["latitude"], + test_record["longitude"], + test_record["precision"], + ) - print(cache.address_in_cache(test_record['address'])) - print(cache.get_coordinates(test_record['address'])) + print(cache.address_in_cache(test_record["address"])) + print(cache.get_coordinates(test_record["address"])) -#test_GeocodingTask() +# test_GeocodingTask() -test_GeocodingCache() \ No newline at end of file +test_GeocodingCache() diff --git a/tests/index_batch_tests.py b/tests/index_batch_tests.py index f7cbbf2..08d5593 100644 --- a/tests/index_batch_tests.py +++ b/tests/index_batch_tests.py @@ -3,104 +3,125 @@ from analysis.index_batch import IndexMM import pandas as pd sample_market = [ - {'tamano_categorico': 'coche pequeño', - 'tipo_anuncio': 1, - 'precio': 15000, - 'calle': 'B1', - 'telefono': 123, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'coche pequeño', - 'tipo_anuncio': 1, - 'precio': 20000, - 'calle': 'B2', - 'telefono': 321, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'coche grande', - 'tipo_anuncio': 1, - 'precio': 20000, - 'calle': 'B2', - 'telefono': 321, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'coche grande', - 'tipo_anuncio': 1, - 'precio': 25000, - 'calle': 'B2', - 'telefono': 123, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'coche y moto', - 'tipo_anuncio': 1, - 'precio': 22000, - 'calle': 'B1', - 'telefono': 456, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'coche y moto', - 'tipo_anuncio': 1, - 'precio': 26000, - 'calle': 'B3', - 'telefono': 789, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': None, - 'tipo_anuncio': 1, - 'precio': 15000, - 'calle': 'abc', - 'telefono': 456, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'moto', - 'tipo_anuncio': 1, - 'precio': 3000, - 'calle': 'B4', - 'telefono': 123, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': '2 coches o más', - 'tipo_anuncio': 1, - 'precio': 60000, - 'calle': 'B4', - 'telefono': 123, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'coche pequeño', - 'tipo_anuncio': 1, - 'precio': 20000, - 'calle': 'B2', - 'telefono': 321, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'coche pequeño', - 'tipo_anuncio': 2, - 'precio': 50, - 'calle': 'B4', - 'telefono': 123, - 'latitud': 2.1, - 'longitud': 1.2}, - {'tamano_categorico': 'moto', - 'tipo_anuncio': 1, - 'precio': 300000, - 'calle': 'B4', - 'telefono': 123, - 'latitud': 2.1, - 'longitud': 1.2} - ] -date_range = {'start': '2018-01-01 00:00:00', - 'end': '2018-02-01 00:00:00'} + { + "tamano_categorico": "coche pequeño", + "tipo_anuncio": 1, + "precio": 15000, + "calle": "B1", + "telefono": 123, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "coche pequeño", + "tipo_anuncio": 1, + "precio": 20000, + "calle": "B2", + "telefono": 321, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "coche grande", + "tipo_anuncio": 1, + "precio": 20000, + "calle": "B2", + "telefono": 321, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "coche grande", + "tipo_anuncio": 1, + "precio": 25000, + "calle": "B2", + "telefono": 123, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "coche y moto", + "tipo_anuncio": 1, + "precio": 22000, + "calle": "B1", + "telefono": 456, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "coche y moto", + "tipo_anuncio": 1, + "precio": 26000, + "calle": "B3", + "telefono": 789, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": None, + "tipo_anuncio": 1, + "precio": 15000, + "calle": "abc", + "telefono": 456, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "moto", + "tipo_anuncio": 1, + "precio": 3000, + "calle": "B4", + "telefono": 123, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "2 coches o más", + "tipo_anuncio": 1, + "precio": 60000, + "calle": "B4", + "telefono": 123, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "coche pequeño", + "tipo_anuncio": 1, + "precio": 20000, + "calle": "B2", + "telefono": 321, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "coche pequeño", + "tipo_anuncio": 2, + "precio": 50, + "calle": "B4", + "telefono": 123, + "latitud": 2.1, + "longitud": 1.2, + }, + { + "tamano_categorico": "moto", + "tipo_anuncio": 1, + "precio": 300000, + "calle": "B4", + "telefono": 123, + "latitud": 2.1, + "longitud": 1.2, + }, +] +date_range = {"start": "2018-01-01 00:00:00", "end": "2018-02-01 00:00:00"} market = Market() -market.load_market(sample_market, - date_range=date_range) +market.load_market(sample_market, date_range=date_range) market.market.fillna(value=pd.np.nan, inplace=True) print(market.market.to_string()) -market.clean_market('index') +market.clean_market("index") print(market.market.to_string()) index = IndexMM() index.calculate(market) index.get_data() - diff --git a/tests/refresher_tests.py b/tests/refresher_tests.py index 4b1e296..bf7862c 100644 --- a/tests/refresher_tests.py +++ b/tests/refresher_tests.py @@ -4,7 +4,6 @@ print(capturas_interface.old_ads_exist()) print(capturas_interface.get_old_ad()) - html_baja = """ @@ -256,4 +255,4 @@ var configTwoSteps = { """ print(Refresher.dead_ad_checker(html_baja)) -print(Refresher.dead_ad_checker(html_normal)) \ No newline at end of file +print(Refresher.dead_ad_checker(html_normal)) diff --git a/tests/scrapping_utils_tests.py b/tests/scrapping_utils_tests.py index 576b3af..27615c7 100644 --- a/tests/scrapping_utils_tests.py +++ b/tests/scrapping_utils_tests.py @@ -1,7 +1,6 @@ from core.scrapping_utils import * - def UrlAttack_test(url): attack = UrlAttack(url) @@ -14,4 +13,4 @@ def UrlAttack_test(url): print(attack.get_text()) -UrlAttack_test('https://www.idealista.com/inmueble/82810718/') \ No newline at end of file +UrlAttack_test("https://www.idealista.com/inmueble/82810718/")