Formatting.

This commit is contained in:
pablo 2020-11-03 07:29:17 +01:00
parent cd9c3b6e39
commit a79fc533ee
11 changed files with 231 additions and 204 deletions

View file

@ -2,26 +2,27 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
import smtplib import smtplib
my_address = 'drogonalerts@gmail.com' my_address = "drogonalerts@gmail.com"
master_address = 'pablomartincalvo@gmail.com' master_address = "pablomartincalvo@gmail.com"
def alert_master(header, message): def alert_master(header, message):
msg = MIMEMultipart() msg = MIMEMultipart()
password = "noesfacilvivirsindrogon" password = "noesfacilvivirsindrogon"
msg['From'] = my_address msg["From"] = my_address
msg['To'] = master_address msg["To"] = master_address
msg['Subject'] = header msg["Subject"] = header
msg.attach(MIMEText(message, 'plain')) msg.attach(MIMEText(message, "plain"))
server = smtplib.SMTP('smtp.gmail.com: 465') server = smtplib.SMTP("smtp.gmail.com: 465")
server.starttls() server.starttls()
server.login(msg['From'], password) server.login(msg["From"], password)
server.sendmail(msg['From'], msg['To'], msg.as_string()) server.sendmail(msg["From"], msg["To"], msg.as_string())
server.quit() server.quit()

View file

@ -1,31 +1,34 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys import sys
sys.path.append('..')
sys.path.append("..")
import mysql.connector import mysql.connector
from core.alerts import alert_master from core.alerts import alert_master
from core.config import current_db_parameters from core.config import current_db_parameters
anuncios_db_parameters = {'database': 'anuncios', **current_db_parameters} anuncios_db_parameters = {"database": "anuncios", **current_db_parameters}
tasks_db_parameters = {'database': 'tasks', **current_db_parameters} tasks_db_parameters = {"database": "tasks", **current_db_parameters}
class DatabaseWrapper():
class DatabaseWrapper:
def __init__(self, connection_parameters): def __init__(self, connection_parameters):
self.host = connection_parameters['host'] self.host = connection_parameters["host"]
self.database = connection_parameters['database'] self.database = connection_parameters["database"]
self.user = connection_parameters['user'] self.user = connection_parameters["user"]
self.password = connection_parameters['password'] self.password = connection_parameters["password"]
self.connection = None self.connection = None
self.ping() self.ping()
def connect(self): def connect(self):
self.connection = mysql.connector.connect(host = self.host, self.connection = mysql.connector.connect(
database = self.database, host=self.host,
user = self.user, database=self.database,
password = self.password, user=self.user,
autocommit = False) password=self.password,
autocommit=False,
)
def disconnect(self): def disconnect(self):
if self.connection.is_connected(): if self.connection.is_connected():
self.connection.disconnect() self.connection.disconnect()
@ -33,41 +36,41 @@ class DatabaseWrapper():
def ping(self): def ping(self):
self.connect() self.connect()
self.disconnect() self.disconnect()
def query(self, query_statement, query_parameters=None, dictionary=False): def query(self, query_statement, query_parameters=None, dictionary=False):
self.connect() self.connect()
if self.connection.is_connected(): if self.connection.is_connected():
try: try:
execution_cursor = self.connection.cursor(dictionary=dictionary, execution_cursor = self.connection.cursor(
buffered=True) dictionary=dictionary, buffered=True
)
execution_cursor.execute(query_statement, query_parameters) execution_cursor.execute(query_statement, query_parameters)
self.connection.commit() self.connection.commit()
self.disconnect() self.disconnect()
return execution_cursor return execution_cursor
except Exception as e: except Exception as e:
alert_master("SQL ERROR", """Se ha producido un error ejecutando la alert_master(
"SQL ERROR",
"""Se ha producido un error ejecutando la
siguiente query: {}. siguiente query: {}.
Con los siguientes parametros: {} Con los siguientes parametros: {}
{} {}
""".format(query_statement, """.format(
query_parameters, query_statement, query_parameters, e
e)) ),
)
else: else:
raise Exception("Could not connect to the database.") raise Exception("Could not connect to the database.")
def query_dict(self, query_statement, query_parameters=None):
def query_dict(self, query_statement, query_parameters = None): return self.query(query_statement, query_parameters, dictionary=True)
return self.query(query_statement, query_parameters, dictionary = True)
def get_anunciosdb(): def get_anunciosdb():
return DatabaseWrapper(anuncios_db_parameters) return DatabaseWrapper(anuncios_db_parameters)
def get_tasksdb(): def get_tasksdb():
return DatabaseWrapper(tasks_db_parameters) return DatabaseWrapper(tasks_db_parameters)

View file

@ -2,19 +2,20 @@ from core.mysql_wrapper import get_anunciosdb
class CapturasInterface: class CapturasInterface:
def __init__(self): def __init__(self):
self.anunciosdb = get_anunciosdb() self.anunciosdb = get_anunciosdb()
def insert_captura(self, ad_data): def insert_captura(self, ad_data):
columns = ', '.join(ad_data.keys()) columns = ", ".join(ad_data.keys())
placeholders_string = ', '.join(['%s'] * len(ad_data)) placeholders_string = ", ".join(["%s"] * len(ad_data))
query_statement = """ INSERT INTO capturas query_statement = """ INSERT INTO capturas
( fecha_captura, {} ) ( fecha_captura, {} )
VALUES( NOW(), {} )""".format(columns, placeholders_string) VALUES( NOW(), {} )""".format(
columns, placeholders_string
)
query_parameters = tuple([v for v in ad_data.values()]) query_parameters = tuple([v for v in ad_data.values()])
@ -71,7 +72,6 @@ class CapturasInterface:
return result > 0 return result > 0
def get_not_geocoded_captura(self): def get_not_geocoded_captura(self):
query_statement = """ query_statement = """
SELECT * SELECT *
@ -82,17 +82,21 @@ class CapturasInterface:
cursor_result = self.anunciosdb.query(query_statement, dictionary=True) cursor_result = self.anunciosdb.query(query_statement, dictionary=True)
return cursor_result.fetchone() return cursor_result.fetchone()
def update_geo_data(self, referencia, fecha_captura, latitude, longitude, precision): def update_geo_data(
self, referencia, fecha_captura, latitude, longitude, precision
):
query_statement = """ query_statement = """
UPDATE anuncios.capturas UPDATE anuncios.capturas
SET latitud = %(latitud)s, longitud = %(longitud)s, `precision` = %(precision)s SET latitud = %(latitud)s, longitud = %(longitud)s, `precision` = %(precision)s
WHERE referencia = %(referencia)s AND fecha_captura = %(fecha_captura)s WHERE referencia = %(referencia)s AND fecha_captura = %(fecha_captura)s
""" """
query_parameters = {'referencia': referencia, query_parameters = {
'fecha_captura': fecha_captura, "referencia": referencia,
'latitud': latitude, "fecha_captura": fecha_captura,
'longitud': longitude, "latitud": latitude,
'precision': precision} "longitud": longitude,
"precision": precision,
}
self.anunciosdb.query(query_statement, query_parameters) self.anunciosdb.query(query_statement, query_parameters)
@ -114,13 +118,15 @@ class CapturasInterface:
AND (`t1`.`fecha_captura` BETWEEN %(start_date)s AND %(end_date)s) AND (`t1`.`fecha_captura` BETWEEN %(start_date)s AND %(end_date)s)
) )
""" """
query_parameters = {'start_date': start_date.strftime('%Y-%m-%d 00:00:00'), query_parameters = {
'end_date': end_date.strftime('%Y-%m-%d 00:00:00')} "start_date": start_date.strftime("%Y-%m-%d 00:00:00"),
"end_date": end_date.strftime("%Y-%m-%d 00:00:00"),
}
cursor_result = self.anunciosdb.query(query_statement, query_parameters, dictionary=True) cursor_result = self.anunciosdb.query(
query_statement, query_parameters, dictionary=True
)
return cursor_result.fetchall() return cursor_result.fetchall()
capturas_interface = CapturasInterface() capturas_interface = CapturasInterface()

View file

@ -1,25 +1,27 @@
import uuid import uuid
from core.mysql_wrapper import get_tasksdb from core.mysql_wrapper import get_tasksdb
class CapturingTasksInterface:
class CapturingTasksInterface:
def __init__(self): def __init__(self):
self.tasksdb = get_tasksdb() self.tasksdb = get_tasksdb()
def create_capturing_task(self, referencia, uuid_exploring=None): def create_capturing_task(self, referencia, uuid_exploring=None):
ads_root = 'https://www.idealista.com/inmueble/' ads_root = "https://www.idealista.com/inmueble/"
query_parameters = {'ad_url': ads_root + referencia, query_parameters = {
'uuid': str(uuid.uuid4()), "ad_url": ads_root + referencia,
'status': 'Pending'} "uuid": str(uuid.uuid4()),
"status": "Pending",
}
if uuid_exploring is None: if uuid_exploring is None:
query_statement = """INSERT INTO capturing_tasks_logs query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, ad_url) (uuid, write_time, status, ad_url)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)""" VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
else: else:
query_parameters['uuid_exploring'] = uuid_exploring query_parameters["uuid_exploring"] = uuid_exploring
query_statement = """INSERT INTO capturing_tasks_logs query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, ad_url, fk_uuid_exploring) (uuid, write_time, status, ad_url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)""" VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
@ -43,16 +45,14 @@ class CapturingTasksInterface:
return None return None
def update_capturing_task(self, uuid, uuid_exploring, status, ad_url): def update_capturing_task(self, uuid, uuid_exploring, status, ad_url):
query_parameters = {'ad_url': ad_url, query_parameters = {"ad_url": ad_url, "uuid": uuid, "status": status}
'uuid': uuid,
'status': status}
if uuid_exploring is None: if uuid_exploring is None:
query_statement = """INSERT INTO capturing_tasks_logs query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, ad_url) (uuid, write_time, status, ad_url)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)""" VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
else: else:
query_parameters['uuid_exploring'] = uuid_exploring query_parameters["uuid_exploring"] = uuid_exploring
query_statement = """INSERT INTO capturing_tasks_logs query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, ad_url, fk_uuid_exploring) (uuid, write_time, status, ad_url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)""" VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
@ -74,4 +74,5 @@ class CapturingTasksInterface:
except: except:
return 999 return 999
capturing_interface = CapturingTasksInterface() capturing_interface = CapturingTasksInterface()

View file

@ -2,7 +2,6 @@ from core.mysql_wrapper import get_anunciosdb
class IndicesInterface: class IndicesInterface:
def __init__(self): def __init__(self):
self.anunciosdb = get_anunciosdb() self.anunciosdb = get_anunciosdb()

View file

@ -1,5 +1,6 @@
import sys import sys
sys.path.append('..')
sys.path.append("..")
from time import sleep from time import sleep
from db_layer.capturas_interface import capturas_interface from db_layer.capturas_interface import capturas_interface
from db_layer.capturing_tasks_interface import capturing_interface from db_layer.capturing_tasks_interface import capturing_interface
@ -7,7 +8,6 @@ from core.config import refresher_delay
class Refresher: class Refresher:
def start(self): def start(self):
while True: while True:
@ -15,7 +15,7 @@ class Refresher:
old_ad = capturas_interface.get_old_ad() old_ad = capturas_interface.get_old_ad()
if old_ad: if old_ad:
capturing_interface.create_capturing_task(str(old_ad['referencia'])) capturing_interface.create_capturing_task(str(old_ad["referencia"]))
@staticmethod @staticmethod
def dead_ad_checker(html): def dead_ad_checker(html):
@ -25,7 +25,7 @@ class Refresher:
:return: True si esta dado de baja, False si no. :return: True si esta dado de baja, False si no.
""" """
try: try:
if ':-|' in html or 'El anunciante lo dio de baja' in html: if ":-|" in html or "El anunciante lo dio de baja" in html:
return True return True
else: else:
return False return False
@ -33,17 +33,6 @@ class Refresher:
return False return False
if __name__ == '__main__': if __name__ == "__main__":
refresher = Refresher() refresher = Refresher()
refresher.start() refresher.start()

View file

@ -1,15 +1,18 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys import sys
sys.path.append('..')
sys.path.append("..")
from capturer.capturer import CapturingTask, Capturer, AdHtmlParser from capturer.capturer import CapturingTask, Capturer, AdHtmlParser
from db_layer.capturas_interface import capturas_interface from db_layer.capturas_interface import capturas_interface
def test_CapturingTask(): def test_CapturingTask():
parameters = {'uuid': 'testie test', parameters = {
'ad_url': 'https://www.idealista.com/inmueble/28252032', "uuid": "testie test",
'fk_uuid_exploring': None, "ad_url": "https://www.idealista.com/inmueble/28252032",
'status': 'Pending'} "fk_uuid_exploring": None,
"status": "Pending",
}
task = CapturingTask(parameters) task = CapturingTask(parameters)
@ -22,6 +25,7 @@ def test_Capturer():
capturer = Capturer() capturer = Capturer()
capturer.start() capturer.start()
def test_AdHtmlParser(): def test_AdHtmlParser():
html = """ html = """
@ -225,8 +229,8 @@ var configTwoSteps = {
parser._validate() parser._validate()
#test_AdHtmlParser() # test_AdHtmlParser()
test_CapturingTask() test_CapturingTask()
#test_Capturer() # test_Capturer()

View file

@ -1,14 +1,14 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys import sys
sys.path.append('..')
from geocoder.geocoder import Geocoder, GeocodingTask, GeocodingCache
sys.path.append("..")
from geocoder.geocoder import Geocoder, GeocodingTask, GeocodingCache
def test_GeocodingTask(): def test_GeocodingTask():
good_address = 'Avinguda de la Republica Argentina 245, Barcelona' good_address = "Avinguda de la Republica Argentina 245, Barcelona"
bad_address = 'ASdasda, 123asd' bad_address = "ASdasda, 123asd"
good_task = GeocodingTask(good_address) good_task = GeocodingTask(good_address)
good_task.geocode() good_task.geocode()
@ -20,25 +20,30 @@ def test_GeocodingTask():
print(bad_address.is_successfull()) print(bad_address.is_successfull())
print(bad_address.get_results()) print(bad_address.get_results())
def test_GeocodingCache(): def test_GeocodingCache():
cache = GeocodingCache() cache = GeocodingCache()
test_record = {'address':'Calle Don Pepito', test_record = {
'latitude': 12.1, "address": "Calle Don Pepito",
'longitude': 1.12, "latitude": 12.1,
'precision': 'absoluta'} "longitude": 1.12,
"precision": "absoluta",
}
print(cache.address_in_cache(test_record['address'])) print(cache.address_in_cache(test_record["address"]))
cache.add_address(test_record['address'], cache.add_address(
test_record['latitude'], test_record["address"],
test_record['longitude'], test_record["latitude"],
test_record['precision']) test_record["longitude"],
test_record["precision"],
)
print(cache.address_in_cache(test_record['address'])) print(cache.address_in_cache(test_record["address"]))
print(cache.get_coordinates(test_record['address'])) print(cache.get_coordinates(test_record["address"]))
#test_GeocodingTask() # test_GeocodingTask()
test_GeocodingCache() test_GeocodingCache()

View file

@ -3,104 +3,125 @@ from analysis.index_batch import IndexMM
import pandas as pd import pandas as pd
sample_market = [ sample_market = [
{'tamano_categorico': 'coche pequeño', {
'tipo_anuncio': 1, "tamano_categorico": "coche pequeño",
'precio': 15000, "tipo_anuncio": 1,
'calle': 'B1', "precio": 15000,
'telefono': 123, "calle": "B1",
'latitud': 2.1, "telefono": 123,
'longitud': 1.2}, "latitud": 2.1,
{'tamano_categorico': 'coche pequeño', "longitud": 1.2,
'tipo_anuncio': 1, },
'precio': 20000, {
'calle': 'B2', "tamano_categorico": "coche pequeño",
'telefono': 321, "tipo_anuncio": 1,
'latitud': 2.1, "precio": 20000,
'longitud': 1.2}, "calle": "B2",
{'tamano_categorico': 'coche grande', "telefono": 321,
'tipo_anuncio': 1, "latitud": 2.1,
'precio': 20000, "longitud": 1.2,
'calle': 'B2', },
'telefono': 321, {
'latitud': 2.1, "tamano_categorico": "coche grande",
'longitud': 1.2}, "tipo_anuncio": 1,
{'tamano_categorico': 'coche grande', "precio": 20000,
'tipo_anuncio': 1, "calle": "B2",
'precio': 25000, "telefono": 321,
'calle': 'B2', "latitud": 2.1,
'telefono': 123, "longitud": 1.2,
'latitud': 2.1, },
'longitud': 1.2}, {
{'tamano_categorico': 'coche y moto', "tamano_categorico": "coche grande",
'tipo_anuncio': 1, "tipo_anuncio": 1,
'precio': 22000, "precio": 25000,
'calle': 'B1', "calle": "B2",
'telefono': 456, "telefono": 123,
'latitud': 2.1, "latitud": 2.1,
'longitud': 1.2}, "longitud": 1.2,
{'tamano_categorico': 'coche y moto', },
'tipo_anuncio': 1, {
'precio': 26000, "tamano_categorico": "coche y moto",
'calle': 'B3', "tipo_anuncio": 1,
'telefono': 789, "precio": 22000,
'latitud': 2.1, "calle": "B1",
'longitud': 1.2}, "telefono": 456,
{'tamano_categorico': None, "latitud": 2.1,
'tipo_anuncio': 1, "longitud": 1.2,
'precio': 15000, },
'calle': 'abc', {
'telefono': 456, "tamano_categorico": "coche y moto",
'latitud': 2.1, "tipo_anuncio": 1,
'longitud': 1.2}, "precio": 26000,
{'tamano_categorico': 'moto', "calle": "B3",
'tipo_anuncio': 1, "telefono": 789,
'precio': 3000, "latitud": 2.1,
'calle': 'B4', "longitud": 1.2,
'telefono': 123, },
'latitud': 2.1, {
'longitud': 1.2}, "tamano_categorico": None,
{'tamano_categorico': '2 coches o más', "tipo_anuncio": 1,
'tipo_anuncio': 1, "precio": 15000,
'precio': 60000, "calle": "abc",
'calle': 'B4', "telefono": 456,
'telefono': 123, "latitud": 2.1,
'latitud': 2.1, "longitud": 1.2,
'longitud': 1.2}, },
{'tamano_categorico': 'coche pequeño', {
'tipo_anuncio': 1, "tamano_categorico": "moto",
'precio': 20000, "tipo_anuncio": 1,
'calle': 'B2', "precio": 3000,
'telefono': 321, "calle": "B4",
'latitud': 2.1, "telefono": 123,
'longitud': 1.2}, "latitud": 2.1,
{'tamano_categorico': 'coche pequeño', "longitud": 1.2,
'tipo_anuncio': 2, },
'precio': 50, {
'calle': 'B4', "tamano_categorico": "2 coches o más",
'telefono': 123, "tipo_anuncio": 1,
'latitud': 2.1, "precio": 60000,
'longitud': 1.2}, "calle": "B4",
{'tamano_categorico': 'moto', "telefono": 123,
'tipo_anuncio': 1, "latitud": 2.1,
'precio': 300000, "longitud": 1.2,
'calle': 'B4', },
'telefono': 123, {
'latitud': 2.1, "tamano_categorico": "coche pequeño",
'longitud': 1.2} "tipo_anuncio": 1,
] "precio": 20000,
date_range = {'start': '2018-01-01 00:00:00', "calle": "B2",
'end': '2018-02-01 00:00:00'} "telefono": 321,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "coche pequeño",
"tipo_anuncio": 2,
"precio": 50,
"calle": "B4",
"telefono": 123,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "moto",
"tipo_anuncio": 1,
"precio": 300000,
"calle": "B4",
"telefono": 123,
"latitud": 2.1,
"longitud": 1.2,
},
]
date_range = {"start": "2018-01-01 00:00:00", "end": "2018-02-01 00:00:00"}
market = Market() market = Market()
market.load_market(sample_market, market.load_market(sample_market, date_range=date_range)
date_range=date_range)
market.market.fillna(value=pd.np.nan, inplace=True) market.market.fillna(value=pd.np.nan, inplace=True)
print(market.market.to_string()) print(market.market.to_string())
market.clean_market('index') market.clean_market("index")
print(market.market.to_string()) print(market.market.to_string())
index = IndexMM() index = IndexMM()
index.calculate(market) index.calculate(market)
index.get_data() index.get_data()

View file

@ -4,7 +4,6 @@ print(capturas_interface.old_ads_exist())
print(capturas_interface.get_old_ad()) print(capturas_interface.get_old_ad())
html_baja = """ html_baja = """
<!DOCTYPE html> <!DOCTYPE html>
@ -256,4 +255,4 @@ var configTwoSteps = {
""" """
print(Refresher.dead_ad_checker(html_baja)) print(Refresher.dead_ad_checker(html_baja))
print(Refresher.dead_ad_checker(html_normal)) print(Refresher.dead_ad_checker(html_normal))

View file

@ -1,7 +1,6 @@
from core.scrapping_utils import * from core.scrapping_utils import *
def UrlAttack_test(url): def UrlAttack_test(url):
attack = UrlAttack(url) attack = UrlAttack(url)
@ -14,4 +13,4 @@ def UrlAttack_test(url):
print(attack.get_text()) print(attack.get_text())
UrlAttack_test('https://www.idealista.com/inmueble/82810718/') UrlAttack_test("https://www.idealista.com/inmueble/82810718/")