Formatting.

This commit is contained in:
pablo 2020-11-03 07:29:17 +01:00
parent cd9c3b6e39
commit a79fc533ee
11 changed files with 231 additions and 204 deletions

View file

@ -2,26 +2,27 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import smtplib
my_address = 'drogonalerts@gmail.com'
master_address = 'pablomartincalvo@gmail.com'
my_address = "drogonalerts@gmail.com"
master_address = "pablomartincalvo@gmail.com"
def alert_master(header, message):
msg = MIMEMultipart()
password = "noesfacilvivirsindrogon"
msg['From'] = my_address
msg['To'] = master_address
msg['Subject'] = header
msg["From"] = my_address
msg["To"] = master_address
msg["Subject"] = header
msg.attach(MIMEText(message, 'plain'))
msg.attach(MIMEText(message, "plain"))
server = smtplib.SMTP('smtp.gmail.com: 465')
server = smtplib.SMTP("smtp.gmail.com: 465")
server.starttls()
server.login(msg['From'], password)
server.login(msg["From"], password)
server.sendmail(msg['From'], msg['To'], msg.as_string())
server.sendmail(msg["From"], msg["To"], msg.as_string())
server.quit()

View file

@ -1,31 +1,34 @@
# -*- coding: utf-8 -*-
import sys
sys.path.append('..')
sys.path.append("..")
import mysql.connector
from core.alerts import alert_master
from core.config import current_db_parameters
anuncios_db_parameters = {'database': 'anuncios', **current_db_parameters}
tasks_db_parameters = {'database': 'tasks', **current_db_parameters}
anuncios_db_parameters = {"database": "anuncios", **current_db_parameters}
tasks_db_parameters = {"database": "tasks", **current_db_parameters}
class DatabaseWrapper():
class DatabaseWrapper:
def __init__(self, connection_parameters):
self.host = connection_parameters['host']
self.database = connection_parameters['database']
self.user = connection_parameters['user']
self.password = connection_parameters['password']
self.host = connection_parameters["host"]
self.database = connection_parameters["database"]
self.user = connection_parameters["user"]
self.password = connection_parameters["password"]
self.connection = None
self.ping()
def connect(self):
self.connection = mysql.connector.connect(host = self.host,
database = self.database,
user = self.user,
password = self.password,
autocommit = False)
self.connection = mysql.connector.connect(
host=self.host,
database=self.database,
user=self.user,
password=self.password,
autocommit=False,
)
def disconnect(self):
if self.connection.is_connected():
self.connection.disconnect()
@ -33,41 +36,41 @@ class DatabaseWrapper():
def ping(self):
self.connect()
self.disconnect()
def query(self, query_statement, query_parameters=None, dictionary=False):
self.connect()
if self.connection.is_connected():
try:
execution_cursor = self.connection.cursor(dictionary=dictionary,
buffered=True)
execution_cursor = self.connection.cursor(
dictionary=dictionary, buffered=True
)
execution_cursor.execute(query_statement, query_parameters)
self.connection.commit()
self.disconnect()
return execution_cursor
except Exception as e:
alert_master("SQL ERROR", """Se ha producido un error ejecutando la
alert_master(
"SQL ERROR",
"""Se ha producido un error ejecutando la
siguiente query: {}.
Con los siguientes parametros: {}
{}
""".format(query_statement,
query_parameters,
e))
""".format(
query_statement, query_parameters, e
),
)
else:
raise Exception("Could not connect to the database.")
def query_dict(self, query_statement, query_parameters = None):
return self.query(query_statement, query_parameters, dictionary = True)
def query_dict(self, query_statement, query_parameters=None):
return self.query(query_statement, query_parameters, dictionary=True)
def get_anunciosdb():
return DatabaseWrapper(anuncios_db_parameters)
return DatabaseWrapper(anuncios_db_parameters)
def get_tasksdb():
return DatabaseWrapper(tasks_db_parameters)

View file

@ -2,19 +2,20 @@ from core.mysql_wrapper import get_anunciosdb
class CapturasInterface:
def __init__(self):
self.anunciosdb = get_anunciosdb()
def insert_captura(self, ad_data):
columns = ', '.join(ad_data.keys())
placeholders_string = ', '.join(['%s'] * len(ad_data))
columns = ", ".join(ad_data.keys())
placeholders_string = ", ".join(["%s"] * len(ad_data))
query_statement = """ INSERT INTO capturas
( fecha_captura, {} )
VALUES( NOW(), {} )""".format(columns, placeholders_string)
VALUES( NOW(), {} )""".format(
columns, placeholders_string
)
query_parameters = tuple([v for v in ad_data.values()])
@ -71,7 +72,6 @@ class CapturasInterface:
return result > 0
def get_not_geocoded_captura(self):
query_statement = """
SELECT *
@ -82,17 +82,21 @@ class CapturasInterface:
cursor_result = self.anunciosdb.query(query_statement, dictionary=True)
return cursor_result.fetchone()
def update_geo_data(self, referencia, fecha_captura, latitude, longitude, precision):
def update_geo_data(
self, referencia, fecha_captura, latitude, longitude, precision
):
query_statement = """
UPDATE anuncios.capturas
SET latitud = %(latitud)s, longitud = %(longitud)s, `precision` = %(precision)s
WHERE referencia = %(referencia)s AND fecha_captura = %(fecha_captura)s
"""
query_parameters = {'referencia': referencia,
'fecha_captura': fecha_captura,
'latitud': latitude,
'longitud': longitude,
'precision': precision}
query_parameters = {
"referencia": referencia,
"fecha_captura": fecha_captura,
"latitud": latitude,
"longitud": longitude,
"precision": precision,
}
self.anunciosdb.query(query_statement, query_parameters)
@ -114,13 +118,15 @@ class CapturasInterface:
AND (`t1`.`fecha_captura` BETWEEN %(start_date)s AND %(end_date)s)
)
"""
query_parameters = {'start_date': start_date.strftime('%Y-%m-%d 00:00:00'),
'end_date': end_date.strftime('%Y-%m-%d 00:00:00')}
query_parameters = {
"start_date": start_date.strftime("%Y-%m-%d 00:00:00"),
"end_date": end_date.strftime("%Y-%m-%d 00:00:00"),
}
cursor_result = self.anunciosdb.query(query_statement, query_parameters, dictionary=True)
cursor_result = self.anunciosdb.query(
query_statement, query_parameters, dictionary=True
)
return cursor_result.fetchall()
capturas_interface = CapturasInterface()

View file

@ -1,25 +1,27 @@
import uuid
from core.mysql_wrapper import get_tasksdb
class CapturingTasksInterface:
class CapturingTasksInterface:
def __init__(self):
self.tasksdb = get_tasksdb()
def create_capturing_task(self, referencia, uuid_exploring=None):
ads_root = 'https://www.idealista.com/inmueble/'
ads_root = "https://www.idealista.com/inmueble/"
query_parameters = {'ad_url': ads_root + referencia,
'uuid': str(uuid.uuid4()),
'status': 'Pending'}
query_parameters = {
"ad_url": ads_root + referencia,
"uuid": str(uuid.uuid4()),
"status": "Pending",
}
if uuid_exploring is None:
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, ad_url)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
else:
query_parameters['uuid_exploring'] = uuid_exploring
query_parameters["uuid_exploring"] = uuid_exploring
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, ad_url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
@ -43,16 +45,14 @@ class CapturingTasksInterface:
return None
def update_capturing_task(self, uuid, uuid_exploring, status, ad_url):
query_parameters = {'ad_url': ad_url,
'uuid': uuid,
'status': status}
query_parameters = {"ad_url": ad_url, "uuid": uuid, "status": status}
if uuid_exploring is None:
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, ad_url)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s)"""
else:
query_parameters['uuid_exploring'] = uuid_exploring
query_parameters["uuid_exploring"] = uuid_exploring
query_statement = """INSERT INTO capturing_tasks_logs
(uuid, write_time, status, ad_url, fk_uuid_exploring)
VALUES (%(uuid)s, NOW(), %(status)s, %(ad_url)s, %(uuid_exploring)s)"""
@ -74,4 +74,5 @@ class CapturingTasksInterface:
except:
return 999
capturing_interface = CapturingTasksInterface()

View file

@ -2,7 +2,6 @@ from core.mysql_wrapper import get_anunciosdb
class IndicesInterface:
def __init__(self):
self.anunciosdb = get_anunciosdb()

View file

@ -1,5 +1,6 @@
import sys
sys.path.append('..')
sys.path.append("..")
from time import sleep
from db_layer.capturas_interface import capturas_interface
from db_layer.capturing_tasks_interface import capturing_interface
@ -7,7 +8,6 @@ from core.config import refresher_delay
class Refresher:
def start(self):
while True:
@ -15,7 +15,7 @@ class Refresher:
old_ad = capturas_interface.get_old_ad()
if old_ad:
capturing_interface.create_capturing_task(str(old_ad['referencia']))
capturing_interface.create_capturing_task(str(old_ad["referencia"]))
@staticmethod
def dead_ad_checker(html):
@ -25,7 +25,7 @@ class Refresher:
:return: True si esta dado de baja, False si no.
"""
try:
if ':-|' in html or 'El anunciante lo dio de baja' in html:
if ":-|" in html or "El anunciante lo dio de baja" in html:
return True
else:
return False
@ -33,17 +33,6 @@ class Refresher:
return False
if __name__ == '__main__':
if __name__ == "__main__":
refresher = Refresher()
refresher.start()

View file

@ -1,15 +1,18 @@
# -*- coding: utf-8 -*-
import sys
sys.path.append('..')
sys.path.append("..")
from capturer.capturer import CapturingTask, Capturer, AdHtmlParser
from db_layer.capturas_interface import capturas_interface
def test_CapturingTask():
parameters = {'uuid': 'testie test',
'ad_url': 'https://www.idealista.com/inmueble/28252032',
'fk_uuid_exploring': None,
'status': 'Pending'}
parameters = {
"uuid": "testie test",
"ad_url": "https://www.idealista.com/inmueble/28252032",
"fk_uuid_exploring": None,
"status": "Pending",
}
task = CapturingTask(parameters)
@ -22,6 +25,7 @@ def test_Capturer():
capturer = Capturer()
capturer.start()
def test_AdHtmlParser():
html = """
@ -225,8 +229,8 @@ var configTwoSteps = {
parser._validate()
#test_AdHtmlParser()
# test_AdHtmlParser()
test_CapturingTask()
#test_Capturer()
# test_Capturer()

View file

@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
import sys
sys.path.append('..')
from geocoder.geocoder import Geocoder, GeocodingTask, GeocodingCache
sys.path.append("..")
from geocoder.geocoder import Geocoder, GeocodingTask, GeocodingCache
def test_GeocodingTask():
good_address = 'Avinguda de la Republica Argentina 245, Barcelona'
bad_address = 'ASdasda, 123asd'
good_address = "Avinguda de la Republica Argentina 245, Barcelona"
bad_address = "ASdasda, 123asd"
good_task = GeocodingTask(good_address)
good_task.geocode()
@ -20,25 +20,30 @@ def test_GeocodingTask():
print(bad_address.is_successfull())
print(bad_address.get_results())
def test_GeocodingCache():
cache = GeocodingCache()
test_record = {'address':'Calle Don Pepito',
'latitude': 12.1,
'longitude': 1.12,
'precision': 'absoluta'}
test_record = {
"address": "Calle Don Pepito",
"latitude": 12.1,
"longitude": 1.12,
"precision": "absoluta",
}
print(cache.address_in_cache(test_record['address']))
cache.add_address(test_record['address'],
test_record['latitude'],
test_record['longitude'],
test_record['precision'])
print(cache.address_in_cache(test_record["address"]))
cache.add_address(
test_record["address"],
test_record["latitude"],
test_record["longitude"],
test_record["precision"],
)
print(cache.address_in_cache(test_record['address']))
print(cache.get_coordinates(test_record['address']))
print(cache.address_in_cache(test_record["address"]))
print(cache.get_coordinates(test_record["address"]))
#test_GeocodingTask()
# test_GeocodingTask()
test_GeocodingCache()
test_GeocodingCache()

View file

@ -3,104 +3,125 @@ from analysis.index_batch import IndexMM
import pandas as pd
sample_market = [
{'tamano_categorico': 'coche pequeño',
'tipo_anuncio': 1,
'precio': 15000,
'calle': 'B1',
'telefono': 123,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'coche pequeño',
'tipo_anuncio': 1,
'precio': 20000,
'calle': 'B2',
'telefono': 321,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'coche grande',
'tipo_anuncio': 1,
'precio': 20000,
'calle': 'B2',
'telefono': 321,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'coche grande',
'tipo_anuncio': 1,
'precio': 25000,
'calle': 'B2',
'telefono': 123,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'coche y moto',
'tipo_anuncio': 1,
'precio': 22000,
'calle': 'B1',
'telefono': 456,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'coche y moto',
'tipo_anuncio': 1,
'precio': 26000,
'calle': 'B3',
'telefono': 789,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': None,
'tipo_anuncio': 1,
'precio': 15000,
'calle': 'abc',
'telefono': 456,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'moto',
'tipo_anuncio': 1,
'precio': 3000,
'calle': 'B4',
'telefono': 123,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': '2 coches o más',
'tipo_anuncio': 1,
'precio': 60000,
'calle': 'B4',
'telefono': 123,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'coche pequeño',
'tipo_anuncio': 1,
'precio': 20000,
'calle': 'B2',
'telefono': 321,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'coche pequeño',
'tipo_anuncio': 2,
'precio': 50,
'calle': 'B4',
'telefono': 123,
'latitud': 2.1,
'longitud': 1.2},
{'tamano_categorico': 'moto',
'tipo_anuncio': 1,
'precio': 300000,
'calle': 'B4',
'telefono': 123,
'latitud': 2.1,
'longitud': 1.2}
]
date_range = {'start': '2018-01-01 00:00:00',
'end': '2018-02-01 00:00:00'}
{
"tamano_categorico": "coche pequeño",
"tipo_anuncio": 1,
"precio": 15000,
"calle": "B1",
"telefono": 123,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "coche pequeño",
"tipo_anuncio": 1,
"precio": 20000,
"calle": "B2",
"telefono": 321,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "coche grande",
"tipo_anuncio": 1,
"precio": 20000,
"calle": "B2",
"telefono": 321,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "coche grande",
"tipo_anuncio": 1,
"precio": 25000,
"calle": "B2",
"telefono": 123,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "coche y moto",
"tipo_anuncio": 1,
"precio": 22000,
"calle": "B1",
"telefono": 456,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "coche y moto",
"tipo_anuncio": 1,
"precio": 26000,
"calle": "B3",
"telefono": 789,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": None,
"tipo_anuncio": 1,
"precio": 15000,
"calle": "abc",
"telefono": 456,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "moto",
"tipo_anuncio": 1,
"precio": 3000,
"calle": "B4",
"telefono": 123,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "2 coches o más",
"tipo_anuncio": 1,
"precio": 60000,
"calle": "B4",
"telefono": 123,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "coche pequeño",
"tipo_anuncio": 1,
"precio": 20000,
"calle": "B2",
"telefono": 321,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "coche pequeño",
"tipo_anuncio": 2,
"precio": 50,
"calle": "B4",
"telefono": 123,
"latitud": 2.1,
"longitud": 1.2,
},
{
"tamano_categorico": "moto",
"tipo_anuncio": 1,
"precio": 300000,
"calle": "B4",
"telefono": 123,
"latitud": 2.1,
"longitud": 1.2,
},
]
date_range = {"start": "2018-01-01 00:00:00", "end": "2018-02-01 00:00:00"}
market = Market()
market.load_market(sample_market,
date_range=date_range)
market.load_market(sample_market, date_range=date_range)
market.market.fillna(value=pd.np.nan, inplace=True)
print(market.market.to_string())
market.clean_market('index')
market.clean_market("index")
print(market.market.to_string())
index = IndexMM()
index.calculate(market)
index.get_data()

View file

@ -4,7 +4,6 @@ print(capturas_interface.old_ads_exist())
print(capturas_interface.get_old_ad())
html_baja = """
<!DOCTYPE html>
@ -256,4 +255,4 @@ var configTwoSteps = {
"""
print(Refresher.dead_ad_checker(html_baja))
print(Refresher.dead_ad_checker(html_normal))
print(Refresher.dead_ad_checker(html_normal))

View file

@ -1,7 +1,6 @@
from core.scrapping_utils import *
def UrlAttack_test(url):
attack = UrlAttack(url)
@ -14,4 +13,4 @@ def UrlAttack_test(url):
print(attack.get_text())
UrlAttack_test('https://www.idealista.com/inmueble/82810718/')
UrlAttack_test("https://www.idealista.com/inmueble/82810718/")