diff --git a/analysis/index_batch.py b/analysis/index_batch.py index 336b248..8da42fc 100644 --- a/analysis/index_batch.py +++ b/analysis/index_batch.py @@ -1,37 +1,49 @@ from analysis.market_snapshot import Market, available_date_ranges +from db_layer.capturas_interface import capturas_interface +from db_layer.indices_interface import indices_interface + class IndexMM: def __init__(self): self.name = 'indexmm' + self.market = None self.date = None self.data = None + self.value = None def calculate(self, market): self.market = market self.date = self.market.end_date - self.data = market.get_market_data() + self.data = self.market.get_market_data() - data_coche_pequeno = {'count': self.data[self.data['tamano_categorico'] == 'coche pequeño'].count(), - 'mean': self.data[self.data['tamano_categorico' == 'coche pequeño']]['precio'].transform('mean')} - data_coche_grande = {'count': self.data[self.data['tamano_categorico'] == 'coche grande'].count(), - 'mean': self.data[self.data['tamano_categorico' == 'coche grande']]['precio'].transform('mean')} - data_coche_moto = {'count': self.data[self.data['tamano_categorico'] == 'coche y moto'].count(), - 'mean': self.data[self.data['tamano_categorico' == 'coche y moto']]['precio'].transform('mean')} + data_coche_pequeno = {'count': self.data[self.data['tamano_categorico'] == 'coche pequeño'].shape[0], + 'mean': self.data[self.data['tamano_categorico'] == 'coche pequeño']['precio'].mean()} + data_coche_grande = {'count': self.data[self.data['tamano_categorico'] == 'coche grande'].shape[0], + 'mean': self.data[self.data['tamano_categorico'] == 'coche grande']['precio'].mean()} + data_coche_moto = {'count': self.data[self.data['tamano_categorico'] == 'coche y moto'].shape[0], + 'mean': self.data[self.data['tamano_categorico'] == 'coche y moto']['precio'].mean()} self.value = (((data_coche_grande['count'] * data_coche_grande['mean']) + (data_coche_moto['count'] * data_coche_moto['mean']) + (data_coche_pequeno['count'] * data_coche_pequeno['mean'])) / (data_coche_grande['count'] + data_coche_moto['count'] + data_coche_pequeno['count'])) - #SEGUIR AQUI - - - - - def get_data(self): return {'name': self.name, 'date': self.date, - 'value': self.value} + 'value': self.value.item()} + + +if __name__ == '__main__': + for date_range in available_date_ranges: + market = Market() + market.load_market(capturas_interface.get_market_snapshot(date_range['start'], date_range['end']), + date_range=date_range) + market.clean_market('index') + + index = IndexMM() + index.calculate(market) + + indices_interface.write_index(index.get_data()) diff --git a/analysis/market_snapshot.py b/analysis/market_snapshot.py index b735ca9..149b88b 100644 --- a/analysis/market_snapshot.py +++ b/analysis/market_snapshot.py @@ -18,27 +18,29 @@ available_date_ranges = [{'start': datetime(2017, 10, 1), 'end': datetime(2018, class Market: def __init__(self): - self.start_date = datetime.today() - timedelta(days=90) - self.end_date = datetime.today() + self.start_date = None + self.end_date = None self.market = None - def load_market(self, market_query_results): + def load_market(self, market_query_results, date_range): self.market = pd.DataFrame(market_query_results) + self.start_date = date_range['start'] + self.end_date = date_range['end'] def clean_market(self, method): if method == 'index': - self.market.dropna(subset=['tamano_categorico']) + self.market = self.market.dropna(subset=['tamano_categorico']) self.market = self.market[~self.market['tamano_categorico'].isin(['2 coches o más', 'moto'])] - self.market.drop_duplicates(subset=['precio', 'latitud', 'longitud'], keep='last') + self.market = self.market.drop_duplicates(subset=['tamano_categorico', 'precio', 'latitud', 'longitud', 'telefono'], keep='last') self.market = self.market[self.market['tipo_anuncio'] == 1] self.delete_outliers() if method == 'valoracion': - self.market.dropna(subset=['tamano_categorico']) + self.market = self.market.dropna(subset=['tamano_categorico']) self.market = self.market[~self.market['tamano_categorico'].isin(['2 coches o más', 'moto'])] self.market = self.market[self.market['precision'].isin(['ROOFTOP'])] - self.market.drop_duplicates(subset=['precio', 'latitud', 'longitud'], keep='last') + self.market = self.market.drop_duplicates(subset=['tamano_categorico', 'precio', 'latitud', 'longitud', 'telefono'], keep='last') self.market = self.market[self.market['tipo_anuncio'] == 1] self.delete_outliers() diff --git a/core/config.py b/core/config.py index 07d3f6c..232d6a7 100644 --- a/core/config.py +++ b/core/config.py @@ -14,7 +14,7 @@ databases = {'dev':{'host': '185.166.215.170 ', try: current_db = environ['DROGON_ENV'] except KeyError: - current_db = 'pro' + current_db = 'dev' try: current_db_parameters = databases[current_db] diff --git a/db_layer/capturas_interface.py b/db_layer/capturas_interface.py index 46a810a..db11749 100644 --- a/db_layer/capturas_interface.py +++ b/db_layer/capturas_interface.py @@ -111,7 +111,7 @@ class CapturasInterface: WHERE (`t1`.`referencia` = `t2`.`referencia`) ) ) - AND (`t1`.`fecha_captura` BETWEEN %(start_date)S AND %(end_date)S) + AND (`t1`.`fecha_captura` BETWEEN %(start_date)s AND %(end_date)s) ) """ query_parameters = {'start_date': start_date.strftime('%Y-%m-%d 00:00:00'), diff --git a/db_layer/db_init_scripts/4_create_valores_indices.sql b/db_layer/db_init_scripts/4_create_valores_indices.sql new file mode 100644 index 0000000..d83eb50 --- /dev/null +++ b/db_layer/db_init_scripts/4_create_valores_indices.sql @@ -0,0 +1,6 @@ +CREATE TABLE `valores_indices` ( + `fecha_valor` datetime NOT NULL, + `nombre_indice` varchar(255) NOT NULL, + `valor_indice` double NOT NULL, + PRIMARY KEY (`fecha_valor`,`nombre_indice`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ; diff --git a/db_layer/indices_interface.py b/db_layer/indices_interface.py new file mode 100644 index 0000000..c4b1069 --- /dev/null +++ b/db_layer/indices_interface.py @@ -0,0 +1,21 @@ +from core.mysql_wrapper import get_anunciosdb + + +class IndicesInterface: + + def __init__(self): + self.anunciosdb = get_anunciosdb() + + def write_index(self, index_data): + query_statement = """ REPLACE INTO valores_indices + (fecha_valor, nombre_indice, valor_indice) + VALUES + (%(date)s, %(name)s, %(value)s) + """ + + query_parameters = index_data + + self.anunciosdb.query(query_statement, query_parameters) + + +indices_interface = IndicesInterface() diff --git a/tests/index_batch_tests.py b/tests/index_batch_tests.py new file mode 100644 index 0000000..f7cbbf2 --- /dev/null +++ b/tests/index_batch_tests.py @@ -0,0 +1,106 @@ +from analysis.market_snapshot import Market +from analysis.index_batch import IndexMM +import pandas as pd + +sample_market = [ + {'tamano_categorico': 'coche pequeño', + 'tipo_anuncio': 1, + 'precio': 15000, + 'calle': 'B1', + 'telefono': 123, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'coche pequeño', + 'tipo_anuncio': 1, + 'precio': 20000, + 'calle': 'B2', + 'telefono': 321, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'coche grande', + 'tipo_anuncio': 1, + 'precio': 20000, + 'calle': 'B2', + 'telefono': 321, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'coche grande', + 'tipo_anuncio': 1, + 'precio': 25000, + 'calle': 'B2', + 'telefono': 123, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'coche y moto', + 'tipo_anuncio': 1, + 'precio': 22000, + 'calle': 'B1', + 'telefono': 456, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'coche y moto', + 'tipo_anuncio': 1, + 'precio': 26000, + 'calle': 'B3', + 'telefono': 789, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': None, + 'tipo_anuncio': 1, + 'precio': 15000, + 'calle': 'abc', + 'telefono': 456, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'moto', + 'tipo_anuncio': 1, + 'precio': 3000, + 'calle': 'B4', + 'telefono': 123, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': '2 coches o más', + 'tipo_anuncio': 1, + 'precio': 60000, + 'calle': 'B4', + 'telefono': 123, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'coche pequeño', + 'tipo_anuncio': 1, + 'precio': 20000, + 'calle': 'B2', + 'telefono': 321, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'coche pequeño', + 'tipo_anuncio': 2, + 'precio': 50, + 'calle': 'B4', + 'telefono': 123, + 'latitud': 2.1, + 'longitud': 1.2}, + {'tamano_categorico': 'moto', + 'tipo_anuncio': 1, + 'precio': 300000, + 'calle': 'B4', + 'telefono': 123, + 'latitud': 2.1, + 'longitud': 1.2} + ] +date_range = {'start': '2018-01-01 00:00:00', + 'end': '2018-02-01 00:00:00'} + + +market = Market() +market.load_market(sample_market, + date_range=date_range) +market.market.fillna(value=pd.np.nan, inplace=True) +print(market.market.to_string()) +market.clean_market('index') +print(market.market.to_string()) + +index = IndexMM() +index.calculate(market) +index.get_data() +