76 lines
4.2 KiB
Python
76 lines
4.2 KiB
Python
from datetime import datetime, timedelta
|
|
import pandas as pd
|
|
|
|
available_date_ranges = [{'start': datetime(2017, 10, 1), 'end': datetime(2018, 1, 1)},
|
|
{'start': datetime(2017, 11, 1), 'end': datetime(2018, 2, 1)},
|
|
{'start': datetime(2017, 12, 1), 'end': datetime(2018, 3, 1)},
|
|
{'start': datetime(2018, 1, 1), 'end': datetime(2018, 4, 1)},
|
|
{'start': datetime(2018, 2, 1), 'end': datetime(2018, 5, 1)},
|
|
{'start': datetime(2018, 3, 1), 'end': datetime(2018, 6, 1)},
|
|
{'start': datetime(2018, 4, 1), 'end': datetime(2018, 7, 1)},
|
|
{'start': datetime(2018, 5, 1), 'end': datetime(2018, 8, 1)},
|
|
{'start': datetime(2018, 6, 1), 'end': datetime(2018, 9, 1)},
|
|
{'start': datetime(2018, 7, 1), 'end': datetime(2018, 10, 1)},
|
|
{'start': datetime(2018, 8, 1), 'end': datetime(2018, 11, 1)},
|
|
{'start': datetime(2018, 9, 1), 'end': datetime(2018, 12, 1)}]
|
|
|
|
|
|
class Market:
|
|
|
|
def __init__(self):
|
|
self.start_date = None
|
|
self.end_date = None
|
|
self.market = None
|
|
|
|
def load_market(self, market_query_results, date_range):
|
|
self.market = pd.DataFrame(market_query_results)
|
|
self.start_date = date_range['start']
|
|
self.end_date = date_range['end']
|
|
|
|
def clean_market(self, method):
|
|
|
|
if method == 'index':
|
|
self.market = self.market.dropna(subset=['tamano_categorico'])
|
|
self.market = self.market[~self.market['tamano_categorico'].isin(['2 coches o más', 'moto'])]
|
|
self.market = self.market.drop_duplicates(subset=['tamano_categorico', 'precio', 'latitud', 'longitud', 'telefono'], keep='last')
|
|
self.market = self.market[self.market['tipo_anuncio'] == 1]
|
|
self.delete_outliers()
|
|
|
|
if method == 'valoracion':
|
|
self.market = self.market.dropna(subset=['tamano_categorico'])
|
|
self.market = self.market[~self.market['tamano_categorico'].isin(['2 coches o más', 'moto'])]
|
|
self.market = self.market[self.market['precision'].isin(['ROOFTOP'])]
|
|
self.market = self.market.drop_duplicates(subset=['tamano_categorico', 'precio', 'latitud', 'longitud', 'telefono'], keep='last')
|
|
self.market = self.market[self.market['tipo_anuncio'] == 1]
|
|
self.delete_outliers()
|
|
|
|
def delete_outliers(self):
|
|
|
|
outlier_combinations = [{'tipo_anuncio': 1, 'tamano_categorico': 'coche grande',
|
|
'min_precio': 1000, 'max_precio': 150000},
|
|
{'tipo_anuncio': 1, 'tamano_categorico': 'coche pequeño',
|
|
'min_precio': 1000, 'max_precio': 150000},
|
|
{'tipo_anuncio': 1, 'tamano_categorico': 'coche y moto',
|
|
'min_precio': 1000, 'max_precio': 200000},
|
|
{'tipo_anuncio': 1, 'tamano_categorico': 'moto',
|
|
'min_precio': 1000, 'max_precio': 40000},
|
|
{'tipo_anuncio': 2, 'tamano_categorico': 'coche grande',
|
|
'min_precio': 10, 'max_precio': 300},
|
|
{'tipo_anuncio': 2, 'tamano_categorico': 'coche pequeño',
|
|
'min_precio': 10, 'max_precio': 300},
|
|
{'tipo_anuncio': 2, 'tamano_categorico': 'coche y moto',
|
|
'min_precio': 10, 'max_precio': 3000},
|
|
{'tipo_anuncio': 2, 'tamano_categorico': 'moto',
|
|
'min_precio': 10, 'max_precio': 150}]
|
|
|
|
for combination in outlier_combinations:
|
|
self.market = self.market.loc[~(
|
|
(self.market['tipo_anuncio'] == combination['tipo_anuncio']) &
|
|
(self.market['tamano_categorico'] == combination['tamano_categorico']) &
|
|
((self.market['precio'] < combination['min_precio']) | (self.market['precio'] > combination['max_precio']))
|
|
)]
|
|
|
|
def get_market_data(self):
|
|
return self.market
|
|
|
|
|