from datetime import datetime, timedelta import pandas as pd available_date_ranges = [{'start': datetime(2017, 10, 1), 'end': datetime(2018, 1, 1)}, {'start': datetime(2017, 11, 1), 'end': datetime(2018, 2, 1)}, {'start': datetime(2017, 12, 1), 'end': datetime(2018, 3, 1)}, {'start': datetime(2018, 1, 1), 'end': datetime(2018, 4, 1)}, {'start': datetime(2018, 2, 1), 'end': datetime(2018, 5, 1)}, {'start': datetime(2018, 3, 1), 'end': datetime(2018, 6, 1)}, {'start': datetime(2018, 4, 1), 'end': datetime(2018, 7, 1)}, {'start': datetime(2018, 5, 1), 'end': datetime(2018, 8, 1)}, {'start': datetime(2018, 6, 1), 'end': datetime(2018, 9, 1)}, {'start': datetime(2018, 7, 1), 'end': datetime(2018, 10, 1)}, {'start': datetime(2018, 8, 1), 'end': datetime(2018, 11, 1)}, {'start': datetime(2018, 9, 1), 'end': datetime(2018, 12, 1)}] class Market: def __init__(self): self.start_date = None self.end_date = None self.market = None def load_market(self, market_query_results, date_range): self.market = pd.DataFrame(market_query_results) self.start_date = date_range['start'] self.end_date = date_range['end'] def clean_market(self, method): if method == 'index': self.market = self.market.dropna(subset=['tamano_categorico']) self.market = self.market[~self.market['tamano_categorico'].isin(['2 coches o más', 'moto'])] self.market = self.market.drop_duplicates(subset=['tamano_categorico', 'precio', 'latitud', 'longitud', 'telefono'], keep='last') self.market = self.market[self.market['tipo_anuncio'] == 1] self.delete_outliers() if method == 'valoracion': self.market = self.market.dropna(subset=['tamano_categorico']) self.market = self.market[~self.market['tamano_categorico'].isin(['2 coches o más', 'moto'])] self.market = self.market[self.market['precision'].isin(['ROOFTOP'])] self.market = self.market.drop_duplicates(subset=['tamano_categorico', 'precio', 'latitud', 'longitud', 'telefono'], keep='last') self.market = self.market[self.market['tipo_anuncio'] == 1] self.delete_outliers() def delete_outliers(self): outlier_combinations = [{'tipo_anuncio': 1, 'tamano_categorico': 'coche grande', 'min_precio': 1000, 'max_precio': 150000}, {'tipo_anuncio': 1, 'tamano_categorico': 'coche pequeño', 'min_precio': 1000, 'max_precio': 150000}, {'tipo_anuncio': 1, 'tamano_categorico': 'coche y moto', 'min_precio': 1000, 'max_precio': 200000}, {'tipo_anuncio': 1, 'tamano_categorico': 'moto', 'min_precio': 1000, 'max_precio': 40000}, {'tipo_anuncio': 2, 'tamano_categorico': 'coche grande', 'min_precio': 10, 'max_precio': 300}, {'tipo_anuncio': 2, 'tamano_categorico': 'coche pequeño', 'min_precio': 10, 'max_precio': 300}, {'tipo_anuncio': 2, 'tamano_categorico': 'coche y moto', 'min_precio': 10, 'max_precio': 3000}, {'tipo_anuncio': 2, 'tamano_categorico': 'moto', 'min_precio': 10, 'max_precio': 150}] for combination in outlier_combinations: self.market = self.market.loc[~( (self.market['tipo_anuncio'] == combination['tipo_anuncio']) & (self.market['tamano_categorico'] == combination['tamano_categorico']) & ((self.market['precio'] < combination['min_precio']) | (self.market['precio'] > combination['max_precio'])) )] def get_market_data(self): return self.market