Testeado el batch de indices en dev.

This commit is contained in:
pablomartincalvo 2018-12-23 18:30:11 +01:00
parent d71b69a611
commit c234679a10
7 changed files with 170 additions and 23 deletions

View file

@ -18,27 +18,29 @@ available_date_ranges = [{'start': datetime(2017, 10, 1), 'end': datetime(2018,
class Market:
def __init__(self):
self.start_date = datetime.today() - timedelta(days=90)
self.end_date = datetime.today()
self.start_date = None
self.end_date = None
self.market = None
def load_market(self, market_query_results):
def load_market(self, market_query_results, date_range):
self.market = pd.DataFrame(market_query_results)
self.start_date = date_range['start']
self.end_date = date_range['end']
def clean_market(self, method):
if method == 'index':
self.market.dropna(subset=['tamano_categorico'])
self.market = self.market.dropna(subset=['tamano_categorico'])
self.market = self.market[~self.market['tamano_categorico'].isin(['2 coches o más', 'moto'])]
self.market.drop_duplicates(subset=['precio', 'latitud', 'longitud'], keep='last')
self.market = self.market.drop_duplicates(subset=['tamano_categorico', 'precio', 'latitud', 'longitud', 'telefono'], keep='last')
self.market = self.market[self.market['tipo_anuncio'] == 1]
self.delete_outliers()
if method == 'valoracion':
self.market.dropna(subset=['tamano_categorico'])
self.market = self.market.dropna(subset=['tamano_categorico'])
self.market = self.market[~self.market['tamano_categorico'].isin(['2 coches o más', 'moto'])]
self.market = self.market[self.market['precision'].isin(['ROOFTOP'])]
self.market.drop_duplicates(subset=['precio', 'latitud', 'longitud'], keep='last')
self.market = self.market.drop_duplicates(subset=['tamano_categorico', 'precio', 'latitud', 'longitud', 'telefono'], keep='last')
self.market = self.market[self.market['tipo_anuncio'] == 1]
self.delete_outliers()