diff --git a/.idea/Drogon.iml b/.idea/Drogon.iml
new file mode 100644
index 0000000..6711606
--- /dev/null
+++ b/.idea/Drogon.iml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..65531ca
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..8631d16
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000..bcfc20d
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,141 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1534095188685
+
+
+ 1534095188685
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000..818de87
Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ
diff --git a/core/__pycache__/mysql_wrapper.cpython-36.pyc b/core/__pycache__/mysql_wrapper.cpython-36.pyc
index 05caa75..4a6c181 100644
Binary files a/core/__pycache__/mysql_wrapper.cpython-36.pyc and b/core/__pycache__/mysql_wrapper.cpython-36.pyc differ
diff --git a/core/__pycache__/scrapping_utils.cpython-36.pyc b/core/__pycache__/scrapping_utils.cpython-36.pyc
new file mode 100644
index 0000000..5b43d29
Binary files /dev/null and b/core/__pycache__/scrapping_utils.cpython-36.pyc differ
diff --git a/core/scrapping_utils.py b/core/scrapping_utils.py
new file mode 100644
index 0000000..f8f37cc
--- /dev/null
+++ b/core/scrapping_utils.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+import requests
+
+
+class UrlAttack():
+
+ headers = {'Upgrade-Insecure-Requests': "1",
+ 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
+ 'Accept-Encoding': "gzip, deflate, br",
+ 'Accept-Language': "en-US,en;q=0.9",
+ 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/67.0.3396.99 Chrome/67.0.3396.99 Safari/537.36'}
+
+ timeout = 20
+
+ def __init__(self, url):
+ self.url = url
+ self.success = None
+ self.has_been_attacked = False
+
+ def attack(self):
+ self.has_been_attacked = True
+ try:
+ self.response = requests.get(self.url, headers = headers,
+ timeout = timeout)
+ if self.response.ok:
+ self.success = True
+ except Exception:
+ self.success = False
+
+ def get_response(self):
+ if self.success:
+ return self.response
+
+ def get_text(self):
+ if self.success:
+ return self.response.text()
\ No newline at end of file
diff --git a/explorer/__init__.py b/explorer/__init__.py
new file mode 100644
index 0000000..633f866
--- /dev/null
+++ b/explorer/__init__.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+
diff --git a/explorer/__pycache__/__init__.cpython-36.pyc b/explorer/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000..5aa2c8c
Binary files /dev/null and b/explorer/__pycache__/__init__.cpython-36.pyc differ
diff --git a/explorer/__pycache__/mock.cpython-36.pyc b/explorer/__pycache__/mock.cpython-36.pyc
new file mode 100644
index 0000000..c2f7e7d
Binary files /dev/null and b/explorer/__pycache__/mock.cpython-36.pyc differ
diff --git a/explorer/explorer.py b/explorer/explorer.py
index 750aee4..510c39a 100644
--- a/explorer/explorer.py
+++ b/explorer/explorer.py
@@ -1,9 +1,12 @@
# -*- coding: utf-8 -*-
import sys
sys.path.append('..')
+import uuid
+import datetime
from time import sleep
from core.mysql_wrapper import get_anunciosdb
+from core.scrapping_utils import UrlAttack
class Explorer():
@@ -39,7 +42,7 @@ class Explorer():
if not self.queue_is_up():
break
- self.compose_listing_url
+ current_task = ExploringTask(self.compose_listing_url)
@@ -80,19 +83,52 @@ class Explorer():
def compose_listing_url(self):
#Decide que url hay que componer y la compone
-
- raiz, orden = componer_raiz(p_orden, tipo_anuncios, ciudad)
- cantidad = calcular_cantidad_listado(anuncios_por_capturar)
-
- lista_urls = []
-
- for num in range(primera_pagina, cantidad + primera_pagina):
- lista_urls.append(raiz + 'pagina-' + str(num) + '.htm' + orde$
+ raiz = 'https://www.idealista.com/'
+ tipo = #Logica random
+ ciudad = 'barcelona'
+ numero = #logica random
+ url = raiz + tipo + '-garajes/' + ciudad + '-' + ciudad + '/' +
+ 'pagina-' + numero + '.htm'
+
+ return url
- return lista_urls
+class ExploringTask():
- #funcion que compone los strings de url de idealista
- #tiene en cuenta cuantos anuncios quedan por capturar
- #para decidir la cantidad de URLs a generar
-
\ No newline at end of file
+ def __init__(self, url):
+ self.creation_date = Datetime.Datetime.now()
+ self.target_url = url
+ self.id = str(uuid.uuid4())
+ self.status = 'Pending'
+
+ self.log()
+
+ def explore(self):
+ self.attack_date = Datetime.Datetime.now()
+ attack = UrlAttack(self.url)
+
+ attack.attack()
+
+ if attack.success:
+ self.get_listings()
+ if self.listings_ok:
+ self.status = 'Success'
+ else:
+ self.status = 'Failure'
+
+ self.log
+
+ def get_listings(self):
+ #TODO
+ #Funcion que devuelva los listings limpitos
+
+ def log(self):
+ #TODO
+ #Funcion que grabe estado y demas en una mongodb o argo azin
+
+ def extract_listings(self):
+ #TODO
+ #Coge el html de una pagina de listado y pilla las referencias.
+ #Si no cae ninguna, se considera un fail
+
+