data-jupyter-notebooks/data_driven_risk_assessment/experiments/pablo_eda.ipynb

5601 lines
455 KiB
Text
Raw Normal View History

2025-07-03 08:05:41 +00:00
{
"cells": [
{
"cell_type": "markdown",
"id": "84dcd475",
"metadata": {},
"source": [
"# DDRA - Pablo's EDA\n",
"\n",
"General fuck around to understand what might drive claims.\n",
"\n",
"A handful of ideas I want to give a first shot to:\n",
"- Time features\n",
" - Length of stay\n",
" - Check In and Check out dates\n",
" - Lead time between creation and check out\n",
" - checkin as week/month/year cycle, checkout as week/month/year cycle\n",
" - Duration between starting GJ and booking checkin\n",
"- Same country, same town features\n",
"- Tokenize listing names and correlate them\n",
" - And specifically get bedrooms with regex\n",
"- Number of active listings of host\n",
"- Number of bookings created by host in last 12 months (and monthly/per listing average?)\n",
"- Number of bookings cancelled on the host in last 12 months (and monthly/per listing average?)\n",
"- Number of claims created by host in last 12 months (and monthly/per listing average?)\n",
"- Number of claims with positive settlemend by host in last 12 months (and monthly/per listing average?)\n",
"- Total invoiced to host in last 12 months (and monthly/per listing average?)\n",
"- Guest age\n",
"- Paid for waiver\n",
"- Paid for CIH\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "12368ce1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"🔌 Testing connection using credentials at: /home/pablo/.superhog-dwh/credentials.yml\n",
"✅ Connection successful.\n"
]
}
],
"source": [
"import sys\n",
"import os\n",
"sys.path.append(os.path.abspath(\"../../utils\")) # Adjust path if needed\n",
"\n",
"from dwh_utils import read_credentials, create_postgres_engine, query_to_dataframe, test_connection\n",
"\n",
"# --- Connect to DWH ---\n",
"creds = read_credentials()\n",
"dwh_pg_engine = create_postgres_engine(creds)\n",
"\n",
"# --- Test Query ---\n",
"test_connection()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "385c350b",
"metadata": {},
"outputs": [],
"source": [
"# Other imports\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.decomposition import TruncatedSVD\n",
"import string"
]
},
{
"cell_type": "markdown",
"id": "78cbf43d",
"metadata": {},
"source": [
"# Getting data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba87ff74",
"metadata": {},
"outputs": [],
"source": [
"# Get all bookings and their claims\n",
"df_bookings_and_claims = query_to_dataframe(\n",
" engine=dwh_pg_engine, \n",
" query=\"\"\"\n",
" select \n",
" -- Unique ID --\n",
" ibs.id_booking,\n",
" -- Target (Boolean) --\n",
" ibs.has_resolution_incident,\n",
" -- Various features -- \n",
" ibs.booking_created_date_utc, \n",
" ibs.booking_check_in_date_utc,\n",
" ibs.booking_check_out_date_utc,\n",
" vr.link_used_date_utc as guest_journey_started_date_utc,\n",
"\n",
" -- Other --\n",
" ibs.id_accommodation\n",
"\n",
"from intermediate.int_booking_summary ibs\n",
"left join intermediate.int_core__verification_requests vr\n",
" on ibs.id_verification_request = vr.id_verification_request\n",
"where \n",
" -- 1. Bookings from New Dash users with Id Deal\n",
" ibs.is_user_in_new_dash = True and \n",
" ibs.is_missing_id_deal = False and\n",
" -- 2. Protected Bookings with a Protection or a Deposit Management service\n",
" (ibs.has_protection_service_business_type or \n",
" ibs.has_deposit_management_service_business_type) and\n",
" -- 3. Bookings with flagging categorisation (this excludes Cancelled/Incomplete/Rejected bookings)\n",
" ibs.is_booking_flagged_as_risk is not null and \n",
" -- 4. Booking is completed\n",
" ibs.is_booking_past_completion_date = True\n",
" \"\"\"\n",
")\n",
"\n",
"# Get listing details\n",
"\n",
"df_listing_details = query_to_dataframe(\n",
" engine=dwh_pg_engine,\n",
" query=\"\"\"\n",
" select\n",
" a.id_accommodation,\n",
" a.friendly_name\n",
" from intermediate.int_core__accommodation a\n",
" where a.id_accommodation in (\n",
" select distinct id_accommodation\n",
" from intermediate.int_booking_summary ibs\n",
" where \n",
" -- 1. Bookings from New Dash users with Id Deal\n",
" ibs.is_user_in_new_dash = True and \n",
" ibs.is_missing_id_deal = False and\n",
" -- 2. Protected Bookings with a Protection or a Deposit Management service\n",
" (ibs.has_protection_service_business_type or \n",
" ibs.has_deposit_management_service_business_type) and\n",
" -- 3. Bookings with flagging categorisation (this excludes Cancelled/Incomplete/Rejected bookings)\n",
" ibs.is_booking_flagged_as_risk is not null and \n",
" -- 4. Booking is completed\n",
" ibs.is_booking_past_completion_date = True)\n",
" \"\"\"\n",
")\n",
"\n",
"# Get last 12 months host KPIs \n",
"\n",
"# Get guest data\n",
"\n",
"# Get host data\n",
"\n",
"# Get guest journey sales\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "9848916e",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "id_booking",
"rawType": "int64",
"type": "integer"
},
{
"name": "has_resolution_incident",
"rawType": "bool",
"type": "boolean"
},
{
"name": "booking_created_date_utc",
"rawType": "object",
"type": "unknown"
},
{
"name": "booking_check_in_date_utc",
"rawType": "object",
"type": "unknown"
},
{
"name": "booking_check_out_date_utc",
"rawType": "object",
"type": "unknown"
},
{
"name": "guest_journey_started_date_utc",
"rawType": "object",
"type": "unknown"
}
],
"ref": "b9c31069-7ef4-4cf3-8954-d283b42f6e64",
"rows": [
[
"0",
"975057",
"False",
"2024-12-04",
"2024-12-05",
"2025-03-31",
null
],
[
"1",
"975059",
"False",
"2024-12-04",
"2024-12-06",
"2024-12-08",
"2024-12-07"
],
[
"2",
"975060",
"False",
"2024-12-04",
"2025-01-26",
"2025-01-29",
"2024-12-07"
],
[
"3",
"975061",
"False",
"2024-12-04",
"2024-12-15",
"2025-03-15",
null
],
[
"4",
"975062",
"False",
"2024-12-04",
"2024-12-11",
"2025-03-10",
"2024-12-10"
],
[
"5",
"975063",
"False",
"2024-12-04",
"2024-12-10",
"2025-03-11",
"2024-12-04"
],
[
"6",
"975065",
"False",
"2024-12-04",
"2024-12-05",
"2024-12-10",
"2024-12-04"
],
[
"7",
"975066",
"False",
"2024-12-04",
"2024-12-20",
"2024-12-23",
"2024-12-04"
],
[
"8",
"975067",
"False",
"2024-12-04",
"2025-01-14",
"2025-01-18",
"2025-01-03"
],
[
"9",
"975068",
"False",
"2024-12-04",
"2025-05-20",
"2025-05-23",
null
],
[
"10",
"975070",
"False",
"2024-12-04",
"2025-01-25",
"2025-01-27",
"2024-12-04"
],
[
"11",
"975071",
"False",
"2024-12-04",
"2025-01-27",
"2025-01-30",
"2024-12-04"
],
[
"12",
"982700",
"False",
"2024-12-11",
"2024-12-13",
"2024-12-14",
"2024-12-11"
],
[
"13",
"984590",
"False",
"2024-12-12",
"2025-02-05",
"2025-02-10",
"2024-12-12"
],
[
"14",
"985483",
"False",
"2024-12-13",
"2024-12-25",
"2024-12-29",
"2024-12-14"
],
[
"15",
"986720",
"False",
"2024-12-14",
"2025-01-23",
"2025-01-25",
"2024-12-14"
],
[
"16",
"987812",
"False",
"2024-12-15",
"2025-02-10",
"2025-02-15",
"2024-12-15"
],
[
"17",
"989579",
"False",
"2024-12-17",
"2024-12-23",
"2024-12-26",
"2024-12-17"
],
[
"18",
"989580",
"False",
"2024-12-17",
"2024-12-20",
"2024-12-22",
"2024-12-17"
],
[
"19",
"989581",
"True",
"2024-12-17",
"2024-12-31",
"2025-01-02",
"2024-12-24"
],
[
"20",
"989582",
"False",
"2024-12-17",
"2024-12-23",
"2024-12-26",
"2024-12-17"
],
[
"21",
"990071",
"False",
"2024-12-17",
"2024-12-31",
"2025-01-02",
"2024-12-19"
],
[
"22",
"990606",
"False",
"2024-12-17",
"2024-12-18",
"2024-12-22",
"2024-12-17"
],
[
"23",
"991162",
"False",
"2024-12-18",
"2024-12-29",
"2024-12-31",
"2024-12-19"
],
[
"24",
"991894",
"False",
"2024-12-18",
"2025-02-28",
"2025-03-03",
"2024-12-18"
],
[
"25",
"993698",
"False",
"2024-12-20",
"2024-12-30",
"2024-12-31",
"2024-12-20"
],
[
"26",
"994300",
"True",
"2024-12-20",
"2025-01-18",
"2025-01-20",
"2024-12-20"
],
[
"27",
"994888",
"False",
"2024-12-21",
"2024-12-23",
"2024-12-24",
"2024-12-21"
],
[
"28",
"994974",
"False",
"2024-12-21",
"2024-12-30",
"2025-01-02",
"2024-12-21"
],
[
"29",
"995617",
"False",
"2024-12-22",
"2024-12-28",
"2024-12-30",
"2024-12-22"
],
[
"30",
"995692",
"True",
"2024-12-22",
"2024-12-30",
"2025-01-02",
"2024-12-22"
],
[
"31",
"996081",
"False",
"2024-12-22",
"2025-01-27",
"2025-02-02",
"2024-12-29"
],
[
"32",
"996092",
"False",
"2024-12-22",
"2025-01-30",
"2025-02-04",
"2024-12-22"
],
[
"33",
"996397",
"False",
"2024-12-22",
"2025-01-11",
"2025-01-15",
"2024-12-22"
],
[
"34",
"997018",
"False",
"2024-12-23",
"2025-02-15",
"2025-02-21",
"2024-12-24"
],
[
"35",
"997710",
"False",
"2024-12-24",
"2025-01-09",
"2025-01-13",
"2024-12-24"
],
[
"36",
"997777",
"False",
"2024-12-24",
"2024-12-23",
"2024-12-26",
"2024-12-24"
],
[
"37",
"998900",
"False",
"2024-12-25",
"2025-01-02",
"2025-01-05",
"2024-12-27"
],
[
"38",
"998926",
"False",
"2024-12-25",
"2024-12-26",
"2024-12-31",
"2024-12-25"
],
[
"39",
"999495",
"False",
"2024-12-25",
"2024-12-27",
"2024-12-28",
"2024-12-25"
],
[
"40",
"999663",
"False",
"2024-12-26",
"2024-12-26",
"2024-12-30",
"2024-12-26"
],
[
"41",
"1000059",
"False",
"2024-12-26",
"2024-12-27",
"2024-12-30",
"2024-12-27"
],
[
"42",
"1000743",
"False",
"2024-12-27",
"2025-03-22",
"2025-03-29",
"2024-12-27"
],
[
"43",
"1000745",
"False",
"2024-12-27",
"2024-12-27",
"2024-12-29",
"2024-12-27"
],
[
"44",
"1000746",
"False",
"2024-12-27",
"2024-12-29",
"2025-01-02",
"2024-12-27"
],
[
"45",
"1000808",
"False",
"2024-12-27",
"2024-12-27",
"2024-12-29",
null
],
[
"46",
"1000809",
"False",
"2024-12-27",
"2025-02-06",
"2025-02-07",
"2024-12-28"
],
[
"47",
"1000883",
"False",
"2024-12-27",
"2025-01-01",
"2025-01-05",
"2024-12-27"
],
[
"48",
"1000951",
"True",
"2024-12-27",
"2025-01-09",
"2025-01-15",
"2024-12-27"
],
[
"49",
"1001807",
"False",
"2024-12-27",
"2024-12-27",
"2024-12-28",
"2024-12-27"
]
],
"shape": {
"columns": 6,
"rows": 20280
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id_booking</th>\n",
" <th>has_resolution_incident</th>\n",
" <th>booking_created_date_utc</th>\n",
" <th>booking_check_in_date_utc</th>\n",
" <th>booking_check_out_date_utc</th>\n",
" <th>guest_journey_started_date_utc</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>975057</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2024-12-05</td>\n",
" <td>2025-03-31</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>975059</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2024-12-06</td>\n",
" <td>2024-12-08</td>\n",
" <td>2024-12-07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>975060</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2025-01-26</td>\n",
" <td>2025-01-29</td>\n",
" <td>2024-12-07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>975061</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2024-12-15</td>\n",
" <td>2025-03-15</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>975062</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2024-12-11</td>\n",
" <td>2025-03-10</td>\n",
" <td>2024-12-10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20275</th>\n",
" <td>931096</td>\n",
" <td>False</td>\n",
" <td>2024-10-31</td>\n",
" <td>2024-11-08</td>\n",
" <td>2024-11-13</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20276</th>\n",
" <td>931086</td>\n",
" <td>False</td>\n",
" <td>2024-10-31</td>\n",
" <td>2024-11-15</td>\n",
" <td>2024-11-18</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20277</th>\n",
" <td>931082</td>\n",
" <td>False</td>\n",
" <td>2024-10-31</td>\n",
" <td>2024-12-20</td>\n",
" <td>2024-12-27</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20278</th>\n",
" <td>926634</td>\n",
" <td>False</td>\n",
" <td>2024-10-27</td>\n",
" <td>2025-02-13</td>\n",
" <td>2025-02-16</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20279</th>\n",
" <td>919656</td>\n",
" <td>False</td>\n",
" <td>2024-10-21</td>\n",
" <td>2025-01-16</td>\n",
" <td>2025-01-20</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20280 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" id_booking has_resolution_incident booking_created_date_utc \\\n",
"0 975057 False 2024-12-04 \n",
"1 975059 False 2024-12-04 \n",
"2 975060 False 2024-12-04 \n",
"3 975061 False 2024-12-04 \n",
"4 975062 False 2024-12-04 \n",
"... ... ... ... \n",
"20275 931096 False 2024-10-31 \n",
"20276 931086 False 2024-10-31 \n",
"20277 931082 False 2024-10-31 \n",
"20278 926634 False 2024-10-27 \n",
"20279 919656 False 2024-10-21 \n",
"\n",
" booking_check_in_date_utc booking_check_out_date_utc \\\n",
"0 2024-12-05 2025-03-31 \n",
"1 2024-12-06 2024-12-08 \n",
"2 2025-01-26 2025-01-29 \n",
"3 2024-12-15 2025-03-15 \n",
"4 2024-12-11 2025-03-10 \n",
"... ... ... \n",
"20275 2024-11-08 2024-11-13 \n",
"20276 2024-11-15 2024-11-18 \n",
"20277 2024-12-20 2024-12-27 \n",
"20278 2025-02-13 2025-02-16 \n",
"20279 2025-01-16 2025-01-20 \n",
"\n",
" guest_journey_started_date_utc \n",
"0 None \n",
"1 2024-12-07 \n",
"2 2024-12-07 \n",
"3 None \n",
"4 2024-12-10 \n",
"... ... \n",
"20275 None \n",
"20276 None \n",
"20277 None \n",
"20278 None \n",
"20279 None \n",
"\n",
"[20280 rows x 6 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_bookings_and_claims"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "faf0b7de",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "id_accommodation",
"rawType": "int64",
"type": "integer"
},
{
"name": "friendly_name",
"rawType": "object",
"type": "unknown"
}
],
"ref": "39e34129-92b9-452c-982e-db106a6a71e0",
"rows": [
[
"0",
"10368",
"Maddox St"
],
[
"1",
"11059",
"HIL-1"
],
[
"2",
"14345",
"SUS-2"
],
[
"3",
"277469",
"4000 sqft Lakefront Retreat | Private Hot Tub #NH"
],
[
"4",
"28561",
"LAN-3"
],
[
"5",
"163044",
"Rosa Negra Suite Near Stadium"
],
[
"6",
"164229",
"Stadio Aparta | Rosa Negra Suite"
],
[
"7",
"202702",
"Ocean View - 2nd from Beach @ Surf City, LBI, NJ"
],
[
"8",
"203920",
"Poplar"
],
[
"9",
"261554",
"Steps to Beach | Linens + Towels included | AC"
],
[
"10",
"33910",
"WAR-2"
],
[
"11",
"277505",
"Incredible SeaPoint Oasis W Pool"
],
[
"12",
"277507",
"181 Palisades"
],
[
"13",
"277508",
"585 Jacks -3 Bedroom"
],
[
"14",
"119411",
""
],
[
"15",
"48607",
"200 Palms"
],
[
"16",
"48896",
"Chilworth Paddington"
],
[
"17",
"83345",
"King Bed | Private Parking | Strong WiFi"
],
[
"18",
"84166",
"Cozy Escape | Strong Wi-Fi | Prime Location"
],
[
"19",
"48900",
"Bakers Passage 1"
],
[
"20",
"48902",
"Bakers Passage 2"
],
[
"21",
"105022",
"Flamingo Cove 7"
],
[
"22",
"51902",
"La Camilla, comfortable private villa with pool"
],
[
"23",
"105038",
"Casa Prieta"
],
[
"24",
"105039",
"Casa Ironbark"
],
[
"25",
"105041",
"Villa Catalina #8"
],
[
"26",
"51921",
"Agriturismo Molino Verde, modern and comfortable"
],
[
"27",
"51926",
"Villa Farneta, large luxury villa with fenced pool"
],
[
"28",
"51928",
"Villa La Ginestra, private villa with pool"
],
[
"29",
"51938",
"Villa Badia, spacious villa with private infinity pool"
],
[
"30",
"51941",
"Casa Paciano, near town, apartments with pool"
],
[
"31",
"51942",
"La Pergola, agriturismo with large pool"
],
[
"32",
"52082",
"The Pom Pom House - In the heart of Palm Springs"
],
[
"33",
"52083",
"Silver Lake Views! The Hummingbird - Guest Suite"
],
[
"34",
"105055",
"Sunset Heights 500"
],
[
"35",
"105056",
"Casa Mango"
],
[
"36",
"105059",
"Marina Resort #502"
],
[
"37",
"105062",
"Casa Tranquility"
],
[
"38",
"105073",
"La Antigua 36"
],
[
"39",
"105077",
"La Antigua 28"
],
[
"40",
"53623",
"2 Napolean Richmond"
],
[
"41",
"53625",
"81 Miramar HiddenBay"
],
[
"42",
"53627",
"24 Marlborough S/Bay"
],
[
"43",
"53629",
"326 Churchill Ave SB"
],
[
"44",
"53637",
"241 Bathurst BOSTANE"
],
[
"45",
"53638",
"3/5HomeAve/Modern"
],
[
"46",
"53639",
"2/165Camb Retreat"
],
[
"47",
"53641",
"165 Cambr-Residence"
],
[
"48",
"53643",
"87 Kingston View Dv"
],
[
"49",
"53960",
"3-Bedroom Oasis in Miami "
]
],
"shape": {
"columns": 2,
"rows": 3632
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id_accommodation</th>\n",
" <th>friendly_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10368</td>\n",
" <td>Maddox St</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11059</td>\n",
" <td>HIL-1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>14345</td>\n",
" <td>SUS-2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>277469</td>\n",
" <td>4000 sqft Lakefront Retreat | Private Hot Tub #NH</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28561</td>\n",
" <td>LAN-3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3627</th>\n",
" <td>197269</td>\n",
" <td>Luxury Glamping | Hot Tub, Firepit &amp; Grill</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3628</th>\n",
" <td>198130</td>\n",
" <td>Brick Haven House: 10min Walk to Shakespeare F...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3629</th>\n",
" <td>205403</td>\n",
" <td>NO FEES! Pool+Hot Tub/Volley&amp;Bocce Ball+Firepit</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3630</th>\n",
" <td>263762</td>\n",
" <td>Brasada Ranch | Hot Tub | Guest Casita | 5 Bed</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3631</th>\n",
" <td>267589</td>\n",
" <td>10% Off July 6-10 • Creek • 3 Dogs • Fenced Yard</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3632 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" id_accommodation friendly_name\n",
"0 10368 Maddox St\n",
"1 11059 HIL-1\n",
"2 14345 SUS-2\n",
"3 277469 4000 sqft Lakefront Retreat | Private Hot Tub #NH\n",
"4 28561 LAN-3\n",
"... ... ...\n",
"3627 197269 Luxury Glamping | Hot Tub, Firepit & Grill\n",
"3628 198130 Brick Haven House: 10min Walk to Shakespeare F...\n",
"3629 205403 NO FEES! Pool+Hot Tub/Volley&Bocce Ball+Firepit\n",
"3630 263762 Brasada Ranch | Hot Tub | Guest Casita | 5 Bed\n",
"3631 267589 10% Off July 6-10 • Creek • 3 Dogs • Fenced Yard\n",
"\n",
"[3632 rows x 2 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_listing_details"
]
},
{
"cell_type": "markdown",
"id": "5acb3488",
"metadata": {},
"source": [
"# Processing"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3fe386f8",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"# Ensure date columns are datetime objects\n",
"df = df_bookings_and_claims.copy()\n",
"df['booking_created_date_utc'] = pd.to_datetime(df['booking_created_date_utc'])\n",
"df['booking_check_in_date_utc'] = pd.to_datetime(df['booking_check_in_date_utc'])\n",
"df['booking_check_out_date_utc'] = pd.to_datetime(df['booking_check_out_date_utc'])\n",
"df['guest_journey_started_date_utc'] = pd.to_datetime(df['guest_journey_started_date_utc'])\n",
"\n",
"# 1. Length of stay (in days)\n",
"df['length_of_stay_days'] = (df['booking_check_out_date_utc'] - df['booking_check_in_date_utc']).dt.days\n",
"\n",
"# 2. Lead time between creation and check-in (in days)\n",
"df['lead_time_to_checkin_days'] = (df['booking_check_in_date_utc'] - df['booking_created_date_utc']).dt.days\n",
"\n",
"# 3. Cyclical transformations\n",
"def add_cyclical_features(df, col, prefix, period):\n",
" df[f'{prefix}_cycle_sin'] = np.sin(2 * np.pi * df[col] / period)\n",
" df[f'{prefix}_cos'] = np.cos(2 * np.pi * df[col] / period)\n",
" return df\n",
"\n",
"# Check-in and check-out day-of-year\n",
"df['checkin_doy'] = df['booking_check_in_date_utc'].dt.dayofyear\n",
"df['checkout_doy'] = df['booking_check_out_date_utc'].dt.dayofyear\n",
"\n",
"# Apply cyclical encoding for week, month, and year cycles\n",
"df = add_cyclical_features(df, 'checkin_doy', 'checkin_week_cycle', 7)\n",
"df = add_cyclical_features(df, 'checkin_doy', 'checkin_month_cycle', 30)\n",
"df = add_cyclical_features(df, 'checkin_doy', 'checkin_year_cycle', 365)\n",
"\n",
"df = add_cyclical_features(df, 'checkout_doy', 'checkout_week_cycle', 7)\n",
"df = add_cyclical_features(df, 'checkout_doy', 'checkout_month_cycle', 30)\n",
"df = add_cyclical_features(df, 'checkout_doy', 'checkout_year_cycle', 365)\n",
"\n",
"# 4. Time in days between GJ start and check-in\n",
"df['gj_start_to_checkin_days'] = (df['booking_check_in_date_utc'] - df['guest_journey_started_date_utc']).dt.days\n",
"\n",
"# Clean up temporary columns if needed\n",
"df.drop(['checkin_doy', 'checkout_doy'], axis=1, inplace=True)\n",
"\n",
"# Final transformed DataFrame\n",
"df_bookings_and_claims = df\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "95814ea4",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "id_booking",
"rawType": "int64",
"type": "integer"
},
{
"name": "has_resolution_incident",
"rawType": "bool",
"type": "boolean"
},
{
"name": "booking_created_date_utc",
"rawType": "object",
"type": "unknown"
},
{
"name": "booking_check_in_date_utc",
"rawType": "object",
"type": "unknown"
},
{
"name": "booking_check_out_date_utc",
"rawType": "object",
"type": "unknown"
},
{
"name": "guest_journey_started_date_utc",
"rawType": "object",
"type": "unknown"
},
{
"name": "id_accommodation",
"rawType": "int64",
"type": "integer"
}
],
"ref": "10c05935-d77f-4555-ab33-945e8d49fbd0",
"rows": [
[
"0",
"975057",
"False",
"2024-12-04",
"2024-12-05",
"2025-03-31",
null,
"196871"
],
[
"1",
"975059",
"False",
"2024-12-04",
"2024-12-06",
"2024-12-08",
"2024-12-07",
"196875"
],
[
"2",
"975060",
"False",
"2024-12-04",
"2025-01-26",
"2025-01-29",
"2024-12-07",
"196876"
],
[
"3",
"975061",
"False",
"2024-12-04",
"2024-12-15",
"2025-03-15",
null,
"196872"
],
[
"4",
"975062",
"False",
"2024-12-04",
"2024-12-11",
"2025-03-10",
"2024-12-10",
"196875"
],
[
"5",
"975063",
"False",
"2024-12-04",
"2024-12-10",
"2025-03-11",
"2024-12-04",
"196877"
],
[
"6",
"975065",
"False",
"2024-12-04",
"2024-12-05",
"2024-12-10",
"2024-12-04",
"196876"
],
[
"7",
"975066",
"False",
"2024-12-04",
"2024-12-20",
"2024-12-23",
"2024-12-04",
"196876"
],
[
"8",
"975067",
"False",
"2024-12-04",
"2025-01-14",
"2025-01-18",
"2025-01-03",
"196876"
],
[
"9",
"975068",
"False",
"2024-12-04",
"2025-05-20",
"2025-05-23",
null,
"196875"
],
[
"10",
"975070",
"False",
"2024-12-04",
"2025-01-25",
"2025-01-27",
"2024-12-04",
"196869"
],
[
"11",
"975071",
"False",
"2024-12-04",
"2025-01-27",
"2025-01-30",
"2024-12-04",
"196869"
],
[
"12",
"982700",
"False",
"2024-12-11",
"2024-12-13",
"2024-12-14",
"2024-12-11",
"196876"
],
[
"13",
"984590",
"False",
"2024-12-12",
"2025-02-05",
"2025-02-10",
"2024-12-12",
"199063"
],
[
"14",
"985483",
"False",
"2024-12-13",
"2024-12-25",
"2024-12-29",
"2024-12-14",
"199360"
],
[
"15",
"986720",
"False",
"2024-12-14",
"2025-01-23",
"2025-01-25",
"2024-12-14",
"199360"
],
[
"16",
"987812",
"False",
"2024-12-15",
"2025-02-10",
"2025-02-15",
"2024-12-15",
"199360"
],
[
"17",
"989579",
"False",
"2024-12-17",
"2024-12-23",
"2024-12-26",
"2024-12-17",
"200860"
],
[
"18",
"989580",
"False",
"2024-12-17",
"2024-12-20",
"2024-12-22",
"2024-12-17",
"200860"
],
[
"19",
"989581",
"True",
"2024-12-17",
"2024-12-31",
"2025-01-02",
"2024-12-24",
"200860"
],
[
"20",
"989582",
"False",
"2024-12-17",
"2024-12-23",
"2024-12-26",
"2024-12-17",
"200858"
],
[
"21",
"990071",
"False",
"2024-12-17",
"2024-12-31",
"2025-01-02",
"2024-12-19",
"199360"
],
[
"22",
"990606",
"False",
"2024-12-17",
"2024-12-18",
"2024-12-22",
"2024-12-17",
"199360"
],
[
"23",
"991162",
"False",
"2024-12-18",
"2024-12-29",
"2024-12-31",
"2024-12-19",
"199360"
],
[
"24",
"991894",
"False",
"2024-12-18",
"2025-02-28",
"2025-03-03",
"2024-12-18",
"196876"
],
[
"25",
"993698",
"False",
"2024-12-20",
"2024-12-30",
"2024-12-31",
"2024-12-20",
"202697"
],
[
"26",
"994300",
"True",
"2024-12-20",
"2025-01-18",
"2025-01-20",
"2024-12-20",
"200729"
],
[
"27",
"994888",
"False",
"2024-12-21",
"2024-12-23",
"2024-12-24",
"2024-12-21",
"196876"
],
[
"28",
"994974",
"False",
"2024-12-21",
"2024-12-30",
"2025-01-02",
"2024-12-21",
"203134"
],
[
"29",
"995617",
"False",
"2024-12-22",
"2024-12-28",
"2024-12-30",
"2024-12-22",
"196876"
],
[
"30",
"995692",
"True",
"2024-12-22",
"2024-12-30",
"2025-01-02",
"2024-12-22",
"201859"
],
[
"31",
"996081",
"False",
"2024-12-22",
"2025-01-27",
"2025-02-02",
"2024-12-29",
"203017"
],
[
"32",
"996092",
"False",
"2024-12-22",
"2025-01-30",
"2025-02-04",
"2024-12-22",
"199360"
],
[
"33",
"996397",
"False",
"2024-12-22",
"2025-01-11",
"2025-01-15",
"2024-12-22",
"199360"
],
[
"34",
"997018",
"False",
"2024-12-23",
"2025-02-15",
"2025-02-21",
"2024-12-24",
"199360"
],
[
"35",
"997710",
"False",
"2024-12-24",
"2025-01-09",
"2025-01-13",
"2024-12-24",
"203017"
],
[
"36",
"997777",
"False",
"2024-12-24",
"2024-12-23",
"2024-12-26",
"2024-12-24",
"196869"
],
[
"37",
"998900",
"False",
"2024-12-25",
"2025-01-02",
"2025-01-05",
"2024-12-27",
"199360"
],
[
"38",
"998926",
"False",
"2024-12-25",
"2024-12-26",
"2024-12-31",
"2024-12-25",
"196869"
],
[
"39",
"999495",
"False",
"2024-12-25",
"2024-12-27",
"2024-12-28",
"2024-12-25",
"199059"
],
[
"40",
"999663",
"False",
"2024-12-26",
"2024-12-26",
"2024-12-30",
"2024-12-26",
"203017"
],
[
"41",
"1000059",
"False",
"2024-12-26",
"2024-12-27",
"2024-12-30",
"2024-12-27",
"199199"
],
[
"42",
"1000743",
"False",
"2024-12-27",
"2025-03-22",
"2025-03-29",
"2024-12-27",
"199360"
],
[
"43",
"1000745",
"False",
"2024-12-27",
"2024-12-27",
"2024-12-29",
"2024-12-27",
"200858"
],
[
"44",
"1000746",
"False",
"2024-12-27",
"2024-12-29",
"2025-01-02",
"2024-12-27",
"200858"
],
[
"45",
"1000808",
"False",
"2024-12-27",
"2024-12-27",
"2024-12-29",
null,
"202455"
],
[
"46",
"1000809",
"False",
"2024-12-27",
"2025-02-06",
"2025-02-07",
"2024-12-28",
"200860"
],
[
"47",
"1000883",
"False",
"2024-12-27",
"2025-01-01",
"2025-01-05",
"2024-12-27",
"202594"
],
[
"48",
"1000951",
"True",
"2024-12-27",
"2025-01-09",
"2025-01-15",
"2024-12-27",
"203382"
],
[
"49",
"1001807",
"False",
"2024-12-27",
"2024-12-27",
"2024-12-28",
"2024-12-27",
"196876"
]
],
"shape": {
"columns": 7,
"rows": 20280
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id_booking</th>\n",
" <th>has_resolution_incident</th>\n",
" <th>booking_created_date_utc</th>\n",
" <th>booking_check_in_date_utc</th>\n",
" <th>booking_check_out_date_utc</th>\n",
" <th>guest_journey_started_date_utc</th>\n",
" <th>id_accommodation</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>975057</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2024-12-05</td>\n",
" <td>2025-03-31</td>\n",
" <td>None</td>\n",
" <td>196871</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>975059</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2024-12-06</td>\n",
" <td>2024-12-08</td>\n",
" <td>2024-12-07</td>\n",
" <td>196875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>975060</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2025-01-26</td>\n",
" <td>2025-01-29</td>\n",
" <td>2024-12-07</td>\n",
" <td>196876</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>975061</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2024-12-15</td>\n",
" <td>2025-03-15</td>\n",
" <td>None</td>\n",
" <td>196872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>975062</td>\n",
" <td>False</td>\n",
" <td>2024-12-04</td>\n",
" <td>2024-12-11</td>\n",
" <td>2025-03-10</td>\n",
" <td>2024-12-10</td>\n",
" <td>196875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20275</th>\n",
" <td>931096</td>\n",
" <td>False</td>\n",
" <td>2024-10-31</td>\n",
" <td>2024-11-08</td>\n",
" <td>2024-11-13</td>\n",
" <td>None</td>\n",
" <td>187560</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20276</th>\n",
" <td>931086</td>\n",
" <td>False</td>\n",
" <td>2024-10-31</td>\n",
" <td>2024-11-15</td>\n",
" <td>2024-11-18</td>\n",
" <td>None</td>\n",
" <td>187585</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20277</th>\n",
" <td>931082</td>\n",
" <td>False</td>\n",
" <td>2024-10-31</td>\n",
" <td>2024-12-20</td>\n",
" <td>2024-12-27</td>\n",
" <td>None</td>\n",
" <td>187585</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20278</th>\n",
" <td>926634</td>\n",
" <td>False</td>\n",
" <td>2024-10-27</td>\n",
" <td>2025-02-13</td>\n",
" <td>2025-02-16</td>\n",
" <td>None</td>\n",
" <td>185004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20279</th>\n",
" <td>919656</td>\n",
" <td>False</td>\n",
" <td>2024-10-21</td>\n",
" <td>2025-01-16</td>\n",
" <td>2025-01-20</td>\n",
" <td>None</td>\n",
" <td>185004</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20280 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" id_booking has_resolution_incident booking_created_date_utc \\\n",
"0 975057 False 2024-12-04 \n",
"1 975059 False 2024-12-04 \n",
"2 975060 False 2024-12-04 \n",
"3 975061 False 2024-12-04 \n",
"4 975062 False 2024-12-04 \n",
"... ... ... ... \n",
"20275 931096 False 2024-10-31 \n",
"20276 931086 False 2024-10-31 \n",
"20277 931082 False 2024-10-31 \n",
"20278 926634 False 2024-10-27 \n",
"20279 919656 False 2024-10-21 \n",
"\n",
" booking_check_in_date_utc booking_check_out_date_utc \\\n",
"0 2024-12-05 2025-03-31 \n",
"1 2024-12-06 2024-12-08 \n",
"2 2025-01-26 2025-01-29 \n",
"3 2024-12-15 2025-03-15 \n",
"4 2024-12-11 2025-03-10 \n",
"... ... ... \n",
"20275 2024-11-08 2024-11-13 \n",
"20276 2024-11-15 2024-11-18 \n",
"20277 2024-12-20 2024-12-27 \n",
"20278 2025-02-13 2025-02-16 \n",
"20279 2025-01-16 2025-01-20 \n",
"\n",
" guest_journey_started_date_utc id_accommodation \n",
"0 None 196871 \n",
"1 2024-12-07 196875 \n",
"2 2024-12-07 196876 \n",
"3 None 196872 \n",
"4 2024-12-10 196875 \n",
"... ... ... \n",
"20275 None 187560 \n",
"20276 None 187585 \n",
"20277 None 187585 \n",
"20278 None 185004 \n",
"20279 None 185004 \n",
"\n",
"[20280 rows x 7 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_bookings_and_claims "
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "2c9927b0",
"metadata": {},
"outputs": [],
"source": [
"# Tokenizing friendly name\n",
"\n",
"\n",
"# Minimal preprocessing: lowercase, remove punctuation\n",
"def preprocess(text):\n",
" text = str(text).lower()\n",
" text = text.translate(str.maketrans('', '', string.punctuation))\n",
" return text\n",
"\n",
"df_listing_details['clean_name'] = df_listing_details['friendly_name'].apply(preprocess)\n",
"\n",
"# Basic length features\n",
"df_listing_details['char_count'] = df_listing_details['clean_name'].apply(len)\n",
"df_listing_details['word_count'] = df_listing_details['clean_name'].apply(lambda x: len(x.split()))\n",
"df_listing_details['unique_word_count'] = df_listing_details['clean_name'].apply(lambda x: len(set(x.split())))\n",
"\n",
"# Vectorize with TF-IDF (unigrams + bigrams)\n",
"vectorizer = TfidfVectorizer(ngram_range=(1,2), max_features=1000)\n",
"X_tfidf = vectorizer.fit_transform(df_listing_details['clean_name'])\n",
"\n",
"# Dimensionality reduction to get dense features\n",
"svd = TruncatedSVD(n_components=30, random_state=42)\n",
"X_reduced = svd.fit_transform(X_tfidf)\n",
"\n",
"# Append the SVD components as features\n",
"for i in range(X_reduced.shape[1]):\n",
" df_listing_details[f'tfidf_svd_{i}'] = X_reduced[:, i]\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "0e845d47",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "id_accommodation",
"rawType": "int64",
"type": "integer"
},
{
"name": "friendly_name",
"rawType": "object",
"type": "unknown"
},
{
"name": "clean_name",
"rawType": "object",
"type": "string"
},
{
"name": "char_count",
"rawType": "int64",
"type": "integer"
},
{
"name": "word_count",
"rawType": "int64",
"type": "integer"
},
{
"name": "unique_word_count",
"rawType": "int64",
"type": "integer"
},
{
"name": "tfidf_svd_0",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_1",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_2",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_3",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_4",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_5",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_6",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_7",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_8",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_9",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_10",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_11",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_12",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_13",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_14",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_15",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_16",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_17",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_18",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_19",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_20",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_21",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_22",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_23",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_24",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_25",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_26",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_27",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_28",
"rawType": "float64",
"type": "float"
},
{
"name": "tfidf_svd_29",
"rawType": "float64",
"type": "float"
}
],
"ref": "85040f21-ec13-4afa-b027-48d2b85f98ba",
"rows": [
[
"0",
"10368",
"Maddox St",
"maddox st",
"9",
"2",
"2",
"0.010854894473435336",
"-0.029763070635827105",
"-0.02105066582524067",
"0.009316318557873451",
"0.009155777902508419",
"-0.03180902701624234",
"-0.08355302470987784",
"0.14465441346736907",
"0.85105054764977",
"-0.1769927058773534",
"0.41674290004218606",
"0.04510420937810948",
"0.07922413267043014",
"0.03122394097244434",
"-0.040208410263027704",
"-0.10020706150703707",
"-0.025422733955694426",
"0.02654530230003335",
"0.006658142055919703",
"-0.0020431476477431724",
"0.004606685810547015",
"-0.010669006854363734",
"0.022564171527997886",
"0.00957662866750014",
"-0.004964053470350576",
"0.002377031437672481",
"0.008182074546599223",
"0.020270943516535682",
"0.011362283474597935",
"0.014186012254990409"
],
[
"1",
"11059",
"HIL-1",
"hil1",
"4",
"1",
"1",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"2",
"14345",
"SUS-2",
"sus2",
"4",
"1",
"1",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"3",
"277469",
"4000 sqft Lakefront Retreat | Private Hot Tub #NH",
"4000 sqft lakefront retreat private hot tub nh",
"47",
"8",
"8",
"0.11807863983296853",
"-0.33172859056317217",
"-0.036329383979882085",
"-0.11120995451599419",
"-0.2648410708278244",
"0.28969316492859243",
"0.2643824163243423",
"-0.03821167308158958",
"0.008220657395023699",
"-0.023905334669079584",
"0.054517013328940295",
"-0.053951464284057546",
"-0.05298205109196754",
"0.028565222921518092",
"0.026433923280066468",
"0.021211850384578266",
"0.003354963230503048",
"0.0011528131338475224",
"-0.02296475741607736",
"0.0015172067755024627",
"-0.03614074981677304",
"-0.052405382429189976",
"0.0932273886312051",
"0.019269231562038663",
"-0.0067738990935211334",
"0.07002711460920275",
"0.10255391406194633",
"-0.007377827078676062",
"0.041281195581921525",
"0.015409930540018501"
],
[
"4",
"28561",
"LAN-3",
"lan3",
"4",
"1",
"1",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"5",
"163044",
"Rosa Negra Suite Near Stadium",
"rosa negra suite near stadium",
"29",
"5",
"5",
"0.030194733434346824",
"-0.06934684562032839",
"0.002098376377075514",
"-0.036058333478653594",
"0.0060657001846643075",
"-0.09440813354142205",
"0.08588431022753748",
"0.0037569922758366936",
"-0.010829083544205245",
"0.014250921210852443",
"-0.012539477084267614",
"0.5986555079970162",
"-0.07921362157716288",
"0.033259422630366686",
"-0.0346163931981728",
"0.04336577955080058",
"0.009566497590113906",
"-0.09878557842898583",
"-0.011316834406345897",
"0.023545381374514864",
"0.08739649770927448",
"0.002944990745431596",
"0.015592859518697376",
"0.013206055165992776",
"-0.08061003555872509",
"0.008968109977770049",
"0.0008177635395260081",
"0.01109196144064344",
"0.10081632682104524",
"0.08335306470006422"
],
[
"6",
"164229",
"Stadio Aparta | Rosa Negra Suite",
"stadio aparta rosa negra suite",
"31",
"5",
"5",
"0.018190497737308055",
"-0.04136222325088856",
"0.0005534405779221063",
"-0.02949272229426415",
"0.0039296951084351",
"-0.0810586517517607",
"0.0986742463682681",
"-0.007132923764480539",
"0.008304548638943917",
"0.016675103325545847",
"-0.03304156089861118",
"0.7367598318708279",
"-0.181185393546972",
"0.06708280703204365",
"-0.019738775390829657",
"0.09558765971391948",
"-0.0888313568596728",
"-0.05462237165575978",
"0.06820180083072858",
"-0.024408802835040454",
"0.02076317486178419",
"-0.02351204621531273",
"-0.009939101922331467",
"-0.012282416180423604",
"-0.0277057195250498",
"0.07210284808751195",
"0.05175087178664732",
"0.007131379141495004",
"0.04776650677922776",
"0.051929033718588584"
],
[
"7",
"202702",
"Ocean View - 2nd from Beach @ Surf City, LBI, NJ",
"ocean view 2nd from beach surf city lbi nj",
"44",
"9",
"9",
"0.06122250335246087",
"-0.10915638282371175",
"0.017018199781162906",
"-0.028083000248974605",
"0.021436257405334294",
"-0.03961450953780221",
"-0.05161966113544465",
"0.09567755878851057",
"0.005947423046612736",
"0.02888705628530226",
"-0.054958265005236824",
"0.002392665330894779",
"0.06708497790892519",
"0.010922929185108933",
"0.00718199306547029",
"0.021759516159678156",
"0.08869762502423573",
"0.07250157273560896",
"0.2760176137965987",
"-0.04829559114771023",
"0.03395989171820788",
"0.07716454759286719",
"0.03135744968252249",
"-0.141377815548572",
"-0.14487051568545287",
"0.028111997990757153",
"-0.11789548757588354",
"0.17377972184148355",
"-0.11579330977257829",
"0.1590861022375369"
],
[
"8",
"203920",
"Poplar",
"poplar",
"6",
"1",
"1",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"9",
"261554",
"Steps to Beach | Linens + Towels included | AC",
"steps to beach linens towels included ac",
"43",
"7",
"7",
"0.0510587106595363",
"-0.11256588743295132",
"0.014477213150155684",
"-0.029336140057495106",
"0.009212079486567978",
"-0.05091937863821739",
"-0.08412508664425504",
"0.24650151029205994",
"-0.02437003639310213",
"0.016744770680865478",
"-0.08103267836498586",
"-0.014442456968626465",
"-0.011253948367273873",
"0.019012529589532853",
"-0.01853877899906787",
"-0.04628515724209761",
"0.04299554588419866",
"0.020754033073400034",
"0.15491568300359634",
"0.016363151568852964",
"-0.025203314519445755",
"-0.0308587610067647",
"0.04752432738389304",
"-0.004797441333480823",
"-0.009691669652794992",
"-0.0009630178648001256",
"-0.0026608917007837673",
"0.0020185511557285663",
"-0.0014652623285652119",
"-0.016716885115022524"
],
[
"10",
"33910",
"WAR-2",
"war2",
"4",
"1",
"1",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"11",
"277505",
"Incredible SeaPoint Oasis W Pool",
"incredible seapoint oasis w pool",
"32",
"5",
"5",
"0.04732284965456589",
"-0.09364208954684786",
"-0.0007939176474757048",
"-0.03507492912676837",
"0.024743604486507864",
"-0.00407684293914436",
"-0.038665664681681935",
"-0.03195658866611633",
"-0.015043065336648422",
"0.012338329793657461",
"0.013039792210389279",
"0.011356797967246748",
"0.1319687367739692",
"-0.15936425809849142",
"-0.14410105862131697",
"0.009706568431572497",
"-0.12710822232553223",
"-0.10218723550502785",
"0.02494132513659701",
"0.06651023064140257",
"0.02515024322265294",
"-0.08365328422819543",
"-0.17449846443127823",
"0.041802396836161014",
"-0.04204285073697456",
"0.031410191292514364",
"-0.0724255932286865",
"-0.048935085960517766",
"-0.12209383525355373",
"0.04670839224017179"
],
[
"12",
"277507",
"181 Palisades",
"181 palisades",
"13",
"2",
"2",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"13",
"277508",
"585 Jacks -3 Bedroom",
"585 jacks 3 bedroom",
"19",
"4",
"4",
"0.02686426223542092",
"-0.044881825521560864",
"-0.0011620414905876117",
"-0.011323432910898617",
"0.009488972912966877",
"-0.05807122482050932",
"-0.04057868397855808",
"-0.046860864507439495",
"0.018733301478272298",
"0.00921323013366453",
"-0.026838754872621755",
"-0.033565717090756245",
"-0.05732169403475782",
"0.03265542625287648",
"0.0018296819250799966",
"0.09563454233152986",
"0.029682374863393875",
"-0.009983087004219683",
"-0.03960356795725783",
"-0.0857399963281308",
"-0.03391042470115134",
"-0.05157939043793175",
"0.14116698793259994",
"0.07785596463500082",
"-0.02862398349622704",
"-0.021925415092193968",
"0.07513900184980368",
"0.1141066140075215",
"-0.031357011497913215",
"0.042434635727796634"
],
[
"14",
"119411",
"",
"",
"0",
"0",
"0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"15",
"48607",
"200 Palms",
"200 palms",
"9",
"2",
"2",
"0.004133191108767369",
"-0.004721144533623115",
"-0.0006434735588647677",
"-0.0037438363492050797",
"0.00840458706404377",
"-0.0007072711722346394",
"0.007998910366230249",
"-0.0035056239535995043",
"0.0008821854607854714",
"0.0013029842004018118",
"7.421784910126484e-05",
"-0.003266318977957291",
"0.0049787035273324695",
"0.006086538559075281",
"-0.0027458326974535486",
"0.005749179664921147",
"-0.006054294621503454",
"-0.005712259275077988",
"-0.0008356503813473287",
"0.005208184237174355",
"-0.00616967766858136",
"-0.007366569325136684",
"-0.006573909368458014",
"0.005752387959978305",
"0.00013161608838015607",
"0.00042937974781380857",
"0.0033638600730636742",
"0.0044692945352669084",
"0.0058142352033818505",
"-0.001471791838026868"
],
[
"16",
"48896",
"Chilworth Paddington",
"chilworth paddington",
"20",
"2",
"2",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"17",
"83345",
"King Bed | Private Parking | Strong WiFi",
"king bed private parking strong wifi",
"38",
"6",
"6",
"0.0689821290032845",
"-0.1401859351717836",
"0.028451660272199968",
"-0.03146074336813536",
"0.022052414271210442",
"-0.08574973230896313",
"-0.05202208551506233",
"-0.07945658633352652",
"-0.007988712984951006",
"-0.006118743680588071",
"0.005448897540375833",
"0.06349529435319165",
"0.06946542591277992",
"-0.1980941723313689",
"0.5027299779747814",
"-0.22883971561753946",
"-0.07219070736358499",
"-0.17178146150781115",
"0.010412538080097877",
"0.0994384609951191",
"-0.14578803833933923",
"0.020109717979585353",
"0.06786980093252255",
"-0.020633674695437528",
"0.09622752732293914",
"0.01886803536277238",
"0.07411256035666208",
"0.05658616565807412",
"-0.046367545782473846",
"-0.06155456646335058"
],
[
"18",
"84166",
"Cozy Escape | Strong Wi-Fi | Prime Location",
"cozy escape strong wifi prime location",
"40",
"6",
"6",
"0.05344788521631131",
"-0.10604518559297722",
"-0.004519592431420178",
"-0.04937600907825799",
"0.014169334404937693",
"-0.1024916926795735",
"0.007929167807976293",
"-0.051103515311812515",
"0.0426978796367734",
"0.022483373461679387",
"-0.055470824199681",
"0.0842925627976641",
"-0.052710292083039956",
"-0.0105437277226033",
"-0.06041438988427729",
"-0.07353967665014001",
"0.06649294694385002",
"0.08348155972701894",
"-0.03429877069072082",
"-0.025956263963022683",
"-0.0755185310864939",
"0.06033860679560669",
"-0.05800307498795776",
"0.11400571487020646",
"0.1925789929476167",
"-0.016503863730443803",
"-0.1292915596375555",
"-0.0187306762818722",
"-0.2160793956587082",
"-0.17883387542424664"
],
[
"19",
"48900",
"Bakers Passage 1",
"bakers passage 1",
"16",
"3",
"3",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"20",
"48902",
"Bakers Passage 2",
"bakers passage 2",
"16",
"3",
"3",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"21",
"105022",
"Flamingo Cove 7",
"flamingo cove 7",
"15",
"3",
"3",
"0.011325714365810084",
"-0.016490588216398976",
"-0.010999022824079965",
"0.001119639607226194",
"0.002873394264802745",
"-0.007047309289939846",
"-0.00046495398182395584",
"0.010315873318051264",
"-0.008792784964387812",
"-0.007152323609183366",
"0.0075507977966610676",
"0.003991951754138081",
"-0.0031359274967525737",
"-0.012003028510610279",
"0.006316208208434227",
"0.003113525780318979",
"0.010188785286912265",
"-0.0003473498857190098",
"-0.014203614888752216",
"-0.028554158304061668",
"-0.0511956378071436",
"0.003282671763752041",
"-0.00504382068968379",
"0.019074798359506775",
"-0.019137714457663917",
"0.021136108991450946",
"0.016924802234514372",
"0.019330740943810053",
"0.021471430265002995",
"-0.03275434424474198"
],
[
"22",
"51902",
"La Camilla, comfortable private villa with pool",
"la camilla comfortable private villa with pool",
"46",
"7",
"7",
"0.0778500819028686",
"-0.21242230705167767",
"-0.009502228985849804",
"-0.0735694040849097",
"0.35418751053014835",
"0.11789096987704727",
"-0.006244410599986284",
"-0.09851127531373483",
"-0.002767113249381913",
"0.01886580089708268",
"0.013900322099586266",
"-0.0015715098355375714",
"0.11417802686594836",
"-0.18266344722818637",
"-0.11779038067268596",
"0.08616491678955979",
"-0.2207631120738757",
"-0.052634577232889286",
"-0.04004742676142648",
"-0.02681886669984597",
"-0.075522850883977",
"-0.06987154923504552",
"0.15185110812564112",
"-0.022032236870329274",
"0.007477922769896101",
"0.0457025058116597",
"0.07994351418020901",
"0.005736246886137931",
"0.03209222613704549",
"-0.019636893073062434"
],
[
"23",
"105038",
"Casa Prieta",
"casa prieta",
"11",
"2",
"2",
"0.00833919328416586",
"-0.03278268995755237",
"-0.000286235909429752",
"-0.01719667352971122",
"-0.0024463937540799754",
"0.005318977501265963",
"-0.06018469418609671",
"0.02737170641307136",
"-0.03883186523284652",
"0.8825292227416641",
"0.43296326469419283",
"-0.009506828680307082",
"-0.07218032749992064",
"0.045356926476299575",
"0.0013101185109432218",
"-0.04169698164412125",
"-0.008905818293634571",
"0.03296062345028381",
"-0.014074450513948749",
"-0.010259462101118586",
"-0.0007853519779285374",
"-0.0024170040787492705",
"0.01957309267931248",
"-0.00895385126435955",
"0.017673141492270665",
"-0.00488709030316019",
"0.008702225600353242",
"0.01122997891016899",
"-0.007553132909815195",
"-0.007814798648288476"
],
[
"24",
"105039",
"Casa Ironbark",
"casa ironbark",
"13",
"2",
"2",
"0.00833919328416586",
"-0.03278268995755237",
"-0.000286235909429752",
"-0.01719667352971122",
"-0.0024463937540799754",
"0.005318977501265963",
"-0.06018469418609671",
"0.02737170641307136",
"-0.03883186523284652",
"0.8825292227416641",
"0.43296326469419283",
"-0.009506828680307082",
"-0.07218032749992064",
"0.045356926476299575",
"0.0013101185109432218",
"-0.04169698164412125",
"-0.008905818293634571",
"0.03296062345028381",
"-0.014074450513948749",
"-0.010259462101118586",
"-0.0007853519779285374",
"-0.0024170040787492705",
"0.01957309267931248",
"-0.00895385126435955",
"0.017673141492270665",
"-0.00488709030316019",
"0.008702225600353242",
"0.01122997891016899",
"-0.007553132909815195",
"-0.007814798648288476"
],
[
"25",
"105041",
"Villa Catalina #8",
"villa catalina 8",
"16",
"3",
"3",
"0.04699666563400539",
"-0.16688209087270223",
"-0.044143907433968885",
"-0.07642849061990847",
"0.8451438494775528",
"0.41608364295610645",
"0.10859592012190307",
"0.008645680502725094",
"0.007056500974092191",
"-0.02622418584914399",
"0.012139545929550969",
"-0.0014673961440761771",
"-0.09265229970152933",
"0.0865575023798689",
"0.06429413154090331",
"-0.024870748837655124",
"0.06327447140236153",
"0.014660908441987695",
"-0.06375456547067693",
"0.005962143514250229",
"0.027060573702001163",
"0.02084181544080372",
"-0.016893166896049493",
"-0.007367558589710238",
"0.004209683635210898",
"0.004567662668747518",
"-0.010630634713975415",
"0.004204430656115629",
"-0.027589738087181948",
"-0.012826344957506619"
],
[
"26",
"51921",
"Agriturismo Molino Verde, modern and comfortable",
"agriturismo molino verde modern and comfortable",
"47",
"6",
"6",
"0.057155139121545906",
"-0.10341748840170799",
"0.015145659334251736",
"-0.03431926324616981",
"0.016713497109304167",
"-0.10241230275353747",
"-0.047744528858601136",
"-0.08104724329462748",
"0.01962229580144628",
"0.010514091254433167",
"-0.03667768762662721",
"0.003110821889545816",
"-0.009352858197203365",
"-0.09699749551244438",
"0.03262130916983554",
"0.03229646114602426",
"0.03480197221405257",
"0.08673790866415203",
"-0.11209859880106532",
"-0.17922307171640636",
"0.22370950242333046",
"-0.12383560842032844",
"0.07036332706807497",
"-0.06887648849837463",
"-0.027645191677377133",
"0.0031917032644766044",
"0.15842142313111934",
"-0.027247896588485324",
"-0.046543495684190264",
"-0.14359184278242149"
],
[
"27",
"51926",
"Villa Farneta, large luxury villa with fenced pool",
"villa farneta large luxury villa with fenced pool",
"49",
"8",
"7",
"0.08664937149149775",
"-0.24356878164425347",
"-0.017018041543061736",
"-0.09038789927596155",
"0.5598632877235699",
"0.23491476448517012",
"0.028612233297507436",
"-0.06795386128335895",
"-0.00018537024945157574",
"-0.0059677913600019745",
"0.011184681379918853",
"-0.017862793718836967",
"0.04245463993591148",
"-0.11417888971398042",
"-0.0651731266684504",
"0.05694709382845593",
"-0.15138109531546945",
"-0.02054451823666379",
"0.026302037018176905",
"-0.0662359886639469",
"0.007729037568272841",
"0.02812162782428571",
"0.0033959114323068185",
"0.005400717135319007",
"0.018030795444814823",
"0.004889863238087341",
"0.06259856304247008",
"-0.007194474626262895",
"0.06066934975237882",
"0.017941254063468335"
],
[
"28",
"51928",
"Villa La Ginestra, private villa with pool",
"villa la ginestra private villa with pool",
"41",
"7",
"6",
"0.08410520354886748",
"-0.24422950198560744",
"-0.02186835233405029",
"-0.08857418361835667",
"0.577472521209152",
"0.23899345709027103",
"0.025377202948347985",
"-0.08291699566723962",
"-0.0011529236802836804",
"0.01037273111745643",
"0.017788628688232427",
"-0.008271110459901199",
"0.08447988672006658",
"-0.14511454021152964",
"-0.09333898524833117",
"0.07936711490608045",
"-0.18981761829378363",
"-0.04537379185889853",
"-0.053584800733158904",
"-0.016614367266523163",
"-0.06340465735028226",
"-0.061117017190598234",
"0.13126292111010732",
"-0.02088206437939681",
"0.0016259877513674229",
"0.04801604901875135",
"0.06164483636592453",
"-0.0007409218455970562",
"0.025269478929435013",
"-0.01670217565785082"
],
[
"29",
"51938",
"Villa Badia, spacious villa with private infinity pool",
"villa badia spacious villa with private infinity pool",
"53",
"8",
"7",
"0.09355454895627736",
"-0.26489730096193315",
"-0.01607867877354311",
"-0.1011724162578091",
"0.5811641521254505",
"0.2267695833012585",
"0.03164147279785071",
"-0.0931563279572158",
"-0.0022337680293293156",
"-0.009594474367037463",
"0.01936789360571811",
"-0.009488280322701782",
"0.07255809300013182",
"-0.14741588248771378",
"-0.06960239306676058",
"0.06526849077395726",
"-0.16956091460336775",
"-0.046957855950288466",
"-0.058467287725925646",
"-0.04186529740084405",
"-0.0788218606764386",
"-0.11385194984178695",
"0.1662873725119552",
"0.01203893457479645",
"0.009804132200456507",
"0.04085796884286511",
"0.034531681603910666",
"0.018095356668083148",
"0.04663524535664683",
"-0.012355836173820876"
],
[
"30",
"51941",
"Casa Paciano, near town, apartments with pool",
"casa paciano near town apartments with pool",
"43",
"7",
"7",
"0.06836640947155294",
"-0.17628319063448047",
"0.006703179446463787",
"-0.06392273943746733",
"0.05191695966059562",
"-0.04669374890914851",
"-0.05008318991885393",
"-0.04974395482388175",
"-0.030841992844479006",
"0.3847511012999523",
"0.1875048386006791",
"0.009176825810745503",
"0.15435548864445997",
"-0.16355795650723293",
"-0.1555279078884765",
"0.05533540525310022",
"-0.08293985417546151",
"-0.11587388987854183",
"-0.06958765712249364",
"0.054287484559972454",
"0.029352566760326933",
"-0.0054342274098185634",
"0.07992458471142498",
"0.005971814785003407",
"-0.07822790189680921",
"-0.014090349627387002",
"0.009244786625243662",
"-0.0519497788628174",
"0.049331005526974905",
"0.047046548204243935"
],
[
"31",
"51942",
"La Pergola, agriturismo with large pool",
"la pergola agriturismo with large pool",
"38",
"6",
"6",
"0.05782562037348448",
"-0.13727564351607552",
"0.002252319917822672",
"-0.03553967274812833",
"0.06061092509866268",
"-0.018582074149029394",
"-0.037459855975999205",
"-0.0787047736504046",
"-0.009134642233083362",
"0.023368074599935763",
"0.014244604632111638",
"-0.004351545844374194",
"0.13371920752053557",
"-0.16289868789582992",
"-0.12056806250088024",
"0.07347589840340751",
"-0.20111066096117902",
"-0.021308372488878013",
"-0.0023285534993019337",
"-0.0003759121000108715",
"-0.04929601215772942",
"-0.05569974005242786",
"0.09550524345646882",
"-0.03698151381013771",
"-0.023598449934915253",
"0.017290057629518338",
"0.022186576156993664",
"-0.014627174477775444",
"-0.009979796579890097",
"-0.017392515097789038"
],
[
"32",
"52082",
"The Pom Pom House - In the heart of Palm Springs",
"the pom pom house in the heart of palm springs",
"47",
"10",
"8",
"0.462992691502346",
"0.11582644054325131",
"0.10154228527330911",
"0.06130208430360438",
"0.01957041311201364",
"-0.027798441608803574",
"-0.026485869132176865",
"-0.14196617889072918",
"0.058617862093225524",
"0.04234987305738795",
"-0.0677329713909989",
"-0.07933344767472951",
"-0.09700372879425292",
"0.21529072733399424",
"-0.06452580401272921",
"-0.12962874548013192",
"-0.06180694440771004",
"-0.20257078315345975",
"0.07226475918145103",
"-0.03131351372403371",
"-0.0364272219536456",
"0.01659032598835906",
"-0.020246866850096024",
"-0.10575328039020865",
"-0.14374106215431398",
"-0.09021743539096966",
"0.11206581008222051",
"-0.14378295056942564",
"0.08410712365446539",
"0.04156903608790058"
],
[
"33",
"52083",
"Silver Lake Views! The Hummingbird - Guest Suite",
"silver lake views the hummingbird guest suite",
"46",
"7",
"7",
"0.27160400818720426",
"0.04158328308666479",
"0.0033358280709456876",
"-0.04746824848818895",
"-0.013100181274298761",
"0.014448752071562997",
"0.01802414500886173",
"-0.006284358367192607",
"0.006957938385986974",
"-0.0002764499454481049",
"0.004212076129629814",
"0.13814671507424778",
"0.019121259363642683",
"-0.07022037653745336",
"0.07564952639431878",
"0.049683579207873245",
"-0.09054441724871622",
"0.03858924999221755",
"0.09429773307843196",
"-0.02419632687750997",
"0.06497045022906768",
"0.006837747616573975",
"0.03650365686432133",
"0.007572264306124368",
"-0.1027414487175163",
"-0.011357646541977365",
"-0.20631105068375277",
"-0.0020453014222415935",
"0.14056101409632712",
"-0.017741748484825005"
],
[
"34",
"105055",
"Sunset Heights 500",
"sunset heights 500",
"18",
"3",
"3",
"0.0072592801490982024",
"-0.014597770014355473",
"-0.013882283688092727",
"0.006300716404457177",
"0.015713625352055872",
"-0.0036687868760685188",
"0.011848746676155415",
"-0.002139301060794825",
"-0.003157510052802002",
"-0.0017630110764596287",
"0.005508089147382907",
"0.0028210094591025513",
"0.004037554380301537",
"-0.006981207481571884",
"0.0045510861905560356",
"-0.0012982053172738844",
"0.002688794743569394",
"0.0001286530350316376",
"-0.004277784864282033",
"-0.01914254497213363",
"-0.0011269472524639496",
"0.007971010579221808",
"-0.018581236230782",
"-0.05697704505423247",
"-0.003076221695372708",
"-0.011871129844978508",
"-0.022301494601469796",
"-0.015650558924011196",
"-0.05103755943604431",
"0.002509402300183974"
],
[
"35",
"105056",
"Casa Mango",
"casa mango",
"10",
"2",
"2",
"0.00833919328416586",
"-0.03278268995755237",
"-0.000286235909429752",
"-0.01719667352971122",
"-0.0024463937540799754",
"0.005318977501265963",
"-0.06018469418609671",
"0.02737170641307136",
"-0.03883186523284652",
"0.8825292227416641",
"0.43296326469419283",
"-0.009506828680307082",
"-0.07218032749992064",
"0.045356926476299575",
"0.0013101185109432218",
"-0.04169698164412125",
"-0.008905818293634571",
"0.03296062345028381",
"-0.014074450513948749",
"-0.010259462101118586",
"-0.0007853519779285374",
"-0.0024170040787492705",
"0.01957309267931248",
"-0.00895385126435955",
"0.017673141492270665",
"-0.00488709030316019",
"0.008702225600353242",
"0.01122997891016899",
"-0.007553132909815195",
"-0.007814798648288476"
],
[
"36",
"105059",
"Marina Resort #502",
"marina resort 502",
"17",
"3",
"3",
"0.008171699583572396",
"-0.0227802970026047",
"-0.0005163782477629944",
"-0.011485139451001186",
"0.02025094618675094",
"-0.012406694027830725",
"0.0029792698227323194",
"-0.006822657273073952",
"0.011562357950796747",
"0.02692559560330674",
"-0.0027148747186378714",
"-0.008809656963187145",
"0.03246604554754141",
"0.02253309861734156",
"0.016990540282637856",
"0.10533585518524495",
"0.0427952974230254",
"-0.04587202775897223",
"0.010272769310035509",
"0.05757337749248109",
"-0.052974340790144694",
"-0.03797583241318291",
"-0.006340099953806995",
"0.03646973897951955",
"0.014392734644564652",
"-0.033584479812595235",
"0.02126952049732214",
"-0.0327923431253457",
"0.029107526379160774",
"-0.006864097669665933"
],
[
"37",
"105062",
"Casa Tranquility",
"casa tranquility",
"16",
"2",
"2",
"0.006473783230104777",
"-0.023454205467309114",
"-0.00034391697755243016",
"-0.012253041857486362",
"0.0015553166319760743",
"0.00021542493680614273",
"-0.03987092506597992",
"0.010469839211077053",
"-0.021813566472350096",
"0.5528210304353214",
"0.26949105865309086",
"-0.00913059106884039",
"-0.047932820987733035",
"0.030283945272475753",
"-0.0015207986551071901",
"-0.018976845604350877",
"-0.0037988500981941675",
"0.01534792605194138",
"-0.0049339058056842265",
"-0.007218054456388164",
"4.541889694994225e-05",
"-0.0006560524285581598",
"0.010107525988090614",
"-0.0026947242755388586",
"0.013696951552949659",
"-0.00042024384284062005",
"0.0041861703255469725",
"0.0036645502137903087",
"-0.00345405820824307",
"-0.011176834316694612"
],
[
"38",
"105073",
"La Antigua 36",
"la antigua 36",
"13",
"3",
"3",
"0.009299959586004072",
"-0.019090602381279253",
"-0.011920072386306864",
"0.00434740986655745",
"0.03310933627038319",
"0.004706275412120669",
"-0.012648733079146175",
"-0.006915236062084281",
"-0.009560553742609078",
"0.02195843645273749",
"0.02130755673567668",
"0.00037699925952072873",
"0.023269163220454508",
"0.009190557549054014",
"-0.002482976170051882",
"0.01666971959755067",
"-0.04622746497426695",
"0.02315701577718638",
"-0.013363337884269246",
"0.01045728799539511",
"0.012529237565662392",
"-0.0014316488185984165",
"-0.005537778218539432",
"-0.0280471607370384",
"-0.013238088960290335",
"0.04237538116227747",
"-0.000725689346784577",
"0.01813419284971483",
"-0.009187819194969006",
"-0.011503534162050971"
],
[
"39",
"105077",
"La Antigua 28",
"la antigua 28",
"13",
"3",
"3",
"0.009299959586004072",
"-0.019090602381279253",
"-0.011920072386306864",
"0.00434740986655745",
"0.03310933627038319",
"0.004706275412120669",
"-0.012648733079146175",
"-0.006915236062084281",
"-0.009560553742609078",
"0.02195843645273749",
"0.02130755673567668",
"0.00037699925952072873",
"0.023269163220454508",
"0.009190557549054014",
"-0.002482976170051882",
"0.01666971959755067",
"-0.04622746497426695",
"0.02315701577718638",
"-0.013363337884269246",
"0.01045728799539511",
"0.012529237565662392",
"-0.0014316488185984165",
"-0.005537778218539432",
"-0.0280471607370384",
"-0.013238088960290335",
"0.04237538116227747",
"-0.000725689346784577",
"0.01813419284971483",
"-0.009187819194969006",
"-0.011503534162050971"
],
[
"40",
"53623",
"2 Napolean Richmond",
"2 napolean richmond",
"19",
"3",
"3",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"41",
"53625",
"81 Miramar HiddenBay",
"81 miramar hiddenbay",
"20",
"3",
"3",
"0.0010663670604508682",
"-0.0029586712513799515",
"0.0009615538131952004",
"-0.00021658715526603483",
"0.0009301136332845276",
"-0.0013134252058103926",
"-0.0025941720658938104",
"0.009615401015214579",
"-0.0019312643086067387",
"0.0010786511108032187",
"-0.003126201823944803",
"0.0015323146786312438",
"0.006794725508029116",
"0.0029599992622450183",
"-0.00476244754260282",
"-0.005385952944201377",
"0.013072490071471593",
"0.001992747847287832",
"0.02603429352928908",
"0.0029045718654666926",
"3.063964016197705e-05",
"-0.006992245246129906",
"0.00859569632467258",
"-0.001214260764968624",
"-0.0007791388357198406",
"-6.975151471279348e-05",
"0.004852992365330324",
"-0.0008974312796723022",
"-0.0019505131029742434",
"-0.003593535754450526"
],
[
"42",
"53627",
"24 Marlborough S/Bay",
"24 marlborough sbay",
"19",
"3",
"3",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"43",
"53629",
"326 Churchill Ave SB",
"326 churchill ave sb",
"20",
"4",
"4",
"0.0011269114205392106",
"-0.002998995630020017",
"0.00037482429343037614",
"-0.0012686732414806609",
"8.476333617688126e-05",
"-0.004745210420509996",
"-0.003453466030396947",
"0.0023558213868859635",
"0.0034302687968961287",
"-0.00011832825560494523",
"-0.006527442045304243",
"-0.007286168757046415",
"-0.01202110177854905",
"-0.009792477840641857",
"0.0063689301800860625",
"0.006152363963803892",
"0.015844630538032492",
"-0.020031084892708294",
"0.0012182162557024972",
"0.007131311979581763",
"0.032693610298430174",
"-0.010502754460958004",
"-0.018258479733619017",
"0.08934258867078382",
"-0.034605348576859984",
"-0.035207194928904374",
"0.03385307763554009",
"0.11073877891294265",
"-0.0021206290463196727",
"-0.023230304318818774"
],
[
"44",
"53637",
"241 Bathurst BOSTANE",
"241 bathurst bostane",
"20",
"3",
"3",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"45",
"53638",
"3/5HomeAve/Modern",
"35homeavemodern",
"15",
"1",
"1",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"46",
"53639",
"2/165Camb Retreat",
"2165camb retreat",
"16",
"2",
"2",
"0.12712807066200066",
"-0.1763903756457614",
"-0.017468179096953546",
"-0.11664742816632509",
"-0.062010543262121715",
"-0.22369785465575848",
"0.8324984804953949",
"0.13073907874245835",
"-0.04922227295199571",
"-0.06097548223190742",
"0.20002228598763194",
"-0.1818511143633542",
"-0.024348961486471393",
"0.04457476957160595",
"0.0682176827086146",
"0.06869322574140582",
"-0.045144372344278094",
"0.016349348496155523",
"0.04221275941444426",
"0.019458348883628945",
"0.003916276919832094",
"-0.0824836052771082",
"-0.014283116400593591",
"-0.028798305722662475",
"0.020217895447678987",
"0.038501240505014765",
"0.08665684119845073",
"0.0011235182502828745",
"-0.027304348414792862",
"0.016188765623493355"
],
[
"47",
"53641",
"165 Cambr-Residence",
"165 cambrresidence",
"18",
"2",
"2",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"48",
"53643",
"87 Kingston View Dv",
"87 kingston view dv",
"19",
"4",
"4",
"0.03542620176362099",
"-0.0667898917268821",
"0.0013412516477135654",
"-0.021466231912998297",
"-0.003661078281108786",
"0.0035818995491241467",
"-0.014716596790311986",
"0.0022186290437056773",
"0.02208694476354252",
"0.026519192348046772",
"-0.041796560575133396",
"-0.00015559116263038128",
"0.039637035531767874",
"-0.020646582253160458",
"0.06020935384864864",
"0.12217902926363679",
"-0.031155034121617485",
"0.07481542581656286",
"0.09422164898392116",
"-0.10417925431691048",
"-0.009928543672222229",
"0.04842572160814778",
"0.08006395176614495",
"-0.33318348735937675",
"-0.3041531929703319",
"0.055161153621484424",
"-0.2631279722398142",
"0.26996825074616154",
"-0.23552874323187892",
"0.15948917767266918"
],
[
"49",
"53960",
"3-Bedroom Oasis in Miami ",
"3bedroom oasis in miami ",
"24",
"4",
"4",
"0.06306439591453775",
"-0.06730992335696277",
"-0.007876006635670632",
"-0.022711909241044752",
"0.015924506499772992",
"-0.07541134017397146",
"-0.016092336450849648",
"-0.11338989446946819",
"0.037654994375165375",
"0.02650575592057726",
"-0.0600623295560825",
"-0.038237035257278416",
"-0.02084605390547835",
"0.05146251605795095",
"-0.0937894249265446",
"-0.07088442908258527",
"-0.03676772577864177",
"-0.08504937266396354",
"0.03724340111990427",
"0.05551793630823731",
"0.030456209795851532",
"-0.03404769476484225",
"-0.10039583254180666",
"0.04422424541052972",
"-0.012261224742634531",
"0.005139052962858013",
"-0.14014202451314134",
"-0.032766731270003",
"-0.029972677854539626",
"0.014141670605375576"
]
],
"shape": {
"columns": 36,
"rows": 3632
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id_accommodation</th>\n",
" <th>friendly_name</th>\n",
" <th>clean_name</th>\n",
" <th>char_count</th>\n",
" <th>word_count</th>\n",
" <th>unique_word_count</th>\n",
" <th>tfidf_svd_0</th>\n",
" <th>tfidf_svd_1</th>\n",
" <th>tfidf_svd_2</th>\n",
" <th>tfidf_svd_3</th>\n",
" <th>...</th>\n",
" <th>tfidf_svd_20</th>\n",
" <th>tfidf_svd_21</th>\n",
" <th>tfidf_svd_22</th>\n",
" <th>tfidf_svd_23</th>\n",
" <th>tfidf_svd_24</th>\n",
" <th>tfidf_svd_25</th>\n",
" <th>tfidf_svd_26</th>\n",
" <th>tfidf_svd_27</th>\n",
" <th>tfidf_svd_28</th>\n",
" <th>tfidf_svd_29</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10368</td>\n",
" <td>Maddox St</td>\n",
" <td>maddox st</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0.010855</td>\n",
" <td>-0.029763</td>\n",
" <td>-0.021051</td>\n",
" <td>0.009316</td>\n",
" <td>...</td>\n",
" <td>0.004607</td>\n",
" <td>-0.010669</td>\n",
" <td>0.022564</td>\n",
" <td>0.009577</td>\n",
" <td>-0.004964</td>\n",
" <td>0.002377</td>\n",
" <td>0.008182</td>\n",
" <td>0.020271</td>\n",
" <td>0.011362</td>\n",
" <td>0.014186</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>11059</td>\n",
" <td>HIL-1</td>\n",
" <td>hil1</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>14345</td>\n",
" <td>SUS-2</td>\n",
" <td>sus2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>277469</td>\n",
" <td>4000 sqft Lakefront Retreat | Private Hot Tub #NH</td>\n",
" <td>4000 sqft lakefront retreat private hot tub nh</td>\n",
" <td>47</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>0.118079</td>\n",
" <td>-0.331729</td>\n",
" <td>-0.036329</td>\n",
" <td>-0.111210</td>\n",
" <td>...</td>\n",
" <td>-0.036141</td>\n",
" <td>-0.052405</td>\n",
" <td>0.093227</td>\n",
" <td>0.019269</td>\n",
" <td>-0.006774</td>\n",
" <td>0.070027</td>\n",
" <td>0.102554</td>\n",
" <td>-0.007378</td>\n",
" <td>0.041281</td>\n",
" <td>0.015410</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28561</td>\n",
" <td>LAN-3</td>\n",
" <td>lan3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3627</th>\n",
" <td>197269</td>\n",
" <td>Luxury Glamping | Hot Tub, Firepit &amp; Grill</td>\n",
" <td>luxury glamping hot tub firepit grill</td>\n",
" <td>39</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>0.086114</td>\n",
" <td>-0.296208</td>\n",
" <td>-0.030307</td>\n",
" <td>-0.076856</td>\n",
" <td>...</td>\n",
" <td>0.100707</td>\n",
" <td>0.100131</td>\n",
" <td>-0.025985</td>\n",
" <td>0.049267</td>\n",
" <td>0.009551</td>\n",
" <td>0.043639</td>\n",
" <td>0.076095</td>\n",
" <td>-0.009465</td>\n",
" <td>0.057498</td>\n",
" <td>0.044279</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3628</th>\n",
" <td>198130</td>\n",
" <td>Brick Haven House: 10min Walk to Shakespeare F...</td>\n",
" <td>brick haven house 10min walk to shakespeare fest</td>\n",
" <td>48</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>0.152979</td>\n",
" <td>-0.182920</td>\n",
" <td>0.248066</td>\n",
" <td>0.178298</td>\n",
" <td>...</td>\n",
" <td>0.018835</td>\n",
" <td>0.034150</td>\n",
" <td>-0.033340</td>\n",
" <td>0.000108</td>\n",
" <td>-0.001094</td>\n",
" <td>0.028110</td>\n",
" <td>0.066771</td>\n",
" <td>-0.008312</td>\n",
" <td>0.007832</td>\n",
" <td>0.015454</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3629</th>\n",
" <td>205403</td>\n",
" <td>NO FEES! Pool+Hot Tub/Volley&amp;Bocce Ball+Firepit</td>\n",
" <td>no fees poolhot tubvolleybocce ballfirepit</td>\n",
" <td>42</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>0.005290</td>\n",
" <td>-0.007561</td>\n",
" <td>0.000197</td>\n",
" <td>-0.004329</td>\n",
" <td>...</td>\n",
" <td>0.004708</td>\n",
" <td>-0.005517</td>\n",
" <td>0.004417</td>\n",
" <td>-0.006661</td>\n",
" <td>-0.006363</td>\n",
" <td>-0.003132</td>\n",
" <td>-0.005854</td>\n",
" <td>0.012305</td>\n",
" <td>-0.005357</td>\n",
" <td>-0.006805</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3630</th>\n",
" <td>263762</td>\n",
" <td>Brasada Ranch | Hot Tub | Guest Casita | 5 Bed</td>\n",
" <td>brasada ranch hot tub guest casita 5 bed</td>\n",
" <td>43</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>0.076370</td>\n",
" <td>-0.220398</td>\n",
" <td>-0.006391</td>\n",
" <td>-0.042772</td>\n",
" <td>...</td>\n",
" <td>0.033248</td>\n",
" <td>0.035303</td>\n",
" <td>0.025965</td>\n",
" <td>0.063088</td>\n",
" <td>0.014874</td>\n",
" <td>0.003055</td>\n",
" <td>-0.013777</td>\n",
" <td>-0.012340</td>\n",
" <td>-0.018223</td>\n",
" <td>0.016823</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3631</th>\n",
" <td>267589</td>\n",
" <td>10% Off July 6-10 • Creek • 3 Dogs • Fenced Yard</td>\n",
" <td>10 off july 610 • creek • 3 dogs • fenced yard</td>\n",
" <td>46</td>\n",
" <td>12</td>\n",
" <td>10</td>\n",
" <td>0.015270</td>\n",
" <td>-0.046531</td>\n",
" <td>0.000051</td>\n",
" <td>-0.017550</td>\n",
" <td>...</td>\n",
" <td>-0.074968</td>\n",
" <td>-0.004176</td>\n",
" <td>-0.051906</td>\n",
" <td>-0.114376</td>\n",
" <td>-0.010725</td>\n",
" <td>-0.071384</td>\n",
" <td>0.044820</td>\n",
" <td>-0.035996</td>\n",
" <td>-0.038547</td>\n",
" <td>0.000560</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3632 rows × 36 columns</p>\n",
"</div>"
],
"text/plain": [
" id_accommodation friendly_name \\\n",
"0 10368 Maddox St \n",
"1 11059 HIL-1 \n",
"2 14345 SUS-2 \n",
"3 277469 4000 sqft Lakefront Retreat | Private Hot Tub #NH \n",
"4 28561 LAN-3 \n",
"... ... ... \n",
"3627 197269 Luxury Glamping | Hot Tub, Firepit & Grill \n",
"3628 198130 Brick Haven House: 10min Walk to Shakespeare F... \n",
"3629 205403 NO FEES! Pool+Hot Tub/Volley&Bocce Ball+Firepit \n",
"3630 263762 Brasada Ranch | Hot Tub | Guest Casita | 5 Bed \n",
"3631 267589 10% Off July 6-10 • Creek • 3 Dogs • Fenced Yard \n",
"\n",
" clean_name char_count \\\n",
"0 maddox st 9 \n",
"1 hil1 4 \n",
"2 sus2 4 \n",
"3 4000 sqft lakefront retreat private hot tub nh 47 \n",
"4 lan3 4 \n",
"... ... ... \n",
"3627 luxury glamping hot tub firepit grill 39 \n",
"3628 brick haven house 10min walk to shakespeare fest 48 \n",
"3629 no fees poolhot tubvolleybocce ballfirepit 42 \n",
"3630 brasada ranch hot tub guest casita 5 bed 43 \n",
"3631 10 off july 610 • creek • 3 dogs • fenced yard 46 \n",
"\n",
" word_count unique_word_count tfidf_svd_0 tfidf_svd_1 tfidf_svd_2 \\\n",
"0 2 2 0.010855 -0.029763 -0.021051 \n",
"1 1 1 0.000000 0.000000 0.000000 \n",
"2 1 1 0.000000 0.000000 0.000000 \n",
"3 8 8 0.118079 -0.331729 -0.036329 \n",
"4 1 1 0.000000 0.000000 0.000000 \n",
"... ... ... ... ... ... \n",
"3627 6 6 0.086114 -0.296208 -0.030307 \n",
"3628 8 8 0.152979 -0.182920 0.248066 \n",
"3629 5 5 0.005290 -0.007561 0.000197 \n",
"3630 8 8 0.076370 -0.220398 -0.006391 \n",
"3631 12 10 0.015270 -0.046531 0.000051 \n",
"\n",
" tfidf_svd_3 ... tfidf_svd_20 tfidf_svd_21 tfidf_svd_22 \\\n",
"0 0.009316 ... 0.004607 -0.010669 0.022564 \n",
"1 0.000000 ... 0.000000 0.000000 0.000000 \n",
"2 0.000000 ... 0.000000 0.000000 0.000000 \n",
"3 -0.111210 ... -0.036141 -0.052405 0.093227 \n",
"4 0.000000 ... 0.000000 0.000000 0.000000 \n",
"... ... ... ... ... ... \n",
"3627 -0.076856 ... 0.100707 0.100131 -0.025985 \n",
"3628 0.178298 ... 0.018835 0.034150 -0.033340 \n",
"3629 -0.004329 ... 0.004708 -0.005517 0.004417 \n",
"3630 -0.042772 ... 0.033248 0.035303 0.025965 \n",
"3631 -0.017550 ... -0.074968 -0.004176 -0.051906 \n",
"\n",
" tfidf_svd_23 tfidf_svd_24 tfidf_svd_25 tfidf_svd_26 tfidf_svd_27 \\\n",
"0 0.009577 -0.004964 0.002377 0.008182 0.020271 \n",
"1 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"2 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"3 0.019269 -0.006774 0.070027 0.102554 -0.007378 \n",
"4 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"... ... ... ... ... ... \n",
"3627 0.049267 0.009551 0.043639 0.076095 -0.009465 \n",
"3628 0.000108 -0.001094 0.028110 0.066771 -0.008312 \n",
"3629 -0.006661 -0.006363 -0.003132 -0.005854 0.012305 \n",
"3630 0.063088 0.014874 0.003055 -0.013777 -0.012340 \n",
"3631 -0.114376 -0.010725 -0.071384 0.044820 -0.035996 \n",
"\n",
" tfidf_svd_28 tfidf_svd_29 \n",
"0 0.011362 0.014186 \n",
"1 0.000000 0.000000 \n",
"2 0.000000 0.000000 \n",
"3 0.041281 0.015410 \n",
"4 0.000000 0.000000 \n",
"... ... ... \n",
"3627 0.057498 0.044279 \n",
"3628 0.007832 0.015454 \n",
"3629 -0.005357 -0.006805 \n",
"3630 -0.018223 0.016823 \n",
"3631 -0.038547 0.000560 \n",
"\n",
"[3632 rows x 36 columns]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_listing_details"
]
},
{
"cell_type": "markdown",
"id": "18bbd6f2",
"metadata": {},
"source": [
"# Relating vars"
]
},
{
"cell_type": "markdown",
"id": "2d4d57c0",
"metadata": {},
"source": [
"## Time related vars"
]
},
{
"cell_type": "markdown",
"id": "5a701a5f",
"metadata": {},
"source": [
"### Boruta"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "63a15eb4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration: \t1 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t15\n",
"Rejected: \t0\n",
"Iteration: \t2 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t15\n",
"Rejected: \t0\n",
"Iteration: \t3 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t15\n",
"Rejected: \t0\n",
"Iteration: \t4 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t15\n",
"Rejected: \t0\n",
"Iteration: \t5 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t15\n",
"Rejected: \t0\n",
"Iteration: \t6 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t15\n",
"Rejected: \t0\n",
"Iteration: \t7 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t15\n",
"Rejected: \t0\n",
"Iteration: \t8 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t2\n",
"Rejected: \t13\n",
"Iteration: \t9 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t2\n",
"Rejected: \t13\n",
"Iteration: \t10 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t2\n",
"Rejected: \t13\n",
"Iteration: \t11 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t2\n",
"Rejected: \t13\n",
"Iteration: \t12 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t2\n",
"Rejected: \t13\n",
"Iteration: \t13 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t2\n",
"Rejected: \t13\n",
"Iteration: \t14 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t2\n",
"Rejected: \t13\n",
"Iteration: \t15 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t2\n",
"Rejected: \t13\n",
"Iteration: \t16 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t17 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t18 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t19 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t20 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t21 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t22 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t23 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t24 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t25 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t26 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t27 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t28 / 100\n",
"Confirmed: \t1\n",
"Tentative: \t1\n",
"Rejected: \t13\n",
"Iteration: \t29 / 100\n",
"Confirmed: \t2\n",
"Tentative: \t0\n",
"Rejected: \t13\n",
"\n",
"\n",
"BorutaPy finished running.\n",
"\n",
"Iteration: \t30 / 100\n",
"Confirmed: \t2\n",
"Tentative: \t0\n",
"Rejected: \t13\n",
"\n",
"📊 Boruta Feature Selection Results:\n",
" feature rank status\n",
"12 length_of_stay_days 1 Selected ✅\n",
"13 lead_time_to_checkin_days 1 Selected ✅\n",
"5 checkin_year_cycle_cos 2 Rejected ❌\n",
"14 gj_start_to_checkin_days 2 Rejected ❌\n",
"4 checkin_year_cycle_sin 3 Rejected ❌\n",
"11 checkout_year_cycle_cos 4 Rejected ❌\n",
"3 checkin_month_cycle_cos 6 Rejected ❌\n",
"10 checkout_year_cycle_sin 6 Rejected ❌\n",
"8 checkout_month_cycle_sin 7 Rejected ❌\n",
"2 checkin_month_cycle_sin 8 Rejected ❌\n",
"9 checkout_month_cycle_cos 9 Rejected ❌\n",
"1 checkin_week_cycle_cos 10 Rejected ❌\n",
"6 checkout_week_cycle_sin 11 Rejected ❌\n",
"0 checkin_week_cycle_sin 12 Rejected ❌\n",
"7 checkout_week_cycle_cos 13 Rejected ❌\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAJOCAYAAACqS2TfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAADYEUlEQVR4nOzdd1RU1/o38O/QhjI0URGUEgUUERBrlKgoKGKJGnuMgoqaWIixEa8F7CUaSxK9URO4GsUSey9EDBJjB40gKAKWkKBGQBRRmf3+4cv5OVJmREcwfj9r7bWcc3Z59jnkXh7OPntkQggBIiIiIiIiInrtdCo6ACIiIiIiIqJ/KybdRERERERERFrCpJuIiIiIiIhIS5h0ExEREREREWkJk24iIiIiIiIiLWHSTURERERERKQlTLqJiIiIiIiItIRJNxEREREREZGWMOkmIiIiIiIi0hIm3URERET0UhwdHdGlSxe19WQyGcLDw7UfEBFRJcakm4iI6C0TGRkJmUymUqpXr462bdti//79FRrbvn37tJJkBQUFFZtzUTlw4MBrHw8ANmzYgKVLl2ql71f14vWQy+VwcXHB9OnT8ejRo4oOj4iInqNX0QEQERFR+cycORPvvfcehBD4+++/ERkZiU6dOmH37t0aPYXUhn379uG7777TSuItl8uxZs2aYsc9PT1f+1jAs6T7jz/+wNixY7XS/6t6/nrk5ORg586dmDVrFlJTU7F+/foKju6Z/Px86Onx100ierfxfwWJiIjeUgEBAWjSpIn0eejQobC2tkZUVNRrSbqVSiUeP34MQ0PDV+7rddDT08Mnn3xS0WG8socPH8LY2PiV+3nxeowcORItW7ZEVFQUvv76a1hbW7/yGK+qsvzsEBFVJC4vJyIi+pewsLCAkZFRsSeLDx48wPjx42FnZwe5XI66deti0aJFEEKo1JPJZBg9ejTWr18PNzc3yOVyHDhwADExMZDJZIiJiVGpn56eDplMhsjISADPljx/9913Ul9FpciiRYvQsmVLWFlZwcjICI0bN8bPP//82uavVCqxdOlSuLm5wdDQENbW1hgxYgTu3bunUm/nzp3o3LkzbG1tIZfLUadOHcyaNQuFhYVSHR8fH+zduxcZGRnSPBwdHQH83/L+9PR0lX5Luk4+Pj5o0KABzp49i9atW8PY2Bj/+c9/AAAFBQUICwuDk5MT5HI57OzsMGnSJBQUFJRr/jKZDB988AGEELh27Zp0PCMjAyNHjkTdunVhZGQEKysr9O7du1j8RfOKi4vDuHHjUK1aNZiYmKBHjx64ffu22vH/97//QU9PDxMnTlSJ6flVD+Hh4ZDJZLh69SqCgoJgYWEBc3NzDB48GA8fPlTpLz8/HyEhIahatSpMTU3x4Ycf4tatW3xPnIjeOnzSTURE9JbKycnBnTt3IIRAVlYWvvnmG+Tl5ak8/RRC4MMPP8TRo0cxdOhQNGzYEAcPHsTEiRNx69YtLFmyRKXPX375BZs3b8bo0aNRtWpVODo6Ijs7W6N4RowYgT///BOHDx/GunXrip1ftmwZPvzwQwwYMACPHz/Gxo0b0bt3b+zZswedO3fWaIw7d+6ofNbX14e5ubk0fmRkJAYPHoyQkBCkpaXh22+/xfnz5xEXFwd9fX0Az5JLhUKBcePGQaFQ4JdffsH06dORm5uLr776CgAwZcoU5OTk4ObNm9I1UigUGsX4ort37yIgIAD9+vXDJ598AmtrayiVSnz44Yc4fvw4hg8fDldXV1y8eBFLlixBSkoKduzYUa6xihJpS0tL6djp06fx22+/oV+/fqhVqxbS09OxcuVK+Pj4IDExsdhT9zFjxsDS0hJhYWFIT0/H0qVLMXr0aGzatKnUcVetWoVPP/0U//nPfzB79my1cfbp0wfvvfce5s2bh3PnzmHNmjWoXr06FixYINUJCgrC5s2bMXDgQLz//vs4duyYxj8nRESViiAiIqK3SkREhABQrMjlchEZGalSd8eOHQKAmD17tsrxXr16CZlMJq5evSodAyB0dHTEpUuXVOoePXpUABBHjx5VOZ6WliYAiIiICOnYqFGjRGm/Xjx8+FDl8+PHj0WDBg1Eu3bt1M45MDCwxDm3adNGCCFEbGysACDWr1+v0u7AgQPFjr8YhxBCjBgxQhgbG4tHjx5Jxzp37iwcHByK1S26/mlpaSrHS7pObdq0EQDEf//7X5W669atEzo6OiI2Nlbl+H//+18BQMTFxZV1OURgYKAwMTERt2/fFrdv3xZXr14VixYtEjKZTDRo0EAolcoy53vixAkBQKxdu7bYvPz8/FTaf/HFF0JXV1dkZ2dLxxwcHETnzp2FEEIsW7ZMyGQyMWvWrGLjABBhYWHS57CwMAFADBkyRKVejx49hJWVlfT57NmzAoAYO3asSr2goKBifRIRVXZcXk5ERPSW+u6773D48GEcPnwYP/30E9q2bYvg4GBs27ZNqrNv3z7o6uoiJCREpe348eMhhCi223mbNm1Qv359rcRrZGQk/fvevXvIyclBq1atcO7cOY3aGxoaSvMtKosXLwYAbNmyBebm5mjfvj3u3LkjlcaNG0OhUODo0aMlxnH//n3cuXMHrVq1wsOHD3H58uXXNNv/I5fLMXjwYJVjW7ZsgaurK+rVq6cSb7t27QBAJd7SPHjwANWqVUO1atXg5OSECRMmwNvbGzt37lRZ1v/8fJ88eYK7d+/CyckJFhYWJV774cOHq7Rv1aoVCgsLkZGRUazuwoUL8fnnn2PBggWYOnWq+ovx/3366acqn1u1aoW7d+8iNzcXAKQd6UeOHKlSb8yYMRqPQURUWXB5ORER0VuqWbNmKhup9e/fH15eXhg9ejS6dOkCAwMDZGRkwNbWFqampiptXV1dAaBYIvXee+9pLd49e/Zg9uzZiI+PV3lv+fkEryy6urrw8/Mr8dyVK1eQk5OD6tWrl3g+KytL+velS5cwdepU/PLLL1KSVyQnJ0ejWF5GzZo1YWBgUCzepKQkVKtWTW28pTE0NMTu3bsBADdv3sTChQuRlZWlkmQDz96NnjdvHiIiInDr1i2Vd/lLmq+9vb3K56Kl6i++G3/s2DHs3bsXoaGhKu9xa6KsMczMzJCRkQEdHZ1iP49OTk4vNQ4RUWXApJuIiOhfQkdHB23btsWyZctw5coVuLm5vXQfLyZsQOlJ8fMbj6kTGxuLDz/8EK1bt8aKFStgY2MDfX19REREYMOGDS8d54uUSiWqV69e6ldlFSW32dnZaNOmDczMzDBz5kzUqVMHhoaGOHfuHEJDQ6FUKtWO9bLXo6RrqlQq4e7ujq+//rrENnZ2dmrjePGPEP7+/qhXrx5GjBiBXbt2ScfHjBmDiIgIjB07Fi1atIC5uTlkMhn69etX4nx1dXVLHE+8sPGem5sbsrOzsW7dOowYMeKl/mCj6RhERP8GTLqJiIj+RZ4+fQoAyMvLAwA4ODjgyJEjuH//vsrT7qJl1A4ODmr7LHoK+eKGaiUtNy4tId26dSsMDQ1x8OBByOVy6XhERITa8TVRp04dHDlyBN7e3iUmuUViYmJw9+5dbNu2Da1bt5aOp6WlFatb2lxe5nqUFW9CQgJ8fX01ftKvjo2NDb744gvMmDEDv//+O95//30AwM8//4zAwEBpKT4APHr0SOMN8kpTtWpV/Pzzz/jggw/g6+uL48ePw9bW9pX6LOLg4AClUom0tDQ4OztLx69evfpa+iciepP4TjcREdG/xJMnT3Do0CEYGBhIy8c7deqEwsJCfPvttyp1lyxZAplMhoCAALX9Ojg4QFdXF7/++qvK8RUrVhSra2JiAqB4QqqrqwuZTKbyNDg9Pb3cu3S/qE+fPigsLMSsWbOKnXv69KkUT9ET1uefqD5+/LjUuZS0/LpOnToAoHI9CgsLsWrVqpeK99atW1i9enWxc/n5+Xjw4IHGfT1vzJgxMDY2xvz586Vjurq6xZ4gf/PNNy+1UqE0tWrVwpEjR5Cfn4/27dvj7t27r9wn8OypPVD8Z+ybb755Lf0TEb1JfNJNRET0ltq/f7/0xDorKwsbNmz
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Boruta for time related features\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from boruta import BorutaPy\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Prepare your feature matrix and target\n",
"# Only keep valid rows (no NaNs in features or target)\n",
"features = [\n",
" # check-in cyclical\n",
" 'checkin_week_cycle_sin', 'checkin_week_cycle_cos',\n",
" 'checkin_month_cycle_sin', 'checkin_month_cycle_cos',\n",
" 'checkin_year_cycle_sin', 'checkin_year_cycle_cos',\n",
" \n",
" # check-out cyclical\n",
" 'checkout_week_cycle_sin', 'checkout_week_cycle_cos',\n",
" 'checkout_month_cycle_sin', 'checkout_month_cycle_cos',\n",
" 'checkout_year_cycle_sin', 'checkout_year_cycle_cos',\n",
" \n",
" # scalar features\n",
" 'length_of_stay_days',\n",
" 'lead_time_to_checkin_days',\n",
" 'gj_start_to_checkin_days'\n",
"]\n",
"df_model = df_bookings_and_claims[features + ['has_resolution_incident']].dropna()\n",
"\n",
"X = df_model[features].values\n",
"y = df_model['has_resolution_incident'].values.astype(int)\n",
"# Random Forest\n",
"rf = RandomForestClassifier(\n",
" n_estimators=100,\n",
" max_depth=5,\n",
" random_state=42,\n",
" n_jobs=-1,\n",
" class_weight='balanced'\n",
")\n",
"\n",
"# Boruta setup\n",
"boruta_selector = BorutaPy(\n",
" estimator=rf,\n",
" n_estimators='auto',\n",
" verbose=2,\n",
" random_state=42\n",
")\n",
"\n",
"# Fit selector\n",
"boruta_selector.fit(X, y)\n",
"\n",
"# Prepare results\n",
"feature_rankings = pd.DataFrame({\n",
" 'feature': features,\n",
" 'rank': boruta_selector.ranking_,\n",
" 'selected': boruta_selector.support_,\n",
" 'tentative': boruta_selector.support_weak_,\n",
"})\n",
"\n",
"# Status column\n",
"def determine_status(row):\n",
" if row['selected']:\n",
" return 'Selected ✅'\n",
" elif row['tentative']:\n",
" return 'Tentative 🤔'\n",
" else:\n",
" return 'Rejected ❌'\n",
"\n",
"feature_rankings['status'] = feature_rankings.apply(determine_status, axis=1)\n",
"\n",
"# Sort by rank\n",
"feature_rankings = feature_rankings.sort_values(by='rank')\n",
"\n",
"# Show results\n",
"print(\"\\n📊 Boruta Feature Selection Results:\")\n",
"print(feature_rankings[['feature', 'rank', 'status']])\n",
"\n",
"# Optional plot\n",
"plt.figure(figsize=(10, 6))\n",
"colors = feature_rankings['status'].map({\n",
" 'Selected ✅': 'green',\n",
" 'Tentative 🤔': 'orange',\n",
" 'Rejected ❌': 'red'\n",
"})\n",
"\n",
"plt.barh(\n",
" feature_rankings['feature'],\n",
" -feature_rankings['rank'], # Negative to flip axis: rank 1 = top\n",
" color=colors\n",
")\n",
"plt.xlabel(\"Feature Rank (lower = better)\")\n",
"plt.title(\"Boruta Feature Ranking\")\n",
"plt.gca().invert_yaxis()\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "11deba4b",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABWgAAAPWCAYAAAB9Y3F7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1gUZ9cG8BsWRFBpChaUqJgFBERAQawRNdiigj2KGGNLLLHFFmOMXey9F7BFjWCLJZpYYsOCig2NnWABpUsRduf7w2/nZd2lLG1R7t91eSU788zMmZ3Z2eHsM+fREQRBABEREREREREREREVO11tB0BERERERERERERUWjFBS0RERERERERERKQlTNASERERERERERERaQkTtERERERERERERERawgQtERERERERERERkZYwQUtERERERERERESkJUzQEhEREREREREREWkJE7REREREREREREREWsIELREREREREREREZGWMEFLRIXGy8sLtra24j87Ozu4uLigefPm8PPzw7x58xAeHp7jOvz8/GBra4vQ0NBiijpnin3677//lKaXtDgBYOLEibC1tUVwcLC2Q/kk/ffff7C1tYWXl5dGy334uVD8c3FxQadOnbBw4ULExcUVUdRFKzg4GLa2tpg4caK2QylWiYmJ+PXXX9GyZUs4OjrC1tYWfn5+uS6neL9yOof27NkDe3t72NnZYfPmzYUZdoF9eA7b2dnBzc0NLVu2xLfffovFixfjwYMH2g6z0OX3s1/c1F1nPvx34sQJbYdJJYjimvTh59rV1RVdunTBwoULERsbq+0wNaKtz6vi/SvJFPcj2rxPzO/7VBLvu4mICpuetgMgok+Pq6srPvvsMwBAWloa4uLicPfuXVy6dAmbNm2Cu7s7Zs+ejRo1ahRZDF5eXoiKisJff/2F6tWrF9l2iktwcDAmTZoEHx8fzJ07V9vhUD5k/VzI5XJER0fj2rVrWLduHfbt24cdO3YU6WfiY7B8+XKsWLECw4cPx4gRI7QdTrZ+/vlnHD16FFZWVmjTpg0MDAxQu3btAq93/fr1WLBgASQSCWbNmgVfX99CiLbwNW3aFBYWFgCAlJQUxMbGIiwsDGfPnsWaNWvw5ZdfYtq0aahYsaKWI82bT+37Iuvx+VDVqlWLORogNDQU/fr1g7u7O7Zu3Vrs26fcGRkZwdvbGwAgk8nw/PlzXL9+HXfv3kVwcDC2b9+OmjVrajdILfLz88OlS5cQFBQEDw8PbYdDWvSpfV8QUcnCBC0RFbru3burJBYEQcCZM2cwe/ZsXLp0Cb169cJvv/2mkpCaN28eUlNTUa1ateIMOVtbtmxBRkYGKleurO1QcjVmzBgMGjQIlpaW2g6F1FD3uYiJiUHfvn3x5MkTzJ8/H8uWLdNSdJRXGRkZOHHiBAwMDHDgwAGUL1++UNa7YMECrF+/HmXKlMHixYvRunXrQllvURg8eLBKkiIzMxNHjhzB3Llz8eeff+LBgwf47bffYGJioqUoC0/lypVx+PBh6OvrazuUPFF3fIhyYmZmpvLj77///ou+ffvi9evXmD17NtatW6el6D4Ohw8f1nYIHwW+T0RE2WOJAyIqFjo6OmjRogX27NmDmjVr4vXr15gyZYpKu2rVqsHGxgaGhoZaiFKVtbU1bGxsPoo/zC0tLWFjY4MKFSpoOxTKIwsLCwwcOBAAcOHCBS1HQ3kRExODzMxMVKpUqVCSs3K5HD///DPWr1+PcuXKYcOGDSU6OZsdPT09fPXVV9izZw/MzMzw6NEjzJs3T9thFQp9fX3Y2NjA2tpa26EQFZvPP/8c33zzDQDg/PnzePfunZYjKtlsbGxgY2Oj7TBKPL5PRETZY4KWiIqVsbExJk+eDAC4ePEibt26pTQ/uxpT7969w4YNG+Dr6wsXFxc4OjqiSZMm6Nq1KwICAhAfHw/gf/XUoqKiAACtWrVSqq2mWG9oaKhYNzI1NRVLly5Fu3bt4OzsrFS3LLsatFldunQJAwYMgLu7O5ydndGtWzfs27dPbdvcamgtX74ctra2WL58uVIMkyZNAgCEhIQo7U/Wupe51aD9448/4O/vD3d3dzg6OqJly5aYNGkSHj9+rLZ91n2/ePEiBgwYgIYNG6JevXrw8fHJdh+TkpKwePFifPXVV6hfvz4cHR3RtGlT9OrVC0uXLkVGRoba5dQJDw9HQEAAunXrhiZNmsDR0RGNGzfG0KFDcf78ebXLZK2LmpKSgoULF6JNmzbiOTNhwgS8evUq222ePHkSffv2hYuLC9zc3PD1118Xad3GSpUqAXjfA1Gd1NRUrFu3Dj4+PnBxcYGzszM6dOiAxYsXIyEhQant0aNHYWtri0aNGuHly5cq6/rnn39gb28PNzc3PHnyRJye9dyJiIjA8OHD0ahRI9SrVw9fffUVAgMDIZPJNN638PBw/PDDD2jatCkcHR3h6emJoUOH4ty5cyptbW1tsWLFCgDAihUrlM7zrDVuo6OjMXPmTHh7e8PJyQnOzs5o0aIF/P39sXHjRo1jfPjwISZNmiTWk3V3d4e/v7/aXj62trZo2bIlACAqKkrttUUT7969w5gxY7B7926Ym5vn+PhsWloaNm3ahB49eqBBgwZwcnKCt7c3AgICVGoYL1u2DLa2tpg6dWq22w4PD4etrS2aNWuW7bmXH9WqVRPLU+zfvx+vX78W5+WlNmR219ys00+cOCE+Mp/1vY+NjUVQUBAGDRoELy8v1KtXD66urvD19cW6deuQnp6utM68fl/kFvfLly8xY8YMfPnll3BycoKbm5v4lIi6z01Br1GF6cKFCxg+fLjSZ3TYsGG4du2a2vaaXpP9/PzQr18/AO+/K7O+v1nfz/x8N344/fnz55g8eTJatGgBBwcHldrYR48exbfffotGjRrB0dERzZo1w7hx47Ktm3zr1i2MGjUKzZs3h6OjI1xdXdGqVSuMGDEiz98JCxcuzPWzeP/+fdja2qJx48ZK34/nz5/H0KFD0bhxYzg4OKBhw4b48ssvMW7cOFy+fDlP2y8oRa3QjIwM8T4rq4SEBCxbtgydO3cWv5+++uorrFq1CqmpqSrt5XI5du3ahV69eqFBgwZwcHCAp6cnOnXqhBkzZqi914qPj8eiRYvQoUMHODs7w8XFBb6+vli/fj3S0tLyvC/5uf4o7hUvXboEAOjXr5/SOZz1fiun2qqa7kPWe9SMjAysW7cOHTp0QL169eDh4YHhw4fj4cOHed733GT9HMXGxuLXX39FixYt4OjoiBYtWmDGjBlITEzMdvnHjx9j2rRp8Pb2hrOzM1xdXdG+fXtMmzYN9+/fV2qb0/v04sULTJo0CU2bNoWTkxO+/PJLLF68OE/HWZPPd9ZzQRAE7Nq1C76+vqhfvz7c3NwwYMAAlWtgXr8viIgKgiUOiKjYNW/eHKampoiPj8f58+fh6OiYY3u5XI7BgwfjwoULKF++PBo0aABjY2PExsbi6dOn2LhxI7766iuYmprC2toaPj4+OHbsGFJSUuDt7Q0jIyNxXYpkmEJ6ejr8/Pzw8OFDNGjQAHZ2dmr/CMnO8ePHsX37dtSuXRtNmzZFdHQ0rl69igkTJiAiIqJQBk/y9vbG9evXERYWBmtra7i5uYnz8lL3UhAETJw4Efv27YOenh4aNGiAihUr4vbt2wgODsaRI0ewbNkyNG/eXO3ye/fuxerVq1G3bl00a9YMUVFRuH79OiZMmID4+Hj0799fbJuamoqvv/4a9+/fh7m5ORo1agQjIyPExMTg8ePHWLVqFb755ps890hetGgRQkNDUadOHTg4OMDQ0BCRkZE4efIkTp48icmTJ8Pf31/tsklJSejVqxdevHgBNzc3fP7557h+/Tr27duHy5cvY//+/Sq9jbds2YI5c+YAAOrVqwdra2s8efIEw4YNE3sSFTbFwHmff/65yjzF+3v37l2UL18ejRo1gr6+Pi5duoQ1a9bg0KFDCAwMFOugtW3bFn5+fti6dSvGjBmDoKAg6Om9/6p/9eoVxo8fD7lcjhkzZqitJxgeHo5p06ahUqVK8PT0RGJ
"text/plain": [
"<Figure size 1400x1000 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Ensure clean data\n",
"df_viz = df_bookings_and_claims[[\n",
" 'length_of_stay_days',\n",
" 'lead_time_to_checkin_days',\n",
" 'has_resolution_incident'\n",
"]].dropna()\n",
"\n",
"# Setup\n",
"sns.set(style=\"whitegrid\", palette=\"muted\")\n",
"\n",
"# Create subplots\n",
"fig, axes = plt.subplots(2, 2, figsize=(14, 10))\n",
"fig.suptitle(\"Distributions and Boxplots of Key Duration Features vs. Resolution Incident\", fontsize=16)\n",
"\n",
"# Length of Stay - KDE/Histogram\n",
"sns.histplot(\n",
" data=df_viz,\n",
" x='length_of_stay_days',\n",
" hue='has_resolution_incident',\n",
" element='step',\n",
" common_norm=False,\n",
" ax=axes[0, 0],\n",
" stat='probability'\n",
")\n",
"axes[0, 0].set_title(\"Length of Stay Distribution\")\n",
"\n",
"# Length of Stay - Boxplot\n",
"sns.boxplot(\n",
" data=df_viz,\n",
" x='has_resolution_incident',\n",
" y='length_of_stay_days',\n",
" ax=axes[0, 1]\n",
")\n",
"axes[0, 1].set_title(\"Length of Stay by Incident\")\n",
"\n",
"# Lead Time - KDE/Histogram\n",
"sns.histplot(\n",
" data=df_viz,\n",
" x='lead_time_to_checkin_days',\n",
" hue='has_resolution_incident',\n",
" element='step',\n",
" common_norm=False,\n",
" ax=axes[1, 0],\n",
" stat='probability'\n",
")\n",
"axes[1, 0].set_title(\"Lead Time Distribution\")\n",
"\n",
"# Lead Time - Boxplot\n",
"sns.boxplot(\n",
" data=df_viz,\n",
" x='has_resolution_incident',\n",
" y='lead_time_to_checkin_days',\n",
" ax=axes[1, 1]\n",
")\n",
"axes[1, 1].set_title(\"Lead Time by Incident\")\n",
"\n",
"# Adjust\n",
"for ax in axes.flat:\n",
" ax.set_xlabel(\"\")\n",
" ax.set_ylabel(\"\")\n",
"\n",
"plt.tight_layout(rect=[0, 0, 1, 0.96])\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"id": "7774f0eb",
"metadata": {},
"source": [
"It seems that:\n",
"- Longer stays (specially beyond two weeks) have higher chances of claiming.\n",
"- Longer lead times have less chances of claiming (although it looks like we simply need to have more samples)."
]
},
{
"cell_type": "markdown",
"id": "2a6fb142",
"metadata": {},
"source": [
"## Listing NPL features"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "b8d9a007",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration: \t1 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t30\n",
"Rejected: \t0\n",
"Iteration: \t2 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t30\n",
"Rejected: \t0\n",
"Iteration: \t3 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t30\n",
"Rejected: \t0\n",
"Iteration: \t4 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t30\n",
"Rejected: \t0\n",
"Iteration: \t5 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t30\n",
"Rejected: \t0\n",
"Iteration: \t6 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t30\n",
"Rejected: \t0\n",
"Iteration: \t7 / 100\n",
"Confirmed: \t0\n",
"Tentative: \t30\n",
"Rejected: \t0\n",
"Iteration: \t8 / 100\n",
"Confirmed: \t7\n",
"Tentative: \t18\n",
"Rejected: \t5\n",
"Iteration: \t9 / 100\n",
"Confirmed: \t7\n",
"Tentative: \t18\n",
"Rejected: \t5\n",
"Iteration: \t10 / 100\n",
"Confirmed: \t7\n",
"Tentative: \t18\n",
"Rejected: \t5\n",
"Iteration: \t11 / 100\n",
"Confirmed: \t7\n",
"Tentative: \t18\n",
"Rejected: \t5\n",
"Iteration: \t12 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t16\n",
"Rejected: \t5\n",
"Iteration: \t13 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t16\n",
"Rejected: \t5\n",
"Iteration: \t14 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t16\n",
"Rejected: \t5\n",
"Iteration: \t15 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t16\n",
"Rejected: \t5\n",
"Iteration: \t16 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t16\n",
"Rejected: \t5\n",
"Iteration: \t17 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t16\n",
"Rejected: \t5\n",
"Iteration: \t18 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t16\n",
"Rejected: \t5\n",
"Iteration: \t19 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t16\n",
"Rejected: \t5\n",
"Iteration: \t20 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t15\n",
"Rejected: \t6\n",
"Iteration: \t21 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t15\n",
"Rejected: \t6\n",
"Iteration: \t22 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t15\n",
"Rejected: \t6\n",
"Iteration: \t23 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t15\n",
"Rejected: \t6\n",
"Iteration: \t24 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t15\n",
"Rejected: \t6\n",
"Iteration: \t25 / 100\n",
"Confirmed: \t9\n",
"Tentative: \t15\n",
"Rejected: \t6\n",
"Iteration: \t26 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t27 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t28 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t29 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t30 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t31 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t32 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t33 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t34 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t35 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t36 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t37 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t38 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t39 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t40 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t41 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t42 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t43 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t44 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t45 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t46 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t47 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t48 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t13\n",
"Rejected: \t7\n",
"Iteration: \t49 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t12\n",
"Rejected: \t8\n",
"Iteration: \t50 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t12\n",
"Rejected: \t8\n",
"Iteration: \t51 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t12\n",
"Rejected: \t8\n",
"Iteration: \t52 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t12\n",
"Rejected: \t8\n",
"Iteration: \t53 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t12\n",
"Rejected: \t8\n",
"Iteration: \t54 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t12\n",
"Rejected: \t8\n",
"Iteration: \t55 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t12\n",
"Rejected: \t8\n",
"Iteration: \t56 / 100\n",
"Confirmed: \t10\n",
"Tentative: \t12\n",
"Rejected: \t8\n",
"Iteration: \t57 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t58 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t59 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t60 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t61 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t62 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t63 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t64 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t65 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t66 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t67 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t68 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t69 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t70 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t71 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t72 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t73 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t74 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t75 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t76 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t77 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t78 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t79 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t80 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t81 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t82 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t83 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t84 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t85 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t86 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t87 / 100\n",
"Confirmed: \t12\n",
"Tentative: \t10\n",
"Rejected: \t8\n",
"Iteration: \t88 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t89 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t90 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t91 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t92 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t93 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t94 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t95 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t96 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t97 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t98 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"Iteration: \t99 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t8\n",
"Rejected: \t8\n",
"\n",
"\n",
"BorutaPy finished running.\n",
"\n",
"Iteration: \t100 / 100\n",
"Confirmed: \t14\n",
"Tentative: \t7\n",
"Rejected: \t9\n",
"\n",
"📊 Boruta Feature Selection Results:\n",
" feature rank status\n",
"1 tfidf_svd_1 1 Selected ✅\n",
"7 tfidf_svd_7 1 Selected ✅\n",
"5 tfidf_svd_5 1 Selected ✅\n",
"4 tfidf_svd_4 1 Selected ✅\n",
"11 tfidf_svd_11 1 Selected ✅\n",
"10 tfidf_svd_10 1 Selected ✅\n",
"9 tfidf_svd_9 1 Selected ✅\n",
"8 tfidf_svd_8 1 Selected ✅\n",
"12 tfidf_svd_12 1 Selected ✅\n",
"19 tfidf_svd_19 1 Selected ✅\n",
"18 tfidf_svd_18 1 Selected ✅\n",
"29 tfidf_svd_29 1 Selected ✅\n",
"24 tfidf_svd_24 1 Selected ✅\n",
"17 tfidf_svd_17 1 Selected ✅\n",
"6 tfidf_svd_6 2 Tentative 🤔\n",
"13 tfidf_svd_13 2 Tentative 🤔\n",
"3 tfidf_svd_3 2 Tentative 🤔\n",
"26 tfidf_svd_26 2 Tentative 🤔\n",
"25 tfidf_svd_25 2 Tentative 🤔\n",
"14 tfidf_svd_14 2 Tentative 🤔\n",
"23 tfidf_svd_23 2 Tentative 🤔\n",
"15 tfidf_svd_15 3 Rejected ❌\n",
"16 tfidf_svd_16 4 Rejected ❌\n",
"2 tfidf_svd_2 4 Rejected ❌\n",
"0 tfidf_svd_0 6 Rejected ❌\n",
"21 tfidf_svd_21 7 Rejected ❌\n",
"27 tfidf_svd_27 8 Rejected ❌\n",
"28 tfidf_svd_28 9 Rejected ❌\n",
"22 tfidf_svd_22 10 Rejected ❌\n",
"20 tfidf_svd_20 11 Rejected ❌\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA9gAAAMQCAYAAADckc2oAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOz9fVhU17k//r9npjNhEB8AB21B0ACOPEUkEYhBBQuKSlq1qK1oEjVGHETPMWrR029ilBJFYhMhGmpQUquRHmVqZcQoPjREG5L2mIIWLEogiX4Ui4AQEIdx//7wx8QJoAOzYSb4fl1XLmXtvdbce3PLlZu99loSQRAEEBEREREREZFFpNYOgIiIiIiIiKgvYIFNREREREREJAIW2EREREREREQiYIFNREREREREJAIW2EREREREREQiYIFNREREREREJAIW2EREREREREQiYIFNREREREREJAIW2EREREREREQiYIFNRERkgyZNmoSlS5c+8jy1Wo309PReiKj3JSUlYdKkST0+Zl++h0RE1Lt+ZO0AiIhsUW5uLtatW2fS5uTkBC8vL7z88suYOHGilSID/vrXv6K4uBiJiYmijpuUlAStVtvhsV27dmHChAmifh4AHDlyBDU1NXjppZdEH9tS378fcrkcrq6umDZtGuLj4/HEE09YMTrr++abb/DTn/4Ua9euxeLFi60dzg/OggUL8Nlnnz3yvOXLl4v2b/29996Dl5cXIiMjzTr/1q1b2LFjBz755BNcu3YN/fr1g6urK0JCQqDRaKBQKBAWFoYnn3wSH374YYdjCIKA8PBwODk5QavVoqioCC+88ILxuFwux4ABA+Dp6YnnnnsOc+bMgZOT0yNja8u/jowePRp/+tOfzLrGrrhx4wb+9Kc/ITIyEj4+PqKPT0R9AwtsIqKHWLFiBdzc3CAIAmpqaqDVavHKK6/gvffeQ0REhFVi+utf/4p9+/aJXmADgEKhQHJycrv2UaNGif5ZAJCXl4fy8nKbLLAB0/vR2NiIkydPYseOHfjqq6/w1ltvWTm6+4qLiyGTyawdxg+aNe5hfHw8YmNjjV+XlJRg7969iI+Px5NPPmlsV6vVon1mZmYmpkyZYlaBXVdXh1/84hdobGzEL37xCzz55JOoq6vDpUuX8OGHH+JXv/oV3NzcEB0djZycHFy9ehWurq7txvn8889x/fr1dv/GFyxYgICAANy7dw+3bt3C+fPnkZ6ejj179uDtt9/Gs88+a9Y1xcTEtPvlnzkFendUV1cjIyMDrq6uLLCJqFMssImIHmLChAkICAgwfh0bG4vnnnsOeXl5ohTY9+7dg16vt5mnoT/60Y/w85//3NphWKy5uRlKpdLicb5/P+bNm4df/vKX0Ol0WLduHQYPHmzxZ1jKVnLnh8wa9/C5555rF8PevXsxbtw4hISE9Ho833fw4EFcu3YNH374IYKCgkyONTY2Qi6XAwCef/55HDhwADqdDq+88kq7cfLy8iCVSjFt2jST9meeeQbR0dEmbWVlZVi0aBFWrFgBnU4HFxeXR8bp6+v7g/+Z1dLSArlcDqmUb24S9QX8l0xE1AUDBgzAE088gR/9yPT3k01NTdi8eTMmTpwIf39/TJkyBVlZWRAEweQ8tVqNjRs34i9/+QumT5+OgIAAFBYWoqioCGq1GkVFRSbnf/PNN1Cr1cjNzQVwf9ryvn37jGO1/dcmKysLv/zlLxESEoKnnnoKs2bNwrFjx0S7/nv37iE7O9sY+7hx4/Daa6+hvr7e5LyCggK88sorCAsLg7+/PyIjI/Huu+/CYDAYz1mwYAHOnDmDq1evGq+j7d3Y3NxcqNVqfPPNNybjdnSfFixYgJiYGFy4cAFxcXEYPXo0tm3bBgC4e/cutm/fjqioKPj7+2PixIlITU3F3bt3u3X9EokEQUFBEAQBX3/9tbH96tWr2LBhA6ZMmYKnnnoKISEhWLFiRbv4267rH//4B958802EhoYiMDAQCQkJuHXr1iM/X6vVwtfXF1u2bDG2ff/94fT0dKjValRVVSEpKQnPPPMMnn76aaxbtw7Nzc0m4925cwfJyckICQnBmDFjEB8fjxs3bnT7neSuXt9f//pXzJ8/H2PGjEFQUBB+8Ytf4MiRI52Ob+6/kzYFBQWIiYlBQEAAYmJicOLEiQ7HtaV7+H1//etfMW/ePAQGBmLMmDF45ZVXUF5ebjz+t7/9DaNGjcI777xj0u/IkSNQq9XYv3+/8Rqbmpqg1WqN/96SkpI6/dyvvvoKMpkMgYGB7Y45ODgYfynx9NNPw9XVtcPvm16vx0cffYSQkBAMGTLkkdc6atQorF+/Hrdv3zb+nLPUlStXsGLFCgQHByMgIACzZs3CyZMnTc6pq6vDli1b8Pzzzxtz8eWXX0ZZWZnxnKKiIuOMg3Xr1hnvYVvOTZo0qcP7uWDBAixYsMBkHLVaDZ1Oh9/97ncYP348Ro8ejcbGRgDAP//5TyxevBhPP/00Ro8ejfnz5+Mf//iHyZiNjY347W9/i0mTJsHf3x/PPvssFi5ciIsXL4pyz4jIMnyCTUT0EI2NjcbCoKamBnv37kVTUxN+9rOfGc8RBAHLli0z/g+Yj48PCgsLkZqaihs3bmD9+vUmY3766afIz89HXFwcHB0d4erqitu3b5sVz9y5c1FdXY2zZ88iNTW13fE//OEPmDRpEp5//nno9XrodDqsXLkSmZmZCA8PN+szvl8IyeVy9O/fHwDw2muvQavVYtasWViwYAG++eYb7Nu3D//617/w4YcfGp9qabVa2NvbY+HChbC3t8enn36K7du3o7GxEb/+9a8B3J8i29DQgOvXrxvfd+/Xr59ZMX5fXV0dlixZgunTp+NnP/sZnJ2dce/ePSxbtgz/+Mc/MGfOHHh6euLf//43PvjgA1RWVmLHjh3d+qyrV68CuP/LljYlJSU4f/48pk+fjqFDh+Lq1av48MMP8cILL0Cn07V7mp6cnIwBAwZg+fLluHr1Kj744ANs3LgRb7/9dqefm5OTg9dffx1Lly7Ff//3fz8yzv/6r/+Cm5sbVq1ahX/961/43//9Xzg5OWHNmjXGc5KSkpCfn4+f//znGD16ND7//PMOn0J2lTnXl5ubi/Xr18Pb2xtLly5F//79UVpaisLCQjz//PMWx/DJJ58gMTERXl5eePXVV1FbW4t169Zh6NChZo9hzXsIAH/+85+RlJSEsLAwrF69Gs3Nzfjwww8xb948aLVauLm54dlnn8W8efPw+9//HpGRkfDz80N1dTWSk5Mxbtw4/OpXvwIApKam4je/+Q2eeuopzJkzBwDg7u7e6We7urrCYDDg8OHDmDlzZqfnSSQSPP/883jvvfdQXl4Ob29v47HCwkLU1dV16fs5ZcoU/M///A8++eQTs/K8ubm53c+s/v37Qy6Xo7y8HL/61a8wZMgQLFmyBPb29sjPz0dCQgLS09MRFRUFAPj6669RUFCA6OhouLm54T//+Q9ycnIwf/586HQ6DBkyBJ6enlixYgW2b9+OuXPn4umnnwaAdk/3zbVjxw7I5XIsXrwYd+/ehVwux9/+9jcsWbIE/v7+WL58OSQSCXJzc/Hiiy9i//79eOqppwAAr7/+Oj766CPMnz8fnp6eqKurwz/+8Q9cuXIFfn5+3YqHiEQkEBFRO4cOHRJGjhzZ7j9/f38hNzfX5NwTJ04II0eOFHbs2GHSnpiYKKjVaqGqqsrYNnLkSGHUqFFCeXm5ybmffvqpMHLkSOHTTz81af/666+FkSNHCocOHTK2vfHGG8LIkSM7jLu5udnk67t37woxMTHCCy+88Mhr/vWvf93hNc+fP18QBEH4/PPPhZEjRwp/+ctfTPp9/PHH7dq/H4cgCML/9//9f8Lo0aOFlpYWY9srr7wiREREtDu37f5//fXXJu0d3af58+cLI0eOFD788EOTc//85z8Lo0aNEj7//HOT9g8//FAYOXKk8I9//OOR9yMwMFCoqakRampqhKqqKiErK0tQq9VCTEyMcO/evYde7/nz54WRI0cKWq223XW99NJLJv1TUlIEHx8f4fb
"text/plain": [
"<Figure size 1000x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df = df_bookings_and_claims.merge(\n",
" df_listing_details[['id_accommodation'] + [f'tfidf_svd_{i}' for i in range(30)]],\n",
" on='id_accommodation',\n",
" how='left'\n",
")\n",
"\n",
"\n",
"svd_features = [f'tfidf_svd_{i}' for i in range(30)]\n",
"all_features = svd_features\n",
"\n",
"df_model = df[all_features + ['has_resolution_incident']].dropna()\n",
"\n",
"X = df_model[all_features].values\n",
"y = df_model['has_resolution_incident'].values.astype(int)\n",
"\n",
"# --- Step 4: Boruta setup and fit ---\n",
"rf = RandomForestClassifier(\n",
" n_estimators=100,\n",
" max_depth=5,\n",
" random_state=42,\n",
" n_jobs=-1,\n",
" class_weight='balanced'\n",
")\n",
"\n",
"boruta_selector = BorutaPy(\n",
" estimator=rf,\n",
" n_estimators='auto',\n",
" verbose=2,\n",
" random_state=42\n",
")\n",
"\n",
"boruta_selector.fit(X, y)\n",
"\n",
"# --- Step 5: Prepare and show results ---\n",
"feature_rankings = pd.DataFrame({\n",
" 'feature': all_features,\n",
" 'rank': boruta_selector.ranking_,\n",
" 'selected': boruta_selector.support_,\n",
" 'tentative': boruta_selector.support_weak_,\n",
"})\n",
"\n",
"def determine_status(row):\n",
" if row['selected']:\n",
" return 'Selected ✅'\n",
" elif row['tentative']:\n",
" return 'Tentative 🤔'\n",
" else:\n",
" return 'Rejected ❌'\n",
"\n",
"feature_rankings['status'] = feature_rankings.apply(determine_status, axis=1)\n",
"feature_rankings = feature_rankings.sort_values(by='rank')\n",
"\n",
"print(\"\\n📊 Boruta Feature Selection Results:\")\n",
"print(feature_rankings[['feature', 'rank', 'status']])\n",
"\n",
"# Optional plot\n",
"plt.figure(figsize=(10, 8))\n",
"colors = feature_rankings['status'].map({\n",
" 'Selected ✅': 'green',\n",
" 'Tentative 🤔': 'orange',\n",
" 'Rejected ❌': 'red'\n",
"})\n",
"\n",
"plt.barh(\n",
" feature_rankings['feature'],\n",
" -feature_rankings['rank'], # Flip axis so rank 1 is top\n",
" color=colors\n",
")\n",
"plt.xlabel(\"Feature Rank (lower = better)\")\n",
"plt.title(\"Boruta Feature Ranking Including Text SVD Features\")\n",
"plt.gca().invert_yaxis()\n",
"plt.tight_layout()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}