diff --git a/models/intermediate/cross/int_flagging_booking_categorisation.sql b/models/intermediate/cross/int_flagging_booking_categorisation.sql new file mode 100644 index 0000000..679376e --- /dev/null +++ b/models/intermediate/cross/int_flagging_booking_categorisation.sql @@ -0,0 +1,220 @@ +{% set risk_booking_status = ("NOTAPPROVED", "FLAGGED") %} +{% set no_risk_booking_status = ("APPROVED", "NOFLAGS") %} +{% set incident_duplicated_status = "CLOSED - DUPLICATE" %} +{% set incident_finished_status = ( + "RESOLVED", + "RESOLVED EXCEPTION", + "CLOSED - NO REPLY", + "CLOSED - OTHER", + "CLOSED - WAIVER CR", + "CLOSED - LATE REPORT", + "CLOSED - NOT COVERED", + "CLOSED - NOT LIABLE", + "CLOSED HOST REQUEST", + "CLOSED - NOT APPROVED", + "CLOSED - THIRD PARTY", +) %} +{% set days_from_checkout_to_completion = 14 %} + +{{ config(materialized="table") }} +with + int_core__booking_summary as (select * from {{ ref("int_core__booking_summary") }}), + int_resolutions__incidents as ( + select * from {{ ref("int_resolutions__incidents") }} + ), + -- The same booking can have multiple Incidents + deduplicated_incidents as ( + select + id_booking, + sum(accepted_amount_in_gbp) as booking_accepted_amount_in_gbp, + case + when sum(submitted_payout_amount_in_gbp) > 0 then true else false + end as has_submitted_payout, + case + when + sum( + case + when + upper(current_status_name) + in {{ incident_finished_status }} + then 1 + else 0 + end + ) + > 0 + then true + else false + end as is_incident_finished + from int_resolutions__incidents + where upper(current_status_name) != '{{ incident_duplicated_status }}' + group by 1 + ), + new_dash_protected_bookings as ( + select + id_booking, + case + when + (current_date - booking_check_out_date_utc) + > {{ days_from_checkout_to_completion }} + then true + else false + end as is_booking_completed, + case + when upper(booking_status) in {{ risk_booking_status }} + then true + when upper(booking_status) in {{ no_risk_booking_status }} + then false + else null + end as is_booking_flagged_as_risk + from int_core__booking_summary + where + -- Bookings from New Dash users with Id Deal + is_user_in_new_dash = true + and is_missing_id_deal = false + -- Protected Bookings with a Protection or a Deposit Management service + and ( + has_protection_service_business_type + or has_deposit_management_service_business_type + ) + -- Bookings with relevant status (i.e. not cancelled, not pending) + and ( + upper(booking_status) in {{ risk_booking_status }} + or upper(booking_status) in {{ no_risk_booking_status }} + ) + ), + bookings_with_incidents as ( + select + ndpb.id_booking, + ndpb.is_booking_completed, + ndpb.is_booking_flagged_as_risk, + case when di.id_booking is not null then true else false end as has_claim, + coalesce(di.has_submitted_payout, false) as has_submitted_payout, + coalesce(di.is_incident_finished, false) as is_incident_finished + from new_dash_protected_bookings ndpb + left join deduplicated_incidents di on ndpb.id_booking = di.id_booking + ) +select + -- High Level Bookings -- + count(id_booking) as total_bookings, + count(id_booking) filter (where is_booking_completed) as completed_bookings, + count(id_booking) filter (where not is_booking_completed) as not_completed_bookings, + + -- High Level Claims -- + count(id_booking) filter (where has_claim) as total_with_claim_bookings, + + -- Completed with Claim -- + count(id_booking) filter ( + where is_booking_completed and has_claim + ) as completed_with_claim_bookings, + -- Not Completed with Claim -- + count(id_booking) filter ( + where not is_booking_completed and has_claim + ) as not_completed_with_claim_bookings, + -- Completed without Claim -- + count(id_booking) filter ( + where is_booking_completed and not has_claim + ) as completed_without_claim_bookings, + + -- Completed with Risk + count(id_booking) filter ( + where is_booking_completed and is_booking_flagged_as_risk + ) as completed_risk_bookings, + -- Completed without Risk + count(id_booking) filter ( + where is_booking_completed and not is_booking_flagged_as_risk + ) as completed_no_risk_bookings, + + -- Completed awaiting resolution + count(id_booking) filter ( + where is_booking_completed and has_claim and not is_incident_finished + ) as completed_awaiting_resolution_bookings, + -- Completed not awaiting resolution + count(id_booking) filter ( + where + is_booking_completed + and ((has_claim and is_incident_finished) or (not has_claim)) + ) as completed_not_awaiting_resolution_bookings, + + -- Completed with Submitted Payout + count(id_booking) filter ( + where + is_booking_completed + and has_submitted_payout + and is_incident_finished + and has_claim + ) as completed_with_submitted_payout_bookings, + -- Completed without Submitted Payout + count(id_booking) filter ( + where + is_booking_completed + and ( + (has_claim and not has_submitted_payout and is_incident_finished) + or (not has_claim) + ) + ) as completed_without_submitted_payout_bookings, + + -- CONFUSION MATRIX 1: RISK vs. CLAIM -- + -- Completed with Risk and with Claim -- True Positives + count(id_booking) filter ( + where is_booking_completed and is_booking_flagged_as_risk and has_claim + ) as completed_risk_with_claim_bookings, + + -- Completed without Risk and without Claim -- True Negatives + count(id_booking) filter ( + where is_booking_completed and not is_booking_flagged_as_risk and not has_claim + ) as completed_no_risk_without_claim_bookings, + + -- Completed with Risk and without Claim -- False Positive + count(id_booking) filter ( + where is_booking_completed and is_booking_flagged_as_risk and not has_claim + ) as completed_risk_without_claim_bookings, + + -- Completed without Risk and with Claim -- False Negatives + count(id_booking) filter ( + where is_booking_completed and not is_booking_flagged_as_risk and has_claim + ) as completed_no_risk_with_claim_bookings, + + -- CONFUSION MATRIX 2: RISK vs. PAYOUT -- + -- Completed with Risk, Incident is finished and with Submitted Payout -- + -- True Positives + count(id_booking) filter ( + where + is_booking_completed + and is_booking_flagged_as_risk + and has_claim + and has_submitted_payout + and is_incident_finished + ) as completed_risk_with_submitted_payout_bookings, + + -- Completed without Risk and without Submitted Payout -- True Negatives + count(id_booking) filter ( + where + is_booking_completed + and not is_booking_flagged_as_risk + and ( + (has_claim and not has_submitted_payout and is_incident_finished) + or (not has_claim) + ) + ) as completed_no_risk_without_submitted_payout_bookings, + + -- Completed with Risk and without Submitted Payout -- False Positive + count(id_booking) filter ( + where + is_booking_completed + and is_booking_flagged_as_risk + and ( + (has_claim and not has_submitted_payout and is_incident_finished) + or (not has_claim) + ) + ) as completed_risk_without_submitted_payout_bookings, + + -- Completed without Risk and with Submitted Payout -- False Negative + count(id_booking) filter ( + where + is_booking_completed + and not is_booking_flagged_as_risk + and has_submitted_payout + and has_claim + and is_incident_finished + ) as completed_no_risk_with_submitted_payout_bookings +from bookings_with_incidents diff --git a/models/intermediate/cross/int_flagging_performance_analysis.sql b/models/intermediate/cross/int_flagging_performance_analysis.sql new file mode 100644 index 0000000..6b798db --- /dev/null +++ b/models/intermediate/cross/int_flagging_performance_analysis.sql @@ -0,0 +1,106 @@ +with + int_flagging_booking_categorisation as ( + select * from {{ ref("int_flagging_booking_categorisation") }} + ) +select + 'RISK_VS_CLAIM' as flagging_analysis_type, + completed_bookings as count_total, + -- TP + completed_risk_with_claim_bookings as count_true_positive, + -- TN + completed_no_risk_without_claim_bookings as count_true_negative, + -- FP + completed_risk_without_claim_bookings as count_false_positive, + -- FN + completed_no_risk_with_claim_bookings as count_false_negative, + -- TP % + cast(completed_risk_with_claim_bookings as decimal) + / completed_bookings as true_positive_score, + -- TN % + cast(completed_no_risk_without_claim_bookings as decimal) + / completed_bookings as true_negative_score, + -- FP % + cast(completed_risk_without_claim_bookings as decimal) + / completed_bookings as false_positive_score, + -- FN % + cast(completed_no_risk_with_claim_bookings as decimal) + / completed_bookings as false_negative_score, + -- RECALL: TP / (TP + FN) + cast(completed_risk_with_claim_bookings as decimal) / ( + completed_risk_with_claim_bookings + completed_no_risk_with_claim_bookings + ) as recall_score, + -- PRECISION: TP / (TP + FP) + cast(completed_risk_with_claim_bookings as decimal) / ( + completed_risk_with_claim_bookings + completed_risk_without_claim_bookings + ) as precision_score, + -- FALSE POSITIVE RATE: FP / (FP + TN) + cast(completed_risk_without_claim_bookings as decimal) / ( + completed_risk_without_claim_bookings + completed_no_risk_without_claim_bookings + ) as false_positive_rate_score, + -- F1 SCORE: 2*TP / (2*TP + FN + FP) + cast(2 * completed_risk_with_claim_bookings as decimal) / ( + 2 * completed_risk_with_claim_bookings + + completed_no_risk_with_claim_bookings + + completed_risk_without_claim_bookings + ) as f1_score, + -- F2 SCORE: 5*TP / (5*TP + 4*FN + FP) + cast(5 * completed_risk_with_claim_bookings as decimal) / ( + 5 * completed_risk_with_claim_bookings + + 4 * completed_no_risk_with_claim_bookings + + completed_risk_without_claim_bookings + ) as f2_score +from int_flagging_booking_categorisation + +union all + +select + 'RISK_VS_SUBMITTED_PAYOUT' as flagging_analysis_type, + completed_not_awaiting_resolution_bookings as count_total, + -- TP + completed_risk_with_submitted_payout_bookings as count_true_positive, + -- TN + completed_no_risk_without_submitted_payout_bookings as count_true_negative, + -- FP + completed_risk_without_submitted_payout_bookings as count_false_positive, + -- FN + completed_no_risk_with_submitted_payout_bookings as count_false_negative, + -- TP % + cast(completed_risk_with_submitted_payout_bookings as decimal) + / completed_not_awaiting_resolution_bookings as true_positive_score, + -- TN % + cast(completed_no_risk_without_submitted_payout_bookings as decimal) + / completed_not_awaiting_resolution_bookings as true_negative_score, + -- FP % + cast(completed_risk_without_submitted_payout_bookings as decimal) + / completed_not_awaiting_resolution_bookings as false_positive_score, + -- FN % + cast(completed_no_risk_with_submitted_payout_bookings as decimal) + / completed_not_awaiting_resolution_bookings as false_negative_score, + -- RECALL: TP / (TP + FN) + cast(completed_risk_with_submitted_payout_bookings as decimal) / ( + completed_risk_with_submitted_payout_bookings + + completed_no_risk_with_submitted_payout_bookings + ) as recall_score, + -- PRECISION: TP / (TP + FP) + cast(completed_risk_with_submitted_payout_bookings as decimal) / ( + completed_risk_with_submitted_payout_bookings + + completed_risk_without_submitted_payout_bookings + ) as precision_score, + -- FALSE POSITIVE RATE: FP / (FP + TN) + cast(completed_risk_without_submitted_payout_bookings as decimal) / ( + completed_risk_without_submitted_payout_bookings + + completed_no_risk_without_submitted_payout_bookings + ) as false_positive_rate_score, + -- F1 SCORE: 2*TP / (2*TP + FN + FP) + cast(2 * completed_risk_with_submitted_payout_bookings as decimal) / ( + 2 * completed_risk_with_submitted_payout_bookings + + completed_no_risk_with_submitted_payout_bookings + + completed_risk_without_submitted_payout_bookings + ) as f1_score, + -- F2 SCORE: 5*TP / (5*TP + 4*FN + FP) + cast(5 * completed_risk_with_submitted_payout_bookings as decimal) / ( + 5 * completed_risk_with_submitted_payout_bookings + + 4 * completed_no_risk_with_submitted_payout_bookings + + completed_risk_without_submitted_payout_bookings + ) as f2_score +from int_flagging_booking_categorisation diff --git a/models/intermediate/cross/schema.yml b/models/intermediate/cross/schema.yml index 63ad28d..204595e 100644 --- a/models/intermediate/cross/schema.yml +++ b/models/intermediate/cross/schema.yml @@ -2800,3 +2800,369 @@ models: - NONE - INVOICING - ONGOING_MONTH + + - name: int_flagging_booking_categorisation + description: | + A model that computes different Booking counts depending whether these + had claims or not, if these were categorised at risk or not, and if there + was a submitted payout or not. + This only applies for Bookings: + - that come from New Dash users + - that are protected, either by a protection or a deposit management service + + Additionally, we track Completed Bookings as those Bookings which, as of today, + have been checked out for more than natural 14 days. + + From these Bookings, we check if these had an incident related in Resolution + Center: + - that is linked to a Booking + - that is not in a duplicated status + + Since Bookings can be duplicated in the incidents data, we effectively consider: + - Bookings with "any" claim + - Bookings with a finished claim, either with a payout or not + - Bookings with a finished claim and a submitted amount for payout + + data_tests: + - dbt_expectations.expect_table_row_count_to_equal: + value: 1 + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: total_bookings + column_B: completed_bookings + not_completed_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: total_with_claim_bookings + column_B: completed_with_claim_bookings + not_completed_with_claim_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_bookings + column_B: completed_with_claim_bookings + completed_without_claim_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_bookings + column_B: completed_risk_bookings + completed_no_risk_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_risk_bookings + column_B: completed_risk_with_claim_bookings + completed_risk_without_claim_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_with_claim_bookings + column_B: completed_risk_with_claim_bookings + completed_no_risk_with_claim_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_no_risk_bookings + column_B: completed_no_risk_with_claim_bookings + completed_no_risk_without_claim_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_without_claim_bookings + column_B: completed_risk_without_claim_bookings + completed_no_risk_without_claim_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_bookings + column_B: completed_awaiting_resolution_bookings + completed_not_awaiting_resolution_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_not_awaiting_resolution_bookings + column_B: completed_with_submitted_payout_bookings + completed_without_submitted_payout_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_with_submitted_payout_bookings + column_B: completed_risk_with_submitted_payout_bookings + completed_no_risk_with_submitted_payout_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_without_submitted_payout_bookings + column_B: completed_risk_without_submitted_payout_bookings + completed_no_risk_without_submitted_payout_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_bookings + column_B: completed_risk_with_claim_bookings + completed_no_risk_without_claim_bookings + completed_risk_without_claim_bookings + completed_no_risk_with_claim_bookings + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: completed_not_awaiting_resolution_bookings + column_B: completed_risk_with_submitted_payout_bookings + completed_no_risk_without_submitted_payout_bookings + completed_risk_without_submitted_payout_bookings + completed_no_risk_with_submitted_payout_bookings + + columns: + - name: total_bookings + data_type: integer + description: | + Current count of New Dash Protected Bookings, either a Protection Service + or a Deposit Management service, for reference. + + - name: completed_bookings + data_type: integer + description: | + Current count of New Dash Protected Bookings with a Checkout happening + more than 14 days ago. + + - name: not_completed_bookings + data_type: integer + description: | + Current count of New Dash Protected Bookings with a Checkout happening + between 14 days ago and today, or in the future. + + - name: total_with_claim_bookings + data_type: integer + description: | + Current count of New Dash Protected Bookings that have had a claim, + indistinctly of these bookings being considered as completed or not. + + - name: completed_with_claim_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + had a claim. + + - name: not_completed_with_claim_bookings + data_type: integer + description: | + Current count of New Dash Protected, NOT Completed Bookings that have + had a claim. + + - name: completed_without_claim_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + NOT had a claim. + + - name: completed_risk_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + been flagged as at Risk. + + - name: completed_no_risk_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + NOT been flagged as at Risk. + + - name: completed_awaiting_resolution_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + a claim and are in a resolution status that is not finished. These + Bookings are excluded for the submitted payout-based performance + analysis, as we don't know if the claim will be paid out or not. + + - name: completed_not_awaiting_resolution_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that are + not awaiting resolution, either because they have a claim in a finished + status or because they don't have a claim at all. + + - name: completed_with_submitted_payout_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + had a submitted payout, with the claim being in a finished status. + + - name: completed_without_submitted_payout_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + NOT had a submitted payout, either because there's a claim being in + a finished status without a payout or because there's no claim at all. + + - name: completed_risk_with_claim_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + been flagged as at Risk AND that have had a claim. + For the claim-based performance analysis, this would be the true positive. + + - name: completed_no_risk_without_claim_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + NOT been flagged as at Risk AND that have NOT had a claim. + For the claim-based performance analysis, this would be the true negative. + + - name: completed_risk_without_claim_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + been flagged as at Risk AND that have NOT had a claim. + For the claim-based performance analysis, this would be the false positive. + + - name: completed_no_risk_with_claim_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + NOT been flagged as at Risk AND that have had a claim. + For the claim-based performance analysis, this would be the false negative. + + - name: completed_risk_with_submitted_payout_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + been flagged as at Risk AND that have had a submitted payout, with + the claim being in a finished status. + For the submitted payout-based performance analysis, this would be + the true positive. + + - name: completed_no_risk_without_submitted_payout_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + NOT been flagged as at Risk AND that have NOT had a submitted payout, + either because there's a claim being in a finished status without a + payout or because there's no claim at all. + For the submitted payout-based performance analysis, this would be + the true negative. + + - name: completed_risk_without_submitted_payout_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + been flagged as at Risk AND that have NOT had a submitted payout, + either because there's a claim being in a finished status without a + payout or because there's no claim at all. + For the submitted payout-based performance analysis, this would be + the false positive. + + - name: completed_no_risk_with_submitted_payout_bookings + data_type: integer + description: | + Current count of New Dash Protected and Completed Bookings that have + NOT been flagged as at Risk AND that have had a submitted payout, with + the claim being in a finished status. + For the submitted payout-based performance analysis, this would be + the false negative. + + - name: int_flagging_performance_analysis + description: | + Provides a basic statistical analysis with binary classification metrics + on the flagging performance for New Dash Protected bookings, in the scope + of claims raised or submitted payouts. + data_tests: + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: count_total + column_B: count_true_positive + count_true_negative + count_false_positive + count_false_negative + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: recall_score + column_B: 1.0 * count_true_positive / (count_true_positive + count_false_negative) + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: precision_score + column_B: 1.0 * count_true_positive / (count_true_positive + count_false_positive) + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: false_positive_rate_score + column_B: 1.0 * count_false_positive / (count_false_positive + count_true_negative) + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: f1_score + column_B: 2.0 * count_true_positive / (2 * count_true_positive + count_false_negative + count_false_positive) + - dbt_expectations.expect_column_pair_values_to_be_equal: + column_A: f2_score + column_B: 5.0 * count_true_positive / (5 * count_true_positive + 4 * count_false_negative + count_false_positive) + + columns: + - name: flagging_analysis_type + data_type: string + description: | + Type of the analysis conducted, i.e., what do we consider as a + positive - predicted (flagged) vs. actual (claim, payout). + data_tests: + - not_null + - unique + - accepted_values: + values: + - RISK_VS_CLAIM + - RISK_VS_SUBMITTED_PAYOUT + + - name: count_total + data_type: integer + description: | + Total count of bookings considered for the flagging performance analysis. + + - name: count_true_positive + data_type: integer + description: | + Count of True Positives: predicted positives that are also an actual positive. + + - name: count_true_negative + data_type: integer + description: | + Count of True Negatives: predicted negatives that are also an actual negative. + + - name: count_false_positive + data_type: integer + description: | + Count of False Positives: predicted positives that are not an actual positive. + + - name: count_false_negative + data_type: integer + description: | + Count of False Negatives: predicted negatives that are not an actual negative. + + - name: true_positive_score + data_type: decimal + description: | + True Positives as a ratio over 1. This is the count of true positives divided + by the total count of bookings considered for the flagging performance analysis. + + - name: true_negative_score + data_type: decimal + description: | + True Negatives, as a ratio over 1. This is the count of true negatives divided + by the total count of bookings considered for the flagging performance analysis. + + - name: false_positive_score + data_type: decimal + description: | + False Positives, as a ratio over 1. This is the count of false positives divided + by the total count of bookings considered for the flagging performance analysis. + + - name: false_negative_score + data_type: decimal + description: | + False Negative, as a ratio over 1. This is the count of false negatives divided + by the total count of bookings considered for the flagging performance analysis. + + - name: recall_score + data_type: decimal + description: | + Recall score, or true positive rate. This corresponds to the proportion of all + actual positives that were classified correctly as a positive. It can be seen + as a probability of detection: in our case, it answers the question "what + fraction of claim/payouts were flagged as at risk?". + This is the count of true positives divided by the sum of true positives and + false negatives. Recall improves when false negatives decrease. + A hypothetical perfect model would have zero false negatives, and thus a + recall of 1.0, or 100% detection rate. + + - name: precision_score + data_type: decimal + description: | + Precision score, or positive predictive value. This corresponds to the + proportion of all predicted positives that were classified correctly as a + positive. In our case, it answers the question "what fraction of + claims/payouts flagged as at risk were actually at risk?". + This is the count of true positives divided by the sum of true positives and + false positives. Precision improves when false positives decrease. + A hypothetical perfect model would have zero false positives, and thus a + precision of 1.0, or 100% precision rate. + + - name: false_positive_rate_score + data_type: decimal + description: | + False positive rate, or fall-out. This corresponds to the proportion of all + actual negatives that were classified incorrectly as a positive. It can be seen + as a probability of false alarm: in our case, it answers the question "what + fraction of non-claims/payouts were flagged as at risk?". + This is the count of false positives divided by the sum of true positives and + false positives. + A hypothetical perfect model would have zero false positives, and thus a + false positive rate of 0.0, or 0% false alarm rate. + + - name: f1_score + data_type: decimal + description: | + F1 score, which computes the harmonic mean of precision and recall. + This metric balances the trade-off between precision and recall, and is useful + when we want to find an optimal balance between the two. + It is defined as 2 * (precision * recall) / (precision + recall). + A hypothetical perfect model would have an F1 score of 1.0, or 100%. + When precision and recall are far apart, the F1 score will be closer to the + lower of the two. + + - name: f2_score + data_type: decimal + description: | + F2 score, which computes the harmonic mean of precision and recall, but + with a twice higher weight on recall. In our case, it effectively means + that we want to reduce the number of false negatives, meaning reducing + the number of claims/payouts that are not flagged as at risk, while still + keeping a good precision. + This metric is useful when we want to prioritize recall over precision, + and is defined as 5 * (precision * recall) / (4 * precision + recall). + A hypothetical perfect model would have an F2 score of 1.0, or 100%. + When precision and recall are far apart, the F2 score will be closer to the + lower of the two.