Coverage for functions\flipdare\analysis\fee\future_fee

1#!/usr/bin/env python

4# This file is part of Flipdare's proprietary software and contains

5# confidential and copyrighted material. Unauthorised copying,

6# modification, distribution, or use of this file is strictly

7# prohibited without prior written permission from Flipdare Pty Ltd.

9# This software includes third-party components licensed under MIT,

10# BSD, and Apache 2.0 licences. See THIRD_PARTY_NOTICES for details.

11#

13import statistics

14import numpy as np

15from scipy import stats

17from flipdare.generated.shared.model.user.app_fee_type import AppFeeType

18from flipdare.generated.shared.stripe.stripe_currency_code import StripeCurrencyCode

19from flipdare.payments.data.fee_calculator import FeeCalculator

21__all__ = ["FutureFeeEstimator"]

24class FutureFeeEstimator:

25 """

26 Used to remove outliers from pledges to determine the future pledge amount for a dare.

27 """

29 def __init__(self, values: list[FeeCalculator]) -> None:

30 self._values: list[FeeCalculator] = values

32 @classmethod

33 def from_raw(

34 cls,

35 values: list[int],

36 currency: StripeCurrencyCode,

37 fee_type: AppFeeType,

38 ) -> "FutureFeeEstimator":

39 fee_calculators = [

40 FeeCalculator(

41 amount=v, fee_type=fee_type, from_currency=currency, to_currency=currency

42 )

43 for v in values

44 ]

45 return cls(fee_calculators)

47 def _unit_values(self) -> list[float]:

48 values = [float(v.amount) for v in self._values]

49 if not values:

50 raise ValueError("Input list must contain at least one numeric value.")

51 return values

53 def highest(self) -> int:

54 self._unit_values()

55 # try mad first since it is more robust to outliers, then fallback to iqr if mad fails

56 try:

57 return self.highest_mad()

58 except ValueError:

59 return self.highest_iqr()

61 def highest_mad(self) -> int:

62 data = np.array(self._unit_values())

64 # scale='normal' makes it comparable to standard deviation for normal data

65 mad = stats.median_abs_deviation(data, scale="normal")

67 median = np.median(data)

68 lower_bound = median - 3 * mad

69 upper_bound = median + 3 * mad

70 filtered_data = data[(data >= lower_bound) & (data <= upper_bound)]

71 if len(filtered_data) == 0:

72 raise ValueError("All values are outliers or list is empty after filtering.")

74 return int(np.max(filtered_data))

76 def highest_iqr(self) -> int:

77 """

78 Returns the highest value in a list after removing statistical outliers

79 using the Inter-quartile Range (IQR) method.

80 Alternatives were :

81 1. Z-score based version (better for normally distributed data)

82 2. Median Absolute Deviation (MAD) based version (more robust to outliers)

84 1. IQR Method:

86 Sort the list to compute quartiles.

87 Calculate Q1 and Q3 (25th and 75th percentiles).

88 Compute IQR = Q3 - Q1.

89 Define bounds:

91 Lower bound = Q1 - 1.5 x IQR

92 Upper bound = Q3 + 1.5 x IQR

94 Filter out values outside bounds.

95 Return the maximum from the filtered list.

98 ✅ Advantages:

100 Works well for skewed data.

101 Removes extreme values before finding the max.

102 No external libraries needed.

103

104 """

105 values = self._unit_values()

106

107 # Sort values for quartile calculation

108 sorted_vals = sorted(values)

109

110 # Calculate Q1 (25th percentile) and Q3 (75th percentile)

111 q1 = statistics.quantiles(sorted_vals, n=4)[0]

112 q3 = statistics.quantiles(sorted_vals, n=4)[2]

113

114 # Calculate IQR

115 iqr = q3 - q1

116

117 # Define bounds for non-outliers

118 lower_bound = q1 - 1.5 * iqr

119 upper_bound = q3 + 1.5 * iqr

120

121 # Filter out outliers

122 filtered_vals = [v for v in sorted_vals if lower_bound <= v <= upper_bound]

123

124 if not filtered_vals:

125 raise ValueError("All values are outliers or list is empty after filtering.")

126

127 max_value = max(filtered_vals)

128 return int(max_value)

Coverage for functions \ flipdare \ analysis \ fee \ future_fee_estimator.py: 96%

48 statements