Coverage for functions \ flipdare \ analysis \ fee \ future_fee_estimator.py: 96%

48 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-05-08 12:22 +1000

1#!/usr/bin/env python 

2# Copyright (c) 2026 Flipdare Pty Ltd. All rights reserved. 

3# 

4# This file is part of Flipdare's proprietary software and contains 

5# confidential and copyrighted material. Unauthorised copying, 

6# modification, distribution, or use of this file is strictly 

7# prohibited without prior written permission from Flipdare Pty Ltd. 

8# 

9# This software includes third-party components licensed under MIT, 

10# BSD, and Apache 2.0 licences. See THIRD_PARTY_NOTICES for details. 

11# 

12 

13import statistics 

14import numpy as np 

15from scipy import stats 

16 

17from flipdare.generated.shared.model.user.app_fee_type import AppFeeType 

18from flipdare.generated.shared.stripe.stripe_currency_code import StripeCurrencyCode 

19from flipdare.payments.data.fee_calculator import FeeCalculator 

20 

21__all__ = ["FutureFeeEstimator"] 

22 

23 

24class FutureFeeEstimator: 

25 """ 

26 Used to remove outliers from pledges to determine the future pledge amount for a dare. 

27 """ 

28 

29 def __init__(self, values: list[FeeCalculator]) -> None: 

30 self._values: list[FeeCalculator] = values 

31 

32 @classmethod 

33 def from_raw( 

34 cls, 

35 values: list[int], 

36 currency: StripeCurrencyCode, 

37 fee_type: AppFeeType, 

38 ) -> "FutureFeeEstimator": 

39 fee_calculators = [ 

40 FeeCalculator( 

41 amount=v, fee_type=fee_type, from_currency=currency, to_currency=currency 

42 ) 

43 for v in values 

44 ] 

45 return cls(fee_calculators) 

46 

47 def _unit_values(self) -> list[float]: 

48 values = [float(v.amount) for v in self._values] 

49 if not values: 

50 raise ValueError("Input list must contain at least one numeric value.") 

51 return values 

52 

53 def highest(self) -> int: 

54 self._unit_values() 

55 # try mad first since it is more robust to outliers, then fallback to iqr if mad fails 

56 try: 

57 return self.highest_mad() 

58 except ValueError: 

59 return self.highest_iqr() 

60 

61 def highest_mad(self) -> int: 

62 data = np.array(self._unit_values()) 

63 

64 # scale='normal' makes it comparable to standard deviation for normal data 

65 mad = stats.median_abs_deviation(data, scale="normal") 

66 

67 median = np.median(data) 

68 lower_bound = median - 3 * mad 

69 upper_bound = median + 3 * mad 

70 filtered_data = data[(data >= lower_bound) & (data <= upper_bound)] 

71 if len(filtered_data) == 0: 

72 raise ValueError("All values are outliers or list is empty after filtering.") 

73 

74 return int(np.max(filtered_data)) 

75 

76 def highest_iqr(self) -> int: 

77 """ 

78 Returns the highest value in a list after removing statistical outliers 

79 using the Inter-quartile Range (IQR) method. 

80 Alternatives were : 

81 1. Z-score based version (better for normally distributed data) 

82 2. Median Absolute Deviation (MAD) based version (more robust to outliers) 

83 

84 1. IQR Method: 

85 

86 Sort the list to compute quartiles. 

87 Calculate Q1 and Q3 (25th and 75th percentiles). 

88 Compute IQR = Q3 - Q1. 

89 Define bounds: 

90 

91 Lower bound = Q1 - 1.5 x IQR 

92 Upper bound = Q3 + 1.5 x IQR 

93 

94 Filter out values outside bounds. 

95 Return the maximum from the filtered list. 

96 

97 

98 ✅ Advantages: 

99 

100 Works well for skewed data. 

101 Removes extreme values before finding the max. 

102 No external libraries needed. 

103 

104 """ 

105 values = self._unit_values() 

106 

107 # Sort values for quartile calculation 

108 sorted_vals = sorted(values) 

109 

110 # Calculate Q1 (25th percentile) and Q3 (75th percentile) 

111 q1 = statistics.quantiles(sorted_vals, n=4)[0] 

112 q3 = statistics.quantiles(sorted_vals, n=4)[2] 

113 

114 # Calculate IQR 

115 iqr = q3 - q1 

116 

117 # Define bounds for non-outliers 

118 lower_bound = q1 - 1.5 * iqr 

119 upper_bound = q3 + 1.5 * iqr 

120 

121 # Filter out outliers 

122 filtered_vals = [v for v in sorted_vals if lower_bound <= v <= upper_bound] 

123 

124 if not filtered_vals: 

125 raise ValueError("All values are outliers or list is empty after filtering.") 

126 

127 max_value = max(filtered_vals) 

128 return int(max_value)