15
15
#include "intel_sideband.h"
16
16
#include "../../../platform/x86/intel_ips.h"
17
17
18
+ #define BUSY_MAX_EI 20u /* ms */
19
+
18
20
/*
19
21
* Lock protecting IPS related data structures
20
22
*/
@@ -45,6 +47,100 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
45
47
intel_uncore_write_fw (uncore , reg , val );
46
48
}
47
49
50
+ static void rps_timer (struct timer_list * t )
51
+ {
52
+ struct intel_rps * rps = from_timer (rps , t , timer );
53
+ struct intel_engine_cs * engine ;
54
+ enum intel_engine_id id ;
55
+ s64 max_busy [3 ] = {};
56
+ ktime_t dt , last ;
57
+
58
+ for_each_engine (engine , rps_to_gt (rps ), id ) {
59
+ s64 busy ;
60
+ int i ;
61
+
62
+ dt = intel_engine_get_busy_time (engine );
63
+ last = engine -> stats .rps ;
64
+ engine -> stats .rps = dt ;
65
+
66
+ busy = ktime_to_ns (ktime_sub (dt , last ));
67
+ for (i = 0 ; i < ARRAY_SIZE (max_busy ); i ++ ) {
68
+ if (busy > max_busy [i ])
69
+ swap (busy , max_busy [i ]);
70
+ }
71
+ }
72
+
73
+ dt = ktime_get ();
74
+ last = rps -> pm_timestamp ;
75
+ rps -> pm_timestamp = dt ;
76
+
77
+ if (intel_rps_is_active (rps )) {
78
+ s64 busy ;
79
+ int i ;
80
+
81
+ dt = ktime_sub (dt , last );
82
+
83
+ /*
84
+ * Our goal is to evaluate each engine independently, so we run
85
+ * at the lowest clocks required to sustain the heaviest
86
+ * workload. However, a task may be split into sequential
87
+ * dependent operations across a set of engines, such that
88
+ * the independent contributions do not account for high load,
89
+ * but overall the task is GPU bound. For example, consider
90
+ * video decode on vcs followed by colour post-processing
91
+ * on vecs, followed by general post-processing on rcs.
92
+ * Since multi-engines being active does imply a single
93
+ * continuous workload across all engines, we hedge our
94
+ * bets by only contributing a factor of the distributed
95
+ * load into our busyness calculation.
96
+ */
97
+ busy = max_busy [0 ];
98
+ for (i = 1 ; i < ARRAY_SIZE (max_busy ); i ++ ) {
99
+ if (!max_busy [i ])
100
+ break ;
101
+
102
+ busy += div_u64 (max_busy [i ], 1 << i );
103
+ }
104
+ GT_TRACE (rps_to_gt (rps ),
105
+ "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n" ,
106
+ busy , (int )div64_u64 (100 * busy , dt ),
107
+ max_busy [0 ], max_busy [1 ], max_busy [2 ],
108
+ rps -> pm_interval );
109
+
110
+ if (100 * busy > rps -> power .up_threshold * dt &&
111
+ rps -> cur_freq < rps -> max_freq_softlimit ) {
112
+ rps -> pm_iir |= GEN6_PM_RP_UP_THRESHOLD ;
113
+ rps -> pm_interval = 1 ;
114
+ schedule_work (& rps -> work );
115
+ } else if (100 * busy < rps -> power .down_threshold * dt &&
116
+ rps -> cur_freq > rps -> min_freq_softlimit ) {
117
+ rps -> pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD ;
118
+ rps -> pm_interval = 1 ;
119
+ schedule_work (& rps -> work );
120
+ } else {
121
+ rps -> last_adj = 0 ;
122
+ }
123
+
124
+ mod_timer (& rps -> timer ,
125
+ jiffies + msecs_to_jiffies (rps -> pm_interval ));
126
+ rps -> pm_interval = min (rps -> pm_interval * 2 , BUSY_MAX_EI );
127
+ }
128
+ }
129
+
130
+ static void rps_start_timer (struct intel_rps * rps )
131
+ {
132
+ rps -> pm_timestamp = ktime_sub (ktime_get (), rps -> pm_timestamp );
133
+ rps -> pm_interval = 1 ;
134
+ mod_timer (& rps -> timer , jiffies + 1 );
135
+ }
136
+
137
+ static void rps_stop_timer (struct intel_rps * rps )
138
+ {
139
+ del_timer_sync (& rps -> timer );
140
+ rps -> pm_timestamp = ktime_sub (ktime_get (), rps -> pm_timestamp );
141
+ cancel_work_sync (& rps -> work );
142
+ }
143
+
48
144
static u32 rps_pm_mask (struct intel_rps * rps , u8 val )
49
145
{
50
146
u32 mask = 0 ;
@@ -535,36 +631,24 @@ static void rps_set_power(struct intel_rps *rps, int new_power)
535
631
if (new_power == rps -> power .mode )
536
632
return ;
537
633
634
+ threshold_up = 95 ;
635
+ threshold_down = 85 ;
636
+
538
637
/* Note the units here are not exactly 1us, but 1280ns. */
539
638
switch (new_power ) {
540
639
case LOW_POWER :
541
- /* Upclock if more than 95% busy over 16ms */
542
640
ei_up = 16000 ;
543
- threshold_up = 95 ;
544
-
545
- /* Downclock if less than 85% busy over 32ms */
546
641
ei_down = 32000 ;
547
- threshold_down = 85 ;
548
642
break ;
549
643
550
644
case BETWEEN :
551
- /* Upclock if more than 90% busy over 13ms */
552
645
ei_up = 13000 ;
553
- threshold_up = 90 ;
554
-
555
- /* Downclock if less than 75% busy over 32ms */
556
646
ei_down = 32000 ;
557
- threshold_down = 75 ;
558
647
break ;
559
648
560
649
case HIGH_POWER :
561
- /* Upclock if more than 85% busy over 10ms */
562
650
ei_up = 10000 ;
563
- threshold_up = 85 ;
564
-
565
- /* Downclock if less than 60% busy over 32ms */
566
651
ei_down = 32000 ;
567
- threshold_down = 60 ;
568
652
break ;
569
653
}
570
654
@@ -742,8 +826,11 @@ void intel_rps_unpark(struct intel_rps *rps)
742
826
743
827
mutex_unlock (& rps -> lock );
744
828
829
+ rps -> pm_iir = 0 ;
745
830
if (intel_rps_has_interrupts (rps ))
746
831
rps_enable_interrupts (rps );
832
+ if (intel_rps_uses_timer (rps ))
833
+ rps_start_timer (rps );
747
834
748
835
if (IS_GEN (rps_to_i915 (rps ), 5 ))
749
836
gen5_rps_update (rps );
@@ -754,6 +841,8 @@ void intel_rps_park(struct intel_rps *rps)
754
841
if (!intel_rps_clear_active (rps ))
755
842
return ;
756
843
844
+ if (intel_rps_uses_timer (rps ))
845
+ rps_stop_timer (rps );
757
846
if (intel_rps_has_interrupts (rps ))
758
847
rps_disable_interrupts (rps );
759
848
@@ -1211,6 +1300,19 @@ static unsigned long __ips_gfx_val(struct intel_ips *ips)
1211
1300
return ips -> gfx_power + state2 ;
1212
1301
}
1213
1302
1303
+ static bool has_busy_stats (struct intel_rps * rps )
1304
+ {
1305
+ struct intel_engine_cs * engine ;
1306
+ enum intel_engine_id id ;
1307
+
1308
+ for_each_engine (engine , rps_to_gt (rps ), id ) {
1309
+ if (!intel_engine_supports_stats (engine ))
1310
+ return false;
1311
+ }
1312
+
1313
+ return true;
1314
+ }
1315
+
1214
1316
void intel_rps_enable (struct intel_rps * rps )
1215
1317
{
1216
1318
struct drm_i915_private * i915 = rps_to_i915 (rps );
@@ -1255,7 +1357,9 @@ void intel_rps_enable(struct intel_rps *rps)
1255
1357
GEM_BUG_ON (rps -> efficient_freq < rps -> min_freq );
1256
1358
GEM_BUG_ON (rps -> efficient_freq > rps -> max_freq );
1257
1359
1258
- if (INTEL_GEN (i915 ) >= 6 )
1360
+ if (has_busy_stats (rps ))
1361
+ intel_rps_set_timer (rps );
1362
+ else if (INTEL_GEN (i915 ) >= 6 )
1259
1363
intel_rps_set_interrupts (rps );
1260
1364
else
1261
1365
/* Ironlake currently uses intel_ips.ko */ {}
@@ -1274,6 +1378,7 @@ void intel_rps_disable(struct intel_rps *rps)
1274
1378
1275
1379
intel_rps_clear_enabled (rps );
1276
1380
intel_rps_clear_interrupts (rps );
1381
+ intel_rps_clear_timer (rps );
1277
1382
1278
1383
if (INTEL_GEN (i915 ) >= 6 )
1279
1384
gen6_rps_disable (rps );
@@ -1689,6 +1794,7 @@ void intel_rps_init_early(struct intel_rps *rps)
1689
1794
mutex_init (& rps -> power .mutex );
1690
1795
1691
1796
INIT_WORK (& rps -> work , rps_work );
1797
+ timer_setup (& rps -> timer , rps_timer , 0 );
1692
1798
1693
1799
atomic_set (& rps -> num_waiters , 0 );
1694
1800
}
0 commit comments