@@ -1735,6 +1735,72 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
1735
1735
false, BFQQE_PREEMPTED );
1736
1736
}
1737
1737
1738
+ static void bfq_reset_inject_limit (struct bfq_data * bfqd ,
1739
+ struct bfq_queue * bfqq )
1740
+ {
1741
+ /* invalidate baseline total service time */
1742
+ bfqq -> last_serv_time_ns = 0 ;
1743
+
1744
+ /*
1745
+ * Reset pointer in case we are waiting for
1746
+ * some request completion.
1747
+ */
1748
+ bfqd -> waited_rq = NULL ;
1749
+
1750
+ /*
1751
+ * If bfqq has a short think time, then start by setting the
1752
+ * inject limit to 0 prudentially, because the service time of
1753
+ * an injected I/O request may be higher than the think time
1754
+ * of bfqq, and therefore, if one request was injected when
1755
+ * bfqq remains empty, this injected request might delay the
1756
+ * service of the next I/O request for bfqq significantly. In
1757
+ * case bfqq can actually tolerate some injection, then the
1758
+ * adaptive update will however raise the limit soon. This
1759
+ * lucky circumstance holds exactly because bfqq has a short
1760
+ * think time, and thus, after remaining empty, is likely to
1761
+ * get new I/O enqueued---and then completed---before being
1762
+ * expired. This is the very pattern that gives the
1763
+ * limit-update algorithm the chance to measure the effect of
1764
+ * injection on request service times, and then to update the
1765
+ * limit accordingly.
1766
+ *
1767
+ * However, in the following special case, the inject limit is
1768
+ * left to 1 even if the think time is short: bfqq's I/O is
1769
+ * synchronized with that of some other queue, i.e., bfqq may
1770
+ * receive new I/O only after the I/O of the other queue is
1771
+ * completed. Keeping the inject limit to 1 allows the
1772
+ * blocking I/O to be served while bfqq is in service. And
1773
+ * this is very convenient both for bfqq and for overall
1774
+ * throughput, as explained in detail in the comments in
1775
+ * bfq_update_has_short_ttime().
1776
+ *
1777
+ * On the opposite end, if bfqq has a long think time, then
1778
+ * start directly by 1, because:
1779
+ * a) on the bright side, keeping at most one request in
1780
+ * service in the drive is unlikely to cause any harm to the
1781
+ * latency of bfqq's requests, as the service time of a single
1782
+ * request is likely to be lower than the think time of bfqq;
1783
+ * b) on the downside, after becoming empty, bfqq is likely to
1784
+ * expire before getting its next request. With this request
1785
+ * arrival pattern, it is very hard to sample total service
1786
+ * times and update the inject limit accordingly (see comments
1787
+ * on bfq_update_inject_limit()). So the limit is likely to be
1788
+ * never, or at least seldom, updated. As a consequence, by
1789
+ * setting the limit to 1, we avoid that no injection ever
1790
+ * occurs with bfqq. On the downside, this proactive step
1791
+ * further reduces chances to actually compute the baseline
1792
+ * total service time. Thus it reduces chances to execute the
1793
+ * limit-update algorithm and possibly raise the limit to more
1794
+ * than 1.
1795
+ */
1796
+ if (bfq_bfqq_has_short_ttime (bfqq ))
1797
+ bfqq -> inject_limit = 0 ;
1798
+ else
1799
+ bfqq -> inject_limit = 1 ;
1800
+
1801
+ bfqq -> decrease_time_jif = jiffies ;
1802
+ }
1803
+
1738
1804
static void bfq_add_request (struct request * rq )
1739
1805
{
1740
1806
struct bfq_queue * bfqq = RQ_BFQQ (rq );
@@ -1755,71 +1821,8 @@ static void bfq_add_request(struct request *rq)
1755
1821
* bfq_update_inject_limit().
1756
1822
*/
1757
1823
if (time_is_before_eq_jiffies (bfqq -> decrease_time_jif +
1758
- msecs_to_jiffies (1000 ))) {
1759
- /* invalidate baseline total service time */
1760
- bfqq -> last_serv_time_ns = 0 ;
1761
-
1762
- /*
1763
- * Reset pointer in case we are waiting for
1764
- * some request completion.
1765
- */
1766
- bfqd -> waited_rq = NULL ;
1767
-
1768
- /*
1769
- * If bfqq has a short think time, then start
1770
- * by setting the inject limit to 0
1771
- * prudentially, because the service time of
1772
- * an injected I/O request may be higher than
1773
- * the think time of bfqq, and therefore, if
1774
- * one request was injected when bfqq remains
1775
- * empty, this injected request might delay
1776
- * the service of the next I/O request for
1777
- * bfqq significantly. In case bfqq can
1778
- * actually tolerate some injection, then the
1779
- * adaptive update will however raise the
1780
- * limit soon. This lucky circumstance holds
1781
- * exactly because bfqq has a short think
1782
- * time, and thus, after remaining empty, is
1783
- * likely to get new I/O enqueued---and then
1784
- * completed---before being expired. This is
1785
- * the very pattern that gives the
1786
- * limit-update algorithm the chance to
1787
- * measure the effect of injection on request
1788
- * service times, and then to update the limit
1789
- * accordingly.
1790
- *
1791
- * On the opposite end, if bfqq has a long
1792
- * think time, then start directly by 1,
1793
- * because:
1794
- * a) on the bright side, keeping at most one
1795
- * request in service in the drive is unlikely
1796
- * to cause any harm to the latency of bfqq's
1797
- * requests, as the service time of a single
1798
- * request is likely to be lower than the
1799
- * think time of bfqq;
1800
- * b) on the downside, after becoming empty,
1801
- * bfqq is likely to expire before getting its
1802
- * next request. With this request arrival
1803
- * pattern, it is very hard to sample total
1804
- * service times and update the inject limit
1805
- * accordingly (see comments on
1806
- * bfq_update_inject_limit()). So the limit is
1807
- * likely to be never, or at least seldom,
1808
- * updated. As a consequence, by setting the
1809
- * limit to 1, we avoid that no injection ever
1810
- * occurs with bfqq. On the downside, this
1811
- * proactive step further reduces chances to
1812
- * actually compute the baseline total service
1813
- * time. Thus it reduces chances to execute the
1814
- * limit-update algorithm and possibly raise the
1815
- * limit to more than 1.
1816
- */
1817
- if (bfq_bfqq_has_short_ttime (bfqq ))
1818
- bfqq -> inject_limit = 0 ;
1819
- else
1820
- bfqq -> inject_limit = 1 ;
1821
- bfqq -> decrease_time_jif = jiffies ;
1822
- }
1824
+ msecs_to_jiffies (1000 )))
1825
+ bfq_reset_inject_limit (bfqd , bfqq );
1823
1826
1824
1827
/*
1825
1828
* The following conditions must hold to setup a new
@@ -4855,7 +4858,7 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
4855
4858
struct bfq_queue * bfqq ,
4856
4859
struct bfq_io_cq * bic )
4857
4860
{
4858
- bool has_short_ttime = true;
4861
+ bool has_short_ttime = true, state_changed ;
4859
4862
4860
4863
/*
4861
4864
* No need to update has_short_ttime if bfqq is async or in
@@ -4880,13 +4883,93 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
4880
4883
bfqq -> ttime .ttime_mean > bfqd -> bfq_slice_idle ))
4881
4884
has_short_ttime = false;
4882
4885
4883
- bfq_log_bfqq (bfqd , bfqq , "update_has_short_ttime: has_short_ttime %d" ,
4884
- has_short_ttime );
4886
+ state_changed = has_short_ttime != bfq_bfqq_has_short_ttime (bfqq );
4885
4887
4886
4888
if (has_short_ttime )
4887
4889
bfq_mark_bfqq_has_short_ttime (bfqq );
4888
4890
else
4889
4891
bfq_clear_bfqq_has_short_ttime (bfqq );
4892
+
4893
+ /*
4894
+ * Until the base value for the total service time gets
4895
+ * finally computed for bfqq, the inject limit does depend on
4896
+ * the think-time state (short|long). In particular, the limit
4897
+ * is 0 or 1 if the think time is deemed, respectively, as
4898
+ * short or long (details in the comments in
4899
+ * bfq_update_inject_limit()). Accordingly, the next
4900
+ * instructions reset the inject limit if the think-time state
4901
+ * has changed and the above base value is still to be
4902
+ * computed.
4903
+ *
4904
+ * However, the reset is performed only if more than 100 ms
4905
+ * have elapsed since the last update of the inject limit, or
4906
+ * (inclusive) if the change is from short to long think
4907
+ * time. The reason for this waiting is as follows.
4908
+ *
4909
+ * bfqq may have a long think time because of a
4910
+ * synchronization with some other queue, i.e., because the
4911
+ * I/O of some other queue may need to be completed for bfqq
4912
+ * to receive new I/O. This happens, e.g., if bfqq is
4913
+ * associated with a process that does some sync. A sync
4914
+ * generates extra blocking I/O, which must be completed
4915
+ * before the process associated with bfqq can go on with its
4916
+ * I/O.
4917
+ *
4918
+ * If such a synchronization is actually in place, then,
4919
+ * without injection on bfqq, the blocking I/O cannot happen
4920
+ * to served while bfqq is in service. As a consequence, if
4921
+ * bfqq is granted I/O-dispatch-plugging, then bfqq remains
4922
+ * empty, and no I/O is dispatched, until the idle timeout
4923
+ * fires. This is likely to result in lower bandwidth and
4924
+ * higher latencies for bfqq, and in a severe loss of total
4925
+ * throughput.
4926
+ *
4927
+ * On the opposite end, a non-zero inject limit may allow the
4928
+ * I/O that blocks bfqq to be executed soon, and therefore
4929
+ * bfqq to receive new I/O soon. But, if this actually
4930
+ * happens, then the next think-time sample for bfqq may be
4931
+ * very low. This in turn may cause bfqq's think time to be
4932
+ * deemed short. Without the 100 ms barrier, this new state
4933
+ * change would cause the body of the next if to be executed
4934
+ * immediately. But this would set to 0 the inject
4935
+ * limit. Without injection, the blocking I/O would cause the
4936
+ * think time of bfqq to become long again, and therefore the
4937
+ * inject limit to be raised again, and so on. The only effect
4938
+ * of such a steady oscillation between the two think-time
4939
+ * states would be to prevent effective injection on bfqq.
4940
+ *
4941
+ * In contrast, if the inject limit is not reset during such a
4942
+ * long time interval as 100 ms, then the number of short
4943
+ * think time samples can grow significantly before the reset
4944
+ * is allowed. As a consequence, the think time state can
4945
+ * become stable before the reset. There will be no state
4946
+ * change when the 100 ms elapse, and therefore no reset of
4947
+ * the inject limit. The inject limit remains steadily equal
4948
+ * to 1 both during and after the 100 ms. So injection can be
4949
+ * performed at all times, and throughput gets boosted.
4950
+ *
4951
+ * An inject limit equal to 1 is however in conflict, in
4952
+ * general, with the fact that the think time of bfqq is
4953
+ * short, because injection may be likely to delay bfqq's I/O
4954
+ * (as explained in the comments in
4955
+ * bfq_update_inject_limit()). But this does not happen in
4956
+ * this special case, because bfqq's low think time is due to
4957
+ * an effective handling of a synchronization, through
4958
+ * injection. In this special case, bfqq's I/O does not get
4959
+ * delayed by injection; on the contrary, bfqq's I/O is
4960
+ * brought forward, because it is not blocked for
4961
+ * milliseconds.
4962
+ *
4963
+ * In addition, during the 100 ms, the base value for the
4964
+ * total service time is likely to get finally computed,
4965
+ * freeing the inject limit from its relation with the think
4966
+ * time.
4967
+ */
4968
+ if (state_changed && bfqq -> last_serv_time_ns == 0 &&
4969
+ (time_is_before_eq_jiffies (bfqq -> decrease_time_jif +
4970
+ msecs_to_jiffies (100 )) ||
4971
+ !has_short_ttime ))
4972
+ bfq_reset_inject_limit (bfqd , bfqq );
4890
4973
}
4891
4974
4892
4975
/*
0 commit comments