@@ -61,6 +61,7 @@ unsigned int rds_ib_active_bonding_enabled = 0;
61
61
unsigned int rds_ib_active_bonding_fallback = 1 ;
62
62
unsigned int rds_ib_active_bonding_reconnect_delay = 1 ;
63
63
unsigned int rds_ib_active_bonding_trigger_delay_max_msecs ; /* = 0; */
64
+ unsigned int rds_ib_active_bonding_trigger_delay_min_msecs ; /* = 0; */
64
65
#if RDMA_RDS_APM_SUPPORTED
65
66
unsigned int rds_ib_apm_timeout = RDS_IB_DEFAULT_TIMEOUT ;
66
67
#endif
@@ -104,6 +105,10 @@ MODULE_PARM_DESC(rds_ib_active_bonding_reconnect_delay, " Active Bonding reconne
104
105
module_param (rds_ib_active_bonding_trigger_delay_max_msecs , int , 0444 );
105
106
MODULE_PARM_DESC (rds_ib_active_bonding_trigger_delay_max_msecs ,
106
107
" Active Bonding Max delay before active bonding is triggered(msecs)" );
108
+ module_param (rds_ib_active_bonding_trigger_delay_min_msecs , int , 0444 );
109
+ MODULE_PARM_DESC (rds_ib_active_bonding_trigger_delay_min_msecs ,
110
+ " Active Bonding Min delay before active "
111
+ "bonding is triggered(msecs)" );
107
112
#if IB_RDS_CQ_VECTOR_SUPPORTED
108
113
module_param (rds_ib_cq_balance_enabled , int , 0444 );
109
114
MODULE_PARM_DESC (rds_ib_cq_balance_enabled , " CQ load balance Enabled" );
@@ -138,7 +143,7 @@ static struct rds_ib_excl_ips excl_ips_tbl[RDS_IB_MAX_EXCL_IPS];
138
143
static u8 excl_ips_cnt = 0 ;
139
144
140
145
static int ip_config_init_phase_flag ; /* = 0 */
141
-
146
+ static int initial_failovers_all_ports_deactivated_flag ; /* = 0 */
142
147
static int initial_failovers_iterations ; /* = 0 */
143
148
144
149
/*
@@ -1571,6 +1576,7 @@ rds_ib_do_initial_failovers(struct work_struct *workarg)
1571
1576
container_of (workarg , struct rds_ib_initial_failovers_work ,
1572
1577
dlywork .work );
1573
1578
unsigned int ii ;
1579
+ unsigned int ports_deactivated = 0 ;
1574
1580
int ret = 0 ;
1575
1581
1576
1582
/*
@@ -1632,11 +1638,15 @@ rds_ib_do_initial_failovers(struct work_struct *workarg)
1632
1638
ret = rds_ib_set_ip (NULL , NULL ,
1633
1639
ip_config [ii ].if_name ,
1634
1640
0 , 0 , 0 );
1641
+ ports_deactivated ++ ;
1635
1642
1636
1643
}
1637
1644
}
1638
-
1639
1645
}
1646
+
1647
+ if (ports_deactivated == ip_port_cnt )
1648
+ initial_failovers_all_ports_deactivated_flag = 1 ;
1649
+
1640
1650
ip_config_init_phase_flag = 0 ; /* done with initial phase! */
1641
1651
kfree (riif_work );
1642
1652
}
@@ -1800,16 +1810,44 @@ sched_initial_failovers(unsigned int tot_devs,
1800
1810
* max time.
1801
1811
*
1802
1812
* Based on some empirical experiments, we put
1803
- * upper bound to be 30sec(30000msecs ) and up.
1804
- * And we put min to be 10sec (10000msecs ).
1813
+ * upper bound to be 60sec(60000msecs ) and up.
1814
+ * And we put min to be 20sec (20000msecs ).
1805
1815
*/
1806
- rds_ib_active_bonding_trigger_delay_max_msecs = 30000 +
1816
+ rds_ib_active_bonding_trigger_delay_max_msecs = 60000 +
1807
1817
tot_ibdevs * 1200 + (tot_devs - tot_ibdevs )* 1000 ;
1808
1818
}
1809
1819
1810
- trigger_delay_max_jiffies =
1811
- msecs_to_jiffies (rds_ib_active_bonding_trigger_delay_max_msecs );
1812
- trigger_delay_min_jiffies = msecs_to_jiffies (10000 ); /* 10 sec */
1820
+ if (rds_ib_active_bonding_trigger_delay_min_msecs == 0 ) {
1821
+ /*
1822
+ * Derive guestimate of minimum time before we trigger the
1823
+ * initial failovers for devices.
1824
+ */
1825
+ rds_ib_active_bonding_trigger_delay_min_msecs =
1826
+ msecs_to_jiffies (20000 ); /* 20 sec */
1827
+ }
1828
+
1829
+ if (rds_ib_active_bonding_trigger_delay_min_msecs >=
1830
+ rds_ib_active_bonding_trigger_delay_max_msecs ) {
1831
+ /*
1832
+ * If these parameters are set inconsistently using
1833
+ * module parameters, try to recover from it by deriving
1834
+ * reasonable values such that max > min and log
1835
+ * warning.
1836
+ */
1837
+ printk (KERN_WARNING
1838
+ "RDS/IB: rds active bonding trigger max delay(%u msecs)"
1839
+ " is set less than min the minimum delay(%u msecs).\n" ,
1840
+ rds_ib_active_bonding_trigger_delay_max_msecs ,
1841
+ rds_ib_active_bonding_trigger_delay_min_msecs );
1842
+
1843
+ /* set max slightly higher than min! */
1844
+ rds_ib_active_bonding_trigger_delay_max_msecs =
1845
+ rds_ib_active_bonding_trigger_delay_min_msecs + 10 ;
1846
+
1847
+ printk (KERN_WARNING "RDS/IB: rds active bonding trigger max "
1848
+ "delay adjusted to %u msecs.\n" ,
1849
+ rds_ib_active_bonding_trigger_delay_max_msecs );
1850
+ }
1813
1851
1814
1852
riif_work = kzalloc (sizeof (struct rds_ib_initial_failovers_work ),
1815
1853
GFP_KERNEL );
@@ -1820,9 +1858,17 @@ sched_initial_failovers(unsigned int tot_devs,
1820
1858
return ;
1821
1859
}
1822
1860
1861
+ trigger_delay_max_jiffies =
1862
+ msecs_to_jiffies (rds_ib_active_bonding_trigger_delay_max_msecs );
1823
1863
riif_work -> timeout = trigger_delay_max_jiffies ;
1824
1864
1865
+ trigger_delay_min_jiffies =
1866
+ msecs_to_jiffies (rds_ib_active_bonding_trigger_delay_min_msecs );
1867
+
1825
1868
INIT_DELAYED_WORK (& riif_work -> dlywork , rds_ib_initial_failovers );
1869
+
1870
+ riif_work -> timeout = trigger_delay_max_jiffies ;
1871
+
1826
1872
queue_delayed_work (rds_wq ,
1827
1873
& riif_work -> dlywork ,
1828
1874
trigger_delay_min_jiffies );
@@ -2585,11 +2631,14 @@ static int rds_ib_netdev_callback(struct notifier_block *self, unsigned long eve
2585
2631
if (rds_ib_active_bonding_fallback ) {
2586
2632
/*
2587
2633
* Special case:
2588
- * If all interfaces were down OR
2634
+ * If all interfaces were down
2635
+ * (but NOT deactivated during initial failovers) OR
2589
2636
* transitioning port_state was in INIT
2590
2637
* use a larger timeout.
2591
2638
*/
2592
- if (all_ports_were_down || port_state_was_init ) {
2639
+ if ((all_ports_were_down &&
2640
+ !initial_failovers_all_ports_deactivated_flag )
2641
+ || port_state_was_init ) {
2593
2642
INIT_DELAYED_WORK (& work -> work ,
2594
2643
rds_ib_net_failback );
2595
2644
work -> timeout = msecs_to_jiffies (10000 );
@@ -2603,7 +2652,16 @@ static int rds_ib_netdev_callback(struct notifier_block *self, unsigned long eve
2603
2652
} else
2604
2653
kfree (work );
2605
2654
2655
+ /*
2656
+ * clear this state - onetime use only to
2657
+ * exclude the deactivation of ports
2658
+ * during initial failovers from the
2659
+ * 'special case' logic above!
2660
+ */
2661
+ initial_failovers_all_ports_deactivated_flag = 0 ;
2662
+
2606
2663
break ;
2664
+
2607
2665
case RDSIBP_TRANSITION_DOWN :
2608
2666
if (rds_ib_sysctl_active_bonding ) {
2609
2667
INIT_DELAYED_WORK (& work -> work , rds_ib_failover );
0 commit comments