59
59
60
60
#define NUM_RDS_RECV_SG (PAGE_ALIGN(RDS_MAX_FRAG_SIZE) / PAGE_SIZE)
61
61
62
- #define RDS_IB_CQ_ERR 2
63
- #define RDS_IB_NEED_SHUTDOWN 3
64
62
static inline void set_bit_mb (long nr , unsigned long * flags )
65
63
{
66
64
/* set_bit() does not imply a memory barrier */
@@ -80,9 +78,12 @@ static inline void clear_bit_mb(long nr, unsigned long *flags)
80
78
}
81
79
82
80
enum rds_ib_conn_flags {
83
- RDS_IB_CLEAN_CACHE ,
84
- RDS_IB_CQ_ERR ,
85
- RDS_IB_NEED_SHUTDOWN
81
+ RDS_IB_CLEAN_CACHE , /* 0x01 */
82
+ RDS_IB_CQ_ERR , /* 0x02 */
83
+ RDS_IB_NEED_SHUTDOWN , /* 0x04 */
84
+ RDS_IB_SRQ_NEED_FLUSH , /* 0x08 */
85
+ RDS_IB_SRQ_LAST_WQE_REACHED , /* 0x10 */
86
+ RDS_IB_SRQ_CQ_FLUSHED /* 0x20 */
86
87
};
87
88
88
89
#define RDS_IB_DEFAULT_FREG_PORT_NUM 1
@@ -130,6 +131,15 @@ struct rds_ib_refill_cache {
130
131
131
132
};
132
133
134
+ struct rds_ib_cache_info {
135
+ u16 ci_frag_sz ; /* IB fragment size */
136
+ u8 ci_frag_pages ;
137
+ u16 ci_frag_cache_inx ;
138
+ uint ci_irq_local_cpu ;
139
+ atomic_t ci_cache_allocs ;
140
+ struct rds_transport * ci_trans ;
141
+ };
142
+
133
143
struct rds_ib_conn_priv_cmn {
134
144
u8 ricpc_protocol_major ;
135
145
u8 ricpc_protocol_minor ;
@@ -185,8 +195,8 @@ struct rds_ib_recv_work {
185
195
struct rds_page_frag * r_frag ;
186
196
struct ib_recv_wr r_wr ;
187
197
struct ib_sge r_sge [RDS_IB_MAX_SGE ];
188
- struct rds_ib_connection * r_ic ;
189
- int r_posted ;
198
+ unsigned long r_posted ;
199
+ struct lfstack_el r_stack_entry ;
190
200
};
191
201
192
202
struct rds_ib_work_ring {
@@ -335,11 +345,8 @@ struct rds_ib_connection {
335
345
336
346
/* Protocol version specific information */
337
347
unsigned int i_flowctl :1 ; /* enable/disable flow ctl */
338
- u16 i_frag_sz ; /* IB fragment size */
339
- u16 i_frag_cache_sz ;
340
- u8 i_frag_pages ;
348
+ struct rds_ib_cache_info i_cache_info ;
341
349
unsigned long i_flags ;
342
- u16 i_frag_cache_inx ;
343
350
u16 i_hca_sge ;
344
351
345
352
/* Batched completions */
@@ -349,8 +356,6 @@ struct rds_ib_connection {
349
356
unsigned int i_unsolicited_wrs ;
350
357
u8 i_sl ;
351
358
352
- atomic_t i_cache_allocs ;
353
-
354
359
struct completion i_last_wqe_complete ;
355
360
356
361
/* Active Bonding */
@@ -364,7 +369,6 @@ struct rds_ib_connection {
364
369
spinlock_t i_rx_lock ;
365
370
unsigned int i_rx_wait_for_handler ;
366
371
atomic_t i_worker_has_rx ;
367
- uint i_irq_local_cpu ;
368
372
369
373
/* For handling delayed release of device related resource. */
370
374
struct mutex i_delayed_free_lock ;
@@ -399,18 +403,28 @@ struct rds_ib_ipaddr {
399
403
struct rcu_head rcu_head ;
400
404
};
401
405
406
+ enum rds_ib_srq_flags {
407
+ RDS_SRQ_REFILL , /* 0x01 */
408
+ };
409
+
410
+ #define RDS_SRQ_NMBR_STACKS 8 /* must be 2^n */
402
411
struct rds_ib_srq {
403
412
struct rds_ib_device * rds_ibdev ;
404
413
struct ib_srq * s_srq ;
405
414
struct ib_event_handler s_event_handler ;
406
415
struct rds_ib_recv_work * s_recvs ;
407
416
u32 s_n_wr ;
408
- struct rds_header * s_recv_hdrs ;
409
- u64 s_recv_hdrs_dma ;
417
+ struct rds_header * * s_recv_hdrs ;
418
+ dma_addr_t * s_recv_hdrs_dma ;
419
+ struct scatterlist * s_recv_hdrs_sg ;
410
420
atomic_t s_num_posted ;
411
- unsigned long s_refill_gate ;
421
+ unsigned long s_flags ;
412
422
struct delayed_work s_refill_w ;
413
423
struct delayed_work s_rearm_w ;
424
+ atomic_t s_refill_ix ;
425
+ atomic_t s_release_ix ;
426
+ struct rds_ib_cache_info s_cache_info ;
427
+ union lfstack s_stack [RDS_SRQ_NMBR_STACKS ];
414
428
};
415
429
416
430
@@ -436,6 +450,7 @@ enum {
436
450
};
437
451
438
452
#define RDS_FRAG_CACHE_ENTRIES (ilog2(RDS_MAX_FRAG_SIZE / PAGE_SIZE) + 1)
453
+ #define NMBR_QOS 256
439
454
440
455
/* Each RDMA device maintains a list of RDS sockets associated with it. The
441
456
* following struct is used to represent this association. This struct is
@@ -484,7 +499,14 @@ struct rds_ib_device {
484
499
unsigned int max_initiator_depth ;
485
500
unsigned int max_responder_resources ;
486
501
spinlock_t spinlock ; /* protect the above */
487
- struct rds_ib_srq * srq ;
502
+ atomic_t refcount ;
503
+ struct work_struct free_work ;
504
+ struct rds_ib_srq * srqs [NMBR_QOS ];
505
+ /* Several QOS connections may invoke rds_ib_srq_get
506
+ * concurrently, hence we need protection for rds_ib_srq_get
507
+ */
508
+ struct mutex srq_get_lock ;
509
+
488
510
struct rds_ib_port * ports ;
489
511
struct ib_event_handler event_handler ;
490
512
int * vector_load ;
@@ -551,6 +573,16 @@ struct rds_ib_statistics {
551
573
uint64_t s_ib_rx_refill_from_cq ;
552
574
uint64_t s_ib_rx_refill_from_thread ;
553
575
uint64_t s_ib_rx_refill_lock_taken ;
576
+ uint64_t s_ib_srq_refill_from_cm ;
577
+ uint64_t s_ib_srq_refill_from_rx ;
578
+ uint64_t s_ib_srq_refill_from_event ;
579
+ uint64_t s_ib_srq_limit_reached_event ;
580
+ uint64_t s_ib_srq_refills ;
581
+ uint64_t s_ib_srq_empty_refills ;
582
+ uint64_t s_ib_srq_entries_refilled ;
583
+ uint64_t s_ib_srq_entries_from_stacks ;
584
+ uint64_t s_ib_srq_jiffies_refilled ;
585
+ uint64_t s_ib_srq_jiffies_from_stacks ;
554
586
uint64_t s_ib_rx_alloc_limit ;
555
587
uint64_t s_ib_rx_total_frags ;
556
588
uint64_t s_ib_rx_total_incs ;
@@ -585,9 +617,6 @@ struct rds_ib_statistics {
585
617
uint64_t s_ib_rdma_flush_mr_pool_avoided ;
586
618
uint64_t s_ib_atomic_cswp ;
587
619
uint64_t s_ib_atomic_fadd ;
588
- uint64_t s_ib_srq_lows ;
589
- uint64_t s_ib_srq_refills ;
590
- uint64_t s_ib_srq_empty_refills ;
591
620
uint64_t s_ib_recv_added_to_cache ;
592
621
uint64_t s_ib_recv_removed_from_cache ;
593
622
uint64_t s_ib_recv_nmb_added_to_cache ;
@@ -708,7 +737,19 @@ u32 __rds_find_ifindex_v4(struct net *net, __be32 addr);
708
737
#if IS_ENABLED (CONFIG_IPV6 )
709
738
u32 __rds_find_ifindex_v6 (struct net * net , const struct in6_addr * addr );
710
739
#endif
711
-
740
+ void rds_ib_free_unmap_hdrs (struct ib_device * dev ,
741
+ struct rds_header * * * _hdrs ,
742
+ dma_addr_t * * _dma ,
743
+ struct scatterlist * * _sg ,
744
+ const int n ,
745
+ enum dma_data_direction direction );
746
+ int rds_ib_alloc_map_hdrs (struct ib_device * dev ,
747
+ struct rds_header * * * _hdrs ,
748
+ dma_addr_t * * _dma ,
749
+ struct scatterlist * * _sg ,
750
+ char * * reason ,
751
+ const int n ,
752
+ enum dma_data_direction direction );
712
753
/* ib_rdma.c */
713
754
struct rds_ib_device * rds_ib_get_device (const struct in6_addr * ipaddr );
714
755
int rds_ib_update_ipaddr (struct rds_ib_device * rds_ibdev ,
@@ -754,19 +795,16 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
754
795
struct ib_wc * wc ,
755
796
struct rds_ib_ack_state * state );
756
797
void rds_ib_recv_tasklet_fn (unsigned long data );
757
- void rds_ib_recv_init_ring (struct rds_ib_connection * ic );
798
+ void rds_ib_recv_init_ring (struct rds_ib_connection * ic , struct rds_ib_srq * srq );
758
799
void rds_ib_recv_clear_ring (struct rds_ib_connection * ic );
759
800
void rds_ib_recv_init_ack (struct rds_ib_connection * ic );
760
801
void rds_ib_attempt_ack (struct rds_ib_connection * ic );
761
802
void rds_ib_ack_send_complete (struct rds_ib_connection * ic );
762
803
u64 rds_ib_piggyb_ack (struct rds_ib_connection * ic );
763
- void rds_ib_srq_refill (struct work_struct * work );
764
- int rds_ib_srq_prefill_ring (struct rds_ib_device * rds_ibdev );
804
+ void rds_ib_srq_refill (struct rds_ib_srq * srq , bool prefill , gfp_t gfp , bool use_worker );
765
805
void rds_ib_srq_rearm (struct work_struct * work );
766
806
void rds_ib_set_ack (struct rds_ib_connection * ic , u64 seq , int ack_required );
767
- void rds_ib_srq_process_recv (struct rds_connection * conn ,
768
- struct rds_ib_recv_work * recv , u32 data_len ,
769
- struct rds_ib_ack_state * state );
807
+ struct rds_ib_srq * rds_ib_srq_get (struct rds_ib_device * rds_ibdev , struct rds_connection * conn );
770
808
static inline int rds_ib_recv_acquire_refill (struct rds_connection * conn )
771
809
{
772
810
return test_and_set_bit (RDS_RECV_REFILL , & conn -> c_flags ) == 0 ;
@@ -786,6 +824,18 @@ static inline int rds_ib_recv_acquire_refill(struct rds_connection *conn)
786
824
} \
787
825
} while (false)
788
826
827
+ /* The goal here is to just make sure that someone, somewhere
828
+ * is posting buffers. If we can't get the refill lock,
829
+ * let them do their thing
830
+ */
831
+ #define RDS_IB_SRQ_REFILL (srq , prefill , gfp , where , use_worker ) do { \
832
+ struct rds_ib_srq *s = (srq); \
833
+ int np = atomic_read(&s->s_num_posted); \
834
+ if (np < rds_ib_srq_hwm_refill) { \
835
+ rds_ib_stats_inc(where); \
836
+ rds_ib_srq_refill(s, prefill, gfp, use_worker); \
837
+ } \
838
+ } while (false)
789
839
/* ib_ring.c */
790
840
void rds_ib_ring_init (struct rds_ib_work_ring * ring , u32 nr );
791
841
void rds_ib_ring_resize (struct rds_ib_work_ring * ring , u32 nr );
0 commit comments