Skip to content

Commit 540eed5

Browse files
htejunJens Axboe
authored andcommitted
block: make partition array dynamic
disk->__part used to be statically allocated to the maximum possible number of partitions. This patch makes partition array allocation dynamic. The added overhead is minimal as only real change is one memory dereference changed to RCU one. This saves both a bit of memory and cpu cycles iterating through unoccupied slots and makes increasing partition limit easier. Signed-off-by: Tejun Heo <[email protected]> Signed-off-by: Jens Axboe <[email protected]>
1 parent 074a7ac commit 540eed5

File tree

4 files changed

+154
-27
lines changed

4 files changed

+154
-27
lines changed

block/genhd.c

Lines changed: 109 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,21 @@ static struct device_type disk_type;
5252
*/
5353
struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
5454
{
55-
struct hd_struct *part;
55+
struct hd_struct *part = NULL;
56+
struct disk_part_tbl *ptbl;
5657

57-
if (unlikely(partno < 0 || partno >= disk_max_parts(disk)))
58+
if (unlikely(partno < 0))
5859
return NULL;
60+
5961
rcu_read_lock();
60-
part = rcu_dereference(disk->__part[partno]);
61-
if (part)
62-
get_device(part_to_dev(part));
62+
63+
ptbl = rcu_dereference(disk->part_tbl);
64+
if (likely(partno < ptbl->len)) {
65+
part = rcu_dereference(ptbl->part[partno]);
66+
if (part)
67+
get_device(part_to_dev(part));
68+
}
69+
6370
rcu_read_unlock();
6471

6572
return part;
@@ -80,17 +87,24 @@ EXPORT_SYMBOL_GPL(disk_get_part);
8087
void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
8188
unsigned int flags)
8289
{
90+
struct disk_part_tbl *ptbl;
91+
92+
rcu_read_lock();
93+
ptbl = rcu_dereference(disk->part_tbl);
94+
8395
piter->disk = disk;
8496
piter->part = NULL;
8597

8698
if (flags & DISK_PITER_REVERSE)
87-
piter->idx = disk_max_parts(piter->disk) - 1;
99+
piter->idx = ptbl->len - 1;
88100
else if (flags & DISK_PITER_INCL_PART0)
89101
piter->idx = 0;
90102
else
91103
piter->idx = 1;
92104

93105
piter->flags = flags;
106+
107+
rcu_read_unlock();
94108
}
95109
EXPORT_SYMBOL_GPL(disk_part_iter_init);
96110

@@ -105,13 +119,16 @@ EXPORT_SYMBOL_GPL(disk_part_iter_init);
105119
*/
106120
struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
107121
{
122+
struct disk_part_tbl *ptbl;
108123
int inc, end;
109124

110125
/* put the last partition */
111126
disk_put_part(piter->part);
112127
piter->part = NULL;
113128

129+
/* get part_tbl */
114130
rcu_read_lock();
131+
ptbl = rcu_dereference(piter->disk->part_tbl);
115132

116133
/* determine iteration parameters */
117134
if (piter->flags & DISK_PITER_REVERSE) {
@@ -122,14 +139,14 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
122139
end = 0;
123140
} else {
124141
inc = 1;
125-
end = disk_max_parts(piter->disk);
142+
end = ptbl->len;
126143
}
127144

128145
/* iterate to the next partition */
129146
for (; piter->idx != end; piter->idx += inc) {
130147
struct hd_struct *part;
131148

132-
part = rcu_dereference(piter->disk->__part[piter->idx]);
149+
part = rcu_dereference(ptbl->part[piter->idx]);
133150
if (!part)
134151
continue;
135152
if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
@@ -180,10 +197,13 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
180197
*/
181198
struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
182199
{
200+
struct disk_part_tbl *ptbl;
183201
int i;
184202

185-
for (i = 1; i < disk_max_parts(disk); i++) {
186-
struct hd_struct *part = rcu_dereference(disk->__part[i]);
203+
ptbl = rcu_dereference(disk->part_tbl);
204+
205+
for (i = 1; i < ptbl->len; i++) {
206+
struct hd_struct *part = rcu_dereference(ptbl->part[i]);
187207

188208
if (part && part->start_sect <= sector &&
189209
sector < part->start_sect + part->nr_sects)
@@ -798,12 +818,86 @@ static struct attribute_group *disk_attr_groups[] = {
798818
NULL
799819
};
800820

821+
static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
822+
{
823+
struct disk_part_tbl *ptbl =
824+
container_of(head, struct disk_part_tbl, rcu_head);
825+
826+
kfree(ptbl);
827+
}
828+
829+
/**
830+
* disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
831+
* @disk: disk to replace part_tbl for
832+
* @new_ptbl: new part_tbl to install
833+
*
834+
* Replace disk->part_tbl with @new_ptbl in RCU-safe way. The
835+
* original ptbl is freed using RCU callback.
836+
*
837+
* LOCKING:
838+
* Matching bd_mutx locked.
839+
*/
840+
static void disk_replace_part_tbl(struct gendisk *disk,
841+
struct disk_part_tbl *new_ptbl)
842+
{
843+
struct disk_part_tbl *old_ptbl = disk->part_tbl;
844+
845+
rcu_assign_pointer(disk->part_tbl, new_ptbl);
846+
if (old_ptbl)
847+
call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
848+
}
849+
850+
/**
851+
* disk_expand_part_tbl - expand disk->part_tbl
852+
* @disk: disk to expand part_tbl for
853+
* @partno: expand such that this partno can fit in
854+
*
855+
* Expand disk->part_tbl such that @partno can fit in. disk->part_tbl
856+
* uses RCU to allow unlocked dereferencing for stats and other stuff.
857+
*
858+
* LOCKING:
859+
* Matching bd_mutex locked, might sleep.
860+
*
861+
* RETURNS:
862+
* 0 on success, -errno on failure.
863+
*/
864+
int disk_expand_part_tbl(struct gendisk *disk, int partno)
865+
{
866+
struct disk_part_tbl *old_ptbl = disk->part_tbl;
867+
struct disk_part_tbl *new_ptbl;
868+
int len = old_ptbl ? old_ptbl->len : 0;
869+
int target = partno + 1;
870+
size_t size;
871+
int i;
872+
873+
/* disk_max_parts() is zero during initialization, ignore if so */
874+
if (disk_max_parts(disk) && target > disk_max_parts(disk))
875+
return -EINVAL;
876+
877+
if (target <= len)
878+
return 0;
879+
880+
size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
881+
new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
882+
if (!new_ptbl)
883+
return -ENOMEM;
884+
885+
INIT_RCU_HEAD(&new_ptbl->rcu_head);
886+
new_ptbl->len = target;
887+
888+
for (i = 0; i < len; i++)
889+
rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
890+
891+
disk_replace_part_tbl(disk, new_ptbl);
892+
return 0;
893+
}
894+
801895
static void disk_release(struct device *dev)
802896
{
803897
struct gendisk *disk = dev_to_disk(dev);
804898

805899
kfree(disk->random);
806-
kfree(disk->__part);
900+
disk_replace_part_tbl(disk, NULL);
807901
free_part_stats(&disk->part0);
808902
kfree(disk);
809903
}
@@ -948,22 +1042,16 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id)
9481042
disk = kmalloc_node(sizeof(struct gendisk),
9491043
GFP_KERNEL | __GFP_ZERO, node_id);
9501044
if (disk) {
951-
int tot_minors = minors + ext_minors;
952-
int size = tot_minors * sizeof(struct hd_struct *);
953-
9541045
if (!init_part_stats(&disk->part0)) {
9551046
kfree(disk);
9561047
return NULL;
9571048
}
958-
959-
disk->__part = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO,
960-
node_id);
961-
if (!disk->__part) {
962-
free_part_stats(&disk->part0);
1049+
if (disk_expand_part_tbl(disk, 0)) {
1050+
free_part_stats(&disk->part0);
9631051
kfree(disk);
9641052
return NULL;
9651053
}
966-
disk->__part[0] = &disk->part0;
1054+
disk->part_tbl->part[0] = &disk->part0;
9671055

9681056
disk->minors = minors;
9691057
disk->ext_minors = ext_minors;
@@ -973,6 +1061,7 @@ struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id)
9731061
device_initialize(disk_to_dev(disk));
9741062
INIT_WORK(&disk->async_notify,
9751063
media_change_notify_thread);
1064+
disk->node_id = node_id;
9761065
}
9771066
return disk;
9781067
}

block/ioctl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
3030
if (bdev != bdev->bd_contains)
3131
return -EINVAL;
3232
partno = p.pno;
33-
if (partno <= 0 || partno >= disk_max_parts(disk))
33+
if (partno <= 0)
3434
return -EINVAL;
3535
switch (a.op) {
3636
case BLKPG_ADD_PARTITION:

fs/partitions/check.c

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -312,14 +312,18 @@ static void delete_partition_rcu_cb(struct rcu_head *head)
312312

313313
void delete_partition(struct gendisk *disk, int partno)
314314
{
315+
struct disk_part_tbl *ptbl = disk->part_tbl;
315316
struct hd_struct *part;
316317

317-
part = disk->__part[partno];
318+
if (partno >= ptbl->len)
319+
return;
320+
321+
part = ptbl->part[partno];
318322
if (!part)
319323
return;
320324

321325
blk_free_devt(part_devt(part));
322-
rcu_assign_pointer(disk->__part[partno], NULL);
326+
rcu_assign_pointer(ptbl->part[partno], NULL);
323327
kobject_put(part->holder_dir);
324328
device_del(part_to_dev(part));
325329

@@ -341,10 +345,16 @@ int add_partition(struct gendisk *disk, int partno,
341345
dev_t devt = MKDEV(0, 0);
342346
struct device *ddev = disk_to_dev(disk);
343347
struct device *pdev;
348+
struct disk_part_tbl *ptbl;
344349
const char *dname;
345350
int err;
346351

347-
if (disk->__part[partno])
352+
err = disk_expand_part_tbl(disk, partno);
353+
if (err)
354+
return err;
355+
ptbl = disk->part_tbl;
356+
357+
if (ptbl->part[partno])
348358
return -EBUSY;
349359

350360
p = kzalloc(sizeof(*p), GFP_KERNEL);
@@ -398,7 +408,7 @@ int add_partition(struct gendisk *disk, int partno,
398408

399409
/* everything is up and running, commence */
400410
INIT_RCU_HEAD(&p->rcu_head);
401-
rcu_assign_pointer(disk->__part[partno], p);
411+
rcu_assign_pointer(ptbl->part[partno], p);
402412

403413
/* suppress uevent if the disk supresses it */
404414
if (!ddev->uevent_suppress)
@@ -487,7 +497,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
487497
struct disk_part_iter piter;
488498
struct hd_struct *part;
489499
struct parsed_partitions *state;
490-
int p, res;
500+
int p, highest, res;
491501

492502
if (bdev->bd_part_count)
493503
return -EBUSY;
@@ -511,6 +521,17 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
511521
/* tell userspace that the media / partition table may have changed */
512522
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
513523

524+
/* Detect the highest partition number and preallocate
525+
* disk->part_tbl. This is an optimization and not strictly
526+
* necessary.
527+
*/
528+
for (p = 1, highest = 0; p < state->limit; p++)
529+
if (state->parts[p].size)
530+
highest = p;
531+
532+
disk_expand_part_tbl(disk, highest);
533+
534+
/* add partitions */
514535
for (p = 1; p < state->limit; p++) {
515536
sector_t size = state->parts[p].size;
516537
sector_t from = state->parts[p].from;

include/linux/genhd.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,21 @@ struct hd_struct {
113113
#define GENHD_FL_UP 16
114114
#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
115115

116+
#define BLK_SCSI_MAX_CMDS (256)
117+
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
118+
119+
struct blk_scsi_cmd_filter {
120+
unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
121+
unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
122+
struct kobject kobj;
123+
};
124+
125+
struct disk_part_tbl {
126+
struct rcu_head rcu_head;
127+
int len;
128+
struct hd_struct *part[];
129+
};
130+
116131
struct gendisk {
117132
/* major, first_minor, minors and ext_minors are input
118133
* parameters only, don't use directly. Use disk_devt() and
@@ -131,7 +146,7 @@ struct gendisk {
131146
* non-critical accesses use RCU. Always access through
132147
* helpers.
133148
*/
134-
struct hd_struct **__part;
149+
struct disk_part_tbl *part_tbl;
135150
struct hd_struct part0;
136151

137152
struct block_device_operations *fops;
@@ -149,6 +164,7 @@ struct gendisk {
149164
#ifdef CONFIG_BLK_DEV_INTEGRITY
150165
struct blk_integrity *integrity;
151166
#endif
167+
int node_id;
152168
};
153169

154170
static inline struct gendisk *part_to_disk(struct hd_struct *part)
@@ -503,6 +519,7 @@ extern void blk_free_devt(dev_t devt);
503519
extern dev_t blk_lookup_devt(const char *name, int partno);
504520
extern char *disk_name (struct gendisk *hd, int partno, char *buf);
505521

522+
extern int disk_expand_part_tbl(struct gendisk *disk, int target);
506523
extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
507524
extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
508525
extern void delete_partition(struct gendisk *, int);

0 commit comments

Comments
 (0)