Skip to content

Commit 1ec3b5f

Browse files
Barry Songtorvalds
authored andcommitted
mm/zswap: move to use crypto_acomp API for hardware acceleration
Right now, all new ZIP drivers are adapted to crypto_acomp APIs rather than legacy crypto_comp APIs. Tradiontal ZIP drivers like lz4,lzo etc have been also wrapped into acomp via scomp backend. But zswap.c is still using the old APIs. That means zswap won't be able to work on any new ZIP drivers in kernel. This patch moves to use cryto_acomp APIs to fix the disconnected bridge between new ZIP drivers and zswap. It is probably the first real user to use acomp but perhaps not a good example to demonstrate how multiple acomp requests can be executed in parallel in one acomp instance. frontswap is doing page load and store page by page synchronously. swap_writepage() depends on the completion of frontswap_store() to decide if it should call __swap_writepage() to swap to disk. However this patch creates multiple acomp instances, so multiple threads running on multiple different cpus can actually do (de)compression parallelly, leveraging the power of multiple ZIP hardware queues. This is also consistent with frontswap's page management model. The old zswap code uses atomic context and avoids the race conditions while shared resources like zswap_dstmem are accessed. Here since acomp can sleep, per-cpu mutex is used to replace preemption-disable. While it is possible to make mm/page_io.c and mm/frontswap.c support async (de)compression in some way, the entire design requires careful thinking and performance evaluation. For the first step, the base with fixed connection between ZIP drivers and zswap should be built. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Barry Song <[email protected]> Acked-by: Vitaly Wool <[email protected]> Cc: Luis Claudio R. Goncalves <[email protected]> Cc: Sebastian Andrzej Siewior <[email protected]> Cc: Herbert Xu <[email protected]> Cc: David S. Miller <[email protected]> Cc: Mahipal Challa <[email protected]> Cc: Seth Jennings <[email protected]> Cc: Dan Streetman <[email protected]> Cc: Zhou Wang <[email protected]> Cc: Colin Ian King <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 42a4470 commit 1ec3b5f

File tree

1 file changed

+137
-46
lines changed

1 file changed

+137
-46
lines changed

mm/zswap.c

Lines changed: 137 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
#include <linux/rbtree.h>
2525
#include <linux/swap.h>
2626
#include <linux/crypto.h>
27+
#include <linux/scatterlist.h>
2728
#include <linux/mempool.h>
2829
#include <linux/zpool.h>
30+
#include <crypto/acompress.h>
2931

3032
#include <linux/mm_types.h>
3133
#include <linux/page-flags.h>
@@ -127,9 +129,17 @@ module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
127129
* data structures
128130
**********************************/
129131

132+
struct crypto_acomp_ctx {
133+
struct crypto_acomp *acomp;
134+
struct acomp_req *req;
135+
struct crypto_wait wait;
136+
u8 *dstmem;
137+
struct mutex *mutex;
138+
};
139+
130140
struct zswap_pool {
131141
struct zpool *zpool;
132-
struct crypto_comp * __percpu *tfm;
142+
struct crypto_acomp_ctx __percpu *acomp_ctx;
133143
struct kref kref;
134144
struct list_head list;
135145
struct work_struct release_work;
@@ -388,23 +398,43 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
388398
* per-cpu code
389399
**********************************/
390400
static DEFINE_PER_CPU(u8 *, zswap_dstmem);
401+
/*
402+
* If users dynamically change the zpool type and compressor at runtime, i.e.
403+
* zswap is running, zswap can have more than one zpool on one cpu, but they
404+
* are sharing dtsmem. So we need this mutex to be per-cpu.
405+
*/
406+
static DEFINE_PER_CPU(struct mutex *, zswap_mutex);
391407

392408
static int zswap_dstmem_prepare(unsigned int cpu)
393409
{
410+
struct mutex *mutex;
394411
u8 *dst;
395412

396413
dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
397414
if (!dst)
398415
return -ENOMEM;
399416

417+
mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu));
418+
if (!mutex) {
419+
kfree(dst);
420+
return -ENOMEM;
421+
}
422+
423+
mutex_init(mutex);
400424
per_cpu(zswap_dstmem, cpu) = dst;
425+
per_cpu(zswap_mutex, cpu) = mutex;
401426
return 0;
402427
}
403428

404429
static int zswap_dstmem_dead(unsigned int cpu)
405430
{
431+
struct mutex *mutex;
406432
u8 *dst;
407433

434+
mutex = per_cpu(zswap_mutex, cpu);
435+
kfree(mutex);
436+
per_cpu(zswap_mutex, cpu) = NULL;
437+
408438
dst = per_cpu(zswap_dstmem, cpu);
409439
kfree(dst);
410440
per_cpu(zswap_dstmem, cpu) = NULL;
@@ -415,30 +445,54 @@ static int zswap_dstmem_dead(unsigned int cpu)
415445
static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
416446
{
417447
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
418-
struct crypto_comp *tfm;
419-
420-
if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu)))
421-
return 0;
448+
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
449+
struct crypto_acomp *acomp;
450+
struct acomp_req *req;
451+
452+
acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
453+
if (IS_ERR(acomp)) {
454+
pr_err("could not alloc crypto acomp %s : %ld\n",
455+
pool->tfm_name, PTR_ERR(acomp));
456+
return PTR_ERR(acomp);
457+
}
458+
acomp_ctx->acomp = acomp;
422459

423-
tfm = crypto_alloc_comp(pool->tfm_name, 0, 0);
424-
if (IS_ERR(tfm)) {
425-
pr_err("could not alloc crypto comp %s : %ld\n",
426-
pool->tfm_name, PTR_ERR(tfm));
460+
req = acomp_request_alloc(acomp_ctx->acomp);
461+
if (!req) {
462+
pr_err("could not alloc crypto acomp_request %s\n",
463+
pool->tfm_name);
464+
crypto_free_acomp(acomp_ctx->acomp);
427465
return -ENOMEM;
428466
}
429-
*per_cpu_ptr(pool->tfm, cpu) = tfm;
467+
acomp_ctx->req = req;
468+
469+
crypto_init_wait(&acomp_ctx->wait);
470+
/*
471+
* if the backend of acomp is async zip, crypto_req_done() will wakeup
472+
* crypto_wait_req(); if the backend of acomp is scomp, the callback
473+
* won't be called, crypto_wait_req() will return without blocking.
474+
*/
475+
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
476+
crypto_req_done, &acomp_ctx->wait);
477+
478+
acomp_ctx->mutex = per_cpu(zswap_mutex, cpu);
479+
acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu);
480+
430481
return 0;
431482
}
432483

433484
static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
434485
{
435486
struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
436-
struct crypto_comp *tfm;
487+
struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
488+
489+
if (!IS_ERR_OR_NULL(acomp_ctx)) {
490+
if (!IS_ERR_OR_NULL(acomp_ctx->req))
491+
acomp_request_free(acomp_ctx->req);
492+
if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
493+
crypto_free_acomp(acomp_ctx->acomp);
494+
}
437495

438-
tfm = *per_cpu_ptr(pool->tfm, cpu);
439-
if (!IS_ERR_OR_NULL(tfm))
440-
crypto_free_comp(tfm);
441-
*per_cpu_ptr(pool->tfm, cpu) = NULL;
442496
return 0;
443497
}
444498

@@ -561,8 +615,9 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
561615
pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
562616

563617
strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
564-
pool->tfm = alloc_percpu(struct crypto_comp *);
565-
if (!pool->tfm) {
618+
619+
pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
620+
if (!pool->acomp_ctx) {
566621
pr_err("percpu alloc failed\n");
567622
goto error;
568623
}
@@ -585,7 +640,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
585640
return pool;
586641

587642
error:
588-
free_percpu(pool->tfm);
643+
if (pool->acomp_ctx)
644+
free_percpu(pool->acomp_ctx);
589645
if (pool->zpool)
590646
zpool_destroy_pool(pool->zpool);
591647
kfree(pool);
@@ -596,14 +652,14 @@ static __init struct zswap_pool *__zswap_pool_create_fallback(void)
596652
{
597653
bool has_comp, has_zpool;
598654

599-
has_comp = crypto_has_comp(zswap_compressor, 0, 0);
655+
has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
600656
if (!has_comp && strcmp(zswap_compressor,
601657
CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
602658
pr_err("compressor %s not available, using default %s\n",
603659
zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
604660
param_free_charp(&zswap_compressor);
605661
zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
606-
has_comp = crypto_has_comp(zswap_compressor, 0, 0);
662+
has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
607663
}
608664
if (!has_comp) {
609665
pr_err("default compressor %s not available\n",
@@ -639,7 +695,7 @@ static void zswap_pool_destroy(struct zswap_pool *pool)
639695
zswap_pool_debug("destroying", pool);
640696

641697
cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
642-
free_percpu(pool->tfm);
698+
free_percpu(pool->acomp_ctx);
643699
zpool_destroy_pool(pool->zpool);
644700
kfree(pool);
645701
}
@@ -723,7 +779,7 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
723779
}
724780
type = s;
725781
} else if (!compressor) {
726-
if (!crypto_has_comp(s, 0, 0)) {
782+
if (!crypto_has_acomp(s, 0, 0)) {
727783
pr_err("compressor %s not available\n", s);
728784
return -ENOENT;
729785
}
@@ -774,7 +830,7 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
774830
* failed, maybe both compressor and zpool params were bad.
775831
* Allow changing this param, so pool creation will succeed
776832
* when the other param is changed. We already verified this
777-
* param is ok in the zpool_has_pool() or crypto_has_comp()
833+
* param is ok in the zpool_has_pool() or crypto_has_acomp()
778834
* checks above.
779835
*/
780836
ret = param_set_charp(s, kp);
@@ -876,8 +932,10 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
876932
pgoff_t offset;
877933
struct zswap_entry *entry;
878934
struct page *page;
879-
struct crypto_comp *tfm;
880-
u8 *src, *dst;
935+
struct scatterlist input, output;
936+
struct crypto_acomp_ctx *acomp_ctx;
937+
938+
u8 *src;
881939
unsigned int dlen;
882940
int ret;
883941
struct writeback_control wbc = {
@@ -916,14 +974,20 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
916974

917975
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
918976
/* decompress */
977+
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
978+
919979
dlen = PAGE_SIZE;
920980
src = (u8 *)zhdr + sizeof(struct zswap_header);
921-
dst = kmap_atomic(page);
922-
tfm = *get_cpu_ptr(entry->pool->tfm);
923-
ret = crypto_comp_decompress(tfm, src, entry->length,
924-
dst, &dlen);
925-
put_cpu_ptr(entry->pool->tfm);
926-
kunmap_atomic(dst);
981+
982+
mutex_lock(acomp_ctx->mutex);
983+
sg_init_one(&input, src, entry->length);
984+
sg_init_table(&output, 1);
985+
sg_set_page(&output, page, PAGE_SIZE, 0);
986+
acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
987+
ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
988+
dlen = acomp_ctx->req->dlen;
989+
mutex_unlock(acomp_ctx->mutex);
990+
927991
BUG_ON(ret);
928992
BUG_ON(dlen != PAGE_SIZE);
929993

@@ -1004,7 +1068,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
10041068
{
10051069
struct zswap_tree *tree = zswap_trees[type];
10061070
struct zswap_entry *entry, *dupentry;
1007-
struct crypto_comp *tfm;
1071+
struct scatterlist input, output;
1072+
struct crypto_acomp_ctx *acomp_ctx;
10081073
int ret;
10091074
unsigned int hlen, dlen = PAGE_SIZE;
10101075
unsigned long handle, value;
@@ -1074,12 +1139,32 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
10741139
}
10751140

10761141
/* compress */
1077-
dst = get_cpu_var(zswap_dstmem);
1078-
tfm = *get_cpu_ptr(entry->pool->tfm);
1079-
src = kmap_atomic(page);
1080-
ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen);
1081-
kunmap_atomic(src);
1082-
put_cpu_ptr(entry->pool->tfm);
1142+
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1143+
1144+
mutex_lock(acomp_ctx->mutex);
1145+
1146+
dst = acomp_ctx->dstmem;
1147+
sg_init_table(&input, 1);
1148+
sg_set_page(&input, page, PAGE_SIZE, 0);
1149+
1150+
/* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */
1151+
sg_init_one(&output, dst, PAGE_SIZE * 2);
1152+
acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
1153+
/*
1154+
* it maybe looks a little bit silly that we send an asynchronous request,
1155+
* then wait for its completion synchronously. This makes the process look
1156+
* synchronous in fact.
1157+
* Theoretically, acomp supports users send multiple acomp requests in one
1158+
* acomp instance, then get those requests done simultaneously. but in this
1159+
* case, frontswap actually does store and load page by page, there is no
1160+
* existing method to send the second page before the first page is done
1161+
* in one thread doing frontswap.
1162+
* but in different threads running on different cpu, we have different
1163+
* acomp instance, so multiple threads can do (de)compression in parallel.
1164+
*/
1165+
ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
1166+
dlen = acomp_ctx->req->dlen;
1167+
10831168
if (ret) {
10841169
ret = -EINVAL;
10851170
goto put_dstmem;
@@ -1103,7 +1188,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
11031188
memcpy(buf, &zhdr, hlen);
11041189
memcpy(buf + hlen, dst, dlen);
11051190
zpool_unmap_handle(entry->pool->zpool, handle);
1106-
put_cpu_var(zswap_dstmem);
1191+
mutex_unlock(acomp_ctx->mutex);
11071192

11081193
/* populate entry */
11091194
entry->offset = offset;
@@ -1131,7 +1216,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
11311216
return 0;
11321217

11331218
put_dstmem:
1134-
put_cpu_var(zswap_dstmem);
1219+
mutex_unlock(acomp_ctx->mutex);
11351220
zswap_pool_put(entry->pool);
11361221
freepage:
11371222
zswap_entry_cache_free(entry);
@@ -1148,7 +1233,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
11481233
{
11491234
struct zswap_tree *tree = zswap_trees[type];
11501235
struct zswap_entry *entry;
1151-
struct crypto_comp *tfm;
1236+
struct scatterlist input, output;
1237+
struct crypto_acomp_ctx *acomp_ctx;
11521238
u8 *src, *dst;
11531239
unsigned int dlen;
11541240
int ret;
@@ -1175,11 +1261,16 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
11751261
src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
11761262
if (zpool_evictable(entry->pool->zpool))
11771263
src += sizeof(struct zswap_header);
1178-
dst = kmap_atomic(page);
1179-
tfm = *get_cpu_ptr(entry->pool->tfm);
1180-
ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen);
1181-
put_cpu_ptr(entry->pool->tfm);
1182-
kunmap_atomic(dst);
1264+
1265+
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1266+
mutex_lock(acomp_ctx->mutex);
1267+
sg_init_one(&input, src, entry->length);
1268+
sg_init_table(&output, 1);
1269+
sg_set_page(&output, page, PAGE_SIZE, 0);
1270+
acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
1271+
ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
1272+
mutex_unlock(acomp_ctx->mutex);
1273+
11831274
zpool_unmap_handle(entry->pool->zpool, entry->handle);
11841275
BUG_ON(ret);
11851276

0 commit comments

Comments
 (0)