--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 29 Apr 2024 14:43:58 +0200
+Subject: [PATCH] PVE backup: improve error when copy-before-write fails for
+ fleecing
+
+With fleecing, failure for copy-before-write does not fail the guest
+write, but only sets the snapshot error that is associated to the
+copy-before-write filter, making further requests to the snapshot
+access fail with EACCES, which then also fails the job. But that error
+code is not the root cause of why the backup failed, so bubble up the
+original snapshot error instead.
+
+Reported-by: Friedrich Weber <f.weber@proxmox.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Tested-by: Friedrich Weber <f.weber@proxmox.com>
+---
+ block/copy-before-write.c | 18 ++++++++++++------
+ block/copy-before-write.h | 1 +
+ pve-backup.c | 9 +++++++++
+ 3 files changed, 22 insertions(+), 6 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index bba58326d7..50cc4c7aae 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -27,6 +27,7 @@
+ #include "qapi/qmp/qjson.h"
+
+ #include "sysemu/block-backend.h"
++#include "qemu/atomic.h"
+ #include "qemu/cutils.h"
+ #include "qapi/error.h"
+ #include "block/block_int.h"
+@@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
+ * @snapshot_error is normally zero. But on first copy-before-write failure
+ * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
+ * value of this error (<0). After that all in-flight and further
+- * snapshot-API requests will fail with that error.
++ * snapshot-API requests will fail with that error. To be accessed with
++ * atomics.
+ */
+ int snapshot_error;
+ } BDRVCopyBeforeWriteState;
+@@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+ return 0;
+ }
+
+- if (s->snapshot_error) {
++ if (qatomic_read(&s->snapshot_error)) {
+ return 0;
+ }
+
+@@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
+ if (ret < 0) {
+ assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
+- if (!s->snapshot_error) {
+- s->snapshot_error = ret;
+- }
++ qatomic_cmpxchg(&s->snapshot_error, 0, ret);
+ } else {
+ bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
+ }
+@@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
+
+ QEMU_LOCK_GUARD(&s->lock);
+
+- if (s->snapshot_error) {
++ if (qatomic_read(&s->snapshot_error)) {
+ g_free(req);
+ return NULL;
+ }
+@@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
+ bdrv_unref(bs);
+ }
+
++int bdrv_cbw_snapshot_error(BlockDriverState *bs)
++{
++ BDRVCopyBeforeWriteState *s = bs->opaque;
++ return qatomic_read(&s->snapshot_error);
++}
++
+ static void cbw_init(void)
+ {
+ bdrv_register(&bdrv_cbw_filter);
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index dc6cafe7fa..a27d2d7d9f 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockCopyState **bcs,
+ Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
++int bdrv_cbw_snapshot_error(BlockDriverState *bs);
+
+ #endif /* COPY_BEFORE_WRITE_H */
+diff --git a/pve-backup.c b/pve-backup.c
+index 7cc1dd3724..07709aa350 100644
+--- a/pve-backup.c
++++ b/pve-backup.c
+@@ -379,6 +379,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+ di->fleecing.snapshot_access = NULL;
+ }
+ if (di->fleecing.cbw) {
++ /*
++ * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
++ * error, making further requests to the snapshot fail with EACCES, which then also fail the
++ * job. But that code is not the root cause and just confusing, so update it.
++ */
++ int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
++ if (di->completed_ret == -EACCES && snapshot_error) {
++ di->completed_ret = snapshot_error;
++ }
+ bdrv_cbw_drop(di->fleecing.cbw);
+ di->fleecing.cbw = NULL;
+ }