]> git.proxmox.com Git - pve-qemu.git/commitdiff
some more stable fixes for QEMU 9.0.2
authorFiona Ebner <f.ebner@proxmox.com>
Thu, 25 Jul 2024 09:45:54 +0000 (11:45 +0200)
committerThomas Lamprecht <t.lamprecht@proxmox.com>
Mon, 29 Jul 2024 16:56:46 +0000 (18:56 +0200)
Fix the two issues reported in the community forum[0][1], i.e.
regression in LSI-53c895a controller and ignored boot order for USB
storage (only possible via custom arguments in Proxmox VE), both
causing boot failures, and pick up fixes for VirtIO, ARM emulation,
char IO device and a graph lock fix for the block layer.

The block-copy patches that serve as a preparation for fleecing are
moved to the extra folder, because the graph lock fix requires them
to be present first. They have been applied upstream in the meantime
and should drop out with the rebase on 9.1.

[0]: https://forum.proxmox.com/threads/149772/post-679433
[1]: https://forum.proxmox.com/threads/149772/post-683459

Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
33 files changed:
debian/patches/bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
debian/patches/bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
debian/patches/bitmap-mirror/0006-mirror-move-some-checks-to-qmp.patch
debian/patches/extra/0007-block-copy-before-write-fix-permission.patch [new file with mode: 0644]
debian/patches/extra/0008-block-copy-before-write-support-unligned-snapshot-di.patch [new file with mode: 0644]
debian/patches/extra/0009-block-copy-before-write-create-block_copy-bitmap-in-.patch [new file with mode: 0644]
debian/patches/extra/0010-qapi-blockdev-backup-add-discard-source-parameter.patch [new file with mode: 0644]
debian/patches/extra/0011-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch [new file with mode: 0644]
debian/patches/extra/0012-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch [new file with mode: 0644]
debian/patches/extra/0013-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch [new file with mode: 0644]
debian/patches/extra/0014-scsi-fix-regression-and-honor-bootindex-again-for-le.patch [new file with mode: 0644]
debian/patches/extra/0015-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch [new file with mode: 0644]
debian/patches/extra/0016-block-copy-Fix-missing-graph-lock.patch [new file with mode: 0644]
debian/patches/extra/0017-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch [new file with mode: 0644]
debian/patches/pve/0022-PVE-Up-Config-file-posix-make-locking-optiono-on-cre.patch
debian/patches/pve/0026-block-backup-move-bcs-bitmap-initialization-to-job-c.patch
debian/patches/pve/0028-PVE-Backup-add-backup-dump-block-driver.patch
debian/patches/pve/0030-PVE-Backup-Proxmox-backup-patches-for-QEMU.patch
debian/patches/pve/0032-PVE-Add-PBS-block-driver-to-map-backup-archives-into.patch
debian/patches/pve/0034-PVE-Migrate-dirty-bitmap-state-via-savevm.patch
debian/patches/pve/0044-block-copy-before-write-fix-permission.patch [deleted file]
debian/patches/pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch [new file with mode: 0644]
debian/patches/pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch [new file with mode: 0644]
debian/patches/pve/0045-block-copy-before-write-support-unligned-snapshot-di.patch [deleted file]
debian/patches/pve/0046-PVE-backup-add-fleecing-option.patch [new file with mode: 0644]
debian/patches/pve/0046-block-copy-before-write-create-block_copy-bitmap-in-.patch [deleted file]
debian/patches/pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch [new file with mode: 0644]
debian/patches/pve/0047-qapi-blockdev-backup-add-discard-source-parameter.patch [deleted file]
debian/patches/pve/0048-copy-before-write-allow-specifying-minimum-cluster-s.patch [deleted file]
debian/patches/pve/0049-backup-add-minimum-cluster-size-to-performance-optio.patch [deleted file]
debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch [deleted file]
debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch [deleted file]
debian/patches/series

index 392b8a2628f1008acb9211f0b3b4222a3f70d2e3..0532896cc786fbbea3312040a250b9d27ae234c0 100644 (file)
@@ -258,7 +258,7 @@ index 1bdce3b657..0c5c72df2e 100644
                       errp);
      if (!job) {
 diff --git a/blockdev.c b/blockdev.c
-index 057601dcf0..8682814a7a 100644
+index 4c33c3f5f0..f3e508a6a7 100644
 --- a/blockdev.c
 +++ b/blockdev.c
 @@ -2776,6 +2776,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
@@ -349,7 +349,7 @@ index 057601dcf0..8682814a7a 100644
                             has_granularity, granularity,
                             has_buf_size, buf_size,
 diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
-index d2201e27f4..cc1387ae02 100644
+index eb2d92a226..f0c642b194 100644
 --- a/include/block/block_int-global-state.h
 +++ b/include/block/block_int-global-state.h
 @@ -158,7 +158,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -364,10 +364,10 @@ index d2201e27f4..cc1387ae02 100644
                    BlockdevOnError on_source_error,
                    BlockdevOnError on_target_error,
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 4b18e01b85..0902b0a024 100644
+index b179d65520..905da8be72 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -2170,6 +2170,15 @@
+@@ -2174,6 +2174,15 @@
  #     destination (all the disk, only the sectors allocated in the
  #     topmost image, or only new I/O).
  #
@@ -383,7 +383,7 @@ index 4b18e01b85..0902b0a024 100644
  # @granularity: granularity of the dirty bitmap, default is 64K if the
  #     image format doesn't have clusters, 4K if the clusters are
  #     smaller than that, else the cluster size.  Must be a power of 2
-@@ -2212,7 +2221,9 @@
+@@ -2216,7 +2225,9 @@
  { 'struct': 'DriveMirror',
    'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
              '*format': 'str', '*node-name': 'str', '*replaces': 'str',
@@ -394,7 +394,7 @@ index 4b18e01b85..0902b0a024 100644
              '*speed': 'int', '*granularity': 'uint32',
              '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
              '*on-target-error': 'BlockdevOnError',
-@@ -2492,6 +2503,15 @@
+@@ -2496,6 +2507,15 @@
  #     destination (all the disk, only the sectors allocated in the
  #     topmost image, or only new I/O).
  #
@@ -410,7 +410,7 @@ index 4b18e01b85..0902b0a024 100644
  # @granularity: granularity of the dirty bitmap, default is 64K if the
  #     image format doesn't have clusters, 4K if the clusters are
  #     smaller than that, else the cluster size.  Must be a power of 2
-@@ -2540,7 +2560,8 @@
+@@ -2544,7 +2564,8 @@
  { 'command': 'blockdev-mirror',
    'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
              '*replaces': 'str',
index ddaf702893f8c0bf251ef57c3bfb272d852a7ad3..d1e0fb045c685e138eccd63c357c3989a2741dd5 100644 (file)
@@ -16,7 +16,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
  1 file changed, 3 insertions(+)
 
 diff --git a/blockdev.c b/blockdev.c
-index 8682814a7a..5b75a085ee 100644
+index f3e508a6a7..37b8437f3e 100644
 --- a/blockdev.c
 +++ b/blockdev.c
 @@ -2873,6 +2873,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
index 05a73d16c286ad5cb720a742c33668cf22c797dd..9f68e4fd2f803e4cec7dc482f546dd65267d174d 100644 (file)
@@ -62,7 +62,7 @@ index 6b3cce1007..2f1223852b 100644
  
          if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
 diff --git a/blockdev.c b/blockdev.c
-index 5b75a085ee..d27d8c38ec 100644
+index 37b8437f3e..ed8198f351 100644
 --- a/blockdev.c
 +++ b/blockdev.c
 @@ -2852,7 +2852,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
diff --git a/debian/patches/extra/0007-block-copy-before-write-fix-permission.patch b/debian/patches/extra/0007-block-copy-before-write-fix-permission.patch
new file mode 100644 (file)
index 0000000..6a759a4
--- /dev/null
@@ -0,0 +1,55 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:22 +0200
+Subject: [PATCH] block/copy-before-write: fix permission
+
+In case when source node does not have any parents, the condition still
+works as required: backup job do create the parent by
+
+  block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child
+
+Still, in this case checking @perm variable doesn't work, as backup job
+creates the root blk with empty permissions (as it rely on CBW filter
+to require correct permissions and don't want to create extra
+conflicts).
+
+So, we should not check @perm.
+
+The hack may be dropped entirely when transactional insertion of
+filter (when we don't try to recalculate permissions in intermediate
+state, when filter does conflict with original parent of the source
+node) merged (old big series
+"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
+current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])
+
+[1] https://patchew.org/QEMU/20220330212902.590099-1-vsementsov@openvz.org/
+[2] https://patchew.org/QEMU/20231017184444.932733-1-vsementsov@yandex-team.ru/
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 026fa9840f..5a9456d426 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -364,9 +364,13 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                            perm, shared, nperm, nshared);
+         if (!QLIST_EMPTY(&bs->parents)) {
+-            if (perm & BLK_PERM_WRITE) {
+-                *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+-            }
++            /*
++             * Note, that source child may be shared with backup job. Backup job
++             * does create own blk parent on copy-before-write node, so this
++             * works even if source node does not have any parents before backup
++             * start
++             */
++            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+         }
+     }
diff --git a/debian/patches/extra/0008-block-copy-before-write-support-unligned-snapshot-di.patch b/debian/patches/extra/0008-block-copy-before-write-support-unligned-snapshot-di.patch
new file mode 100644 (file)
index 0000000..f651c58
--- /dev/null
@@ -0,0 +1,48 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:23 +0200
+Subject: [PATCH] block/copy-before-write: support unligned snapshot-discard
+
+First thing that crashes on unligned access here is
+bdrv_reset_dirty_bitmap(). Correct way is to align-down the
+snapshot-discard request.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 5a9456d426..c0e70669a2 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
+ cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
+ {
+     BDRVCopyBeforeWriteState *s = bs->opaque;
++    uint32_t cluster_size = block_copy_cluster_size(s->bcs);
++    int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
++    int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
++    int64_t aligned_bytes;
++
++    if (aligned_end <= aligned_offset) {
++        return 0;
++    }
++    aligned_bytes = aligned_end - aligned_offset;
+     WITH_QEMU_LOCK_GUARD(&s->lock) {
+-        bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
++        bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
++                                aligned_bytes);
+     }
+-    block_copy_reset(s->bcs, offset, bytes);
++    block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
+-    return bdrv_co_pdiscard(s->target, offset, bytes);
++    return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
+ }
+ static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)
diff --git a/debian/patches/extra/0009-block-copy-before-write-create-block_copy-bitmap-in-.patch b/debian/patches/extra/0009-block-copy-before-write-create-block_copy-bitmap-in-.patch
new file mode 100644 (file)
index 0000000..7cd24d0
--- /dev/null
@@ -0,0 +1,373 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:24 +0200
+Subject: [PATCH] block/copy-before-write: create block_copy bitmap in filter
+ node
+
+Currently block_copy creates copy_bitmap in source node. But that is in
+bad relation with .independent_close=true of copy-before-write filter:
+source node may be detached and removed before .bdrv_close() handler
+called, which should call block_copy_state_free(), which in turn should
+remove copy_bitmap.
+
+That's all not ideal: it would be better if internal bitmap of
+block-copy object is not attached to any node. But that is not possible
+now.
+
+The simplest solution is just create copy_bitmap in filter node, where
+anyway two other bitmaps are created.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c         |   3 +-
+ block/copy-before-write.c  |   2 +-
+ include/block/block-copy.h |   1 +
+ tests/qemu-iotests/257.out | 112 ++++++++++++++++++-------------------
+ 4 files changed, 60 insertions(+), 58 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 9ee3dd7ef5..8fca2c3698 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -351,6 +351,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ }
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
++                                     BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      Error **errp)
+ {
+@@ -367,7 +368,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+         return NULL;
+     }
+-    copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
++    copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
+                                            errp);
+     if (!copy_bitmap) {
+         return NULL;
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index c0e70669a2..94db31512d 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -468,7 +468,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+              bs->file->bs->supported_zero_flags);
+-    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
++    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 0700953ab8..8b41643bfa 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
+ typedef struct BlockCopyCallState BlockCopyCallState;
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
++                                     BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      Error **errp);
+diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
+index aa76131ca9..c33dd7f3a9 100644
+--- a/tests/qemu-iotests/257.out
++++ b/tests/qemu-iotests/257.out
+@@ -120,16 +120,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -596,16 +596,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -865,16 +865,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2086,16 +2086,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2355,16 +2355,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -2831,16 +2831,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3100,16 +3100,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3576,16 +3576,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -3845,16 +3845,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -4321,16 +4321,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -4590,16 +4590,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
+@@ -5066,16 +5066,16 @@ write -P0x67 0x3fe0000 0x20000
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      }
+-    ],
+-    "drive0": [
++      },
+       {
+         "busy": false,
+         "count": 0,
+         "granularity": 65536,
+         "persistent": false,
+         "recording": false
+-      },
++      }
++    ],
++    "drive0": [
+       {
+         "busy": false,
+         "count": 458752,
diff --git a/debian/patches/extra/0010-qapi-blockdev-backup-add-discard-source-parameter.patch b/debian/patches/extra/0010-qapi-blockdev-backup-add-discard-source-parameter.patch
new file mode 100644 (file)
index 0000000..e11a37d
--- /dev/null
@@ -0,0 +1,277 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:25 +0200
+Subject: [PATCH] qapi: blockdev-backup: add discard-source parameter
+
+Add a parameter that enables discard-after-copy. That is mostly useful
+in "push backup with fleecing" scheme, when source is snapshot-access
+format driver node, based on copy-before-write filter snapshot-access
+API:
+
+[guest]      [snapshot-access] ~~ blockdev-backup ~~> [backup target]
+   |            |
+   | root       | file
+   v            v
+[copy-before-write]
+   |             |
+   | file        | target
+   v             v
+[active disk]   [temp.img]
+
+In this case discard-after-copy does two things:
+
+ - discard data in temp.img to save disk space
+ - avoid further copy-before-write operation in discarded area
+
+Note that we have to declare WRITE permission on source in
+copy-before-write filter, for discard to work. Still we can't take it
+unconditionally, as it will break normal backup from RO source. So, we
+have to add a parameter and pass it thorough bdrv_open flags.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c                         |  5 +++--
+ block/block-copy.c                     |  9 +++++++++
+ block/copy-before-write.c              | 15 +++++++++++++--
+ block/copy-before-write.h              |  1 +
+ block/replication.c                    |  4 ++--
+ blockdev.c                             |  2 +-
+ include/block/block-common.h           |  2 ++
+ include/block/block-copy.h             |  1 +
+ include/block/block_int-global-state.h |  2 +-
+ qapi/block-core.json                   |  4 ++++
+ 10 files changed, 37 insertions(+), 8 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index ec29d6b810..3dd2e229d2 100644
+--- a/block/backup.c
++++ b/block/backup.c
+@@ -356,7 +356,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                   BlockDriverState *target, int64_t speed,
+                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
+                   BitmapSyncMode bitmap_mode,
+-                  bool compress,
++                  bool compress, bool discard_source,
+                   const char *filter_node_name,
+                   BackupPerf *perf,
+                   BlockdevOnError on_source_error,
+@@ -457,7 +457,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+         goto error;
+     }
+-    cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
++    cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
++                          &bcs, errp);
+     if (!cbw) {
+         goto error;
+     }
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 8fca2c3698..7e3b378528 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
+     CoMutex lock;
+     int64_t in_flight_bytes;
+     BlockCopyMethod method;
++    bool discard_source;
+     BlockReqList reqs;
+     QLIST_HEAD(, BlockCopyCallState) calls;
+     /*
+@@ -353,6 +354,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
++                                     bool discard_source,
+                                      Error **errp)
+ {
+     ERRP_GUARD();
+@@ -418,6 +420,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                     cluster_size),
+     };
++    s->discard_source = discard_source;
+     block_copy_set_copy_opts(s, false, false);
+     ratelimit_init(&s->rate_limit);
+@@ -589,6 +592,12 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
+     co_put_to_shres(s->mem, t->req.bytes);
+     block_copy_task_end(t, ret);
++    if (s->discard_source && ret == 0) {
++        int64_t nbytes =
++            MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
++        bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
++    }
++
+     return ret;
+ }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 94db31512d..853e01a1eb 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
+     BdrvChild *target;
+     OnCbwError on_cbw_error;
+     uint64_t cbw_timeout_ns;
++    bool discard_source;
+     /*
+      * @lock: protects access to @access_bitmap, @done_bitmap and
+@@ -357,6 +358,8 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+                uint64_t perm, uint64_t shared,
+                uint64_t *nperm, uint64_t *nshared)
+ {
++    BDRVCopyBeforeWriteState *s = bs->opaque;
++
+     if (!(role & BDRV_CHILD_FILTERED)) {
+         /*
+          * Target child
+@@ -381,6 +384,10 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+              * start
+              */
+             *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
++            if (s->discard_source) {
++                *nperm = *nperm | BLK_PERM_WRITE;
++            }
++
+             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+         }
+     }
+@@ -468,7 +475,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+              bs->file->bs->supported_zero_flags);
+-    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
++    s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
++    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
++                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+@@ -535,12 +544,14 @@ static BlockDriver bdrv_cbw_filter = {
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
++                                  bool discard_source,
+                                   BlockCopyState **bcs,
+                                   Error **errp)
+ {
+     BDRVCopyBeforeWriteState *state;
+     BlockDriverState *top;
+     QDict *opts;
++    int flags = BDRV_O_RDWR | (discard_source ? BDRV_O_CBW_DISCARD_SOURCE : 0);
+     assert(source->total_sectors == target->total_sectors);
+     GLOBAL_STATE_CODE();
+@@ -553,7 +564,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+     qdict_put_str(opts, "file", bdrv_get_node_name(source));
+     qdict_put_str(opts, "target", bdrv_get_node_name(target));
+-    top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
++    top = bdrv_insert_node(source, opts, flags, errp);
+     if (!top) {
+         return NULL;
+     }
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 6e72bb25e9..01af0cd3c4 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -39,6 +39,7 @@
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
++                                  bool discard_source,
+                                   BlockCopyState **bcs,
+                                   Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/block/replication.c b/block/replication.c
+index ca6bd0a720..0415a5e8b7 100644
+--- a/block/replication.c
++++ b/block/replication.c
+@@ -582,8 +582,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
+         s->backup_job = backup_job_create(
+                                 NULL, s->secondary_disk->bs, s->hidden_disk->bs,
+-                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
+-                                &perf,
++                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
++                                NULL, &perf,
+                                 BLOCKDEV_ON_ERROR_REPORT,
+                                 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
+                                 backup_job_completed, bs, NULL, &local_err);
+diff --git a/blockdev.c b/blockdev.c
+index 057601dcf0..4c33c3f5f0 100644
+--- a/blockdev.c
++++ b/blockdev.c
+@@ -2726,7 +2726,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+     job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+                             backup->sync, bmap, backup->bitmap_mode,
+-                            backup->compress,
++                            backup->compress, backup->discard_source,
+                             backup->filter_node_name,
+                             &perf,
+                             backup->on_source_error,
+diff --git a/include/block/block-common.h b/include/block/block-common.h
+index a846023a09..338fe5ff7a 100644
+--- a/include/block/block-common.h
++++ b/include/block/block-common.h
+@@ -243,6 +243,8 @@ typedef enum {
+                                       read-write fails */
+ #define BDRV_O_IO_URING    0x40000 /* use io_uring instead of the thread pool */
++#define BDRV_O_CBW_DISCARD_SOURCE 0x80000 /* for copy-before-write filter */
++
+ #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 8b41643bfa..bdc703bacd 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -27,6 +27,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
++                                     bool discard_source,
+                                      Error **errp);
+ /* Function should be called prior any actual copy request */
+diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
+index d2201e27f4..eb2d92a226 100644
+--- a/include/block/block_int-global-state.h
++++ b/include/block/block_int-global-state.h
+@@ -193,7 +193,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                             MirrorSyncMode sync_mode,
+                             BdrvDirtyBitmap *sync_bitmap,
+                             BitmapSyncMode bitmap_mode,
+-                            bool compress,
++                            bool compress, bool discard_source,
+                             const char *filter_node_name,
+                             BackupPerf *perf,
+                             BlockdevOnError on_source_error,
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 4b18e01b85..b179d65520 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -1610,6 +1610,9 @@
+ #     node specified by @drive.  If this option is not given, a node
+ #     name is autogenerated.  (Since: 4.2)
+ #
++# @discard-source: Discard blocks on source which are already copied
++#     to the target.  (Since 9.0)
++#
+ # @x-perf: Performance options.  (Since 6.0)
+ #
+ # Features:
+@@ -1631,6 +1634,7 @@
+             '*on-target-error': 'BlockdevOnError',
+             '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
+             '*filter-node-name': 'str',
++            '*discard-source': 'bool',
+             '*x-perf': { 'type': 'BackupPerf',
+                          'features': [ 'unstable' ] } } }
diff --git a/debian/patches/extra/0011-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch b/debian/patches/extra/0011-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
new file mode 100644 (file)
index 0000000..c5a3e92
--- /dev/null
@@ -0,0 +1,92 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Tue, 18 Jun 2024 14:19:58 +0200
+Subject: [PATCH] hw/virtio: Fix the de-initialization of vhost-user devices
+
+The unrealize functions of the various vhost-user devices are
+calling the corresponding vhost_*_set_status() functions with a
+status of 0 to shut down the device correctly.
+
+Now these vhost_*_set_status() functions all follow this scheme:
+
+    bool should_start = virtio_device_should_start(vdev, status);
+
+    if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        /* ... do the initialization stuff ... */
+    } else {
+        /* ... do the cleanup stuff ... */
+    }
+
+The problem here is virtio_device_should_start(vdev, 0) currently
+always returns "true" since it internally only looks at vdev->started
+instead of looking at the "status" parameter. Thus once the device
+got started once, virtio_device_should_start() always returns true
+and thus the vhost_*_set_status() functions return early, without
+ever doing any clean-up when being called with status == 0. This
+causes e.g. problems when trying to hot-plug and hot-unplug a vhost
+user devices multiple times since the de-initialization step is
+completely skipped during the unplug operation.
+
+This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move
+vm_running check to virtio_device_started") which replaced
+
+ should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
+
+with
+
+ should_start = virtio_device_started(vdev, status);
+
+which later got replaced by virtio_device_should_start(). This blocked
+the possibility to set should_start to false in case the status flag
+VIRTIO_CONFIG_S_DRIVER_OK was not set.
+
+Fix it by adjusting the virtio_device_should_start() function to
+only consider the status flag instead of vdev->started. Since this
+function is only used in the various vhost_*_set_status() functions
+for exactly the same purpose, it should be fine to fix it in this
+central place there without any risk to change the behavior of other
+code.
+
+Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started")
+Buglink: https://issues.redhat.com/browse/RHEL-40708
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+Message-Id: <20240618121958.88673-1-thuth@redhat.com>
+Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ include/hw/virtio/virtio.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
+index 7d5ffdc145..2eafad17b8 100644
+--- a/include/hw/virtio/virtio.h
++++ b/include/hw/virtio/virtio.h
+@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
+  * @vdev - the VirtIO device
+  * @status - the devices status bits
+  *
+- * This is similar to virtio_device_started() but also encapsulates a
+- * check on the VM status which would prevent a device starting
+- * anyway.
++ * This is similar to virtio_device_started() but ignores vdev->started
++ * and also encapsulates a check on the VM status which would prevent a
++ * device from starting anyway.
+  */
+ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status)
+ {
+@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status
+         return false;
+     }
+-    return virtio_device_started(vdev, status);
++    return status & VIRTIO_CONFIG_S_DRIVER_OK;
+ }
+ static inline void virtio_set_started(VirtIODevice *vdev, bool started)
diff --git a/debian/patches/extra/0012-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch b/debian/patches/extra/0012-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
new file mode 100644 (file)
index 0000000..3ca2147
--- /dev/null
@@ -0,0 +1,43 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Daniyal Khan <danikhan632@gmail.com>
+Date: Wed, 17 Jul 2024 16:01:47 +1000
+Subject: [PATCH] target/arm: Use float_status copy in sme_fmopa_s
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We made a copy above because the fp exception flags
+are not propagated back to the FPST register, but
+then failed to use the copy.
+
+Cc: qemu-stable@nongnu.org
+Fixes: 558e956c719 ("target/arm: Implement FMOPA, FMOPS (non-widening)")
+Signed-off-by: Daniyal Khan <danikhan632@gmail.com>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Message-id: 20240717060149.204788-2-richard.henderson@linaro.org
+[rth: Split from a larger patch]
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+(cherry picked from commit 31d93fedf41c24b0badb38cd9317590d1ef74e37)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/sme_helper.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
+index e2e0575039..5a6dd76489 100644
+--- a/target/arm/tcg/sme_helper.c
++++ b/target/arm/tcg/sme_helper.c
+@@ -916,7 +916,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
+                         if (pb & 1) {
+                             uint32_t *a = vza_row + H1_4(col);
+                             uint32_t *m = vzm + H1_4(col);
+-                            *a = float32_muladd(n, *m, *a, 0, vst);
++                            *a = float32_muladd(n, *m, *a, 0, &fpst);
+                         }
+                         col += 4;
+                         pb >>= 4;
diff --git a/debian/patches/extra/0013-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch b/debian/patches/extra/0013-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
new file mode 100644 (file)
index 0000000..56f24fc
--- /dev/null
@@ -0,0 +1,62 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <richard.henderson@linaro.org>
+Date: Wed, 17 Jul 2024 16:01:48 +1000
+Subject: [PATCH] target/arm: Use FPST_F16 for SME FMOPA (widening)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This operation has float16 inputs and thus must use
+the FZ16 control not the FZ control.
+
+Cc: qemu-stable@nongnu.org
+Fixes: 3916841ac75 ("target/arm: Implement FMOPA, FMOPS (widening)")
+Reported-by: Daniyal Khan <danikhan632@gmail.com>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Message-id: 20240717060149.204788-3-richard.henderson@linaro.org
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2374
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+(cherry picked from commit 207d30b5fdb5b45a36f26eefcf52fe2c1714dd4f)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/translate-sme.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
+index 46c7fce8b4..185a8a917b 100644
+--- a/target/arm/tcg/translate-sme.c
++++ b/target/arm/tcg/translate-sme.c
+@@ -304,6 +304,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
+ }
+ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
++                            ARMFPStatusFlavour e_fpst,
+                             gen_helper_gvec_5_ptr *fn)
+ {
+     int svl = streaming_vec_reg_size(s);
+@@ -319,15 +320,18 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
+     zm = vec_full_reg_ptr(s, a->zm);
+     pn = pred_full_reg_ptr(s, a->pn);
+     pm = pred_full_reg_ptr(s, a->pm);
+-    fpst = fpstatus_ptr(FPST_FPCR);
++    fpst = fpstatus_ptr(e_fpst);
+     fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
+     return true;
+ }
+-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
+-TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
+-TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
++TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
++           MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
++TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
++           MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
++TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
++           MO_64, FPST_FPCR, gen_helper_sme_fmopa_d)
+ /* TODO: FEAT_EBF16 */
+ TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
diff --git a/debian/patches/extra/0014-scsi-fix-regression-and-honor-bootindex-again-for-le.patch b/debian/patches/extra/0014-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
new file mode 100644 (file)
index 0000000..6fad4dc
--- /dev/null
@@ -0,0 +1,60 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Wed, 10 Jul 2024 17:25:29 +0200
+Subject: [PATCH] scsi: fix regression and honor bootindex again for legacy
+ drives
+
+Commit 3089637461 ("scsi: Don't ignore most usb-storage properties")
+removed the call to object_property_set_int() and thus the 'set'
+method for the bootindex property was also not called anymore. Here
+that method is device_set_bootindex() (as configured by
+scsi_dev_instance_init() -> device_add_bootindex_property()) which as
+a side effect registers the device via add_boot_device_path().
+
+As reported by a downstream user [0], the bootindex property did not
+have the desired effect anymore for legacy drives. Fix the regression
+by explicitly calling the add_boot_device_path() function after
+checking that the bootindex is not yet used (to avoid
+add_boot_device_path() calling exit()).
+
+[0]: https://forum.proxmox.com/threads/149772/post-679433
+
+Cc: qemu-stable@nongnu.org
+Fixes: 3089637461 ("scsi: Don't ignore most usb-storage properties")
+Suggested-by: Kevin Wolf <kwolf@redhat.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Link: https://lore.kernel.org/r/20240710152529.1737407-1-f.ebner@proxmox.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 57a8a80d1a5b28797b21d30bfc60601945820e51)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/scsi/scsi-bus.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
+index 9e40b0c920..53eff5dd3d 100644
+--- a/hw/scsi/scsi-bus.c
++++ b/hw/scsi/scsi-bus.c
+@@ -384,6 +384,7 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
+     DeviceState *dev;
+     SCSIDevice *s;
+     DriveInfo *dinfo;
++    Error *local_err = NULL;
+     if (blk_is_sg(blk)) {
+         driver = "scsi-generic";
+@@ -403,6 +404,14 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
+     s = SCSI_DEVICE(dev);
+     s->conf = *conf;
++    check_boot_index(conf->bootindex, &local_err);
++    if (local_err) {
++        object_unparent(OBJECT(dev));
++        error_propagate(errp, local_err);
++        return NULL;
++    }
++    add_boot_device_path(conf->bootindex, dev, NULL);
++
+     qdev_prop_set_uint32(dev, "scsi-id", unit);
+     if (object_property_find(OBJECT(dev), "removable")) {
+         qdev_prop_set_bit(dev, "removable", removable);
diff --git a/debian/patches/extra/0015-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch b/debian/patches/extra/0015-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
new file mode 100644 (file)
index 0000000..e118289
--- /dev/null
@@ -0,0 +1,48 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 15 Jul 2024 15:14:03 +0200
+Subject: [PATCH] hw/scsi/lsi53c895a: bump instruction limit in scripts
+ processing to fix regression
+
+Commit 9876359990 ("hw/scsi/lsi53c895a: add timer to scripts
+processing") reduced the maximum allowed instruction count by
+a factor of 100 all the way down to 100.
+
+This causes the "Check Point R81.20 Gaia" appliance [0] to fail to
+boot after fully finishing the installation via the appliance's web
+interface (there is already one reboot before that).
+
+With a limit of 150, the appliance still fails to boot, while with a
+limit of 200, it works. Bump to 500 to fix the regression and be on
+the safe side.
+
+Originally reported in the Proxmox community forum[1].
+
+[0]: https://support.checkpoint.com/results/download/124397
+[1]: https://forum.proxmox.com/threads/149772/post-683459
+
+Cc: qemu-stable@nongnu.org
+Fixes: 9876359990 ("hw/scsi/lsi53c895a: add timer to scripts processing")
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Acked-by: Sven Schnelle <svens@stackframe.org>
+Link: https://lore.kernel.org/r/20240715131403.223239-1-f.ebner@proxmox.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit a4975023fb13cf229bd59c9ceec1b8cbdc5b9a20)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/scsi/lsi53c895a.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
+index eb9828dd5e..f1935e5328 100644
+--- a/hw/scsi/lsi53c895a.c
++++ b/hw/scsi/lsi53c895a.c
+@@ -188,7 +188,7 @@ static const char *names[] = {
+ #define LSI_TAG_VALID     (1 << 16)
+ /* Maximum instructions to process. */
+-#define LSI_MAX_INSN    100
++#define LSI_MAX_INSN    500
+ typedef struct lsi_request {
+     SCSIRequest *req;
diff --git a/debian/patches/extra/0016-block-copy-Fix-missing-graph-lock.patch b/debian/patches/extra/0016-block-copy-Fix-missing-graph-lock.patch
new file mode 100644 (file)
index 0000000..dc1d2c1
--- /dev/null
@@ -0,0 +1,38 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Thu, 27 Jun 2024 20:12:44 +0200
+Subject: [PATCH] block-copy: Fix missing graph lock
+
+The graph lock needs to be held when calling bdrv_co_pdiscard(). Fix
+block_copy_task_entry() to take it for the call.
+
+WITH_GRAPH_RDLOCK_GUARD() was implemented in a weak way because of
+limitations in clang's Thread Safety Analysis at the time, so that it
+only asserts that the lock is held (which allows calling functions that
+require the lock), but we never deal with the unlocking (so even after
+the scope of the guard, the compiler assumes that the lock is still
+held). This is why the compiler didn't catch this locking error.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+(picked from https://lore.kernel.org/qemu-devel/20240627181245.281403-2-kwolf@redhat.com/)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/block-copy.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 7e3b378528..cc618e4561 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -595,7 +595,9 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
+     if (s->discard_source && ret == 0) {
+         int64_t nbytes =
+             MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
+-        bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
++        WITH_GRAPH_RDLOCK_GUARD() {
++            bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
++        }
+     }
+     return ret;
diff --git a/debian/patches/extra/0017-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch b/debian/patches/extra/0017-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
new file mode 100644 (file)
index 0000000..088af84
--- /dev/null
@@ -0,0 +1,93 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli@nutanix.com>
+Date: Fri, 12 Jul 2024 09:26:59 +0000
+Subject: [PATCH] Revert "qemu-char: do not operate on sources from finalize
+ callbacks"
+
+This reverts commit 2b316774f60291f57ca9ecb6a9f0712c532cae34.
+
+After 038b4217884c ("Revert "chardev: use a child source for qio input
+source"") we've been observing the "iwp->src == NULL" assertion
+triggering periodically during the initial capabilities querying by
+libvirtd. One of possible backtraces:
+
+Thread 1 (Thread 0x7f16cd4f0700 (LWP 43858)):
+0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
+1  0x00007f16c6c21e65 in __GI_abort () at abort.c:79
+2  0x00007f16c6c21d39 in __assert_fail_base  at assert.c:92
+3  0x00007f16c6c46e86 in __GI___assert_fail (assertion=assertion@entry=0x562e9bcdaadd "iwp->src == NULL", file=file@entry=0x562e9bcdaac8 "../chardev/char-io.c", line=line@entry=99, function=function@entry=0x562e9bcdab10 <__PRETTY_FUNCTION__.20549> "io_watch_poll_finalize") at assert.c:101
+4  0x0000562e9ba20c2c in io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:99
+5  io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:88
+6  0x00007f16c904aae0 in g_source_unref_internal () from /lib64/libglib-2.0.so.0
+7  0x00007f16c904baf9 in g_source_destroy_internal () from /lib64/libglib-2.0.so.0
+8  0x0000562e9ba20db0 in io_remove_watch_poll (source=0x562e9d6720b0) at ../chardev/char-io.c:147
+9  remove_fd_in_watch (chr=chr@entry=0x562e9d5f3800) at ../chardev/char-io.c:153
+10 0x0000562e9ba23ffb in update_ioc_handlers (s=0x562e9d5f3800) at ../chardev/char-socket.c:592
+11 0x0000562e9ba2072f in qemu_chr_fe_set_handlers_full at ../chardev/char-fe.c:279
+12 0x0000562e9ba207a9 in qemu_chr_fe_set_handlers at ../chardev/char-fe.c:304
+13 0x0000562e9ba2ca75 in monitor_qmp_setup_handlers_bh (opaque=0x562e9d4c2c60) at ../monitor/qmp.c:509
+14 0x0000562e9bb6222e in aio_bh_poll (ctx=ctx@entry=0x562e9d4c2f20) at ../util/async.c:216
+15 0x0000562e9bb4de0a in aio_poll (ctx=0x562e9d4c2f20, blocking=blocking@entry=true) at ../util/aio-posix.c:722
+16 0x0000562e9b99dfaa in iothread_run (opaque=0x562e9d4c26f0) at ../iothread.c:63
+17 0x0000562e9bb505a4 in qemu_thread_start (args=0x562e9d4c7ea0) at ../util/qemu-thread-posix.c:543
+18 0x00007f16c70081ca in start_thread (arg=<optimized out>) at pthread_create.c:479
+19 0x00007f16c6c398d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
+
+io_remove_watch_poll(), which makes sure that iwp->src is NULL, calls
+g_source_destroy() which finds that iwp->src is not NULL in the finalize
+callback. This can only happen if another thread has managed to trigger
+io_watch_poll_prepare() callback in the meantime.
+
+Move iwp->src destruction back to the finalize callback to prevent the
+described race, and also remove the stale comment. The deadlock glib bug
+was fixed back in 2010 by b35820285668 ("gmain: move finalization of
+GSource outside of context lock").
+
+Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sergey Dyasli <sergey.dyasli@nutanix.com>
+Link: https://lore.kernel.org/r/20240712092659.216206-1-sergey.dyasli@nutanix.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit e0bf95443ee9326d44031373420cf9f3513ee255)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ chardev/char-io.c | 19 +++++--------------
+ 1 file changed, 5 insertions(+), 14 deletions(-)
+
+diff --git a/chardev/char-io.c b/chardev/char-io.c
+index dab77b112e..3be17b51ca 100644
+--- a/chardev/char-io.c
++++ b/chardev/char-io.c
+@@ -87,16 +87,12 @@ static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
+ static void io_watch_poll_finalize(GSource *source)
+ {
+-    /*
+-     * Due to a glib bug, removing the last reference to a source
+-     * inside a finalize callback causes recursive locking (and a
+-     * deadlock).  This is not a problem inside other callbacks,
+-     * including dispatch callbacks, so we call io_remove_watch_poll
+-     * to remove this source.  At this point, iwp->src must
+-     * be NULL, or we would leak it.
+-     */
+     IOWatchPoll *iwp = io_watch_poll_from_source(source);
+-    assert(iwp->src == NULL);
++    if (iwp->src) {
++        g_source_destroy(iwp->src);
++        g_source_unref(iwp->src);
++        iwp->src = NULL;
++    }
+ }
+ static GSourceFuncs io_watch_poll_funcs = {
+@@ -139,11 +135,6 @@ static void io_remove_watch_poll(GSource *source)
+     IOWatchPoll *iwp;
+     iwp = io_watch_poll_from_source(source);
+-    if (iwp->src) {
+-        g_source_destroy(iwp->src);
+-        g_source_unref(iwp->src);
+-        iwp->src = NULL;
+-    }
+     g_source_destroy(&iwp->parent);
+ }
index fce068cb025db42b8391b71caf4cbf0c7170c58f..8b7439c9fc7aca24e593b4a1742261c6310d1e92 100644 (file)
@@ -119,10 +119,10 @@ index 43bc0bd520..60e98c87f1 100644
      };
      return raw_co_create(&options, errp);
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 0902b0a024..0653c244cf 100644
+index 905da8be72..3db587a6e4 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -4952,6 +4952,10 @@
+@@ -4956,6 +4956,10 @@
  # @extent-size-hint: Extent size hint to add to the image file; 0 for
  #     not adding an extent size hint (default: 1 MB, since 5.1)
  #
@@ -133,7 +133,7 @@ index 0902b0a024..0653c244cf 100644
  # Since: 2.12
  ##
  { 'struct': 'BlockdevCreateOptionsFile',
-@@ -4959,7 +4963,8 @@
+@@ -4963,7 +4967,8 @@
              'size':                 'size',
              '*preallocation':       'PreallocMode',
              '*nocow':               'bool',
index 8bff4f37bb9787b6cb0fbb59997f86e7aa945efb..df0672feda39fa1fdaf9804d9d60354ce46d6e54 100644 (file)
@@ -25,7 +25,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
  1 file changed, 4 insertions(+), 4 deletions(-)
 
 diff --git a/block/backup.c b/block/backup.c
-index ec29d6b810..270957c0cd 100644
+index 3dd2e229d2..eba5b11493 100644
 --- a/block/backup.c
 +++ b/block/backup.c
 @@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
@@ -48,7 +48,7 @@ index ec29d6b810..270957c0cd 100644
      if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
          int64_t offset = 0;
          int64_t count;
-@@ -501,6 +499,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+@@ -502,6 +500,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                         &error_abort);
      bdrv_graph_wrunlock();
  
index 357f9d69d8d1b40147ec975c421fa9aa7f77e6f5..722a22f77f21d845a679d734f08d37a04956a206 100644 (file)
@@ -199,7 +199,7 @@ index 0000000000..e46abf1070
 +    return bs;
 +}
 diff --git a/block/backup.c b/block/backup.c
-index 270957c0cd..16d611c4ca 100644
+index eba5b11493..1963e47ab9 100644
 --- a/block/backup.c
 +++ b/block/backup.c
 @@ -29,28 +29,6 @@
@@ -231,7 +231,7 @@ index 270957c0cd..16d611c4ca 100644
  static const BlockJobDriver backup_job_driver;
  
  static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
-@@ -461,6 +439,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+@@ -462,6 +440,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
      }
  
      cluster_size = block_copy_cluster_size(bcs);
index 7e09f6870797e968c4c669e4ad451b5304b64b09..fee57fa9902a2075ababc6e909416f2b9f2b0a62 100644 (file)
@@ -167,7 +167,7 @@ index d954bec6f1..5000c084c5 100644
 +    hmp_handle_error(mon, error);
 +}
 diff --git a/blockdev.c b/blockdev.c
-index d27d8c38ec..5e5dbc1da9 100644
+index ed8198f351..1054a69279 100644
 --- a/blockdev.c
 +++ b/blockdev.c
 @@ -37,6 +37,7 @@
@@ -1683,7 +1683,7 @@ index 0000000000..c755bf302b
 +    return ret;
 +}
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 0653c244cf..dbd5d9b993 100644
+index 3db587a6e4..d05fffce1d 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
 @@ -851,6 +851,239 @@
index 859ac77a2236a431747ea7391e3c59de66c14aea..02efb582953b601422c3a990e6c3251d2f2285a8 100644 (file)
@@ -368,10 +368,10 @@ index 6de51c34cb..3bc039f60f 100644
  summary_info += {'libdaxctl support': libdaxctl}
  summary_info += {'libudev':           libudev}
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index dbd5d9b993..e79775656c 100644
+index d05fffce1d..e7cf3d94f3 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
-@@ -3453,6 +3453,7 @@
+@@ -3457,6 +3457,7 @@
              'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
              'raw', 'rbd',
              { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
@@ -379,7 +379,7 @@ index dbd5d9b993..e79775656c 100644
              'ssh', 'throttle', 'vdi', 'vhdx',
              { 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
              { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
-@@ -3539,6 +3540,33 @@
+@@ -3543,6 +3544,33 @@
  { 'struct': 'BlockdevOptionsNull',
    'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
  
@@ -413,7 +413,7 @@ index dbd5d9b993..e79775656c 100644
  ##
  # @BlockdevOptionsNVMe:
  #
-@@ -4973,6 +5001,7 @@
+@@ -4977,6 +5005,7 @@
        'nfs':        'BlockdevOptionsNfs',
        'null-aio':   'BlockdevOptionsNull',
        'null-co':    'BlockdevOptionsNull',
index 6ba8425a735eccffa8fda9b1da7bc61c80542a16..388bd04c166ea13f60f38dd7444bf791e3793b12 100644 (file)
@@ -186,7 +186,7 @@ index c755bf302b..5ebb6a3947 100644
      ret->pbs_masterkey = true;
      ret->backup_max_workers = true;
 diff --git a/qapi/block-core.json b/qapi/block-core.json
-index e79775656c..cb58a664ef 100644
+index e7cf3d94f3..282e2e8a8c 100644
 --- a/qapi/block-core.json
 +++ b/qapi/block-core.json
 @@ -1004,6 +1004,11 @@
diff --git a/debian/patches/pve/0044-block-copy-before-write-fix-permission.patch b/debian/patches/pve/0044-block-copy-before-write-fix-permission.patch
deleted file mode 100644 (file)
index 6a759a4..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Date: Thu, 11 Apr 2024 11:29:22 +0200
-Subject: [PATCH] block/copy-before-write: fix permission
-
-In case when source node does not have any parents, the condition still
-works as required: backup job do create the parent by
-
-  block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child
-
-Still, in this case checking @perm variable doesn't work, as backup job
-creates the root blk with empty permissions (as it rely on CBW filter
-to require correct permissions and don't want to create extra
-conflicts).
-
-So, we should not check @perm.
-
-The hack may be dropped entirely when transactional insertion of
-filter (when we don't try to recalculate permissions in intermediate
-state, when filter does conflict with original parent of the source
-node) merged (old big series
-"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
-current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])
-
-[1] https://patchew.org/QEMU/20220330212902.590099-1-vsementsov@openvz.org/
-[2] https://patchew.org/QEMU/20231017184444.932733-1-vsementsov@yandex-team.ru/
-
-Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/copy-before-write.c | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 026fa9840f..5a9456d426 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -364,9 +364,13 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
-                            perm, shared, nperm, nshared);
-         if (!QLIST_EMPTY(&bs->parents)) {
--            if (perm & BLK_PERM_WRITE) {
--                *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
--            }
-+            /*
-+             * Note, that source child may be shared with backup job. Backup job
-+             * does create own blk parent on copy-before-write node, so this
-+             * works even if source node does not have any parents before backup
-+             * start
-+             */
-+            *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
-             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
-         }
-     }
diff --git a/debian/patches/pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch b/debian/patches/pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch
new file mode 100644 (file)
index 0000000..0282e71
--- /dev/null
@@ -0,0 +1,133 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:26 +0200
+Subject: [PATCH] copy-before-write: allow specifying minimum cluster size
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Copy-before-write operations will use at least this granularity and in
+particular, discard requests to the source node will too. If the
+granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+The QAPI uses uint32 so the value will be non-negative, but still fit
+into a uint64_t.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c         | 17 +++++++++++++----
+ block/copy-before-write.c  |  3 ++-
+ include/block/block-copy.h |  1 +
+ qapi/block-core.json       |  8 +++++++-
+ 4 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index cc618e4561..12d662e9d4 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+ }
+ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
++                                                 int64_t min_cluster_size,
+                                                  Error **errp)
+ {
+     int ret;
+@@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                     "used. If the actual block size of the target exceeds "
+                     "this default, the backup may be unusable",
+                     BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
++        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+     } else if (ret < 0 && !target_does_cow) {
+         error_setg_errno(errp, -ret,
+             "Couldn't determine the cluster size of the target image, "
+@@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+         return ret;
+     } else if (ret < 0 && target_does_cow) {
+         /* Not fatal; just trudge on ahead. */
+-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
++        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+     }
+-    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
++    return MAX(min_cluster_size,
++               MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
+ }
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      bool discard_source,
++                                     int64_t min_cluster_size,
+                                      Error **errp)
+ {
+     ERRP_GUARD();
+@@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+     GLOBAL_STATE_CODE();
+-    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
++    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
++        error_setg(errp, "min-cluster-size needs to be a power of 2");
++        return NULL;
++    }
++
++    cluster_size = block_copy_calculate_cluster_size(target->bs,
++                                                     min_cluster_size, errp);
+     if (cluster_size < 0) {
+         return NULL;
+     }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 853e01a1eb..47b3cdd09f 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -477,7 +477,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+     s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
+     s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
+-                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
++                                  flags & BDRV_O_CBW_DISCARD_SOURCE,
++                                  opts->min_cluster_size, errp);
+     if (!s->bcs) {
+         error_prepend(errp, "Cannot create block-copy-state: ");
+         return -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index bdc703bacd..77857c6c68 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+                                      BlockDriverState *copy_bitmap_bs,
+                                      const BdrvDirtyBitmap *bitmap,
+                                      bool discard_source,
++                                     int64_t min_cluster_size,
+                                      Error **errp);
+ /* Function should be called prior any actual copy request */
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 282e2e8a8c..9caf04cbe9 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -4926,12 +4926,18 @@
+ #     @on-cbw-error parameter will decide how this failure is handled.
+ #     Default 0.  (Since 7.1)
+ #
++# @min-cluster-size: Minimum size of blocks used by copy-before-write
++#     operations.  Has to be a power of 2.  No effect if smaller than
++#     the maximum of the target's cluster size and 64 KiB.  Default 0.
++#     (Since 8.1)
++#
+ # Since: 6.2
+ ##
+ { 'struct': 'BlockdevOptionsCbw',
+   'base': 'BlockdevOptionsGenericFormat',
+   'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
+-            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
++            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
++            '*min-cluster-size': 'uint32' } }
+ ##
+ # @BlockdevOptions:
diff --git a/debian/patches/pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch b/debian/patches/pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
new file mode 100644 (file)
index 0000000..42c5ec5
--- /dev/null
@@ -0,0 +1,106 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:27 +0200
+Subject: [PATCH] backup: add minimum cluster size to performance options
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Backup/block-copy will use at least this granularity for copy operations
+and in particular, discard requests to the backup source will too. If
+the granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c            | 2 +-
+ block/copy-before-write.c | 2 ++
+ block/copy-before-write.h | 1 +
+ blockdev.c                | 3 +++
+ qapi/block-core.json      | 9 +++++++--
+ 5 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index 1963e47ab9..fe69723ada 100644
+--- a/block/backup.c
++++ b/block/backup.c
+@@ -434,7 +434,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+     }
+     cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+-                          &bcs, errp);
++                          perf->min_cluster_size, &bcs, errp);
+     if (!cbw) {
+         goto error;
+     }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 47b3cdd09f..bba58326d7 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -546,6 +546,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                   bool discard_source,
++                                  int64_t min_cluster_size,
+                                   BlockCopyState **bcs,
+                                   Error **errp)
+ {
+@@ -564,6 +565,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+     }
+     qdict_put_str(opts, "file", bdrv_get_node_name(source));
+     qdict_put_str(opts, "target", bdrv_get_node_name(target));
++    qdict_put_int(opts, "min-cluster-size", min_cluster_size);
+     top = bdrv_insert_node(source, opts, flags, errp);
+     if (!top) {
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 01af0cd3c4..dc6cafe7fa 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockDriverState *target,
+                                   const char *filter_node_name,
+                                   bool discard_source,
++                                  int64_t min_cluster_size,
+                                   BlockCopyState **bcs,
+                                   Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/blockdev.c b/blockdev.c
+index 1054a69279..cbe224387b 100644
+--- a/blockdev.c
++++ b/blockdev.c
+@@ -2654,6 +2654,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+         if (backup->x_perf->has_max_chunk) {
+             perf.max_chunk = backup->x_perf->max_chunk;
+         }
++        if (backup->x_perf->has_min_cluster_size) {
++            perf.min_cluster_size = backup->x_perf->min_cluster_size;
++        }
+     }
+     if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 9caf04cbe9..df934647ed 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -1790,11 +1790,16 @@
+ #     it should not be less than job cluster size which is calculated
+ #     as maximum of target image cluster size and 64k.  Default 0.
+ #
++# @min-cluster-size: Minimum size of blocks used by copy-before-write
++#     and background copy operations.  Has to be a power of 2.  No
++#     effect if smaller than the maximum of the target's cluster size
++#     and 64 KiB.  Default 0. (Since 8.1)
++#
+ # Since: 6.0
+ ##
+ { 'struct': 'BackupPerf',
+-  'data': { '*use-copy-range': 'bool',
+-            '*max-workers': 'int', '*max-chunk': 'int64' } }
++  'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
++            '*max-chunk': 'int64', '*min-cluster-size': 'uint32' } }
+ ##
+ # @BackupCommon:
diff --git a/debian/patches/pve/0045-block-copy-before-write-support-unligned-snapshot-di.patch b/debian/patches/pve/0045-block-copy-before-write-support-unligned-snapshot-di.patch
deleted file mode 100644 (file)
index f651c58..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Date: Thu, 11 Apr 2024 11:29:23 +0200
-Subject: [PATCH] block/copy-before-write: support unligned snapshot-discard
-
-First thing that crashes on unligned access here is
-bdrv_reset_dirty_bitmap(). Correct way is to align-down the
-snapshot-discard request.
-
-Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/copy-before-write.c | 16 +++++++++++++---
- 1 file changed, 13 insertions(+), 3 deletions(-)
-
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 5a9456d426..c0e70669a2 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
- cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
- {
-     BDRVCopyBeforeWriteState *s = bs->opaque;
-+    uint32_t cluster_size = block_copy_cluster_size(s->bcs);
-+    int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
-+    int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
-+    int64_t aligned_bytes;
-+
-+    if (aligned_end <= aligned_offset) {
-+        return 0;
-+    }
-+    aligned_bytes = aligned_end - aligned_offset;
-     WITH_QEMU_LOCK_GUARD(&s->lock) {
--        bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
-+        bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
-+                                aligned_bytes);
-     }
--    block_copy_reset(s->bcs, offset, bytes);
-+    block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
--    return bdrv_co_pdiscard(s->target, offset, bytes);
-+    return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
- }
- static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)
diff --git a/debian/patches/pve/0046-PVE-backup-add-fleecing-option.patch b/debian/patches/pve/0046-PVE-backup-add-fleecing-option.patch
new file mode 100644 (file)
index 0000000..1af3e04
--- /dev/null
@@ -0,0 +1,337 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:28 +0200
+Subject: [PATCH] PVE backup: add fleecing option
+
+When a fleecing option is given, it is expected that each device has
+a corresponding "-fleecing" block device already attached, except for
+EFI disk and TPM state, where fleecing is never used.
+
+The following graph was adapted from [0] which also contains more
+details about fleecing.
+
+[guest]
+   |
+   | root
+   v                 file
+[copy-before-write]<------[snapshot-access]
+   |           |
+   | file      | target
+   v           v
+[source] [fleecing]
+
+For fleecing, a copy-before-write filter is inserted on top of the
+source node, as well as a snapshot-access node pointing to the filter
+node which allows to read the consistent state of the image at the
+time it was inserted. New guest writes are passed through the
+copy-before-write filter which will first copy over old data to the
+fleecing image in case that old data is still needed by the
+snapshot-access node.
+
+The backup process will sequentially read from the snapshot access,
+which has a bitmap and knows whether to read from the original image
+or the fleecing image to get the "snapshot" state, i.e. data from the
+source image at the time when the copy-before-write filter was
+inserted. After reading, the copied sections are discarded from the
+fleecing image to reduce space usage.
+
+All of this can be restricted by an initial dirty bitmap to parts of
+the source image that are required for an incremental backup.
+
+For discard to work, it is necessary that the fleecing image does not
+have a larger cluster size than the backup job granularity. Since
+querying that size does not always work, e.g. for RBD with krbd, the
+cluster size will not be reported, a minimum of 4 MiB is used. A job
+with PBS target already has at least this granularity, so it's just
+relevant for other targets. I.e. edge cases where this minimum is not
+enough should be very rare in practice. If ever necessary in the
+future, can still add a passed-in value for the backup QMP command to
+override.
+
+Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
+are set when installing the copy-before-write filter and
+snapshot-access. When an error or timeout occurs, the problematic (and
+each further) snapshot operation will fail and thus cancel the backup
+instead of breaking the guest write.
+
+Note that job_id cannot be inferred from the snapshot-access bs because
+it has no parent, so just pass the one from the original bs.
+
+[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/monitor/block-hmp-cmds.c |   1 +
+ pve-backup.c                   | 135 ++++++++++++++++++++++++++++++++-
+ qapi/block-core.json           |  10 ++-
+ 3 files changed, 142 insertions(+), 4 deletions(-)
+
+diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
+index 5000c084c5..70b3de4c7e 100644
+--- a/block/monitor/block-hmp-cmds.c
++++ b/block/monitor/block-hmp-cmds.c
+@@ -1043,6 +1043,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
+         NULL, NULL,
+         devlist, qdict_haskey(qdict, "speed"), speed,
+         false, 0, // BackupPerf max-workers
++        false, false, // fleecing
+         &error);
+     hmp_handle_error(mon, error);
+diff --git a/pve-backup.c b/pve-backup.c
+index 5ebb6a3947..a747d12d3d 100644
+--- a/pve-backup.c
++++ b/pve-backup.c
+@@ -7,9 +7,11 @@
+ #include "sysemu/blockdev.h"
+ #include "block/block_int-global-state.h"
+ #include "block/blockjob.h"
++#include "block/copy-before-write.h"
+ #include "block/dirty-bitmap.h"
+ #include "block/graph-lock.h"
+ #include "qapi/qapi-commands-block.h"
++#include "qapi/qmp/qdict.h"
+ #include "qapi/qmp/qerror.h"
+ #include "qemu/cutils.h"
+@@ -80,8 +82,15 @@ static void pvebackup_init(void)
+ // initialize PVEBackupState at startup
+ opts_init(pvebackup_init);
++typedef struct PVEBackupFleecingInfo {
++    BlockDriverState *bs;
++    BlockDriverState *cbw;
++    BlockDriverState *snapshot_access;
++} PVEBackupFleecingInfo;
++
+ typedef struct PVEBackupDevInfo {
+     BlockDriverState *bs;
++    PVEBackupFleecingInfo fleecing;
+     size_t size;
+     uint64_t block_size;
+     uint8_t dev_id;
+@@ -353,6 +362,22 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+     PVEBackupDevInfo *di = opaque;
+     di->completed_ret = ret;
++    /*
++     * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
++     * won't be done as a coroutine anyways:
++     * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
++     *   just spawn a BH calling bdrv_unref().
++     * - For cbw, draining would need to spawn a BH.
++     */
++    if (di->fleecing.snapshot_access) {
++        bdrv_unref(di->fleecing.snapshot_access);
++        di->fleecing.snapshot_access = NULL;
++    }
++    if (di->fleecing.cbw) {
++        bdrv_cbw_drop(di->fleecing.cbw);
++        di->fleecing.cbw = NULL;
++    }
++
+     /*
+      * Needs to happen outside of coroutine, because it takes the graph write lock.
+      */
+@@ -519,9 +544,77 @@ static void create_backup_jobs_bh(void *opaque) {
+         }
+         bdrv_drained_begin(di->bs);
++        BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
++
++        BlockDriverState *source_bs = di->bs;
++        bool discard_source = false;
++        bdrv_graph_co_rdlock();
++        const char *job_id = bdrv_get_device_name(di->bs);
++        bdrv_graph_co_rdunlock();
++        if (di->fleecing.bs) {
++            QDict *cbw_opts = qdict_new();
++            qdict_put_str(cbw_opts, "driver", "copy-before-write");
++            qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
++            qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
++
++            if (di->bitmap) {
++                /*
++                 * Only guest writes to parts relevant for the backup need to be intercepted with
++                 * old data being copied to the fleecing image.
++                 */
++                qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
++                qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
++            }
++            /*
++             * Fleecing storage is supposed to be fast and it's better to break backup than guest
++             * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
++             * abort a bit before that.
++             */
++            qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
++            qdict_put_int(cbw_opts, "cbw-timeout", 45);
++
++            di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
++
++            if (!di->fleecing.cbw) {
++                error_setg(errp, "appending cbw node for fleecing failed: %s",
++                           local_err ? error_get_pretty(local_err) : "unknown error");
++                break;
++            }
++
++            QDict *snapshot_access_opts = qdict_new();
++            qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
++            qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
++
++            di->fleecing.snapshot_access =
++                bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
++            if (!di->fleecing.snapshot_access) {
++                error_setg(errp, "setting up snapshot access for fleecing failed: %s",
++                           local_err ? error_get_pretty(local_err) : "unknown error");
++                break;
++            }
++            source_bs = di->fleecing.snapshot_access;
++            discard_source = true;
++
++            /*
++             * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
++             * on the fleecing image won't work if the backup job's granularity is less than the RBD
++             * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
++             * target, the backup job granularity would already be at least this much.
++             */
++            perf.min_cluster_size = 4 * 1024 * 1024;
++            /*
++             * For discard to work, cluster size for the backup job must be at least the same as for
++             * the fleecing image.
++             */
++            BlockDriverInfo bdi;
++            if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
++                perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
++            }
++        }
++
+         BlockJob *job = backup_job_create(
+-            NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+-            bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
++            job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
++            bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
+             BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
+             &local_err);
+@@ -577,6 +670,14 @@ static void create_backup_jobs_bh(void *opaque) {
+     aio_co_enter(data->ctx, data->co);
+ }
++/*
++ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
++ */
++static bool device_uses_fleecing(const char *device_id)
++{
++    return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
++}
++
+ /*
+  * Returns a list of device infos, which needs to be freed by the caller. In
+  * case of an error, errp will be set, but the returned value might still be a
+@@ -584,6 +685,7 @@ static void create_backup_jobs_bh(void *opaque) {
+  */
+ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+     const char *devlist,
++    bool fleecing,
+     Error **errp)
+ {
+     gchar **devs = NULL;
+@@ -607,6 +709,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+             }
+             PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+             di->bs = bs;
++
++            if (fleecing && device_uses_fleecing(*d)) {
++                g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
++                BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
++                if (!fleecing_blk) {
++                    error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
++                              "Device '%s' not found", fleecing_devid);
++                    goto err;
++                }
++                BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
++                if (!bdrv_co_is_inserted(fleecing_bs)) {
++                    error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, fleecing_devid);
++                    goto err;
++                }
++                /*
++                 * Fleecing image needs to be the same size to act as a cbw target.
++                 */
++                if (bs->total_sectors != fleecing_bs->total_sectors) {
++                    error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
++                               fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
++                    goto err;
++                }
++                di->fleecing.bs = fleecing_bs;
++            }
++
+             di_list = g_list_append(di_list, di);
+             d++;
+         }
+@@ -656,6 +783,7 @@ UuidInfo coroutine_fn *qmp_backup(
+     const char *devlist,
+     bool has_speed, int64_t speed,
+     bool has_max_workers, int64_t max_workers,
++    bool has_fleecing, bool fleecing,
+     Error **errp)
+ {
+     assert(qemu_in_coroutine());
+@@ -684,7 +812,7 @@ UuidInfo coroutine_fn *qmp_backup(
+     format = has_format ? format : BACKUP_FORMAT_VMA;
+     bdrv_graph_co_rdlock();
+-    di_list = get_device_info(devlist, &local_err);
++    di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
+     bdrv_graph_co_rdunlock();
+     if (local_err) {
+         error_propagate(errp, local_err);
+@@ -1089,5 +1217,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+     ret->query_bitmap_info = true;
+     ret->pbs_masterkey = true;
+     ret->backup_max_workers = true;
++    ret->backup_fleecing = true;
+     return ret;
+ }
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index df934647ed..ff441d4258 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -948,6 +948,10 @@
+ #
+ # @max-workers: see @BackupPerf for details. Default 16.
+ #
++# @fleecing: perform a backup with fleecing. For each device in @devlist, a
++#            corresponing '-fleecing' device with the same size already needs to
++#            be present.
++#
+ # Returns: the uuid of the backup job
+ #
+ ##
+@@ -968,7 +972,8 @@
+                                     '*firewall-file': 'str',
+                                     '*devlist': 'str',
+                                     '*speed': 'int',
+-                                    '*max-workers': 'int' },
++                                    '*max-workers': 'int',
++                                    '*fleecing': 'bool' },
+   'returns': 'UuidInfo', 'coroutine': true }
+ ##
+@@ -1014,6 +1019,8 @@
+ #
+ # @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+ #
++# @backup-fleecing: Whether backup fleecing is supported or not.
++#
+ # @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
+ #     supported or not.
+ #
+@@ -1025,6 +1032,7 @@
+             'pbs-dirty-bitmap-migration': 'bool',
+             'pbs-masterkey': 'bool',
+             'pbs-library-version': 'str',
++            'backup-fleecing': 'bool',
+             'backup-max-workers': 'bool' } }
+ ##
diff --git a/debian/patches/pve/0046-block-copy-before-write-create-block_copy-bitmap-in-.patch b/debian/patches/pve/0046-block-copy-before-write-create-block_copy-bitmap-in-.patch
deleted file mode 100644 (file)
index 7cd24d0..0000000
+++ /dev/null
@@ -1,373 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Date: Thu, 11 Apr 2024 11:29:24 +0200
-Subject: [PATCH] block/copy-before-write: create block_copy bitmap in filter
- node
-
-Currently block_copy creates copy_bitmap in source node. But that is in
-bad relation with .independent_close=true of copy-before-write filter:
-source node may be detached and removed before .bdrv_close() handler
-called, which should call block_copy_state_free(), which in turn should
-remove copy_bitmap.
-
-That's all not ideal: it would be better if internal bitmap of
-block-copy object is not attached to any node. But that is not possible
-now.
-
-The simplest solution is just create copy_bitmap in filter node, where
-anyway two other bitmaps are created.
-
-Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/block-copy.c         |   3 +-
- block/copy-before-write.c  |   2 +-
- include/block/block-copy.h |   1 +
- tests/qemu-iotests/257.out | 112 ++++++++++++++++++-------------------
- 4 files changed, 60 insertions(+), 58 deletions(-)
-
-diff --git a/block/block-copy.c b/block/block-copy.c
-index 9ee3dd7ef5..8fca2c3698 100644
---- a/block/block-copy.c
-+++ b/block/block-copy.c
-@@ -351,6 +351,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
- }
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-+                                     BlockDriverState *copy_bitmap_bs,
-                                      const BdrvDirtyBitmap *bitmap,
-                                      Error **errp)
- {
-@@ -367,7 +368,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-         return NULL;
-     }
--    copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
-+    copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
-                                            errp);
-     if (!copy_bitmap) {
-         return NULL;
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index c0e70669a2..94db31512d 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -468,7 +468,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
-             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
-              bs->file->bs->supported_zero_flags);
--    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
-+    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
-     if (!s->bcs) {
-         error_prepend(errp, "Cannot create block-copy-state: ");
-         return -EINVAL;
-diff --git a/include/block/block-copy.h b/include/block/block-copy.h
-index 0700953ab8..8b41643bfa 100644
---- a/include/block/block-copy.h
-+++ b/include/block/block-copy.h
-@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
- typedef struct BlockCopyCallState BlockCopyCallState;
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-+                                     BlockDriverState *copy_bitmap_bs,
-                                      const BdrvDirtyBitmap *bitmap,
-                                      Error **errp);
-diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
-index aa76131ca9..c33dd7f3a9 100644
---- a/tests/qemu-iotests/257.out
-+++ b/tests/qemu-iotests/257.out
-@@ -120,16 +120,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -596,16 +596,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -865,16 +865,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -2086,16 +2086,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -2355,16 +2355,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -2831,16 +2831,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -3100,16 +3100,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -3576,16 +3576,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -3845,16 +3845,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -4321,16 +4321,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -4590,16 +4590,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
-@@ -5066,16 +5066,16 @@ write -P0x67 0x3fe0000 0x20000
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      }
--    ],
--    "drive0": [
-+      },
-       {
-         "busy": false,
-         "count": 0,
-         "granularity": 65536,
-         "persistent": false,
-         "recording": false
--      },
-+      }
-+    ],
-+    "drive0": [
-       {
-         "busy": false,
-         "count": 458752,
diff --git a/debian/patches/pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch b/debian/patches/pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch
new file mode 100644 (file)
index 0000000..4522d37
--- /dev/null
@@ -0,0 +1,117 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 29 Apr 2024 14:43:58 +0200
+Subject: [PATCH] PVE backup: improve error when copy-before-write fails for
+ fleecing
+
+With fleecing, failure for copy-before-write does not fail the guest
+write, but only sets the snapshot error that is associated to the
+copy-before-write filter, making further requests to the snapshot
+access fail with EACCES, which then also fails the job. But that error
+code is not the root cause of why the backup failed, so bubble up the
+original snapshot error instead.
+
+Reported-by: Friedrich Weber <f.weber@proxmox.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Tested-by: Friedrich Weber <f.weber@proxmox.com>
+---
+ block/copy-before-write.c | 18 ++++++++++++------
+ block/copy-before-write.h |  1 +
+ pve-backup.c              |  9 +++++++++
+ 3 files changed, 22 insertions(+), 6 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index bba58326d7..50cc4c7aae 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -27,6 +27,7 @@
+ #include "qapi/qmp/qjson.h"
+ #include "sysemu/block-backend.h"
++#include "qemu/atomic.h"
+ #include "qemu/cutils.h"
+ #include "qapi/error.h"
+ #include "block/block_int.h"
+@@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
+      * @snapshot_error is normally zero. But on first copy-before-write failure
+      * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
+      * value of this error (<0). After that all in-flight and further
+-     * snapshot-API requests will fail with that error.
++     * snapshot-API requests will fail with that error. To be accessed with
++     * atomics.
+      */
+     int snapshot_error;
+ } BDRVCopyBeforeWriteState;
+@@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+         return 0;
+     }
+-    if (s->snapshot_error) {
++    if (qatomic_read(&s->snapshot_error)) {
+         return 0;
+     }
+@@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+     WITH_QEMU_LOCK_GUARD(&s->lock) {
+         if (ret < 0) {
+             assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
+-            if (!s->snapshot_error) {
+-                s->snapshot_error = ret;
+-            }
++            qatomic_cmpxchg(&s->snapshot_error, 0, ret);
+         } else {
+             bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
+         }
+@@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
+     QEMU_LOCK_GUARD(&s->lock);
+-    if (s->snapshot_error) {
++    if (qatomic_read(&s->snapshot_error)) {
+         g_free(req);
+         return NULL;
+     }
+@@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
+     bdrv_unref(bs);
+ }
++int bdrv_cbw_snapshot_error(BlockDriverState *bs)
++{
++    BDRVCopyBeforeWriteState *s = bs->opaque;
++    return qatomic_read(&s->snapshot_error);
++}
++
+ static void cbw_init(void)
+ {
+     bdrv_register(&bdrv_cbw_filter);
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index dc6cafe7fa..a27d2d7d9f 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+                                   BlockCopyState **bcs,
+                                   Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
++int bdrv_cbw_snapshot_error(BlockDriverState *bs);
+ #endif /* COPY_BEFORE_WRITE_H */
+diff --git a/pve-backup.c b/pve-backup.c
+index a747d12d3d..4e730aa3da 100644
+--- a/pve-backup.c
++++ b/pve-backup.c
+@@ -374,6 +374,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+         di->fleecing.snapshot_access = NULL;
+     }
+     if (di->fleecing.cbw) {
++        /*
++         * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
++         * error, making further requests to the snapshot fail with EACCES, which then also fail the
++         * job. But that code is not the root cause and just confusing, so update it.
++         */
++        int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
++        if (di->completed_ret == -EACCES && snapshot_error) {
++            di->completed_ret = snapshot_error;
++        }
+         bdrv_cbw_drop(di->fleecing.cbw);
+         di->fleecing.cbw = NULL;
+     }
diff --git a/debian/patches/pve/0047-qapi-blockdev-backup-add-discard-source-parameter.patch b/debian/patches/pve/0047-qapi-blockdev-backup-add-discard-source-parameter.patch
deleted file mode 100644 (file)
index 5bf9efc..0000000
+++ /dev/null
@@ -1,277 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Date: Thu, 11 Apr 2024 11:29:25 +0200
-Subject: [PATCH] qapi: blockdev-backup: add discard-source parameter
-
-Add a parameter that enables discard-after-copy. That is mostly useful
-in "push backup with fleecing" scheme, when source is snapshot-access
-format driver node, based on copy-before-write filter snapshot-access
-API:
-
-[guest]      [snapshot-access] ~~ blockdev-backup ~~> [backup target]
-   |            |
-   | root       | file
-   v            v
-[copy-before-write]
-   |             |
-   | file        | target
-   v             v
-[active disk]   [temp.img]
-
-In this case discard-after-copy does two things:
-
- - discard data in temp.img to save disk space
- - avoid further copy-before-write operation in discarded area
-
-Note that we have to declare WRITE permission on source in
-copy-before-write filter, for discard to work. Still we can't take it
-unconditionally, as it will break normal backup from RO source. So, we
-have to add a parameter and pass it thorough bdrv_open flags.
-
-Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/backup.c                         |  5 +++--
- block/block-copy.c                     |  9 +++++++++
- block/copy-before-write.c              | 15 +++++++++++++--
- block/copy-before-write.h              |  1 +
- block/replication.c                    |  4 ++--
- blockdev.c                             |  2 +-
- include/block/block-common.h           |  2 ++
- include/block/block-copy.h             |  1 +
- include/block/block_int-global-state.h |  2 +-
- qapi/block-core.json                   |  4 ++++
- 10 files changed, 37 insertions(+), 8 deletions(-)
-
-diff --git a/block/backup.c b/block/backup.c
-index 16d611c4ca..1963e47ab9 100644
---- a/block/backup.c
-+++ b/block/backup.c
-@@ -332,7 +332,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-                   BlockDriverState *target, int64_t speed,
-                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
-                   BitmapSyncMode bitmap_mode,
--                  bool compress,
-+                  bool compress, bool discard_source,
-                   const char *filter_node_name,
-                   BackupPerf *perf,
-                   BlockdevOnError on_source_error,
-@@ -433,7 +433,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-         goto error;
-     }
--    cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
-+    cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
-+                          &bcs, errp);
-     if (!cbw) {
-         goto error;
-     }
-diff --git a/block/block-copy.c b/block/block-copy.c
-index 8fca2c3698..7e3b378528 100644
---- a/block/block-copy.c
-+++ b/block/block-copy.c
-@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
-     CoMutex lock;
-     int64_t in_flight_bytes;
-     BlockCopyMethod method;
-+    bool discard_source;
-     BlockReqList reqs;
-     QLIST_HEAD(, BlockCopyCallState) calls;
-     /*
-@@ -353,6 +354,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                      BlockDriverState *copy_bitmap_bs,
-                                      const BdrvDirtyBitmap *bitmap,
-+                                     bool discard_source,
-                                      Error **errp)
- {
-     ERRP_GUARD();
-@@ -418,6 +420,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                     cluster_size),
-     };
-+    s->discard_source = discard_source;
-     block_copy_set_copy_opts(s, false, false);
-     ratelimit_init(&s->rate_limit);
-@@ -589,6 +592,12 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
-     co_put_to_shres(s->mem, t->req.bytes);
-     block_copy_task_end(t, ret);
-+    if (s->discard_source && ret == 0) {
-+        int64_t nbytes =
-+            MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
-+        bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
-+    }
-+
-     return ret;
- }
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 94db31512d..853e01a1eb 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
-     BdrvChild *target;
-     OnCbwError on_cbw_error;
-     uint64_t cbw_timeout_ns;
-+    bool discard_source;
-     /*
-      * @lock: protects access to @access_bitmap, @done_bitmap and
-@@ -357,6 +358,8 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
-                uint64_t perm, uint64_t shared,
-                uint64_t *nperm, uint64_t *nshared)
- {
-+    BDRVCopyBeforeWriteState *s = bs->opaque;
-+
-     if (!(role & BDRV_CHILD_FILTERED)) {
-         /*
-          * Target child
-@@ -381,6 +384,10 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
-              * start
-              */
-             *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
-+            if (s->discard_source) {
-+                *nperm = *nperm | BLK_PERM_WRITE;
-+            }
-+
-             *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
-         }
-     }
-@@ -468,7 +475,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
-             ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
-              bs->file->bs->supported_zero_flags);
--    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
-+    s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
-+    s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
-+                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
-     if (!s->bcs) {
-         error_prepend(errp, "Cannot create block-copy-state: ");
-         return -EINVAL;
-@@ -535,12 +544,14 @@ static BlockDriver bdrv_cbw_filter = {
- BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-                                   BlockDriverState *target,
-                                   const char *filter_node_name,
-+                                  bool discard_source,
-                                   BlockCopyState **bcs,
-                                   Error **errp)
- {
-     BDRVCopyBeforeWriteState *state;
-     BlockDriverState *top;
-     QDict *opts;
-+    int flags = BDRV_O_RDWR | (discard_source ? BDRV_O_CBW_DISCARD_SOURCE : 0);
-     assert(source->total_sectors == target->total_sectors);
-     GLOBAL_STATE_CODE();
-@@ -553,7 +564,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-     qdict_put_str(opts, "file", bdrv_get_node_name(source));
-     qdict_put_str(opts, "target", bdrv_get_node_name(target));
--    top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
-+    top = bdrv_insert_node(source, opts, flags, errp);
-     if (!top) {
-         return NULL;
-     }
-diff --git a/block/copy-before-write.h b/block/copy-before-write.h
-index 6e72bb25e9..01af0cd3c4 100644
---- a/block/copy-before-write.h
-+++ b/block/copy-before-write.h
-@@ -39,6 +39,7 @@
- BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-                                   BlockDriverState *target,
-                                   const char *filter_node_name,
-+                                  bool discard_source,
-                                   BlockCopyState **bcs,
-                                   Error **errp);
- void bdrv_cbw_drop(BlockDriverState *bs);
-diff --git a/block/replication.c b/block/replication.c
-index ca6bd0a720..0415a5e8b7 100644
---- a/block/replication.c
-+++ b/block/replication.c
-@@ -582,8 +582,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
-         s->backup_job = backup_job_create(
-                                 NULL, s->secondary_disk->bs, s->hidden_disk->bs,
--                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
--                                &perf,
-+                                0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
-+                                NULL, &perf,
-                                 BLOCKDEV_ON_ERROR_REPORT,
-                                 BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
-                                 backup_job_completed, bs, NULL, &local_err);
-diff --git a/blockdev.c b/blockdev.c
-index 5e5dbc1da9..1054a69279 100644
---- a/blockdev.c
-+++ b/blockdev.c
-@@ -2727,7 +2727,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
-     job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
-                             backup->sync, bmap, backup->bitmap_mode,
--                            backup->compress,
-+                            backup->compress, backup->discard_source,
-                             backup->filter_node_name,
-                             &perf,
-                             backup->on_source_error,
-diff --git a/include/block/block-common.h b/include/block/block-common.h
-index a846023a09..338fe5ff7a 100644
---- a/include/block/block-common.h
-+++ b/include/block/block-common.h
-@@ -243,6 +243,8 @@ typedef enum {
-                                       read-write fails */
- #define BDRV_O_IO_URING    0x40000 /* use io_uring instead of the thread pool */
-+#define BDRV_O_CBW_DISCARD_SOURCE 0x80000 /* for copy-before-write filter */
-+
- #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
-diff --git a/include/block/block-copy.h b/include/block/block-copy.h
-index 8b41643bfa..bdc703bacd 100644
---- a/include/block/block-copy.h
-+++ b/include/block/block-copy.h
-@@ -27,6 +27,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                      BlockDriverState *copy_bitmap_bs,
-                                      const BdrvDirtyBitmap *bitmap,
-+                                     bool discard_source,
-                                      Error **errp);
- /* Function should be called prior any actual copy request */
-diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
-index cc1387ae02..f0c642b194 100644
---- a/include/block/block_int-global-state.h
-+++ b/include/block/block_int-global-state.h
-@@ -195,7 +195,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-                             MirrorSyncMode sync_mode,
-                             BdrvDirtyBitmap *sync_bitmap,
-                             BitmapSyncMode bitmap_mode,
--                            bool compress,
-+                            bool compress, bool discard_source,
-                             const char *filter_node_name,
-                             BackupPerf *perf,
-                             BlockdevOnError on_source_error,
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index cb58a664ef..282e2e8a8c 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -1849,6 +1849,9 @@
- #     node specified by @drive.  If this option is not given, a node
- #     name is autogenerated.  (Since: 4.2)
- #
-+# @discard-source: Discard blocks on source which are already copied
-+#     to the target.  (Since 9.0)
-+#
- # @x-perf: Performance options.  (Since 6.0)
- #
- # Features:
-@@ -1870,6 +1873,7 @@
-             '*on-target-error': 'BlockdevOnError',
-             '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
-             '*filter-node-name': 'str',
-+            '*discard-source': 'bool',
-             '*x-perf': { 'type': 'BackupPerf',
-                          'features': [ 'unstable' ] } } }
diff --git a/debian/patches/pve/0048-copy-before-write-allow-specifying-minimum-cluster-s.patch b/debian/patches/pve/0048-copy-before-write-allow-specifying-minimum-cluster-s.patch
deleted file mode 100644 (file)
index 26bbdd0..0000000
+++ /dev/null
@@ -1,133 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 11 Apr 2024 11:29:26 +0200
-Subject: [PATCH] copy-before-write: allow specifying minimum cluster size
-
-Useful to make discard-source work in the context of backup fleecing
-when the fleecing image has a larger granularity than the backup
-target.
-
-Copy-before-write operations will use at least this granularity and in
-particular, discard requests to the source node will too. If the
-granularity is too small, they will just be aligned down in
-cbw_co_pdiscard_snapshot() and thus effectively ignored.
-
-The QAPI uses uint32 so the value will be non-negative, but still fit
-into a uint64_t.
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/block-copy.c         | 17 +++++++++++++----
- block/copy-before-write.c  |  3 ++-
- include/block/block-copy.h |  1 +
- qapi/block-core.json       |  8 +++++++-
- 4 files changed, 23 insertions(+), 6 deletions(-)
-
-diff --git a/block/block-copy.c b/block/block-copy.c
-index 7e3b378528..adb1cbb440 100644
---- a/block/block-copy.c
-+++ b/block/block-copy.c
-@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
- }
- static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
-+                                                 int64_t min_cluster_size,
-                                                  Error **errp)
- {
-     int ret;
-@@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
-                     "used. If the actual block size of the target exceeds "
-                     "this default, the backup may be unusable",
-                     BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
--        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
-+        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
-     } else if (ret < 0 && !target_does_cow) {
-         error_setg_errno(errp, -ret,
-             "Couldn't determine the cluster size of the target image, "
-@@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
-         return ret;
-     } else if (ret < 0 && target_does_cow) {
-         /* Not fatal; just trudge on ahead. */
--        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
-+        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
-     }
--    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-+    return MAX(min_cluster_size,
-+               MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
- }
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                      BlockDriverState *copy_bitmap_bs,
-                                      const BdrvDirtyBitmap *bitmap,
-                                      bool discard_source,
-+                                     int64_t min_cluster_size,
-                                      Error **errp)
- {
-     ERRP_GUARD();
-@@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-     GLOBAL_STATE_CODE();
--    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
-+    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
-+        error_setg(errp, "min-cluster-size needs to be a power of 2");
-+        return NULL;
-+    }
-+
-+    cluster_size = block_copy_calculate_cluster_size(target->bs,
-+                                                     min_cluster_size, errp);
-     if (cluster_size < 0) {
-         return NULL;
-     }
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 853e01a1eb..47b3cdd09f 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -477,7 +477,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
-     s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
-     s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
--                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
-+                                  flags & BDRV_O_CBW_DISCARD_SOURCE,
-+                                  opts->min_cluster_size, errp);
-     if (!s->bcs) {
-         error_prepend(errp, "Cannot create block-copy-state: ");
-         return -EINVAL;
-diff --git a/include/block/block-copy.h b/include/block/block-copy.h
-index bdc703bacd..77857c6c68 100644
---- a/include/block/block-copy.h
-+++ b/include/block/block-copy.h
-@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-                                      BlockDriverState *copy_bitmap_bs,
-                                      const BdrvDirtyBitmap *bitmap,
-                                      bool discard_source,
-+                                     int64_t min_cluster_size,
-                                      Error **errp);
- /* Function should be called prior any actual copy request */
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 282e2e8a8c..9caf04cbe9 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -4926,12 +4926,18 @@
- #     @on-cbw-error parameter will decide how this failure is handled.
- #     Default 0.  (Since 7.1)
- #
-+# @min-cluster-size: Minimum size of blocks used by copy-before-write
-+#     operations.  Has to be a power of 2.  No effect if smaller than
-+#     the maximum of the target's cluster size and 64 KiB.  Default 0.
-+#     (Since 8.1)
-+#
- # Since: 6.2
- ##
- { 'struct': 'BlockdevOptionsCbw',
-   'base': 'BlockdevOptionsGenericFormat',
-   'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
--            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
-+            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
-+            '*min-cluster-size': 'uint32' } }
- ##
- # @BlockdevOptions:
diff --git a/debian/patches/pve/0049-backup-add-minimum-cluster-size-to-performance-optio.patch b/debian/patches/pve/0049-backup-add-minimum-cluster-size-to-performance-optio.patch
deleted file mode 100644 (file)
index 42c5ec5..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 11 Apr 2024 11:29:27 +0200
-Subject: [PATCH] backup: add minimum cluster size to performance options
-
-Useful to make discard-source work in the context of backup fleecing
-when the fleecing image has a larger granularity than the backup
-target.
-
-Backup/block-copy will use at least this granularity for copy operations
-and in particular, discard requests to the backup source will too. If
-the granularity is too small, they will just be aligned down in
-cbw_co_pdiscard_snapshot() and thus effectively ignored.
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/backup.c            | 2 +-
- block/copy-before-write.c | 2 ++
- block/copy-before-write.h | 1 +
- blockdev.c                | 3 +++
- qapi/block-core.json      | 9 +++++++--
- 5 files changed, 14 insertions(+), 3 deletions(-)
-
-diff --git a/block/backup.c b/block/backup.c
-index 1963e47ab9..fe69723ada 100644
---- a/block/backup.c
-+++ b/block/backup.c
-@@ -434,7 +434,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-     }
-     cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
--                          &bcs, errp);
-+                          perf->min_cluster_size, &bcs, errp);
-     if (!cbw) {
-         goto error;
-     }
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 47b3cdd09f..bba58326d7 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -546,6 +546,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-                                   BlockDriverState *target,
-                                   const char *filter_node_name,
-                                   bool discard_source,
-+                                  int64_t min_cluster_size,
-                                   BlockCopyState **bcs,
-                                   Error **errp)
- {
-@@ -564,6 +565,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-     }
-     qdict_put_str(opts, "file", bdrv_get_node_name(source));
-     qdict_put_str(opts, "target", bdrv_get_node_name(target));
-+    qdict_put_int(opts, "min-cluster-size", min_cluster_size);
-     top = bdrv_insert_node(source, opts, flags, errp);
-     if (!top) {
-diff --git a/block/copy-before-write.h b/block/copy-before-write.h
-index 01af0cd3c4..dc6cafe7fa 100644
---- a/block/copy-before-write.h
-+++ b/block/copy-before-write.h
-@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-                                   BlockDriverState *target,
-                                   const char *filter_node_name,
-                                   bool discard_source,
-+                                  int64_t min_cluster_size,
-                                   BlockCopyState **bcs,
-                                   Error **errp);
- void bdrv_cbw_drop(BlockDriverState *bs);
-diff --git a/blockdev.c b/blockdev.c
-index 1054a69279..cbe224387b 100644
---- a/blockdev.c
-+++ b/blockdev.c
-@@ -2654,6 +2654,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
-         if (backup->x_perf->has_max_chunk) {
-             perf.max_chunk = backup->x_perf->max_chunk;
-         }
-+        if (backup->x_perf->has_min_cluster_size) {
-+            perf.min_cluster_size = backup->x_perf->min_cluster_size;
-+        }
-     }
-     if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 9caf04cbe9..df934647ed 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -1790,11 +1790,16 @@
- #     it should not be less than job cluster size which is calculated
- #     as maximum of target image cluster size and 64k.  Default 0.
- #
-+# @min-cluster-size: Minimum size of blocks used by copy-before-write
-+#     and background copy operations.  Has to be a power of 2.  No
-+#     effect if smaller than the maximum of the target's cluster size
-+#     and 64 KiB.  Default 0. (Since 8.1)
-+#
- # Since: 6.0
- ##
- { 'struct': 'BackupPerf',
--  'data': { '*use-copy-range': 'bool',
--            '*max-workers': 'int', '*max-chunk': 'int64' } }
-+  'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
-+            '*max-chunk': 'int64', '*min-cluster-size': 'uint32' } }
- ##
- # @BackupCommon:
diff --git a/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch b/debian/patches/pve/0050-PVE-backup-add-fleecing-option.patch
deleted file mode 100644 (file)
index 1af3e04..0000000
+++ /dev/null
@@ -1,337 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 11 Apr 2024 11:29:28 +0200
-Subject: [PATCH] PVE backup: add fleecing option
-
-When a fleecing option is given, it is expected that each device has
-a corresponding "-fleecing" block device already attached, except for
-EFI disk and TPM state, where fleecing is never used.
-
-The following graph was adapted from [0] which also contains more
-details about fleecing.
-
-[guest]
-   |
-   | root
-   v                 file
-[copy-before-write]<------[snapshot-access]
-   |           |
-   | file      | target
-   v           v
-[source] [fleecing]
-
-For fleecing, a copy-before-write filter is inserted on top of the
-source node, as well as a snapshot-access node pointing to the filter
-node which allows to read the consistent state of the image at the
-time it was inserted. New guest writes are passed through the
-copy-before-write filter which will first copy over old data to the
-fleecing image in case that old data is still needed by the
-snapshot-access node.
-
-The backup process will sequentially read from the snapshot access,
-which has a bitmap and knows whether to read from the original image
-or the fleecing image to get the "snapshot" state, i.e. data from the
-source image at the time when the copy-before-write filter was
-inserted. After reading, the copied sections are discarded from the
-fleecing image to reduce space usage.
-
-All of this can be restricted by an initial dirty bitmap to parts of
-the source image that are required for an incremental backup.
-
-For discard to work, it is necessary that the fleecing image does not
-have a larger cluster size than the backup job granularity. Since
-querying that size does not always work, e.g. for RBD with krbd, the
-cluster size will not be reported, a minimum of 4 MiB is used. A job
-with PBS target already has at least this granularity, so it's just
-relevant for other targets. I.e. edge cases where this minimum is not
-enough should be very rare in practice. If ever necessary in the
-future, can still add a passed-in value for the backup QMP command to
-override.
-
-Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
-are set when installing the copy-before-write filter and
-snapshot-access. When an error or timeout occurs, the problematic (and
-each further) snapshot operation will fail and thus cancel the backup
-instead of breaking the guest write.
-
-Note that job_id cannot be inferred from the snapshot-access bs because
-it has no parent, so just pass the one from the original bs.
-
-[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/monitor/block-hmp-cmds.c |   1 +
- pve-backup.c                   | 135 ++++++++++++++++++++++++++++++++-
- qapi/block-core.json           |  10 ++-
- 3 files changed, 142 insertions(+), 4 deletions(-)
-
-diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
-index 5000c084c5..70b3de4c7e 100644
---- a/block/monitor/block-hmp-cmds.c
-+++ b/block/monitor/block-hmp-cmds.c
-@@ -1043,6 +1043,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
-         NULL, NULL,
-         devlist, qdict_haskey(qdict, "speed"), speed,
-         false, 0, // BackupPerf max-workers
-+        false, false, // fleecing
-         &error);
-     hmp_handle_error(mon, error);
-diff --git a/pve-backup.c b/pve-backup.c
-index 5ebb6a3947..a747d12d3d 100644
---- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -7,9 +7,11 @@
- #include "sysemu/blockdev.h"
- #include "block/block_int-global-state.h"
- #include "block/blockjob.h"
-+#include "block/copy-before-write.h"
- #include "block/dirty-bitmap.h"
- #include "block/graph-lock.h"
- #include "qapi/qapi-commands-block.h"
-+#include "qapi/qmp/qdict.h"
- #include "qapi/qmp/qerror.h"
- #include "qemu/cutils.h"
-@@ -80,8 +82,15 @@ static void pvebackup_init(void)
- // initialize PVEBackupState at startup
- opts_init(pvebackup_init);
-+typedef struct PVEBackupFleecingInfo {
-+    BlockDriverState *bs;
-+    BlockDriverState *cbw;
-+    BlockDriverState *snapshot_access;
-+} PVEBackupFleecingInfo;
-+
- typedef struct PVEBackupDevInfo {
-     BlockDriverState *bs;
-+    PVEBackupFleecingInfo fleecing;
-     size_t size;
-     uint64_t block_size;
-     uint8_t dev_id;
-@@ -353,6 +362,22 @@ static void pvebackup_complete_cb(void *opaque, int ret)
-     PVEBackupDevInfo *di = opaque;
-     di->completed_ret = ret;
-+    /*
-+     * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
-+     * won't be done as a coroutine anyways:
-+     * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
-+     *   just spawn a BH calling bdrv_unref().
-+     * - For cbw, draining would need to spawn a BH.
-+     */
-+    if (di->fleecing.snapshot_access) {
-+        bdrv_unref(di->fleecing.snapshot_access);
-+        di->fleecing.snapshot_access = NULL;
-+    }
-+    if (di->fleecing.cbw) {
-+        bdrv_cbw_drop(di->fleecing.cbw);
-+        di->fleecing.cbw = NULL;
-+    }
-+
-     /*
-      * Needs to happen outside of coroutine, because it takes the graph write lock.
-      */
-@@ -519,9 +544,77 @@ static void create_backup_jobs_bh(void *opaque) {
-         }
-         bdrv_drained_begin(di->bs);
-+        BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
-+
-+        BlockDriverState *source_bs = di->bs;
-+        bool discard_source = false;
-+        bdrv_graph_co_rdlock();
-+        const char *job_id = bdrv_get_device_name(di->bs);
-+        bdrv_graph_co_rdunlock();
-+        if (di->fleecing.bs) {
-+            QDict *cbw_opts = qdict_new();
-+            qdict_put_str(cbw_opts, "driver", "copy-before-write");
-+            qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
-+            qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
-+
-+            if (di->bitmap) {
-+                /*
-+                 * Only guest writes to parts relevant for the backup need to be intercepted with
-+                 * old data being copied to the fleecing image.
-+                 */
-+                qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
-+                qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
-+            }
-+            /*
-+             * Fleecing storage is supposed to be fast and it's better to break backup than guest
-+             * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
-+             * abort a bit before that.
-+             */
-+            qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
-+            qdict_put_int(cbw_opts, "cbw-timeout", 45);
-+
-+            di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
-+
-+            if (!di->fleecing.cbw) {
-+                error_setg(errp, "appending cbw node for fleecing failed: %s",
-+                           local_err ? error_get_pretty(local_err) : "unknown error");
-+                break;
-+            }
-+
-+            QDict *snapshot_access_opts = qdict_new();
-+            qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
-+            qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
-+
-+            di->fleecing.snapshot_access =
-+                bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
-+            if (!di->fleecing.snapshot_access) {
-+                error_setg(errp, "setting up snapshot access for fleecing failed: %s",
-+                           local_err ? error_get_pretty(local_err) : "unknown error");
-+                break;
-+            }
-+            source_bs = di->fleecing.snapshot_access;
-+            discard_source = true;
-+
-+            /*
-+             * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
-+             * on the fleecing image won't work if the backup job's granularity is less than the RBD
-+             * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
-+             * target, the backup job granularity would already be at least this much.
-+             */
-+            perf.min_cluster_size = 4 * 1024 * 1024;
-+            /*
-+             * For discard to work, cluster size for the backup job must be at least the same as for
-+             * the fleecing image.
-+             */
-+            BlockDriverInfo bdi;
-+            if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
-+                perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
-+            }
-+        }
-+
-         BlockJob *job = backup_job_create(
--            NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
--            bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
-+            job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
-+            bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
-             BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
-             &local_err);
-@@ -577,6 +670,14 @@ static void create_backup_jobs_bh(void *opaque) {
-     aio_co_enter(data->ctx, data->co);
- }
-+/*
-+ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
-+ */
-+static bool device_uses_fleecing(const char *device_id)
-+{
-+    return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
-+}
-+
- /*
-  * Returns a list of device infos, which needs to be freed by the caller. In
-  * case of an error, errp will be set, but the returned value might still be a
-@@ -584,6 +685,7 @@ static void create_backup_jobs_bh(void *opaque) {
-  */
- static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
-     const char *devlist,
-+    bool fleecing,
-     Error **errp)
- {
-     gchar **devs = NULL;
-@@ -607,6 +709,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
-             }
-             PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
-             di->bs = bs;
-+
-+            if (fleecing && device_uses_fleecing(*d)) {
-+                g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
-+                BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
-+                if (!fleecing_blk) {
-+                    error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-+                              "Device '%s' not found", fleecing_devid);
-+                    goto err;
-+                }
-+                BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
-+                if (!bdrv_co_is_inserted(fleecing_bs)) {
-+                    error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, fleecing_devid);
-+                    goto err;
-+                }
-+                /*
-+                 * Fleecing image needs to be the same size to act as a cbw target.
-+                 */
-+                if (bs->total_sectors != fleecing_bs->total_sectors) {
-+                    error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
-+                               fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
-+                    goto err;
-+                }
-+                di->fleecing.bs = fleecing_bs;
-+            }
-+
-             di_list = g_list_append(di_list, di);
-             d++;
-         }
-@@ -656,6 +783,7 @@ UuidInfo coroutine_fn *qmp_backup(
-     const char *devlist,
-     bool has_speed, int64_t speed,
-     bool has_max_workers, int64_t max_workers,
-+    bool has_fleecing, bool fleecing,
-     Error **errp)
- {
-     assert(qemu_in_coroutine());
-@@ -684,7 +812,7 @@ UuidInfo coroutine_fn *qmp_backup(
-     format = has_format ? format : BACKUP_FORMAT_VMA;
-     bdrv_graph_co_rdlock();
--    di_list = get_device_info(devlist, &local_err);
-+    di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
-     bdrv_graph_co_rdunlock();
-     if (local_err) {
-         error_propagate(errp, local_err);
-@@ -1089,5 +1217,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
-     ret->query_bitmap_info = true;
-     ret->pbs_masterkey = true;
-     ret->backup_max_workers = true;
-+    ret->backup_fleecing = true;
-     return ret;
- }
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index df934647ed..ff441d4258 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -948,6 +948,10 @@
- #
- # @max-workers: see @BackupPerf for details. Default 16.
- #
-+# @fleecing: perform a backup with fleecing. For each device in @devlist, a
-+#            corresponing '-fleecing' device with the same size already needs to
-+#            be present.
-+#
- # Returns: the uuid of the backup job
- #
- ##
-@@ -968,7 +972,8 @@
-                                     '*firewall-file': 'str',
-                                     '*devlist': 'str',
-                                     '*speed': 'int',
--                                    '*max-workers': 'int' },
-+                                    '*max-workers': 'int',
-+                                    '*fleecing': 'bool' },
-   'returns': 'UuidInfo', 'coroutine': true }
- ##
-@@ -1014,6 +1019,8 @@
- #
- # @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
- #
-+# @backup-fleecing: Whether backup fleecing is supported or not.
-+#
- # @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
- #     supported or not.
- #
-@@ -1025,6 +1032,7 @@
-             'pbs-dirty-bitmap-migration': 'bool',
-             'pbs-masterkey': 'bool',
-             'pbs-library-version': 'str',
-+            'backup-fleecing': 'bool',
-             'backup-max-workers': 'bool' } }
- ##
diff --git a/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch b/debian/patches/pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
deleted file mode 100644 (file)
index 4522d37..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Mon, 29 Apr 2024 14:43:58 +0200
-Subject: [PATCH] PVE backup: improve error when copy-before-write fails for
- fleecing
-
-With fleecing, failure for copy-before-write does not fail the guest
-write, but only sets the snapshot error that is associated to the
-copy-before-write filter, making further requests to the snapshot
-access fail with EACCES, which then also fails the job. But that error
-code is not the root cause of why the backup failed, so bubble up the
-original snapshot error instead.
-
-Reported-by: Friedrich Weber <f.weber@proxmox.com>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Tested-by: Friedrich Weber <f.weber@proxmox.com>
----
- block/copy-before-write.c | 18 ++++++++++++------
- block/copy-before-write.h |  1 +
- pve-backup.c              |  9 +++++++++
- 3 files changed, 22 insertions(+), 6 deletions(-)
-
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index bba58326d7..50cc4c7aae 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -27,6 +27,7 @@
- #include "qapi/qmp/qjson.h"
- #include "sysemu/block-backend.h"
-+#include "qemu/atomic.h"
- #include "qemu/cutils.h"
- #include "qapi/error.h"
- #include "block/block_int.h"
-@@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
-      * @snapshot_error is normally zero. But on first copy-before-write failure
-      * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
-      * value of this error (<0). After that all in-flight and further
--     * snapshot-API requests will fail with that error.
-+     * snapshot-API requests will fail with that error. To be accessed with
-+     * atomics.
-      */
-     int snapshot_error;
- } BDRVCopyBeforeWriteState;
-@@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
-         return 0;
-     }
--    if (s->snapshot_error) {
-+    if (qatomic_read(&s->snapshot_error)) {
-         return 0;
-     }
-@@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
-     WITH_QEMU_LOCK_GUARD(&s->lock) {
-         if (ret < 0) {
-             assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
--            if (!s->snapshot_error) {
--                s->snapshot_error = ret;
--            }
-+            qatomic_cmpxchg(&s->snapshot_error, 0, ret);
-         } else {
-             bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
-         }
-@@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
-     QEMU_LOCK_GUARD(&s->lock);
--    if (s->snapshot_error) {
-+    if (qatomic_read(&s->snapshot_error)) {
-         g_free(req);
-         return NULL;
-     }
-@@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
-     bdrv_unref(bs);
- }
-+int bdrv_cbw_snapshot_error(BlockDriverState *bs)
-+{
-+    BDRVCopyBeforeWriteState *s = bs->opaque;
-+    return qatomic_read(&s->snapshot_error);
-+}
-+
- static void cbw_init(void)
- {
-     bdrv_register(&bdrv_cbw_filter);
-diff --git a/block/copy-before-write.h b/block/copy-before-write.h
-index dc6cafe7fa..a27d2d7d9f 100644
---- a/block/copy-before-write.h
-+++ b/block/copy-before-write.h
-@@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
-                                   BlockCopyState **bcs,
-                                   Error **errp);
- void bdrv_cbw_drop(BlockDriverState *bs);
-+int bdrv_cbw_snapshot_error(BlockDriverState *bs);
- #endif /* COPY_BEFORE_WRITE_H */
-diff --git a/pve-backup.c b/pve-backup.c
-index a747d12d3d..4e730aa3da 100644
---- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -374,6 +374,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
-         di->fleecing.snapshot_access = NULL;
-     }
-     if (di->fleecing.cbw) {
-+        /*
-+         * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
-+         * error, making further requests to the snapshot fail with EACCES, which then also fail the
-+         * job. But that code is not the root cause and just confusing, so update it.
-+         */
-+        int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
-+        if (di->completed_ret == -EACCES && snapshot_error) {
-+            di->completed_ret = snapshot_error;
-+        }
-         bdrv_cbw_drop(di->fleecing.cbw);
-         di->fleecing.cbw = NULL;
-     }
index 763dfc1083037c7d84a5cbfbf6ceec487155989d..c3b3117d937bf19720b08f2b7d0d8e275ea1d265 100644 (file)
@@ -4,6 +4,17 @@ extra/0003-ide-avoid-potential-deadlock-when-draining-during-tr.patch
 extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
 extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
 extra/0006-Revert-virtio-pci-fix-use-of-a-released-vector.patch
+extra/0007-block-copy-before-write-fix-permission.patch
+extra/0008-block-copy-before-write-support-unligned-snapshot-di.patch
+extra/0009-block-copy-before-write-create-block_copy-bitmap-in-.patch
+extra/0010-qapi-blockdev-backup-add-discard-source-parameter.patch
+extra/0011-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
+extra/0012-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
+extra/0013-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
+extra/0014-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
+extra/0015-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
+extra/0016-block-copy-Fix-missing-graph-lock.patch
+extra/0017-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
 bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
 bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
 bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
@@ -53,11 +64,7 @@ pve/0040-Revert-block-rbd-fix-handling-of-holes-in-.bdrv_co_b.patch
 pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
 pve/0042-alloc-track-error-out-when-auto-remove-is-not-set.patch
 pve/0043-alloc-track-avoid-seemingly-superfluous-child-permis.patch
-pve/0044-block-copy-before-write-fix-permission.patch
-pve/0045-block-copy-before-write-support-unligned-snapshot-di.patch
-pve/0046-block-copy-before-write-create-block_copy-bitmap-in-.patch
-pve/0047-qapi-blockdev-backup-add-discard-source-parameter.patch
-pve/0048-copy-before-write-allow-specifying-minimum-cluster-s.patch
-pve/0049-backup-add-minimum-cluster-size-to-performance-optio.patch
-pve/0050-PVE-backup-add-fleecing-option.patch
-pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch
+pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
+pve/0046-PVE-backup-add-fleecing-option.patch
+pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch