hc
2024-12-19 9370bb92b2d16684ee45cf24e879c93c509162da
kernel/fs/xfs/xfs_icreate_item.c
....@@ -9,13 +9,16 @@
99 #include "xfs_format.h"
1010 #include "xfs_log_format.h"
1111 #include "xfs_trans_resv.h"
12
-#include "xfs_bit.h"
1312 #include "xfs_mount.h"
13
+#include "xfs_inode.h"
1414 #include "xfs_trans.h"
1515 #include "xfs_trans_priv.h"
16
-#include "xfs_error.h"
1716 #include "xfs_icreate_item.h"
1817 #include "xfs_log.h"
18
+#include "xfs_log_priv.h"
19
+#include "xfs_log_recover.h"
20
+#include "xfs_ialloc.h"
21
+#include "xfs_trace.h"
1922
2023 kmem_zone_t *xfs_icreate_zone; /* inode create item zone */
2124
....@@ -56,80 +59,18 @@
5659 sizeof(struct xfs_icreate_log));
5760 }
5861
59
-
60
-/* Pinning has no meaning for the create item, so just return. */
6162 STATIC void
62
-xfs_icreate_item_pin(
63
+xfs_icreate_item_release(
6364 struct xfs_log_item *lip)
6465 {
66
+ kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip));
6567 }
6668
67
-
68
-/* pinning has no meaning for the create item, so just return. */
69
-STATIC void
70
-xfs_icreate_item_unpin(
71
- struct xfs_log_item *lip,
72
- int remove)
73
-{
74
-}
75
-
76
-STATIC void
77
-xfs_icreate_item_unlock(
78
- struct xfs_log_item *lip)
79
-{
80
- struct xfs_icreate_item *icp = ICR_ITEM(lip);
81
-
82
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
83
- kmem_zone_free(xfs_icreate_zone, icp);
84
- return;
85
-}
86
-
87
-/*
88
- * Because we have ordered buffers being tracked in the AIL for the inode
89
- * creation, we don't need the create item after this. Hence we can free
90
- * the log item and return -1 to tell the caller we're done with the item.
91
- */
92
-STATIC xfs_lsn_t
93
-xfs_icreate_item_committed(
94
- struct xfs_log_item *lip,
95
- xfs_lsn_t lsn)
96
-{
97
- struct xfs_icreate_item *icp = ICR_ITEM(lip);
98
-
99
- kmem_zone_free(xfs_icreate_zone, icp);
100
- return (xfs_lsn_t)-1;
101
-}
102
-
103
-/* item can never get into the AIL */
104
-STATIC uint
105
-xfs_icreate_item_push(
106
- struct xfs_log_item *lip,
107
- struct list_head *buffer_list)
108
-{
109
- ASSERT(0);
110
- return XFS_ITEM_SUCCESS;
111
-}
112
-
113
-/* Ordered buffers do the dependency tracking here, so this does nothing. */
114
-STATIC void
115
-xfs_icreate_item_committing(
116
- struct xfs_log_item *lip,
117
- xfs_lsn_t lsn)
118
-{
119
-}
120
-
121
-/*
122
- * This is the ops vector shared by all buf log items.
123
- */
12469 static const struct xfs_item_ops xfs_icreate_item_ops = {
70
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
12571 .iop_size = xfs_icreate_item_size,
12672 .iop_format = xfs_icreate_item_format,
127
- .iop_pin = xfs_icreate_item_pin,
128
- .iop_unpin = xfs_icreate_item_unpin,
129
- .iop_push = xfs_icreate_item_push,
130
- .iop_unlock = xfs_icreate_item_unlock,
131
- .iop_committed = xfs_icreate_item_committed,
132
- .iop_committing = xfs_icreate_item_committing,
73
+ .iop_release = xfs_icreate_item_release,
13374 };
13475
13576
....@@ -156,7 +97,7 @@
15697 {
15798 struct xfs_icreate_item *icp;
15899
159
- icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP);
100
+ icp = kmem_cache_zalloc(xfs_icreate_zone, GFP_KERNEL | __GFP_NOFAIL);
160101
161102 xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE,
162103 &xfs_icreate_item_ops);
....@@ -174,3 +115,147 @@
174115 tp->t_flags |= XFS_TRANS_DIRTY;
175116 set_bit(XFS_LI_DIRTY, &icp->ic_item.li_flags);
176117 }
118
+
119
+static enum xlog_recover_reorder
120
+xlog_recover_icreate_reorder(
121
+ struct xlog_recover_item *item)
122
+{
123
+ /*
124
+ * Inode allocation buffers must be replayed before subsequent inode
125
+ * items try to modify those buffers. ICREATE items are the logical
126
+ * equivalent of logging a newly initialized inode buffer, so recover
127
+ * these at the same time that we recover logged buffers.
128
+ */
129
+ return XLOG_REORDER_BUFFER_LIST;
130
+}
131
+
132
+/*
133
+ * This routine is called when an inode create format structure is found in a
134
+ * committed transaction in the log. It's purpose is to initialise the inodes
135
+ * being allocated on disk. This requires us to get inode cluster buffers that
136
+ * match the range to be initialised, stamped with inode templates and written
137
+ * by delayed write so that subsequent modifications will hit the cached buffer
138
+ * and only need writing out at the end of recovery.
139
+ */
140
+STATIC int
141
+xlog_recover_icreate_commit_pass2(
142
+ struct xlog *log,
143
+ struct list_head *buffer_list,
144
+ struct xlog_recover_item *item,
145
+ xfs_lsn_t lsn)
146
+{
147
+ struct xfs_mount *mp = log->l_mp;
148
+ struct xfs_icreate_log *icl;
149
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
150
+ xfs_agnumber_t agno;
151
+ xfs_agblock_t agbno;
152
+ unsigned int count;
153
+ unsigned int isize;
154
+ xfs_agblock_t length;
155
+ int bb_per_cluster;
156
+ int cancel_count;
157
+ int nbufs;
158
+ int i;
159
+
160
+ icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
161
+ if (icl->icl_type != XFS_LI_ICREATE) {
162
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
163
+ return -EINVAL;
164
+ }
165
+
166
+ if (icl->icl_size != 1) {
167
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
168
+ return -EINVAL;
169
+ }
170
+
171
+ agno = be32_to_cpu(icl->icl_ag);
172
+ if (agno >= mp->m_sb.sb_agcount) {
173
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
174
+ return -EINVAL;
175
+ }
176
+ agbno = be32_to_cpu(icl->icl_agbno);
177
+ if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
178
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
179
+ return -EINVAL;
180
+ }
181
+ isize = be32_to_cpu(icl->icl_isize);
182
+ if (isize != mp->m_sb.sb_inodesize) {
183
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
184
+ return -EINVAL;
185
+ }
186
+ count = be32_to_cpu(icl->icl_count);
187
+ if (!count) {
188
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
189
+ return -EINVAL;
190
+ }
191
+ length = be32_to_cpu(icl->icl_length);
192
+ if (!length || length >= mp->m_sb.sb_agblocks) {
193
+ xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
194
+ return -EINVAL;
195
+ }
196
+
197
+ /*
198
+ * The inode chunk is either full or sparse and we only support
199
+ * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
200
+ */
201
+ if (length != igeo->ialloc_blks &&
202
+ length != igeo->ialloc_min_blks) {
203
+ xfs_warn(log->l_mp,
204
+ "%s: unsupported chunk length", __FUNCTION__);
205
+ return -EINVAL;
206
+ }
207
+
208
+ /* verify inode count is consistent with extent length */
209
+ if ((count >> mp->m_sb.sb_inopblog) != length) {
210
+ xfs_warn(log->l_mp,
211
+ "%s: inconsistent inode count and chunk length",
212
+ __FUNCTION__);
213
+ return -EINVAL;
214
+ }
215
+
216
+ /*
217
+ * The icreate transaction can cover multiple cluster buffers and these
218
+ * buffers could have been freed and reused. Check the individual
219
+ * buffers for cancellation so we don't overwrite anything written after
220
+ * a cancellation.
221
+ */
222
+ bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
223
+ nbufs = length / igeo->blocks_per_cluster;
224
+ for (i = 0, cancel_count = 0; i < nbufs; i++) {
225
+ xfs_daddr_t daddr;
226
+
227
+ daddr = XFS_AGB_TO_DADDR(mp, agno,
228
+ agbno + i * igeo->blocks_per_cluster);
229
+ if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster))
230
+ cancel_count++;
231
+ }
232
+
233
+ /*
234
+ * We currently only use icreate for a single allocation at a time. This
235
+ * means we should expect either all or none of the buffers to be
236
+ * cancelled. Be conservative and skip replay if at least one buffer is
237
+ * cancelled, but warn the user that something is awry if the buffers
238
+ * are not consistent.
239
+ *
240
+ * XXX: This must be refined to only skip cancelled clusters once we use
241
+ * icreate for multiple chunk allocations.
242
+ */
243
+ ASSERT(!cancel_count || cancel_count == nbufs);
244
+ if (cancel_count) {
245
+ if (cancel_count != nbufs)
246
+ xfs_warn(mp,
247
+ "WARNING: partial inode chunk cancellation, skipped icreate.");
248
+ trace_xfs_log_recover_icreate_cancel(log, icl);
249
+ return 0;
250
+ }
251
+
252
+ trace_xfs_log_recover_icreate_recover(log, icl);
253
+ return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
254
+ length, be32_to_cpu(icl->icl_gen));
255
+}
256
+
257
+const struct xlog_recover_item_ops xlog_icreate_item_ops = {
258
+ .item_type = XFS_LI_ICREATE,
259
+ .reorder = xlog_recover_icreate_reorder,
260
+ .commit_pass2 = xlog_recover_icreate_commit_pass2,
261
+};