Diff markup
1 /* 1 /*
2 * linux/fs/jbd2/checkpoint.c 2 * linux/fs/jbd2/checkpoint.c
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.c 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
6 * Copyright 1999 Red Hat Software --- All Rig 6 * Copyright 1999 Red Hat Software --- All Rights Reserved
7 * 7 *
8 * This file is part of the Linux kernel and i 8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License 9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated her 10 * option, any later version, incorporated herein by reference.
11 * 11 *
12 * Checkpoint routines for the generic filesys 12 * Checkpoint routines for the generic filesystem journaling code.
13 * Part of the ext2fs journaling system. 13 * Part of the ext2fs journaling system.
14 * 14 *
15 * Checkpointing is the process of ensuring th 15 * Checkpointing is the process of ensuring that a section of the log is
16 * committed fully to disk, so that that porti 16 * committed fully to disk, so that that portion of the log can be
17 * reused. 17 * reused.
18 */ 18 */
19 19
20 #include <linux/time.h> 20 #include <linux/time.h>
21 #include <linux/fs.h> 21 #include <linux/fs.h>
22 #include <linux/jbd2.h> 22 #include <linux/jbd2.h>
23 #include <linux/errno.h> 23 #include <linux/errno.h>
24 #include <linux/slab.h> 24 #include <linux/slab.h>
25 #include <trace/events/jbd2.h> <<
26 25
27 /* 26 /*
28 * Unlink a buffer from a transaction checkpoi 27 * Unlink a buffer from a transaction checkpoint list.
29 * 28 *
30 * Called with j_list_lock held. 29 * Called with j_list_lock held.
31 */ 30 */
32 static inline void __buffer_unlink_first(struc 31 static inline void __buffer_unlink_first(struct journal_head *jh)
33 { 32 {
34 transaction_t *transaction = jh->b_cp_ 33 transaction_t *transaction = jh->b_cp_transaction;
35 34
36 jh->b_cpnext->b_cpprev = jh->b_cpprev; 35 jh->b_cpnext->b_cpprev = jh->b_cpprev;
37 jh->b_cpprev->b_cpnext = jh->b_cpnext; 36 jh->b_cpprev->b_cpnext = jh->b_cpnext;
38 if (transaction->t_checkpoint_list == 37 if (transaction->t_checkpoint_list == jh) {
39 transaction->t_checkpoint_list 38 transaction->t_checkpoint_list = jh->b_cpnext;
40 if (transaction->t_checkpoint_ 39 if (transaction->t_checkpoint_list == jh)
41 transaction->t_checkpo 40 transaction->t_checkpoint_list = NULL;
42 } 41 }
43 } 42 }
44 43
45 /* 44 /*
46 * Unlink a buffer from a transaction checkpoi 45 * Unlink a buffer from a transaction checkpoint(io) list.
47 * 46 *
48 * Called with j_list_lock held. 47 * Called with j_list_lock held.
49 */ 48 */
50 static inline void __buffer_unlink(struct jour 49 static inline void __buffer_unlink(struct journal_head *jh)
51 { 50 {
52 transaction_t *transaction = jh->b_cp_ 51 transaction_t *transaction = jh->b_cp_transaction;
53 52
54 __buffer_unlink_first(jh); 53 __buffer_unlink_first(jh);
55 if (transaction->t_checkpoint_io_list 54 if (transaction->t_checkpoint_io_list == jh) {
56 transaction->t_checkpoint_io_l 55 transaction->t_checkpoint_io_list = jh->b_cpnext;
57 if (transaction->t_checkpoint_ 56 if (transaction->t_checkpoint_io_list == jh)
58 transaction->t_checkpo 57 transaction->t_checkpoint_io_list = NULL;
59 } 58 }
60 } 59 }
61 60
62 /* 61 /*
63 * Move a buffer from the checkpoint list to t 62 * Move a buffer from the checkpoint list to the checkpoint io list
64 * 63 *
65 * Called with j_list_lock held 64 * Called with j_list_lock held
66 */ 65 */
67 static inline void __buffer_relink_io(struct j 66 static inline void __buffer_relink_io(struct journal_head *jh)
68 { 67 {
69 transaction_t *transaction = jh->b_cp_ 68 transaction_t *transaction = jh->b_cp_transaction;
70 69
71 __buffer_unlink_first(jh); 70 __buffer_unlink_first(jh);
72 71
73 if (!transaction->t_checkpoint_io_list 72 if (!transaction->t_checkpoint_io_list) {
74 jh->b_cpnext = jh->b_cpprev = 73 jh->b_cpnext = jh->b_cpprev = jh;
75 } else { 74 } else {
76 jh->b_cpnext = transaction->t_ 75 jh->b_cpnext = transaction->t_checkpoint_io_list;
77 jh->b_cpprev = transaction->t_ 76 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
78 jh->b_cpprev->b_cpnext = jh; 77 jh->b_cpprev->b_cpnext = jh;
79 jh->b_cpnext->b_cpprev = jh; 78 jh->b_cpnext->b_cpprev = jh;
80 } 79 }
81 transaction->t_checkpoint_io_list = jh 80 transaction->t_checkpoint_io_list = jh;
82 } 81 }
83 82
84 /* 83 /*
85 * Try to release a checkpointed buffer from i 84 * Try to release a checkpointed buffer from its transaction.
86 * Returns 1 if we released it and 2 if we als 85 * Returns 1 if we released it and 2 if we also released the
87 * whole transaction. 86 * whole transaction.
88 * 87 *
89 * Requires j_list_lock 88 * Requires j_list_lock
90 * Called under jbd_lock_bh_state(jh2bh(jh)), 89 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
91 */ 90 */
92 static int __try_to_free_cp_buf(struct journal 91 static int __try_to_free_cp_buf(struct journal_head *jh)
93 { 92 {
94 int ret = 0; 93 int ret = 0;
95 struct buffer_head *bh = jh2bh(jh); 94 struct buffer_head *bh = jh2bh(jh);
96 95
97 if (jh->b_jlist == BJ_None && !buffer_ !! 96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
98 !buffer_dirty(bh) && !buffer_write <<
99 JBUFFER_TRACE(jh, "remove from 97 JBUFFER_TRACE(jh, "remove from checkpoint list");
100 ret = __jbd2_journal_remove_ch 98 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
101 jbd_unlock_bh_state(bh); 99 jbd_unlock_bh_state(bh);
102 jbd2_journal_remove_journal_he 100 jbd2_journal_remove_journal_head(bh);
103 BUFFER_TRACE(bh, "release"); 101 BUFFER_TRACE(bh, "release");
104 __brelse(bh); 102 __brelse(bh);
105 } else { 103 } else {
106 jbd_unlock_bh_state(bh); 104 jbd_unlock_bh_state(bh);
107 } 105 }
108 return ret; 106 return ret;
109 } 107 }
110 108
111 /* 109 /*
112 * __jbd2_log_wait_for_space: wait until there 110 * __jbd2_log_wait_for_space: wait until there is space in the journal.
113 * 111 *
114 * Called under j-state_lock *only*. It will 112 * Called under j-state_lock *only*. It will be unlocked if we have to wait
115 * for a checkpoint to free up some space in t 113 * for a checkpoint to free up some space in the log.
116 */ 114 */
117 void __jbd2_log_wait_for_space(journal_t *jour 115 void __jbd2_log_wait_for_space(journal_t *journal)
118 { 116 {
119 int nblocks, space_left; !! 117 int nblocks;
120 assert_spin_locked(&journal->j_state_l 118 assert_spin_locked(&journal->j_state_lock);
121 119
122 nblocks = jbd_space_needed(journal); 120 nblocks = jbd_space_needed(journal);
123 while (__jbd2_log_space_left(journal) 121 while (__jbd2_log_space_left(journal) < nblocks) {
124 if (journal->j_flags & JBD2_AB 122 if (journal->j_flags & JBD2_ABORT)
125 return; 123 return;
126 spin_unlock(&journal->j_state_ 124 spin_unlock(&journal->j_state_lock);
127 mutex_lock(&journal->j_checkpo 125 mutex_lock(&journal->j_checkpoint_mutex);
128 126
129 /* 127 /*
130 * Test again, another process 128 * Test again, another process may have checkpointed while we
131 * were waiting for the checkp !! 129 * were waiting for the checkpoint lock
132 * transactions ready to be ch <<
133 * journal space by calling cl <<
134 * that doesn't work, by waiti <<
135 * transaction to complete. I <<
136 * to make progress, this is e <<
137 * filesystem, so abort the jo <<
138 * trace for forensic evidence <<
139 */ 130 */
140 spin_lock(&journal->j_state_lo 131 spin_lock(&journal->j_state_lock);
141 spin_lock(&journal->j_list_loc <<
142 nblocks = jbd_space_needed(jou 132 nblocks = jbd_space_needed(journal);
143 space_left = __jbd2_log_space_ !! 133 if (__jbd2_log_space_left(journal) < nblocks) {
144 if (space_left < nblocks) { <<
145 int chkpt = journal->j <<
146 tid_t tid = 0; <<
147 <<
148 if (journal->j_committ <<
149 tid = journal- <<
150 spin_unlock(&journal-> <<
151 spin_unlock(&journal-> 134 spin_unlock(&journal->j_state_lock);
152 if (chkpt) { !! 135 jbd2_log_do_checkpoint(journal);
153 jbd2_log_do_ch <<
154 } else if (jbd2_cleanu <<
155 /* We were abl <<
156 ; <<
157 } else if (tid) { <<
158 jbd2_log_wait_ <<
159 } else { <<
160 printk(KERN_ER <<
161 "only h <<
162 __func_ <<
163 printk(KERN_ER <<
164 "journa <<
165 journal <<
166 WARN_ON(1); <<
167 jbd2_journal_a <<
168 } <<
169 spin_lock(&journal->j_ 136 spin_lock(&journal->j_state_lock);
170 } else { <<
171 spin_unlock(&journal-> <<
172 } 137 }
173 mutex_unlock(&journal->j_check 138 mutex_unlock(&journal->j_checkpoint_mutex);
174 } 139 }
175 } 140 }
176 141
177 /* 142 /*
178 * We were unable to perform jbd_trylock_bh_st 143 * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
179 * The caller must restart a list walk. Wait 144 * The caller must restart a list walk. Wait for someone else to run
180 * jbd_unlock_bh_state(). 145 * jbd_unlock_bh_state().
181 */ 146 */
182 static void jbd_sync_bh(journal_t *journal, st 147 static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
183 __releases(journal->j_list_lock) 148 __releases(journal->j_list_lock)
184 { 149 {
185 get_bh(bh); 150 get_bh(bh);
186 spin_unlock(&journal->j_list_lock); 151 spin_unlock(&journal->j_list_lock);
187 jbd_lock_bh_state(bh); 152 jbd_lock_bh_state(bh);
188 jbd_unlock_bh_state(bh); 153 jbd_unlock_bh_state(bh);
189 put_bh(bh); 154 put_bh(bh);
190 } 155 }
191 156
192 /* 157 /*
193 * Clean up transaction's list of buffers subm 158 * Clean up transaction's list of buffers submitted for io.
194 * We wait for any pending IO to complete and 159 * We wait for any pending IO to complete and remove any clean
195 * buffers. Note that we take the buffers in t 160 * buffers. Note that we take the buffers in the opposite ordering
196 * from the one in which they were submitted f 161 * from the one in which they were submitted for IO.
197 * 162 *
198 * Return 0 on success, and return <0 if some <<
199 * to be written out. <<
200 * <<
201 * Called with j_list_lock held. 163 * Called with j_list_lock held.
202 */ 164 */
203 static int __wait_cp_io(journal_t *journal, tr !! 165 static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
204 { 166 {
205 struct journal_head *jh; 167 struct journal_head *jh;
206 struct buffer_head *bh; 168 struct buffer_head *bh;
207 tid_t this_tid; 169 tid_t this_tid;
208 int released = 0; 170 int released = 0;
209 int ret = 0; <<
210 171
211 this_tid = transaction->t_tid; 172 this_tid = transaction->t_tid;
212 restart: 173 restart:
213 /* Did somebody clean up the transacti 174 /* Did somebody clean up the transaction in the meanwhile? */
214 if (journal->j_checkpoint_transactions 175 if (journal->j_checkpoint_transactions != transaction ||
215 transaction->t_tid != 176 transaction->t_tid != this_tid)
216 return ret; !! 177 return;
217 while (!released && transaction->t_che 178 while (!released && transaction->t_checkpoint_io_list) {
218 jh = transaction->t_checkpoint 179 jh = transaction->t_checkpoint_io_list;
219 bh = jh2bh(jh); 180 bh = jh2bh(jh);
220 if (!jbd_trylock_bh_state(bh)) 181 if (!jbd_trylock_bh_state(bh)) {
221 jbd_sync_bh(journal, b 182 jbd_sync_bh(journal, bh);
222 spin_lock(&journal->j_ 183 spin_lock(&journal->j_list_lock);
223 goto restart; 184 goto restart;
224 } 185 }
225 if (buffer_locked(bh)) { 186 if (buffer_locked(bh)) {
226 atomic_inc(&bh->b_coun 187 atomic_inc(&bh->b_count);
227 spin_unlock(&journal-> 188 spin_unlock(&journal->j_list_lock);
228 jbd_unlock_bh_state(bh 189 jbd_unlock_bh_state(bh);
229 wait_on_buffer(bh); 190 wait_on_buffer(bh);
230 /* the journal_head ma 191 /* the journal_head may have gone by now */
231 BUFFER_TRACE(bh, "brel 192 BUFFER_TRACE(bh, "brelse");
232 __brelse(bh); 193 __brelse(bh);
233 spin_lock(&journal->j_ 194 spin_lock(&journal->j_list_lock);
234 goto restart; 195 goto restart;
235 } 196 }
236 if (unlikely(buffer_write_io_e <<
237 ret = -EIO; <<
238 <<
239 /* 197 /*
240 * Now in whatever state the b 198 * Now in whatever state the buffer currently is, we know that
241 * it has been written out and 199 * it has been written out and so we can drop it from the list
242 */ 200 */
243 released = __jbd2_journal_remo 201 released = __jbd2_journal_remove_checkpoint(jh);
244 jbd_unlock_bh_state(bh); 202 jbd_unlock_bh_state(bh);
245 jbd2_journal_remove_journal_he 203 jbd2_journal_remove_journal_head(bh);
246 __brelse(bh); 204 __brelse(bh);
247 } 205 }
248 <<
249 return ret; <<
250 } 206 }
251 207
>> 208 #define NR_BATCH 64
>> 209
252 static void 210 static void
253 __flush_batch(journal_t *journal, int *batch_c !! 211 __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
254 { 212 {
255 int i; 213 int i;
256 214
257 ll_rw_block(SWRITE, *batch_count, jour !! 215 ll_rw_block(SWRITE, *batch_count, bhs);
258 for (i = 0; i < *batch_count; i++) { 216 for (i = 0; i < *batch_count; i++) {
259 struct buffer_head *bh = journ !! 217 struct buffer_head *bh = bhs[i];
260 clear_buffer_jwrite(bh); 218 clear_buffer_jwrite(bh);
261 BUFFER_TRACE(bh, "brelse"); 219 BUFFER_TRACE(bh, "brelse");
262 __brelse(bh); 220 __brelse(bh);
263 } 221 }
264 *batch_count = 0; 222 *batch_count = 0;
265 } 223 }
266 224
267 /* 225 /*
268 * Try to flush one buffer from the checkpoint 226 * Try to flush one buffer from the checkpoint list to disk.
269 * 227 *
270 * Return 1 if something happened which requir 228 * Return 1 if something happened which requires us to abort the current
271 * scan of the checkpoint list. Return <0 if !! 229 * scan of the checkpoint list.
272 * be written out. <<
273 * 230 *
274 * Called with j_list_lock held and drops it i 231 * Called with j_list_lock held and drops it if 1 is returned
275 * Called under jbd_lock_bh_state(jh2bh(jh)), 232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
276 */ 233 */
277 static int __process_buffer(journal_t *journal 234 static int __process_buffer(journal_t *journal, struct journal_head *jh,
278 int *batch_count, !! 235 struct buffer_head **bhs, int *batch_count,
>> 236 transaction_t *transaction)
279 { 237 {
280 struct buffer_head *bh = jh2bh(jh); 238 struct buffer_head *bh = jh2bh(jh);
281 int ret = 0; 239 int ret = 0;
282 240
283 if (buffer_locked(bh)) { 241 if (buffer_locked(bh)) {
284 atomic_inc(&bh->b_count); 242 atomic_inc(&bh->b_count);
285 spin_unlock(&journal->j_list_l 243 spin_unlock(&journal->j_list_lock);
286 jbd_unlock_bh_state(bh); 244 jbd_unlock_bh_state(bh);
287 wait_on_buffer(bh); 245 wait_on_buffer(bh);
288 /* the journal_head may have g 246 /* the journal_head may have gone by now */
289 BUFFER_TRACE(bh, "brelse"); 247 BUFFER_TRACE(bh, "brelse");
290 __brelse(bh); 248 __brelse(bh);
291 ret = 1; 249 ret = 1;
292 } else if (jh->b_transaction != NULL) 250 } else if (jh->b_transaction != NULL) {
293 transaction_t *t = jh->b_trans 251 transaction_t *t = jh->b_transaction;
294 tid_t tid = t->t_tid; 252 tid_t tid = t->t_tid;
295 253
296 transaction->t_chp_stats.cs_fo 254 transaction->t_chp_stats.cs_forced_to_close++;
297 spin_unlock(&journal->j_list_l 255 spin_unlock(&journal->j_list_lock);
298 jbd_unlock_bh_state(bh); 256 jbd_unlock_bh_state(bh);
299 jbd2_log_start_commit(journal, 257 jbd2_log_start_commit(journal, tid);
300 jbd2_log_wait_commit(journal, 258 jbd2_log_wait_commit(journal, tid);
301 ret = 1; 259 ret = 1;
302 } else if (!buffer_dirty(bh)) { 260 } else if (!buffer_dirty(bh)) {
303 ret = 1; <<
304 if (unlikely(buffer_write_io_e <<
305 ret = -EIO; <<
306 J_ASSERT_JH(jh, !buffer_jbddir 261 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
307 BUFFER_TRACE(bh, "remove from 262 BUFFER_TRACE(bh, "remove from checkpoint");
308 __jbd2_journal_remove_checkpoi 263 __jbd2_journal_remove_checkpoint(jh);
309 spin_unlock(&journal->j_list_l 264 spin_unlock(&journal->j_list_lock);
310 jbd_unlock_bh_state(bh); 265 jbd_unlock_bh_state(bh);
311 jbd2_journal_remove_journal_he 266 jbd2_journal_remove_journal_head(bh);
312 __brelse(bh); 267 __brelse(bh);
>> 268 ret = 1;
313 } else { 269 } else {
314 /* 270 /*
315 * Important: we are about to 271 * Important: we are about to write the buffer, and
316 * possibly block, while still 272 * possibly block, while still holding the journal lock.
317 * We cannot afford to let the 273 * We cannot afford to let the transaction logic start
318 * messing around with this bu 274 * messing around with this buffer before we write it to
319 * disk, as that would break r 275 * disk, as that would break recoverability.
320 */ 276 */
321 BUFFER_TRACE(bh, "queue"); 277 BUFFER_TRACE(bh, "queue");
322 get_bh(bh); 278 get_bh(bh);
323 J_ASSERT_BH(bh, !buffer_jwrite 279 J_ASSERT_BH(bh, !buffer_jwrite(bh));
324 set_buffer_jwrite(bh); 280 set_buffer_jwrite(bh);
325 journal->j_chkpt_bhs[*batch_co !! 281 bhs[*batch_count] = bh;
326 __buffer_relink_io(jh); 282 __buffer_relink_io(jh);
327 jbd_unlock_bh_state(bh); 283 jbd_unlock_bh_state(bh);
328 transaction->t_chp_stats.cs_wr 284 transaction->t_chp_stats.cs_written++;
329 (*batch_count)++; 285 (*batch_count)++;
330 if (*batch_count == JBD2_NR_BA !! 286 if (*batch_count == NR_BATCH) {
331 spin_unlock(&journal-> 287 spin_unlock(&journal->j_list_lock);
332 __flush_batch(journal, !! 288 __flush_batch(journal, bhs, batch_count);
333 ret = 1; 289 ret = 1;
334 } 290 }
335 } 291 }
336 return ret; 292 return ret;
337 } 293 }
338 294
339 /* 295 /*
340 * Perform an actual checkpoint. We take the f 296 * Perform an actual checkpoint. We take the first transaction on the
341 * list of transactions to be checkpointed and 297 * list of transactions to be checkpointed and send all its buffers
342 * to disk. We submit larger chunks of data at 298 * to disk. We submit larger chunks of data at once.
343 * 299 *
344 * The journal should be locked before calling 300 * The journal should be locked before calling this function.
345 * Called with j_checkpoint_mutex held. <<
346 */ 301 */
347 int jbd2_log_do_checkpoint(journal_t *journal) 302 int jbd2_log_do_checkpoint(journal_t *journal)
348 { 303 {
349 transaction_t *transaction; 304 transaction_t *transaction;
350 tid_t this_tid; 305 tid_t this_tid;
351 int result; 306 int result;
352 307
353 jbd_debug(1, "Start checkpoint\n"); 308 jbd_debug(1, "Start checkpoint\n");
354 309
355 /* 310 /*
356 * First thing: if there are any trans 311 * First thing: if there are any transactions in the log which
357 * don't need checkpointing, just elim 312 * don't need checkpointing, just eliminate them from the
358 * journal straight away. 313 * journal straight away.
359 */ 314 */
360 result = jbd2_cleanup_journal_tail(jou 315 result = jbd2_cleanup_journal_tail(journal);
361 trace_jbd2_checkpoint(journal, result) <<
362 jbd_debug(1, "cleanup_journal_tail ret 316 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
363 if (result <= 0) 317 if (result <= 0)
364 return result; 318 return result;
365 319
366 /* 320 /*
367 * OK, we need to start writing disk b 321 * OK, we need to start writing disk blocks. Take one transaction
368 * and write it. 322 * and write it.
369 */ 323 */
370 result = 0; <<
371 spin_lock(&journal->j_list_lock); 324 spin_lock(&journal->j_list_lock);
372 if (!journal->j_checkpoint_transaction 325 if (!journal->j_checkpoint_transactions)
373 goto out; 326 goto out;
374 transaction = journal->j_checkpoint_tr 327 transaction = journal->j_checkpoint_transactions;
375 if (transaction->t_chp_stats.cs_chp_ti 328 if (transaction->t_chp_stats.cs_chp_time == 0)
376 transaction->t_chp_stats.cs_ch 329 transaction->t_chp_stats.cs_chp_time = jiffies;
377 this_tid = transaction->t_tid; 330 this_tid = transaction->t_tid;
378 restart: 331 restart:
379 /* 332 /*
380 * If someone cleaned up this transact 333 * If someone cleaned up this transaction while we slept, we're
381 * done (maybe it's a new transaction, 334 * done (maybe it's a new transaction, but it fell at the same
382 * address). 335 * address).
383 */ 336 */
384 if (journal->j_checkpoint_transactions 337 if (journal->j_checkpoint_transactions == transaction &&
385 transaction->t_tid == 338 transaction->t_tid == this_tid) {
386 int batch_count = 0; 339 int batch_count = 0;
>> 340 struct buffer_head *bhs[NR_BATCH];
387 struct journal_head *jh; 341 struct journal_head *jh;
388 int retry = 0, err; !! 342 int retry = 0;
389 343
390 while (!retry && transaction-> 344 while (!retry && transaction->t_checkpoint_list) {
391 struct buffer_head *bh 345 struct buffer_head *bh;
392 346
393 jh = transaction->t_ch 347 jh = transaction->t_checkpoint_list;
394 bh = jh2bh(jh); 348 bh = jh2bh(jh);
395 if (!jbd_trylock_bh_st 349 if (!jbd_trylock_bh_state(bh)) {
396 jbd_sync_bh(jo 350 jbd_sync_bh(journal, bh);
397 retry = 1; 351 retry = 1;
398 break; 352 break;
399 } 353 }
400 retry = __process_buff !! 354 retry = __process_buffer(journal, jh, bhs, &batch_count,
401 355 transaction);
402 if (retry < 0 && !resu <<
403 result = retry <<
404 if (!retry && (need_re 356 if (!retry && (need_resched() ||
405 spin_needbreak 357 spin_needbreak(&journal->j_list_lock))) {
406 spin_unlock(&j 358 spin_unlock(&journal->j_list_lock);
407 retry = 1; 359 retry = 1;
408 break; 360 break;
409 } 361 }
410 } 362 }
411 363
412 if (batch_count) { 364 if (batch_count) {
413 if (!retry) { 365 if (!retry) {
414 spin_unlock(&j 366 spin_unlock(&journal->j_list_lock);
415 retry = 1; 367 retry = 1;
416 } 368 }
417 __flush_batch(journal, !! 369 __flush_batch(journal, bhs, &batch_count);
418 } 370 }
419 371
420 if (retry) { 372 if (retry) {
421 spin_lock(&journal->j_ 373 spin_lock(&journal->j_list_lock);
422 goto restart; 374 goto restart;
423 } 375 }
424 /* 376 /*
425 * Now we have cleaned up the 377 * Now we have cleaned up the first transaction's checkpoint
426 * list. Let's clean up the se 378 * list. Let's clean up the second one
427 */ 379 */
428 err = __wait_cp_io(journal, tr !! 380 __wait_cp_io(journal, transaction);
429 if (!result) <<
430 result = err; <<
431 } 381 }
432 out: 382 out:
433 spin_unlock(&journal->j_list_lock); 383 spin_unlock(&journal->j_list_lock);
>> 384 result = jbd2_cleanup_journal_tail(journal);
434 if (result < 0) 385 if (result < 0)
435 jbd2_journal_abort(journal, re !! 386 return result;
436 else !! 387 return 0;
437 result = jbd2_cleanup_journal_ <<
438 <<
439 return (result < 0) ? result : 0; <<
440 } 388 }
441 389
442 /* 390 /*
443 * Check the list of checkpoint transactions f 391 * Check the list of checkpoint transactions for the journal to see if
444 * we have already got rid of any since the la 392 * we have already got rid of any since the last update of the log tail
445 * in the journal superblock. If so, we can i 393 * in the journal superblock. If so, we can instantly roll the
446 * superblock forward to remove those transact 394 * superblock forward to remove those transactions from the log.
447 * 395 *
448 * Return <0 on error, 0 on success, 1 if ther 396 * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
449 * 397 *
450 * Called with the journal lock held. 398 * Called with the journal lock held.
451 * 399 *
452 * This is the only part of the journaling cod 400 * This is the only part of the journaling code which really needs to be
453 * aware of transaction aborts. Checkpointing 401 * aware of transaction aborts. Checkpointing involves writing to the
454 * main filesystem area rather than to the jou 402 * main filesystem area rather than to the journal, so it can proceed
455 * even in abort state, but we must not update !! 403 * even in abort state, but we must not update the journal superblock if
456 * checkpointing may have failed. Otherwise, !! 404 * we have an abort error outstanding.
457 * buffers which should be written-back to the <<
458 */ 405 */
459 406
460 int jbd2_cleanup_journal_tail(journal_t *journ 407 int jbd2_cleanup_journal_tail(journal_t *journal)
461 { 408 {
462 transaction_t * transaction; 409 transaction_t * transaction;
463 tid_t first_tid; 410 tid_t first_tid;
464 unsigned long blocknr, freed; 411 unsigned long blocknr, freed;
465 412
466 if (is_journal_aborted(journal)) <<
467 return 1; <<
468 <<
469 /* OK, work out the oldest transaction 413 /* OK, work out the oldest transaction remaining in the log, and
470 * the log block it starts at. 414 * the log block it starts at.
471 * 415 *
472 * If the log is now empty, we need to 416 * If the log is now empty, we need to work out which is the
473 * next transaction ID we will write, 417 * next transaction ID we will write, and where it will
474 * start. */ 418 * start. */
475 419
476 spin_lock(&journal->j_state_lock); 420 spin_lock(&journal->j_state_lock);
477 spin_lock(&journal->j_list_lock); 421 spin_lock(&journal->j_list_lock);
478 transaction = journal->j_checkpoint_tr 422 transaction = journal->j_checkpoint_transactions;
479 if (transaction) { 423 if (transaction) {
480 first_tid = transaction->t_tid 424 first_tid = transaction->t_tid;
481 blocknr = transaction->t_log_s 425 blocknr = transaction->t_log_start;
482 } else if ((transaction = journal->j_c 426 } else if ((transaction = journal->j_committing_transaction) != NULL) {
483 first_tid = transaction->t_tid 427 first_tid = transaction->t_tid;
484 blocknr = transaction->t_log_s 428 blocknr = transaction->t_log_start;
485 } else if ((transaction = journal->j_r 429 } else if ((transaction = journal->j_running_transaction) != NULL) {
486 first_tid = transaction->t_tid 430 first_tid = transaction->t_tid;
487 blocknr = journal->j_head; 431 blocknr = journal->j_head;
488 } else { 432 } else {
489 first_tid = journal->j_transac 433 first_tid = journal->j_transaction_sequence;
490 blocknr = journal->j_head; 434 blocknr = journal->j_head;
491 } 435 }
492 spin_unlock(&journal->j_list_lock); 436 spin_unlock(&journal->j_list_lock);
493 J_ASSERT(blocknr != 0); 437 J_ASSERT(blocknr != 0);
494 438
495 /* If the oldest pinned transaction is 439 /* If the oldest pinned transaction is at the tail of the log
496 already then there's not much we ca 440 already then there's not much we can do right now. */
497 if (journal->j_tail_sequence == first_ 441 if (journal->j_tail_sequence == first_tid) {
498 spin_unlock(&journal->j_state_ 442 spin_unlock(&journal->j_state_lock);
499 return 1; 443 return 1;
500 } 444 }
501 445
502 /* OK, update the superblock to recove 446 /* OK, update the superblock to recover the freed space.
503 * Physical blocks come first: have we 447 * Physical blocks come first: have we wrapped beyond the end of
504 * the log? */ 448 * the log? */
505 freed = blocknr - journal->j_tail; 449 freed = blocknr - journal->j_tail;
506 if (blocknr < journal->j_tail) 450 if (blocknr < journal->j_tail)
507 freed = freed + journal->j_las 451 freed = freed + journal->j_last - journal->j_first;
508 452
509 jbd_debug(1, 453 jbd_debug(1,
510 "Cleaning journal tail from 454 "Cleaning journal tail from %d to %d (offset %lu), "
511 "freeing %lu\n", 455 "freeing %lu\n",
512 journal->j_tail_sequence, fi 456 journal->j_tail_sequence, first_tid, blocknr, freed);
513 457
514 journal->j_free += freed; 458 journal->j_free += freed;
515 journal->j_tail_sequence = first_tid; 459 journal->j_tail_sequence = first_tid;
516 journal->j_tail = blocknr; 460 journal->j_tail = blocknr;
517 spin_unlock(&journal->j_state_lock); 461 spin_unlock(&journal->j_state_lock);
518 if (!(journal->j_flags & JBD2_ABORT)) 462 if (!(journal->j_flags & JBD2_ABORT))
519 jbd2_journal_update_superblock 463 jbd2_journal_update_superblock(journal, 1);
520 return 0; 464 return 0;
521 } 465 }
522 466
523 467
524 /* Checkpoint list management */ 468 /* Checkpoint list management */
525 469
526 /* 470 /*
527 * journal_clean_one_cp_list 471 * journal_clean_one_cp_list
528 * 472 *
529 * Find all the written-back checkpoint buffer 473 * Find all the written-back checkpoint buffers in the given list and release them.
530 * 474 *
531 * Called with the journal locked. 475 * Called with the journal locked.
532 * Called with j_list_lock held. 476 * Called with j_list_lock held.
533 * Returns number of bufers reaped (for debug) 477 * Returns number of bufers reaped (for debug)
534 */ 478 */
535 479
536 static int journal_clean_one_cp_list(struct jo 480 static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
537 { 481 {
538 struct journal_head *last_jh; 482 struct journal_head *last_jh;
539 struct journal_head *next_jh = jh; 483 struct journal_head *next_jh = jh;
540 int ret, freed = 0; 484 int ret, freed = 0;
541 485
542 *released = 0; 486 *released = 0;
543 if (!jh) 487 if (!jh)
544 return 0; 488 return 0;
545 489
546 last_jh = jh->b_cpprev; 490 last_jh = jh->b_cpprev;
547 do { 491 do {
548 jh = next_jh; 492 jh = next_jh;
549 next_jh = jh->b_cpnext; 493 next_jh = jh->b_cpnext;
550 /* Use trylock because of the 494 /* Use trylock because of the ranking */
551 if (jbd_trylock_bh_state(jh2bh 495 if (jbd_trylock_bh_state(jh2bh(jh))) {
552 ret = __try_to_free_cp 496 ret = __try_to_free_cp_buf(jh);
553 if (ret) { 497 if (ret) {
554 freed++; 498 freed++;
555 if (ret == 2) 499 if (ret == 2) {
556 *relea 500 *released = 1;
557 return 501 return freed;
558 } 502 }
559 } 503 }
560 } 504 }
561 /* 505 /*
562 * This function only frees up 506 * This function only frees up some memory
563 * if possible so we dont have 507 * if possible so we dont have an obligation
564 * to finish processing. Bail 508 * to finish processing. Bail out if preemption
565 * requested: 509 * requested:
566 */ 510 */
567 if (need_resched()) 511 if (need_resched())
568 return freed; 512 return freed;
569 } while (jh != last_jh); 513 } while (jh != last_jh);
570 514
571 return freed; 515 return freed;
572 } 516 }
573 517
574 /* 518 /*
575 * journal_clean_checkpoint_list 519 * journal_clean_checkpoint_list
576 * 520 *
577 * Find all the written-back checkpoint buffer 521 * Find all the written-back checkpoint buffers in the journal and release them.
578 * 522 *
579 * Called with the journal locked. 523 * Called with the journal locked.
580 * Called with j_list_lock held. 524 * Called with j_list_lock held.
581 * Returns number of buffers reaped (for debug 525 * Returns number of buffers reaped (for debug)
582 */ 526 */
583 527
584 int __jbd2_journal_clean_checkpoint_list(journ 528 int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
585 { 529 {
586 transaction_t *transaction, *last_tran 530 transaction_t *transaction, *last_transaction, *next_transaction;
587 int ret = 0; 531 int ret = 0;
588 int released; 532 int released;
589 533
590 transaction = journal->j_checkpoint_tr 534 transaction = journal->j_checkpoint_transactions;
591 if (!transaction) 535 if (!transaction)
592 goto out; 536 goto out;
593 537
594 last_transaction = transaction->t_cppr 538 last_transaction = transaction->t_cpprev;
595 next_transaction = transaction; 539 next_transaction = transaction;
596 do { 540 do {
597 transaction = next_transaction 541 transaction = next_transaction;
598 next_transaction = transaction 542 next_transaction = transaction->t_cpnext;
599 ret += journal_clean_one_cp_li 543 ret += journal_clean_one_cp_list(transaction->
600 t_checkpoint_l 544 t_checkpoint_list, &released);
601 /* 545 /*
602 * This function only frees up 546 * This function only frees up some memory if possible so we
603 * dont have an obligation to 547 * dont have an obligation to finish processing. Bail out if
604 * preemption requested: 548 * preemption requested:
605 */ 549 */
606 if (need_resched()) 550 if (need_resched())
607 goto out; 551 goto out;
608 if (released) 552 if (released)
609 continue; 553 continue;
610 /* 554 /*
611 * It is essential that we are 555 * It is essential that we are as careful as in the case of
612 * t_checkpoint_list with remo 556 * t_checkpoint_list with removing the buffer from the list as
613 * we can possibly see not yet 557 * we can possibly see not yet submitted buffers on io_list
614 */ 558 */
615 ret += journal_clean_one_cp_li 559 ret += journal_clean_one_cp_list(transaction->
616 t_checkpoint_i 560 t_checkpoint_io_list, &released);
617 if (need_resched()) 561 if (need_resched())
618 goto out; 562 goto out;
619 } while (transaction != last_transacti 563 } while (transaction != last_transaction);
620 out: 564 out:
621 return ret; 565 return ret;
622 } 566 }
623 567
624 /* 568 /*
625 * journal_remove_checkpoint: called after a b 569 * journal_remove_checkpoint: called after a buffer has been committed
626 * to disk (either by being write-back flushed 570 * to disk (either by being write-back flushed to disk, or being
627 * committed to the log). 571 * committed to the log).
628 * 572 *
629 * We cannot safely clean a transaction out of 573 * We cannot safely clean a transaction out of the log until all of the
630 * buffer updates committed in that transactio 574 * buffer updates committed in that transaction have safely been stored
631 * elsewhere on disk. To achieve this, all of 575 * elsewhere on disk. To achieve this, all of the buffers in a
632 * transaction need to be maintained on the tr 576 * transaction need to be maintained on the transaction's checkpoint
633 * lists until they have been rewritten, at wh 577 * lists until they have been rewritten, at which point this function is
634 * called to remove the buffer from the existi 578 * called to remove the buffer from the existing transaction's
635 * checkpoint lists. 579 * checkpoint lists.
636 * 580 *
637 * The function returns 1 if it frees the tran 581 * The function returns 1 if it frees the transaction, 0 otherwise.
638 * 582 *
639 * This function is called with the journal lo 583 * This function is called with the journal locked.
640 * This function is called with j_list_lock he 584 * This function is called with j_list_lock held.
641 * This function is called with jbd_lock_bh_st 585 * This function is called with jbd_lock_bh_state(jh2bh(jh))
642 */ 586 */
643 587
644 int __jbd2_journal_remove_checkpoint(struct jo 588 int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
645 { 589 {
646 transaction_t *transaction; 590 transaction_t *transaction;
647 journal_t *journal; 591 journal_t *journal;
648 int ret = 0; 592 int ret = 0;
649 593
650 JBUFFER_TRACE(jh, "entry"); 594 JBUFFER_TRACE(jh, "entry");
651 595
652 if ((transaction = jh->b_cp_transactio 596 if ((transaction = jh->b_cp_transaction) == NULL) {
653 JBUFFER_TRACE(jh, "not on tran 597 JBUFFER_TRACE(jh, "not on transaction");
654 goto out; 598 goto out;
655 } 599 }
656 journal = transaction->t_journal; 600 journal = transaction->t_journal;
657 601
658 __buffer_unlink(jh); 602 __buffer_unlink(jh);
659 jh->b_cp_transaction = NULL; 603 jh->b_cp_transaction = NULL;
660 604
661 if (transaction->t_checkpoint_list != 605 if (transaction->t_checkpoint_list != NULL ||
662 transaction->t_checkpoint_io_list 606 transaction->t_checkpoint_io_list != NULL)
663 goto out; 607 goto out;
664 JBUFFER_TRACE(jh, "transaction has no 608 JBUFFER_TRACE(jh, "transaction has no more buffers");
665 609
666 /* 610 /*
667 * There is one special case to worry 611 * There is one special case to worry about: if we have just pulled the
668 * buffer off a running or committing 612 * buffer off a running or committing transaction's checkpoing list,
669 * then even if the checkpoint list is 613 * then even if the checkpoint list is empty, the transaction obviously
670 * cannot be dropped! 614 * cannot be dropped!
671 * 615 *
672 * The locking here around t_state is 616 * The locking here around t_state is a bit sleazy.
673 * See the comment at the end of jbd2_ 617 * See the comment at the end of jbd2_journal_commit_transaction().
674 */ 618 */
675 if (transaction->t_state != T_FINISHED 619 if (transaction->t_state != T_FINISHED) {
676 JBUFFER_TRACE(jh, "belongs to 620 JBUFFER_TRACE(jh, "belongs to running/committing transaction");
677 goto out; 621 goto out;
678 } 622 }
679 623
680 /* OK, that was the last buffer for th 624 /* OK, that was the last buffer for the transaction: we can now
681 safely remove this transaction from 625 safely remove this transaction from the log */
682 626
683 __jbd2_journal_drop_transaction(journa 627 __jbd2_journal_drop_transaction(journal, transaction);
684 kfree(transaction); <<
685 628
686 /* Just in case anybody was waiting fo 629 /* Just in case anybody was waiting for more transactions to be
687 checkpointed... */ 630 checkpointed... */
688 wake_up(&journal->j_wait_logspace); 631 wake_up(&journal->j_wait_logspace);
689 ret = 1; 632 ret = 1;
690 out: 633 out:
691 JBUFFER_TRACE(jh, "exit"); 634 JBUFFER_TRACE(jh, "exit");
692 return ret; 635 return ret;
693 } 636 }
694 637
695 /* 638 /*
696 * journal_insert_checkpoint: put a committed 639 * journal_insert_checkpoint: put a committed buffer onto a checkpoint
697 * list so that we know when it is safe to cle 640 * list so that we know when it is safe to clean the transaction out of
698 * the log. 641 * the log.
699 * 642 *
700 * Called with the journal locked. 643 * Called with the journal locked.
701 * Called with j_list_lock held. 644 * Called with j_list_lock held.
702 */ 645 */
703 void __jbd2_journal_insert_checkpoint(struct j 646 void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
704 transaction_t * 647 transaction_t *transaction)
705 { 648 {
706 JBUFFER_TRACE(jh, "entry"); 649 JBUFFER_TRACE(jh, "entry");
707 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh) 650 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
708 J_ASSERT_JH(jh, jh->b_cp_transaction = 651 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
709 652
710 jh->b_cp_transaction = transaction; 653 jh->b_cp_transaction = transaction;
711 654
712 if (!transaction->t_checkpoint_list) { 655 if (!transaction->t_checkpoint_list) {
713 jh->b_cpnext = jh->b_cpprev = 656 jh->b_cpnext = jh->b_cpprev = jh;
714 } else { 657 } else {
715 jh->b_cpnext = transaction->t_ 658 jh->b_cpnext = transaction->t_checkpoint_list;
716 jh->b_cpprev = transaction->t_ 659 jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
717 jh->b_cpprev->b_cpnext = jh; 660 jh->b_cpprev->b_cpnext = jh;
718 jh->b_cpnext->b_cpprev = jh; 661 jh->b_cpnext->b_cpprev = jh;
719 } 662 }
720 transaction->t_checkpoint_list = jh; 663 transaction->t_checkpoint_list = jh;
721 } 664 }
722 665
723 /* 666 /*
724 * We've finished with this transaction struct 667 * We've finished with this transaction structure: adios...
725 * 668 *
726 * The transaction must have no links except f 669 * The transaction must have no links except for the checkpoint by this
727 * point. 670 * point.
728 * 671 *
729 * Called with the journal locked. 672 * Called with the journal locked.
730 * Called with j_list_lock held. 673 * Called with j_list_lock held.
731 */ 674 */
732 675
733 void __jbd2_journal_drop_transaction(journal_t 676 void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
734 { 677 {
735 assert_spin_locked(&journal->j_list_lo 678 assert_spin_locked(&journal->j_list_lock);
736 if (transaction->t_cpnext) { 679 if (transaction->t_cpnext) {
737 transaction->t_cpnext->t_cppre 680 transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
738 transaction->t_cpprev->t_cpnex 681 transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
739 if (journal->j_checkpoint_tran 682 if (journal->j_checkpoint_transactions == transaction)
740 journal->j_checkpoint_ 683 journal->j_checkpoint_transactions =
741 transaction->t 684 transaction->t_cpnext;
742 if (journal->j_checkpoint_tran 685 if (journal->j_checkpoint_transactions == transaction)
743 journal->j_checkpoint_ 686 journal->j_checkpoint_transactions = NULL;
744 } 687 }
745 688
746 J_ASSERT(transaction->t_state == T_FIN 689 J_ASSERT(transaction->t_state == T_FINISHED);
747 J_ASSERT(transaction->t_buffers == NUL 690 J_ASSERT(transaction->t_buffers == NULL);
>> 691 J_ASSERT(transaction->t_sync_datalist == NULL);
748 J_ASSERT(transaction->t_forget == NULL 692 J_ASSERT(transaction->t_forget == NULL);
749 J_ASSERT(transaction->t_iobuf_list == 693 J_ASSERT(transaction->t_iobuf_list == NULL);
750 J_ASSERT(transaction->t_shadow_list == 694 J_ASSERT(transaction->t_shadow_list == NULL);
751 J_ASSERT(transaction->t_log_list == NU 695 J_ASSERT(transaction->t_log_list == NULL);
752 J_ASSERT(transaction->t_checkpoint_lis 696 J_ASSERT(transaction->t_checkpoint_list == NULL);
753 J_ASSERT(transaction->t_checkpoint_io_ 697 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
754 J_ASSERT(transaction->t_updates == 0); 698 J_ASSERT(transaction->t_updates == 0);
755 J_ASSERT(journal->j_committing_transac 699 J_ASSERT(journal->j_committing_transaction != transaction);
756 J_ASSERT(journal->j_running_transactio 700 J_ASSERT(journal->j_running_transaction != transaction);
757 701
758 jbd_debug(1, "Dropping transaction %d, 702 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
>> 703 kfree(transaction);
759 } 704 }
760 705
|
This page was automatically generated by the
LXR engine.
|