Diff markup
1 /* handling of writes to regular files and wri 1 /* handling of writes to regular files and writing back to the server
2 * 2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.c 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redi 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Genera 7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundatio 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any l 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 #include <linux/backing-dev.h> 11 #include <linux/backing-dev.h>
12 #include <linux/slab.h> 12 #include <linux/slab.h>
13 #include <linux/fs.h> 13 #include <linux/fs.h>
14 #include <linux/pagemap.h> 14 #include <linux/pagemap.h>
15 #include <linux/writeback.h> 15 #include <linux/writeback.h>
16 #include <linux/pagevec.h> 16 #include <linux/pagevec.h>
17 #include "internal.h" 17 #include "internal.h"
18 18
19 static int afs_write_back_from_locked_page(str 19 static int afs_write_back_from_locked_page(struct afs_writeback *wb,
20 str 20 struct page *page);
21 21
22 /* 22 /*
23 * mark a page as having been made dirty and t 23 * mark a page as having been made dirty and thus needing writeback
24 */ 24 */
25 int afs_set_page_dirty(struct page *page) 25 int afs_set_page_dirty(struct page *page)
26 { 26 {
27 _enter(""); 27 _enter("");
28 return __set_page_dirty_nobuffers(page 28 return __set_page_dirty_nobuffers(page);
29 } 29 }
30 30
31 /* 31 /*
32 * unlink a writeback record because its usage 32 * unlink a writeback record because its usage has reached zero
33 * - must be called with the wb->vnode->writeb 33 * - must be called with the wb->vnode->writeback_lock held
34 */ 34 */
35 static void afs_unlink_writeback(struct afs_wr 35 static void afs_unlink_writeback(struct afs_writeback *wb)
36 { 36 {
37 struct afs_writeback *front; 37 struct afs_writeback *front;
38 struct afs_vnode *vnode = wb->vnode; 38 struct afs_vnode *vnode = wb->vnode;
39 39
40 list_del_init(&wb->link); 40 list_del_init(&wb->link);
41 if (!list_empty(&vnode->writebacks)) { 41 if (!list_empty(&vnode->writebacks)) {
42 /* if an fsync rises to the fr 42 /* if an fsync rises to the front of the queue then wake it
43 * up */ 43 * up */
44 front = list_entry(vnode->writ 44 front = list_entry(vnode->writebacks.next,
45 struct afs_ 45 struct afs_writeback, link);
46 if (front->state == AFS_WBACK_ 46 if (front->state == AFS_WBACK_SYNCING) {
47 _debug("wake up sync") 47 _debug("wake up sync");
48 front->state = AFS_WBA 48 front->state = AFS_WBACK_COMPLETE;
49 wake_up(&front->waitq) 49 wake_up(&front->waitq);
50 } 50 }
51 } 51 }
52 } 52 }
53 53
54 /* 54 /*
55 * free a writeback record 55 * free a writeback record
56 */ 56 */
57 static void afs_free_writeback(struct afs_writ 57 static void afs_free_writeback(struct afs_writeback *wb)
58 { 58 {
59 _enter(""); 59 _enter("");
60 key_put(wb->key); 60 key_put(wb->key);
61 kfree(wb); 61 kfree(wb);
62 } 62 }
63 63
64 /* 64 /*
65 * dispose of a reference to a writeback recor 65 * dispose of a reference to a writeback record
66 */ 66 */
67 void afs_put_writeback(struct afs_writeback *w 67 void afs_put_writeback(struct afs_writeback *wb)
68 { 68 {
69 struct afs_vnode *vnode = wb->vnode; 69 struct afs_vnode *vnode = wb->vnode;
70 70
71 _enter("{%d}", wb->usage); 71 _enter("{%d}", wb->usage);
72 72
73 spin_lock(&vnode->writeback_lock); 73 spin_lock(&vnode->writeback_lock);
74 if (--wb->usage == 0) 74 if (--wb->usage == 0)
75 afs_unlink_writeback(wb); 75 afs_unlink_writeback(wb);
76 else 76 else
77 wb = NULL; 77 wb = NULL;
78 spin_unlock(&vnode->writeback_lock); 78 spin_unlock(&vnode->writeback_lock);
79 if (wb) 79 if (wb)
80 afs_free_writeback(wb); 80 afs_free_writeback(wb);
81 } 81 }
82 82
83 /* 83 /*
84 * partly or wholly fill a page that's under p 84 * partly or wholly fill a page that's under preparation for writing
85 */ 85 */
86 static int afs_fill_page(struct afs_vnode *vno 86 static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
87 unsigned start, unsig 87 unsigned start, unsigned len, struct page *page)
88 { 88 {
89 int ret; 89 int ret;
90 90
91 _enter(",,%u,%u", start, len); 91 _enter(",,%u,%u", start, len);
92 92
93 ASSERTCMP(start + len, <=, PAGE_SIZE); 93 ASSERTCMP(start + len, <=, PAGE_SIZE);
94 94
95 ret = afs_vnode_fetch_data(vnode, key, 95 ret = afs_vnode_fetch_data(vnode, key, start, len, page);
96 if (ret < 0) { 96 if (ret < 0) {
97 if (ret == -ENOENT) { 97 if (ret == -ENOENT) {
98 _debug("got NOENT from 98 _debug("got NOENT from server"
99 " - marking fil 99 " - marking file deleted and stale");
100 set_bit(AFS_VNODE_DELE 100 set_bit(AFS_VNODE_DELETED, &vnode->flags);
101 ret = -ESTALE; 101 ret = -ESTALE;
102 } 102 }
103 } 103 }
104 104
105 _leave(" = %d", ret); 105 _leave(" = %d", ret);
106 return ret; 106 return ret;
107 } 107 }
108 108
109 /* 109 /*
110 * prepare a page for being written to 110 * prepare a page for being written to
111 */ 111 */
112 static int afs_prepare_page(struct afs_vnode * 112 static int afs_prepare_page(struct afs_vnode *vnode, struct page *page,
113 struct key *key, u 113 struct key *key, unsigned offset, unsigned to)
114 { 114 {
115 unsigned eof, tail, start, stop, len; 115 unsigned eof, tail, start, stop, len;
116 loff_t i_size, pos; 116 loff_t i_size, pos;
117 void *p; 117 void *p;
118 int ret; 118 int ret;
119 119
120 _enter(""); 120 _enter("");
121 121
122 if (offset == 0 && to == PAGE_SIZE) 122 if (offset == 0 && to == PAGE_SIZE)
123 return 0; 123 return 0;
124 124
125 p = kmap_atomic(page, KM_USER0); 125 p = kmap_atomic(page, KM_USER0);
126 126
127 i_size = i_size_read(&vnode->vfs_inode 127 i_size = i_size_read(&vnode->vfs_inode);
128 pos = (loff_t) page->index << PAGE_SHI 128 pos = (loff_t) page->index << PAGE_SHIFT;
129 if (pos >= i_size) { 129 if (pos >= i_size) {
130 /* partial write, page beyond 130 /* partial write, page beyond EOF */
131 _debug("beyond"); 131 _debug("beyond");
132 if (offset > 0) 132 if (offset > 0)
133 memset(p, 0, offset); 133 memset(p, 0, offset);
134 if (to < PAGE_SIZE) 134 if (to < PAGE_SIZE)
135 memset(p + to, 0, PAGE 135 memset(p + to, 0, PAGE_SIZE - to);
136 kunmap_atomic(p, KM_USER0); 136 kunmap_atomic(p, KM_USER0);
137 return 0; 137 return 0;
138 } 138 }
139 139
140 if (i_size - pos >= PAGE_SIZE) { 140 if (i_size - pos >= PAGE_SIZE) {
141 /* partial write, page entirel 141 /* partial write, page entirely before EOF */
142 _debug("before"); 142 _debug("before");
143 tail = eof = PAGE_SIZE; 143 tail = eof = PAGE_SIZE;
144 } else { 144 } else {
145 /* partial write, page overlap 145 /* partial write, page overlaps EOF */
146 eof = i_size - pos; 146 eof = i_size - pos;
147 _debug("overlap %u", eof); 147 _debug("overlap %u", eof);
148 tail = max(eof, to); 148 tail = max(eof, to);
149 if (tail < PAGE_SIZE) 149 if (tail < PAGE_SIZE)
150 memset(p + tail, 0, PA 150 memset(p + tail, 0, PAGE_SIZE - tail);
151 if (offset > eof) 151 if (offset > eof)
152 memset(p + eof, 0, PAG 152 memset(p + eof, 0, PAGE_SIZE - eof);
153 } 153 }
154 154
155 kunmap_atomic(p, KM_USER0); 155 kunmap_atomic(p, KM_USER0);
156 156
157 ret = 0; 157 ret = 0;
158 if (offset > 0 || eof > to) { 158 if (offset > 0 || eof > to) {
159 /* need to fill one or two bit 159 /* need to fill one or two bits that aren't going to be written
160 * (cover both fillers in one 160 * (cover both fillers in one read if there are two) */
161 start = (offset > 0) ? 0 : to; 161 start = (offset > 0) ? 0 : to;
162 stop = (eof > to) ? eof : offs 162 stop = (eof > to) ? eof : offset;
163 len = stop - start; 163 len = stop - start;
164 _debug("wr=%u-%u av=0-%u rd=%u 164 _debug("wr=%u-%u av=0-%u rd=%u@%u",
165 offset, to, eof, start, 165 offset, to, eof, start, len);
166 ret = afs_fill_page(vnode, key 166 ret = afs_fill_page(vnode, key, start, len, page);
167 } 167 }
168 168
169 _leave(" = %d", ret); 169 _leave(" = %d", ret);
170 return ret; 170 return ret;
171 } 171 }
172 172
173 /* 173 /*
174 * prepare to perform part of a write to a pag 174 * prepare to perform part of a write to a page
175 * - the caller holds the page locked, prevent 175 * - the caller holds the page locked, preventing it from being written out or
176 * modified by anyone else 176 * modified by anyone else
177 */ 177 */
178 int afs_prepare_write(struct file *file, struc 178 int afs_prepare_write(struct file *file, struct page *page,
179 unsigned offset, unsigne 179 unsigned offset, unsigned to)
180 { 180 {
181 struct afs_writeback *candidate, *wb; 181 struct afs_writeback *candidate, *wb;
182 struct afs_vnode *vnode = AFS_FS_I(fil 182 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
183 struct key *key = file->private_data; 183 struct key *key = file->private_data;
184 pgoff_t index; 184 pgoff_t index;
185 int ret; 185 int ret;
186 186
187 _enter("{%x:%u},{%lx},%u,%u", 187 _enter("{%x:%u},{%lx},%u,%u",
188 vnode->fid.vid, vnode->fid.vnod 188 vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
189 189
190 candidate = kzalloc(sizeof(*candidate) 190 candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
191 if (!candidate) 191 if (!candidate)
192 return -ENOMEM; 192 return -ENOMEM;
193 candidate->vnode = vnode; 193 candidate->vnode = vnode;
194 candidate->first = candidate->last = p 194 candidate->first = candidate->last = page->index;
195 candidate->offset_first = offset; 195 candidate->offset_first = offset;
196 candidate->to_last = to; 196 candidate->to_last = to;
197 candidate->usage = 1; 197 candidate->usage = 1;
198 candidate->state = AFS_WBACK_PENDING; 198 candidate->state = AFS_WBACK_PENDING;
199 init_waitqueue_head(&candidate->waitq) 199 init_waitqueue_head(&candidate->waitq);
200 200
201 if (!PageUptodate(page)) { 201 if (!PageUptodate(page)) {
202 _debug("not up to date"); 202 _debug("not up to date");
203 ret = afs_prepare_page(vnode, 203 ret = afs_prepare_page(vnode, page, key, offset, to);
204 if (ret < 0) { 204 if (ret < 0) {
205 kfree(candidate); 205 kfree(candidate);
206 _leave(" = %d [prep]", 206 _leave(" = %d [prep]", ret);
207 return ret; 207 return ret;
208 } 208 }
209 } 209 }
210 210
211 try_again: 211 try_again:
212 index = page->index; 212 index = page->index;
213 spin_lock(&vnode->writeback_lock); 213 spin_lock(&vnode->writeback_lock);
214 214
215 /* see if this page is already pending 215 /* see if this page is already pending a writeback under a suitable key
216 * - if so we can just join onto that 216 * - if so we can just join onto that one */
217 wb = (struct afs_writeback *) page_pri 217 wb = (struct afs_writeback *) page_private(page);
218 if (wb) { 218 if (wb) {
219 if (wb->key == key && wb->stat 219 if (wb->key == key && wb->state == AFS_WBACK_PENDING)
220 goto subsume_in_curren 220 goto subsume_in_current_wb;
221 goto flush_conflicting_wb; 221 goto flush_conflicting_wb;
222 } 222 }
223 223
224 if (index > 0) { 224 if (index > 0) {
225 /* see if we can find an alrea 225 /* see if we can find an already pending writeback that we can
226 * append this page to */ 226 * append this page to */
227 list_for_each_entry(wb, &vnode 227 list_for_each_entry(wb, &vnode->writebacks, link) {
228 if (wb->last == index 228 if (wb->last == index - 1 && wb->key == key &&
229 wb->state == AFS_W 229 wb->state == AFS_WBACK_PENDING)
230 goto append_to 230 goto append_to_previous_wb;
231 } 231 }
232 } 232 }
233 233
234 list_add_tail(&candidate->link, &vnode 234 list_add_tail(&candidate->link, &vnode->writebacks);
235 candidate->key = key_get(key); 235 candidate->key = key_get(key);
236 spin_unlock(&vnode->writeback_lock); 236 spin_unlock(&vnode->writeback_lock);
237 SetPagePrivate(page); 237 SetPagePrivate(page);
238 set_page_private(page, (unsigned long) 238 set_page_private(page, (unsigned long) candidate);
239 _leave(" = 0 [new]"); 239 _leave(" = 0 [new]");
240 return 0; 240 return 0;
241 241
242 subsume_in_current_wb: 242 subsume_in_current_wb:
243 _debug("subsume"); 243 _debug("subsume");
244 ASSERTRANGE(wb->first, <=, index, <=, 244 ASSERTRANGE(wb->first, <=, index, <=, wb->last);
245 if (index == wb->first && offset < wb- 245 if (index == wb->first && offset < wb->offset_first)
246 wb->offset_first = offset; 246 wb->offset_first = offset;
247 if (index == wb->last && to > wb->to_l 247 if (index == wb->last && to > wb->to_last)
248 wb->to_last = to; 248 wb->to_last = to;
249 spin_unlock(&vnode->writeback_lock); 249 spin_unlock(&vnode->writeback_lock);
250 kfree(candidate); 250 kfree(candidate);
251 _leave(" = 0 [sub]"); 251 _leave(" = 0 [sub]");
252 return 0; 252 return 0;
253 253
254 append_to_previous_wb: 254 append_to_previous_wb:
255 _debug("append into %lx-%lx", wb->firs 255 _debug("append into %lx-%lx", wb->first, wb->last);
256 wb->usage++; 256 wb->usage++;
257 wb->last++; 257 wb->last++;
258 wb->to_last = to; 258 wb->to_last = to;
259 spin_unlock(&vnode->writeback_lock); 259 spin_unlock(&vnode->writeback_lock);
260 SetPagePrivate(page); 260 SetPagePrivate(page);
261 set_page_private(page, (unsigned long) 261 set_page_private(page, (unsigned long) wb);
262 kfree(candidate); 262 kfree(candidate);
263 _leave(" = 0 [app]"); 263 _leave(" = 0 [app]");
264 return 0; 264 return 0;
265 265
266 /* the page is currently bound to anot 266 /* the page is currently bound to another context, so if it's dirty we
267 * need to flush it before we can use 267 * need to flush it before we can use the new context */
268 flush_conflicting_wb: 268 flush_conflicting_wb:
269 _debug("flush conflict"); 269 _debug("flush conflict");
270 if (wb->state == AFS_WBACK_PENDING) 270 if (wb->state == AFS_WBACK_PENDING)
271 wb->state = AFS_WBACK_CONFLICT 271 wb->state = AFS_WBACK_CONFLICTING;
272 spin_unlock(&vnode->writeback_lock); 272 spin_unlock(&vnode->writeback_lock);
273 if (PageDirty(page)) { 273 if (PageDirty(page)) {
274 ret = afs_write_back_from_lock 274 ret = afs_write_back_from_locked_page(wb, page);
275 if (ret < 0) { 275 if (ret < 0) {
276 afs_put_writeback(cand 276 afs_put_writeback(candidate);
277 _leave(" = %d", ret); 277 _leave(" = %d", ret);
278 return ret; 278 return ret;
279 } 279 }
280 } 280 }
281 281
282 /* the page holds a ref on the writeba 282 /* the page holds a ref on the writeback record */
283 afs_put_writeback(wb); 283 afs_put_writeback(wb);
284 set_page_private(page, 0); 284 set_page_private(page, 0);
285 ClearPagePrivate(page); 285 ClearPagePrivate(page);
286 goto try_again; 286 goto try_again;
287 } 287 }
288 288
289 /* 289 /*
290 * finalise part of a write to a page 290 * finalise part of a write to a page
291 */ 291 */
292 int afs_commit_write(struct file *file, struct 292 int afs_commit_write(struct file *file, struct page *page,
293 unsigned offset, unsigned 293 unsigned offset, unsigned to)
294 { 294 {
295 struct afs_vnode *vnode = AFS_FS_I(fil 295 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
296 loff_t i_size, maybe_i_size; 296 loff_t i_size, maybe_i_size;
297 297
298 _enter("{%x:%u},{%lx},%u,%u", 298 _enter("{%x:%u},{%lx},%u,%u",
299 vnode->fid.vid, vnode->fid.vnod 299 vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
300 300
301 maybe_i_size = (loff_t) page->index << 301 maybe_i_size = (loff_t) page->index << PAGE_SHIFT;
302 maybe_i_size += to; 302 maybe_i_size += to;
303 303
304 i_size = i_size_read(&vnode->vfs_inode 304 i_size = i_size_read(&vnode->vfs_inode);
305 if (maybe_i_size > i_size) { 305 if (maybe_i_size > i_size) {
306 spin_lock(&vnode->writeback_lo 306 spin_lock(&vnode->writeback_lock);
307 i_size = i_size_read(&vnode->v 307 i_size = i_size_read(&vnode->vfs_inode);
308 if (maybe_i_size > i_size) 308 if (maybe_i_size > i_size)
309 i_size_write(&vnode->v 309 i_size_write(&vnode->vfs_inode, maybe_i_size);
310 spin_unlock(&vnode->writeback_ 310 spin_unlock(&vnode->writeback_lock);
311 } 311 }
312 312
313 SetPageUptodate(page); 313 SetPageUptodate(page);
314 set_page_dirty(page); 314 set_page_dirty(page);
315 if (PageDirty(page)) 315 if (PageDirty(page))
316 _debug("dirtied"); 316 _debug("dirtied");
317 317
318 return 0; 318 return 0;
319 } 319 }
320 320
321 /* 321 /*
322 * kill all the pages in the given range 322 * kill all the pages in the given range
323 */ 323 */
324 static void afs_kill_pages(struct afs_vnode *v 324 static void afs_kill_pages(struct afs_vnode *vnode, bool error,
325 pgoff_t first, pgof 325 pgoff_t first, pgoff_t last)
326 { 326 {
327 struct pagevec pv; 327 struct pagevec pv;
328 unsigned count, loop; 328 unsigned count, loop;
329 329
330 _enter("{%x:%u},%lx-%lx", 330 _enter("{%x:%u},%lx-%lx",
331 vnode->fid.vid, vnode->fid.vnod 331 vnode->fid.vid, vnode->fid.vnode, first, last);
332 332
333 pagevec_init(&pv, 0); 333 pagevec_init(&pv, 0);
334 334
335 do { 335 do {
336 _debug("kill %lx-%lx", first, 336 _debug("kill %lx-%lx", first, last);
337 337
338 count = last - first + 1; 338 count = last - first + 1;
339 if (count > PAGEVEC_SIZE) 339 if (count > PAGEVEC_SIZE)
340 count = PAGEVEC_SIZE; 340 count = PAGEVEC_SIZE;
341 pv.nr = find_get_pages_contig( 341 pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
342 342 first, count, pv.pages);
343 ASSERTCMP(pv.nr, ==, count); 343 ASSERTCMP(pv.nr, ==, count);
344 344
345 for (loop = 0; loop < count; l 345 for (loop = 0; loop < count; loop++) {
346 ClearPageUptodate(pv.p 346 ClearPageUptodate(pv.pages[loop]);
347 if (error) 347 if (error)
348 SetPageError(p 348 SetPageError(pv.pages[loop]);
349 end_page_writeback(pv. 349 end_page_writeback(pv.pages[loop]);
350 } 350 }
351 351
352 __pagevec_release(&pv); 352 __pagevec_release(&pv);
353 } while (first < last); 353 } while (first < last);
354 354
355 _leave(""); 355 _leave("");
356 } 356 }
357 357
358 /* 358 /*
359 * synchronously write back the locked page an 359 * synchronously write back the locked page and any subsequent non-locked dirty
360 * pages also covered by the same writeback re 360 * pages also covered by the same writeback record
361 */ 361 */
362 static int afs_write_back_from_locked_page(str 362 static int afs_write_back_from_locked_page(struct afs_writeback *wb,
363 str 363 struct page *primary_page)
364 { 364 {
365 struct page *pages[8], *page; 365 struct page *pages[8], *page;
366 unsigned long count; 366 unsigned long count;
367 unsigned n, offset, to; 367 unsigned n, offset, to;
368 pgoff_t start, first, last; 368 pgoff_t start, first, last;
369 int loop, ret; 369 int loop, ret;
370 370
371 _enter(",%lx", primary_page->index); 371 _enter(",%lx", primary_page->index);
372 372
373 count = 1; 373 count = 1;
374 if (!clear_page_dirty_for_io(primary_p 374 if (!clear_page_dirty_for_io(primary_page))
375 BUG(); 375 BUG();
376 if (test_set_page_writeback(primary_pa 376 if (test_set_page_writeback(primary_page))
377 BUG(); 377 BUG();
378 378
379 /* find all consecutive lockable dirty 379 /* find all consecutive lockable dirty pages, stopping when we find a
380 * page that is not immediately lockab 380 * page that is not immediately lockable, is not dirty or is missing,
381 * or we reach the end of the range */ 381 * or we reach the end of the range */
382 start = primary_page->index; 382 start = primary_page->index;
383 if (start >= wb->last) 383 if (start >= wb->last)
384 goto no_more; 384 goto no_more;
385 start++; 385 start++;
386 do { 386 do {
387 _debug("more %lx [%lx]", start 387 _debug("more %lx [%lx]", start, count);
388 n = wb->last - start + 1; 388 n = wb->last - start + 1;
389 if (n > ARRAY_SIZE(pages)) 389 if (n > ARRAY_SIZE(pages))
390 n = ARRAY_SIZE(pages); 390 n = ARRAY_SIZE(pages);
391 n = find_get_pages_contig(wb-> 391 n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
392 star 392 start, n, pages);
393 _debug("fgpc %u", n); 393 _debug("fgpc %u", n);
394 if (n == 0) 394 if (n == 0)
395 goto no_more; 395 goto no_more;
396 if (pages[0]->index != start) 396 if (pages[0]->index != start) {
397 do { 397 do {
398 put_page(pages 398 put_page(pages[--n]);
399 } while (n > 0); 399 } while (n > 0);
400 goto no_more; 400 goto no_more;
401 } 401 }
402 402
403 for (loop = 0; loop < n; loop+ 403 for (loop = 0; loop < n; loop++) {
404 page = pages[loop]; 404 page = pages[loop];
405 if (page->index > wb-> 405 if (page->index > wb->last)
406 break; 406 break;
407 if (TestSetPageLocked( 407 if (TestSetPageLocked(page))
408 break; 408 break;
409 if (!PageDirty(page) | 409 if (!PageDirty(page) ||
410 page_private(page) 410 page_private(page) != (unsigned long) wb) {
411 unlock_page(pa 411 unlock_page(page);
412 break; 412 break;
413 } 413 }
414 if (!clear_page_dirty_ 414 if (!clear_page_dirty_for_io(page))
415 BUG(); 415 BUG();
416 if (test_set_page_writ 416 if (test_set_page_writeback(page))
417 BUG(); 417 BUG();
418 unlock_page(page); 418 unlock_page(page);
419 put_page(page); 419 put_page(page);
420 } 420 }
421 count += loop; 421 count += loop;
422 if (loop < n) { 422 if (loop < n) {
423 for (; loop < n; loop+ 423 for (; loop < n; loop++)
424 put_page(pages 424 put_page(pages[loop]);
425 goto no_more; 425 goto no_more;
426 } 426 }
427 427
428 start += loop; 428 start += loop;
429 } while (start <= wb->last && count < 429 } while (start <= wb->last && count < 65536);
430 430
431 no_more: 431 no_more:
432 /* we now have a contiguous set of dir 432 /* we now have a contiguous set of dirty pages, each with writeback set
433 * and the dirty mark cleared; the fir 433 * and the dirty mark cleared; the first page is locked and must remain
434 * so, all the rest are unlocked */ 434 * so, all the rest are unlocked */
435 first = primary_page->index; 435 first = primary_page->index;
436 last = first + count - 1; 436 last = first + count - 1;
437 437
438 offset = (first == wb->first) ? wb->of 438 offset = (first == wb->first) ? wb->offset_first : 0;
439 to = (last == wb->last) ? wb->to_last 439 to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
440 440
441 _debug("write back %lx[%u..] to %lx[.. 441 _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
442 442
443 ret = afs_vnode_store_data(wb, first, 443 ret = afs_vnode_store_data(wb, first, last, offset, to);
444 if (ret < 0) { 444 if (ret < 0) {
445 switch (ret) { 445 switch (ret) {
446 case -EDQUOT: 446 case -EDQUOT:
447 case -ENOSPC: 447 case -ENOSPC:
448 set_bit(AS_ENOSPC, 448 set_bit(AS_ENOSPC,
449 &wb->vnode->vf 449 &wb->vnode->vfs_inode.i_mapping->flags);
450 break; 450 break;
451 case -EROFS: 451 case -EROFS:
452 case -EIO: 452 case -EIO:
453 case -EREMOTEIO: 453 case -EREMOTEIO:
454 case -EFBIG: 454 case -EFBIG:
455 case -ENOENT: 455 case -ENOENT:
456 case -ENOMEDIUM: 456 case -ENOMEDIUM:
457 case -ENXIO: 457 case -ENXIO:
458 afs_kill_pages(wb->vno 458 afs_kill_pages(wb->vnode, true, first, last);
459 set_bit(AS_EIO, &wb->v 459 set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags);
460 break; 460 break;
461 case -EACCES: 461 case -EACCES:
462 case -EPERM: 462 case -EPERM:
463 case -ENOKEY: 463 case -ENOKEY:
464 case -EKEYEXPIRED: 464 case -EKEYEXPIRED:
465 case -EKEYREJECTED: 465 case -EKEYREJECTED:
466 case -EKEYREVOKED: 466 case -EKEYREVOKED:
467 afs_kill_pages(wb->vno 467 afs_kill_pages(wb->vnode, false, first, last);
468 break; 468 break;
469 default: 469 default:
470 break; 470 break;
471 } 471 }
472 } else { 472 } else {
473 ret = count; 473 ret = count;
474 } 474 }
475 475
476 _leave(" = %d", ret); 476 _leave(" = %d", ret);
477 return ret; 477 return ret;
478 } 478 }
479 479
480 /* 480 /*
481 * write a page back to the server 481 * write a page back to the server
482 * - the caller locked the page for us 482 * - the caller locked the page for us
483 */ 483 */
484 int afs_writepage(struct page *page, struct wr 484 int afs_writepage(struct page *page, struct writeback_control *wbc)
485 { 485 {
486 struct backing_dev_info *bdi = page->m 486 struct backing_dev_info *bdi = page->mapping->backing_dev_info;
487 struct afs_writeback *wb; 487 struct afs_writeback *wb;
488 int ret; 488 int ret;
489 489
490 _enter("{%lx},", page->index); 490 _enter("{%lx},", page->index);
491 491
492 wb = (struct afs_writeback *) page_pri 492 wb = (struct afs_writeback *) page_private(page);
493 ASSERT(wb != NULL); 493 ASSERT(wb != NULL);
494 494
495 ret = afs_write_back_from_locked_page( 495 ret = afs_write_back_from_locked_page(wb, page);
496 unlock_page(page); 496 unlock_page(page);
497 if (ret < 0) { 497 if (ret < 0) {
498 _leave(" = %d", ret); 498 _leave(" = %d", ret);
499 return 0; 499 return 0;
500 } 500 }
501 501
502 wbc->nr_to_write -= ret; 502 wbc->nr_to_write -= ret;
503 if (wbc->nonblocking && bdi_write_cong 503 if (wbc->nonblocking && bdi_write_congested(bdi))
504 wbc->encountered_congestion = 504 wbc->encountered_congestion = 1;
505 505
506 _leave(" = 0"); 506 _leave(" = 0");
507 return 0; 507 return 0;
508 } 508 }
509 509
510 /* 510 /*
511 * write a region of pages back to the server 511 * write a region of pages back to the server
512 */ 512 */
513 static int afs_writepages_region(struct addres 513 static int afs_writepages_region(struct address_space *mapping,
514 struct writeb 514 struct writeback_control *wbc,
515 pgoff_t index 515 pgoff_t index, pgoff_t end, pgoff_t *_next)
516 { 516 {
517 struct backing_dev_info *bdi = mapping 517 struct backing_dev_info *bdi = mapping->backing_dev_info;
518 struct afs_writeback *wb; 518 struct afs_writeback *wb;
519 struct page *page; 519 struct page *page;
520 int ret, n; 520 int ret, n;
521 521
522 _enter(",,%lx,%lx,", index, end); 522 _enter(",,%lx,%lx,", index, end);
523 523
524 do { 524 do {
525 n = find_get_pages_tag(mapping 525 n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
526 1, &pag 526 1, &page);
527 if (!n) 527 if (!n)
528 break; 528 break;
529 529
530 _debug("wback %lx", page->inde 530 _debug("wback %lx", page->index);
531 531
532 if (page->index > end) { 532 if (page->index > end) {
533 *_next = index; 533 *_next = index;
534 page_cache_release(pag 534 page_cache_release(page);
535 _leave(" = 0 [%lx]", * 535 _leave(" = 0 [%lx]", *_next);
536 return 0; 536 return 0;
537 } 537 }
538 538
539 /* at this point we hold neith 539 /* at this point we hold neither mapping->tree_lock nor lock on
540 * the page itself: the page m 540 * the page itself: the page may be truncated or invalidated
541 * (changing page->mapping to 541 * (changing page->mapping to NULL), or even swizzled back from
542 * swapper_space to tmpfs file 542 * swapper_space to tmpfs file mapping
543 */ 543 */
544 lock_page(page); 544 lock_page(page);
545 545
546 if (page->mapping != mapping) 546 if (page->mapping != mapping) {
547 unlock_page(page); 547 unlock_page(page);
548 page_cache_release(pag 548 page_cache_release(page);
549 continue; 549 continue;
550 } 550 }
551 551
552 if (wbc->sync_mode != WB_SYNC_ 552 if (wbc->sync_mode != WB_SYNC_NONE)
553 wait_on_page_writeback 553 wait_on_page_writeback(page);
554 554
555 if (PageWriteback(page) || !Pa 555 if (PageWriteback(page) || !PageDirty(page)) {
556 unlock_page(page); 556 unlock_page(page);
557 continue; 557 continue;
558 } 558 }
559 559
560 wb = (struct afs_writeback *) 560 wb = (struct afs_writeback *) page_private(page);
561 ASSERT(wb != NULL); 561 ASSERT(wb != NULL);
562 562
563 spin_lock(&wb->vnode->writebac 563 spin_lock(&wb->vnode->writeback_lock);
564 wb->state = AFS_WBACK_WRITING; 564 wb->state = AFS_WBACK_WRITING;
565 spin_unlock(&wb->vnode->writeb 565 spin_unlock(&wb->vnode->writeback_lock);
566 566
567 ret = afs_write_back_from_lock 567 ret = afs_write_back_from_locked_page(wb, page);
568 unlock_page(page); 568 unlock_page(page);
569 page_cache_release(page); 569 page_cache_release(page);
570 if (ret < 0) { 570 if (ret < 0) {
571 _leave(" = %d", ret); 571 _leave(" = %d", ret);
572 return ret; 572 return ret;
573 } 573 }
574 574
575 wbc->nr_to_write -= ret; 575 wbc->nr_to_write -= ret;
576 576
577 if (wbc->nonblocking && bdi_wr 577 if (wbc->nonblocking && bdi_write_congested(bdi)) {
578 wbc->encountered_conge 578 wbc->encountered_congestion = 1;
579 break; 579 break;
580 } 580 }
581 581
582 cond_resched(); 582 cond_resched();
583 } while (index < end && wbc->nr_to_wri 583 } while (index < end && wbc->nr_to_write > 0);
584 584
585 *_next = index; 585 *_next = index;
586 _leave(" = 0 [%lx]", *_next); 586 _leave(" = 0 [%lx]", *_next);
587 return 0; 587 return 0;
588 } 588 }
589 589
590 /* 590 /*
591 * write some of the pending data back to the 591 * write some of the pending data back to the server
592 */ 592 */
593 int afs_writepages(struct address_space *mappi 593 int afs_writepages(struct address_space *mapping,
594 struct writeback_control *w 594 struct writeback_control *wbc)
595 { 595 {
596 struct backing_dev_info *bdi = mapping 596 struct backing_dev_info *bdi = mapping->backing_dev_info;
597 pgoff_t start, end, next; 597 pgoff_t start, end, next;
598 int ret; 598 int ret;
599 599
600 _enter(""); 600 _enter("");
601 601
602 if (wbc->nonblocking && bdi_write_cong 602 if (wbc->nonblocking && bdi_write_congested(bdi)) {
603 wbc->encountered_congestion = 603 wbc->encountered_congestion = 1;
604 _leave(" = 0 [congest]"); 604 _leave(" = 0 [congest]");
605 return 0; 605 return 0;
606 } 606 }
607 607
608 if (wbc->range_cyclic) { 608 if (wbc->range_cyclic) {
609 start = mapping->writeback_ind 609 start = mapping->writeback_index;
610 end = -1; 610 end = -1;
611 ret = afs_writepages_region(ma 611 ret = afs_writepages_region(mapping, wbc, start, end, &next);
612 if (start > 0 && wbc->nr_to_wr 612 if (start > 0 && wbc->nr_to_write > 0 && ret == 0 &&
613 !(wbc->nonblocking && wbc- 613 !(wbc->nonblocking && wbc->encountered_congestion))
614 ret = afs_writepages_r 614 ret = afs_writepages_region(mapping, wbc, 0, start,
615 615 &next);
616 mapping->writeback_index = nex 616 mapping->writeback_index = next;
617 } else if (wbc->range_start == 0 && wb 617 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
618 end = (pgoff_t)(LLONG_MAX >> P 618 end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
619 ret = afs_writepages_region(ma 619 ret = afs_writepages_region(mapping, wbc, 0, end, &next);
620 if (wbc->nr_to_write > 0) 620 if (wbc->nr_to_write > 0)
621 mapping->writeback_ind 621 mapping->writeback_index = next;
622 } else { 622 } else {
623 start = wbc->range_start >> PA 623 start = wbc->range_start >> PAGE_CACHE_SHIFT;
624 end = wbc->range_end >> PAGE_C 624 end = wbc->range_end >> PAGE_CACHE_SHIFT;
625 ret = afs_writepages_region(ma 625 ret = afs_writepages_region(mapping, wbc, start, end, &next);
626 } 626 }
627 627
628 _leave(" = %d", ret); 628 _leave(" = %d", ret);
629 return ret; 629 return ret;
630 } 630 }
631 631
632 /* 632 /*
633 * write an inode back 633 * write an inode back
634 */ 634 */
635 int afs_write_inode(struct inode *inode, int s 635 int afs_write_inode(struct inode *inode, int sync)
636 { 636 {
637 struct afs_vnode *vnode = AFS_FS_I(ino 637 struct afs_vnode *vnode = AFS_FS_I(inode);
638 int ret; 638 int ret;
639 639
640 _enter("{%x:%u},", vnode->fid.vid, vno 640 _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
641 641
642 ret = 0; 642 ret = 0;
643 if (sync) { 643 if (sync) {
644 ret = filemap_fdatawait(inode- 644 ret = filemap_fdatawait(inode->i_mapping);
645 if (ret < 0) 645 if (ret < 0)
646 __mark_inode_dirty(ino 646 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
647 } 647 }
648 648
649 _leave(" = %d", ret); 649 _leave(" = %d", ret);
650 return ret; 650 return ret;
651 } 651 }
652 652
653 /* 653 /*
654 * completion of write to server 654 * completion of write to server
655 */ 655 */
656 void afs_pages_written_back(struct afs_vnode * 656 void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
657 { 657 {
658 struct afs_writeback *wb = call->wb; 658 struct afs_writeback *wb = call->wb;
659 struct pagevec pv; 659 struct pagevec pv;
660 unsigned count, loop; 660 unsigned count, loop;
661 pgoff_t first = call->first, last = ca 661 pgoff_t first = call->first, last = call->last;
662 bool free_wb; 662 bool free_wb;
663 663
664 _enter("{%x:%u},{%lx-%lx}", 664 _enter("{%x:%u},{%lx-%lx}",
665 vnode->fid.vid, vnode->fid.vnod 665 vnode->fid.vid, vnode->fid.vnode, first, last);
666 666
667 ASSERT(wb != NULL); 667 ASSERT(wb != NULL);
668 668
669 pagevec_init(&pv, 0); 669 pagevec_init(&pv, 0);
670 670
671 do { 671 do {
672 _debug("done %lx-%lx", first, 672 _debug("done %lx-%lx", first, last);
673 673
674 count = last - first + 1; 674 count = last - first + 1;
675 if (count > PAGEVEC_SIZE) 675 if (count > PAGEVEC_SIZE)
676 count = PAGEVEC_SIZE; 676 count = PAGEVEC_SIZE;
677 pv.nr = find_get_pages_contig( 677 pv.nr = find_get_pages_contig(call->mapping, first, count,
678 678 pv.pages);
679 ASSERTCMP(pv.nr, ==, count); 679 ASSERTCMP(pv.nr, ==, count);
680 680
681 spin_lock(&vnode->writeback_lo 681 spin_lock(&vnode->writeback_lock);
682 for (loop = 0; loop < count; l 682 for (loop = 0; loop < count; loop++) {
683 struct page *page = pv 683 struct page *page = pv.pages[loop];
684 end_page_writeback(pag 684 end_page_writeback(page);
685 if (page_private(page) 685 if (page_private(page) == (unsigned long) wb) {
686 set_page_priva 686 set_page_private(page, 0);
687 ClearPagePriva 687 ClearPagePrivate(page);
688 wb->usage--; 688 wb->usage--;
689 } 689 }
690 } 690 }
691 free_wb = false; 691 free_wb = false;
692 if (wb->usage == 0) { 692 if (wb->usage == 0) {
693 afs_unlink_writeback(w 693 afs_unlink_writeback(wb);
694 free_wb = true; 694 free_wb = true;
695 } 695 }
696 spin_unlock(&vnode->writeback_ 696 spin_unlock(&vnode->writeback_lock);
697 first += count; 697 first += count;
698 if (free_wb) { 698 if (free_wb) {
699 afs_free_writeback(wb) 699 afs_free_writeback(wb);
700 wb = NULL; 700 wb = NULL;
701 } 701 }
702 702
703 __pagevec_release(&pv); 703 __pagevec_release(&pv);
704 } while (first <= last); 704 } while (first <= last);
705 705
706 _leave(""); 706 _leave("");
707 } 707 }
708 708
709 /* 709 /*
710 * write to an AFS file 710 * write to an AFS file
711 */ 711 */
712 ssize_t afs_file_write(struct kiocb *iocb, con 712 ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
713 unsigned long nr_segs, 713 unsigned long nr_segs, loff_t pos)
714 { 714 {
715 struct dentry *dentry = iocb->ki_filp- 715 struct dentry *dentry = iocb->ki_filp->f_path.dentry;
716 struct afs_vnode *vnode = AFS_FS_I(den 716 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
717 ssize_t result; 717 ssize_t result;
718 size_t count = iov_length(iov, nr_segs 718 size_t count = iov_length(iov, nr_segs);
719 int ret; 719 int ret;
720 720
721 _enter("{%x.%u},{%zu},%lu,", 721 _enter("{%x.%u},{%zu},%lu,",
722 vnode->fid.vid, vnode->fid.vnod 722 vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
723 723
724 if (IS_SWAPFILE(&vnode->vfs_inode)) { 724 if (IS_SWAPFILE(&vnode->vfs_inode)) {
725 printk(KERN_INFO 725 printk(KERN_INFO
726 "AFS: Attempt to write 726 "AFS: Attempt to write to active swap file!\n");
727 return -EBUSY; 727 return -EBUSY;
728 } 728 }
729 729
730 if (!count) 730 if (!count)
731 return 0; 731 return 0;
732 732
733 result = generic_file_aio_write(iocb, 733 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
734 if (IS_ERR_VALUE(result)) { 734 if (IS_ERR_VALUE(result)) {
735 _leave(" = %zd", result); 735 _leave(" = %zd", result);
736 return result; 736 return result;
737 } 737 }
738 738
739 /* return error values for O_SYNC and 739 /* return error values for O_SYNC and IS_SYNC() */
740 if (IS_SYNC(&vnode->vfs_inode) || iocb 740 if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
741 ret = afs_fsync(iocb->ki_filp, 741 ret = afs_fsync(iocb->ki_filp, dentry, 1);
742 if (ret < 0) 742 if (ret < 0)
743 result = ret; 743 result = ret;
744 } 744 }
745 745
746 _leave(" = %zd", result); 746 _leave(" = %zd", result);
747 return result; 747 return result;
748 } 748 }
749 749
750 /* 750 /*
751 * flush the vnode to the fileserver 751 * flush the vnode to the fileserver
752 */ 752 */
753 int afs_writeback_all(struct afs_vnode *vnode) 753 int afs_writeback_all(struct afs_vnode *vnode)
754 { 754 {
755 struct address_space *mapping = vnode- 755 struct address_space *mapping = vnode->vfs_inode.i_mapping;
756 struct writeback_control wbc = { 756 struct writeback_control wbc = {
757 .bdi = mapping->bac 757 .bdi = mapping->backing_dev_info,
758 .sync_mode = WB_SYNC_ALL, 758 .sync_mode = WB_SYNC_ALL,
759 .nr_to_write = LONG_MAX, 759 .nr_to_write = LONG_MAX,
760 .for_writepages = 1, 760 .for_writepages = 1,
761 .range_cyclic = 1, 761 .range_cyclic = 1,
762 }; 762 };
763 int ret; 763 int ret;
764 764
765 _enter(""); 765 _enter("");
766 766
767 ret = mapping->a_ops->writepages(mappi 767 ret = mapping->a_ops->writepages(mapping, &wbc);
768 __mark_inode_dirty(mapping->host, I_DI 768 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
769 769
770 _leave(" = %d", ret); 770 _leave(" = %d", ret);
771 return ret; 771 return ret;
772 } 772 }
773 773
774 /* 774 /*
775 * flush any dirty pages for this process, and 775 * flush any dirty pages for this process, and check for write errors.
776 * - the return status from this call provides 776 * - the return status from this call provides a reliable indication of
777 * whether any write errors occurred for thi 777 * whether any write errors occurred for this process.
778 */ 778 */
779 int afs_fsync(struct file *file, struct dentry 779 int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
780 { 780 {
781 struct afs_writeback *wb, *xwb; 781 struct afs_writeback *wb, *xwb;
782 struct afs_vnode *vnode = AFS_FS_I(den 782 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
783 int ret; 783 int ret;
784 784
785 _enter("{%x:%u},{n=%s},%d", 785 _enter("{%x:%u},{n=%s},%d",
786 vnode->fid.vid, vnode->fid.vnod 786 vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
787 datasync); 787 datasync);
788 788
789 /* use a writeback record as a marker 789 /* use a writeback record as a marker in the queue - when this reaches
790 * the front of the queue, all the out 790 * the front of the queue, all the outstanding writes are either
791 * completed or rejected */ 791 * completed or rejected */
792 wb = kzalloc(sizeof(*wb), GFP_KERNEL); 792 wb = kzalloc(sizeof(*wb), GFP_KERNEL);
793 if (!wb) 793 if (!wb)
794 return -ENOMEM; 794 return -ENOMEM;
795 wb->vnode = vnode; 795 wb->vnode = vnode;
796 wb->first = 0; 796 wb->first = 0;
797 wb->last = -1; 797 wb->last = -1;
798 wb->offset_first = 0; 798 wb->offset_first = 0;
799 wb->to_last = PAGE_SIZE; 799 wb->to_last = PAGE_SIZE;
800 wb->usage = 1; 800 wb->usage = 1;
801 wb->state = AFS_WBACK_SYNCING; 801 wb->state = AFS_WBACK_SYNCING;
802 init_waitqueue_head(&wb->waitq); 802 init_waitqueue_head(&wb->waitq);
803 803
804 spin_lock(&vnode->writeback_lock); 804 spin_lock(&vnode->writeback_lock);
805 list_for_each_entry(xwb, &vnode->write 805 list_for_each_entry(xwb, &vnode->writebacks, link) {
806 if (xwb->state == AFS_WBACK_PE 806 if (xwb->state == AFS_WBACK_PENDING)
807 xwb->state = AFS_WBACK 807 xwb->state = AFS_WBACK_CONFLICTING;
808 } 808 }
809 list_add_tail(&wb->link, &vnode->write 809 list_add_tail(&wb->link, &vnode->writebacks);
810 spin_unlock(&vnode->writeback_lock); 810 spin_unlock(&vnode->writeback_lock);
811 811
812 /* push all the outstanding writebacks 812 /* push all the outstanding writebacks to the server */
813 ret = afs_writeback_all(vnode); 813 ret = afs_writeback_all(vnode);
814 if (ret < 0) { 814 if (ret < 0) {
815 afs_put_writeback(wb); 815 afs_put_writeback(wb);
816 _leave(" = %d [wb]", ret); 816 _leave(" = %d [wb]", ret);
817 return ret; 817 return ret;
818 } 818 }
819 819
820 /* wait for the preceding writes to ac 820 /* wait for the preceding writes to actually complete */
821 ret = wait_event_interruptible(wb->wai 821 ret = wait_event_interruptible(wb->waitq,
822 wb->sta 822 wb->state == AFS_WBACK_COMPLETE ||
823 vnode-> 823 vnode->writebacks.next == &wb->link);
824 afs_put_writeback(wb); 824 afs_put_writeback(wb);
825 _leave(" = %d", ret); 825 _leave(" = %d", ret);
826 return ret; 826 return ret;
827 } 827 }
828 828
|
This page was automatically generated by the
LXR engine.
|