Diff markup
1 /* 1 /*
2 * linux/mm/swap_state.c 2 * linux/mm/swap_state.c
3 * 3 *
4 * Copyright (C) 1991, 1992, 1993, 1994 Linu 4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 * Swap reorganised 29.12.95, Stephen Tweedie 5 * Swap reorganised 29.12.95, Stephen Tweedie
6 * 6 *
7 * Rewritten to use page cache, (C) 1998 Step 7 * Rewritten to use page cache, (C) 1998 Stephen Tweedie
8 */ 8 */
9 #include <linux/module.h> 9 #include <linux/module.h>
10 #include <linux/mm.h> 10 #include <linux/mm.h>
11 #include <linux/kernel_stat.h> 11 #include <linux/kernel_stat.h>
12 #include <linux/swap.h> 12 #include <linux/swap.h>
13 #include <linux/swapops.h> <<
14 #include <linux/init.h> 13 #include <linux/init.h>
15 #include <linux/pagemap.h> 14 #include <linux/pagemap.h>
16 #include <linux/buffer_head.h> 15 #include <linux/buffer_head.h>
17 #include <linux/backing-dev.h> 16 #include <linux/backing-dev.h>
18 #include <linux/pagevec.h> <<
19 #include <linux/migrate.h> <<
20 17
21 #include <asm/pgtable.h> 18 #include <asm/pgtable.h>
22 19
23 /* 20 /*
24 * swapper_space is a fiction, retained to sim 21 * swapper_space is a fiction, retained to simplify the path through
25 * vmscan's shrink_page_list, to make sync_pag !! 22 * vmscan's shrink_list, to make sync_page look nicer, and to allow
26 * future use of radix_tree tags in the swap c 23 * future use of radix_tree tags in the swap cache.
27 */ 24 */
28 static const struct address_space_operations s !! 25 static struct address_space_operations swap_aops = {
29 .writepage = swap_writepage, 26 .writepage = swap_writepage,
30 .sync_page = block_sync_page, 27 .sync_page = block_sync_page,
31 .set_page_dirty = __set_page_dirty_nob 28 .set_page_dirty = __set_page_dirty_nobuffers,
32 .migratepage = migrate_page, <<
33 }; 29 };
34 30
35 static struct backing_dev_info swap_backing_de 31 static struct backing_dev_info swap_backing_dev_info = {
36 .capabilities = BDI_CAP_NO_ACCT_DIRT !! 32 .memory_backed = 1, /* Does not contribute to dirty memory */
37 .unplug_io_fn = swap_unplug_io_fn, 33 .unplug_io_fn = swap_unplug_io_fn,
38 }; 34 };
39 35
40 struct address_space swapper_space = { 36 struct address_space swapper_space = {
41 .page_tree = RADIX_TREE_INIT(GFP_ 37 .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
42 .tree_lock = __RW_LOCK_UNLOCKED(s !! 38 .tree_lock = SPIN_LOCK_UNLOCKED,
43 .a_ops = &swap_aops, 39 .a_ops = &swap_aops,
44 .i_mmap_nonlinear = LIST_HEAD_INIT(swa 40 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
45 .backing_dev_info = &swap_backing_dev_ 41 .backing_dev_info = &swap_backing_dev_info,
46 }; 42 };
>> 43 EXPORT_SYMBOL(swapper_space);
47 44
48 #define INC_CACHE_INFO(x) do { swap_cach 45 #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
49 46
50 static struct { 47 static struct {
51 unsigned long add_total; 48 unsigned long add_total;
52 unsigned long del_total; 49 unsigned long del_total;
53 unsigned long find_success; 50 unsigned long find_success;
54 unsigned long find_total; 51 unsigned long find_total;
>> 52 unsigned long noent_race;
>> 53 unsigned long exist_race;
55 } swap_cache_info; 54 } swap_cache_info;
56 55
57 void show_swap_cache_info(void) 56 void show_swap_cache_info(void)
58 { 57 {
59 printk("Swap cache: add %lu, delete %l !! 58 printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n",
60 swap_cache_info.add_total, swa 59 swap_cache_info.add_total, swap_cache_info.del_total,
61 swap_cache_info.find_success, !! 60 swap_cache_info.find_success, swap_cache_info.find_total,
>> 61 swap_cache_info.noent_race, swap_cache_info.exist_race);
62 printk("Free swap = %lukB\n", nr_swap 62 printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
63 printk("Total swap = %lukB\n", total_s 63 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
64 } 64 }
65 65
66 /* 66 /*
67 * add_to_swap_cache resembles add_to_page_cac !! 67 * __add_to_swap_cache resembles add_to_page_cache on swapper_space,
68 * but sets SwapCache flag and private instead 68 * but sets SwapCache flag and private instead of mapping and index.
69 */ 69 */
70 int add_to_swap_cache(struct page *page, swp_e !! 70 static int __add_to_swap_cache(struct page *page,
>> 71 swp_entry_t entry, int gfp_mask)
71 { 72 {
72 int error; 73 int error;
73 74
74 BUG_ON(!PageLocked(page)); <<
75 BUG_ON(PageSwapCache(page)); 75 BUG_ON(PageSwapCache(page));
76 BUG_ON(PagePrivate(page)); 76 BUG_ON(PagePrivate(page));
77 error = radix_tree_preload(gfp_mask); 77 error = radix_tree_preload(gfp_mask);
78 if (!error) { 78 if (!error) {
79 write_lock_irq(&swapper_space. !! 79 spin_lock_irq(&swapper_space.tree_lock);
80 error = radix_tree_insert(&swa 80 error = radix_tree_insert(&swapper_space.page_tree,
81 81 entry.val, page);
82 if (!error) { 82 if (!error) {
83 page_cache_get(page); 83 page_cache_get(page);
>> 84 SetPageLocked(page);
84 SetPageSwapCache(page) 85 SetPageSwapCache(page);
85 set_page_private(page, !! 86 page->private = entry.val;
86 total_swapcache_pages+ 87 total_swapcache_pages++;
87 __inc_zone_page_state( !! 88 pagecache_acct(1);
88 INC_CACHE_INFO(add_tot <<
89 } 89 }
90 write_unlock_irq(&swapper_spac !! 90 spin_unlock_irq(&swapper_space.tree_lock);
91 radix_tree_preload_end(); 91 radix_tree_preload_end();
92 } 92 }
93 return error; 93 return error;
94 } 94 }
95 95
>> 96 static int add_to_swap_cache(struct page *page, swp_entry_t entry)
>> 97 {
>> 98 int error;
>> 99
>> 100 if (!swap_duplicate(entry)) {
>> 101 INC_CACHE_INFO(noent_race);
>> 102 return -ENOENT;
>> 103 }
>> 104 error = __add_to_swap_cache(page, entry, GFP_KERNEL);
>> 105 /*
>> 106 * Anon pages are already on the LRU, we don't run lru_cache_add here.
>> 107 */
>> 108 if (error) {
>> 109 swap_free(entry);
>> 110 if (error == -EEXIST)
>> 111 INC_CACHE_INFO(exist_race);
>> 112 return error;
>> 113 }
>> 114 INC_CACHE_INFO(add_total);
>> 115 return 0;
>> 116 }
>> 117
96 /* 118 /*
97 * This must be called only on pages that have 119 * This must be called only on pages that have
98 * been verified to be in the swap cache. 120 * been verified to be in the swap cache.
99 */ 121 */
100 void __delete_from_swap_cache(struct page *pag 122 void __delete_from_swap_cache(struct page *page)
101 { 123 {
102 BUG_ON(!PageLocked(page)); 124 BUG_ON(!PageLocked(page));
103 BUG_ON(!PageSwapCache(page)); 125 BUG_ON(!PageSwapCache(page));
104 BUG_ON(PageWriteback(page)); 126 BUG_ON(PageWriteback(page));
105 BUG_ON(PagePrivate(page)); <<
106 127
107 radix_tree_delete(&swapper_space.page_ !! 128 radix_tree_delete(&swapper_space.page_tree, page->private);
108 set_page_private(page, 0); !! 129 page->private = 0;
109 ClearPageSwapCache(page); 130 ClearPageSwapCache(page);
110 total_swapcache_pages--; 131 total_swapcache_pages--;
111 __dec_zone_page_state(page, NR_FILE_PA !! 132 pagecache_acct(-1);
112 INC_CACHE_INFO(del_total); 133 INC_CACHE_INFO(del_total);
113 } 134 }
114 135
115 /** 136 /**
116 * add_to_swap - allocate swap space for a pag 137 * add_to_swap - allocate swap space for a page
117 * @page: page we want to move to swap 138 * @page: page we want to move to swap
118 * @gfp_mask: memory allocation flags <<
119 * 139 *
120 * Allocate swap space for the page and add th 140 * Allocate swap space for the page and add the page to the
121 * swap cache. Caller needs to hold the page 141 * swap cache. Caller needs to hold the page lock.
122 */ 142 */
123 int add_to_swap(struct page * page, gfp_t gfp_ !! 143 int add_to_swap(struct page * page)
124 { 144 {
125 swp_entry_t entry; 145 swp_entry_t entry;
>> 146 int pf_flags;
126 int err; 147 int err;
127 148
128 BUG_ON(!PageLocked(page)); !! 149 if (!PageLocked(page))
129 BUG_ON(!PageUptodate(page)); !! 150 BUG();
130 151
131 for (;;) { 152 for (;;) {
132 entry = get_swap_page(); 153 entry = get_swap_page();
133 if (!entry.val) 154 if (!entry.val)
134 return 0; 155 return 0;
135 156
136 /* !! 157 /* Radix-tree node allocations are performing
137 * Radix-tree node allocations !! 158 * GFP_ATOMIC allocations under PF_MEMALLOC.
138 * completely exhaust the page !! 159 * They can completely exhaust the page allocator.
139 * stops emergency reserves fr !! 160 *
>> 161 * So PF_MEMALLOC is dropped here. This causes the slab
>> 162 * allocations to fail earlier, so radix-tree nodes will
>> 163 * then be allocated from the mempool reserves.
140 * 164 *
141 * TODO: this could cause a th !! 165 * We're still using __GFP_HIGH for radix-tree node
142 * deadlock in the swap out pa !! 166 * allocations, so some of the emergency pools are available,
>> 167 * just not all of them.
143 */ 168 */
>> 169
>> 170 pf_flags = current->flags;
>> 171 current->flags &= ~PF_MEMALLOC;
>> 172
144 /* 173 /*
145 * Add it to the swap cache an 174 * Add it to the swap cache and mark it dirty
146 */ 175 */
147 err = add_to_swap_cache(page, !! 176 err = __add_to_swap_cache(page, entry, GFP_ATOMIC|__GFP_NOWARN);
148 gfp_mask|__GFP !! 177
>> 178 if (pf_flags & PF_MEMALLOC)
>> 179 current->flags |= PF_MEMALLOC;
149 180
150 switch (err) { 181 switch (err) {
151 case 0: 182 case 0: /* Success */
>> 183 SetPageUptodate(page);
152 SetPageDirty(page); 184 SetPageDirty(page);
>> 185 INC_CACHE_INFO(add_total);
153 return 1; 186 return 1;
154 case -EEXIST: 187 case -EEXIST:
155 /* Raced with "specula 188 /* Raced with "speculative" read_swap_cache_async */
>> 189 INC_CACHE_INFO(exist_race);
156 swap_free(entry); 190 swap_free(entry);
157 continue; 191 continue;
158 default: 192 default:
159 /* -ENOMEM radix-tree 193 /* -ENOMEM radix-tree allocation failure */
160 swap_free(entry); 194 swap_free(entry);
161 return 0; 195 return 0;
162 } 196 }
163 } 197 }
164 } 198 }
165 199
166 /* 200 /*
167 * This must be called only on pages that have 201 * This must be called only on pages that have
168 * been verified to be in the swap cache and l 202 * been verified to be in the swap cache and locked.
169 * It will never put the page into the free li 203 * It will never put the page into the free list,
170 * the caller has a reference on the page. 204 * the caller has a reference on the page.
171 */ 205 */
172 void delete_from_swap_cache(struct page *page) 206 void delete_from_swap_cache(struct page *page)
173 { 207 {
174 swp_entry_t entry; 208 swp_entry_t entry;
175 209
176 entry.val = page_private(page); !! 210 BUG_ON(!PageSwapCache(page));
>> 211 BUG_ON(!PageLocked(page));
>> 212 BUG_ON(PageWriteback(page));
>> 213 BUG_ON(PagePrivate(page));
>> 214
>> 215 entry.val = page->private;
177 216
178 write_lock_irq(&swapper_space.tree_loc !! 217 spin_lock_irq(&swapper_space.tree_lock);
179 __delete_from_swap_cache(page); 218 __delete_from_swap_cache(page);
180 write_unlock_irq(&swapper_space.tree_l !! 219 spin_unlock_irq(&swapper_space.tree_lock);
181 220
182 swap_free(entry); 221 swap_free(entry);
183 page_cache_release(page); 222 page_cache_release(page);
184 } 223 }
185 224
>> 225 /*
>> 226 * Strange swizzling function only for use by shmem_writepage
>> 227 */
>> 228 int move_to_swap_cache(struct page *page, swp_entry_t entry)
>> 229 {
>> 230 int err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
>> 231 if (!err) {
>> 232 remove_from_page_cache(page);
>> 233 page_cache_release(page); /* pagecache ref */
>> 234 if (!swap_duplicate(entry))
>> 235 BUG();
>> 236 SetPageDirty(page);
>> 237 INC_CACHE_INFO(add_total);
>> 238 } else if (err == -EEXIST)
>> 239 INC_CACHE_INFO(exist_race);
>> 240 return err;
>> 241 }
>> 242
>> 243 /*
>> 244 * Strange swizzling function for shmem_getpage (and shmem_unuse)
>> 245 */
>> 246 int move_from_swap_cache(struct page *page, unsigned long index,
>> 247 struct address_space *mapping)
>> 248 {
>> 249 int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC);
>> 250 if (!err) {
>> 251 delete_from_swap_cache(page);
>> 252 /* shift page from clean_pages to dirty_pages list */
>> 253 ClearPageDirty(page);
>> 254 set_page_dirty(page);
>> 255 }
>> 256 return err;
>> 257 }
>> 258
186 /* 259 /*
187 * If we are the only user, then try to free u 260 * If we are the only user, then try to free up the swap cache.
188 * 261 *
189 * Its ok to check for PageSwapCache without t 262 * Its ok to check for PageSwapCache without the page lock
190 * here because we are going to recheck again 263 * here because we are going to recheck again inside
191 * exclusive_swap_page() _with_ the lock. 264 * exclusive_swap_page() _with_ the lock.
192 * - Marc 265 * - Marcelo
193 */ 266 */
194 static inline void free_swap_cache(struct page 267 static inline void free_swap_cache(struct page *page)
195 { 268 {
196 if (PageSwapCache(page) && !TestSetPag 269 if (PageSwapCache(page) && !TestSetPageLocked(page)) {
197 remove_exclusive_swap_page(pag 270 remove_exclusive_swap_page(page);
198 unlock_page(page); 271 unlock_page(page);
199 } 272 }
200 } 273 }
201 274
202 /* 275 /*
203 * Perform a free_page(), also freeing any swa 276 * Perform a free_page(), also freeing any swap cache associated with
204 * this page if it is the last user of the pag !! 277 * this page if it is the last user of the page. Can not do a lock_page,
>> 278 * as we are holding the page_table_lock spinlock.
205 */ 279 */
206 void free_page_and_swap_cache(struct page *pag 280 void free_page_and_swap_cache(struct page *page)
207 { 281 {
208 free_swap_cache(page); 282 free_swap_cache(page);
209 page_cache_release(page); 283 page_cache_release(page);
210 } 284 }
211 285
212 /* 286 /*
213 * Passed an array of pages, drop them all fro 287 * Passed an array of pages, drop them all from swapcache and then release
214 * them. They are removed from the LRU and fr 288 * them. They are removed from the LRU and freed if this is their last use.
215 */ 289 */
216 void free_pages_and_swap_cache(struct page **p 290 void free_pages_and_swap_cache(struct page **pages, int nr)
217 { 291 {
>> 292 int chunk = 16;
218 struct page **pagep = pages; 293 struct page **pagep = pages;
219 294
220 lru_add_drain(); 295 lru_add_drain();
221 while (nr) { 296 while (nr) {
222 int todo = min(nr, PAGEVEC_SIZ !! 297 int todo = min(chunk, nr);
223 int i; 298 int i;
224 299
225 for (i = 0; i < todo; i++) 300 for (i = 0; i < todo; i++)
226 free_swap_cache(pagep[ 301 free_swap_cache(pagep[i]);
227 release_pages(pagep, todo, 0); 302 release_pages(pagep, todo, 0);
228 pagep += todo; 303 pagep += todo;
229 nr -= todo; 304 nr -= todo;
230 } 305 }
231 } 306 }
232 307
233 /* 308 /*
234 * Lookup a swap entry in the swap cache. A fo 309 * Lookup a swap entry in the swap cache. A found page will be returned
235 * unlocked and with its refcount incremented 310 * unlocked and with its refcount incremented - we rely on the kernel
236 * lock getting page table operations atomic e 311 * lock getting page table operations atomic even if we drop the page
237 * lock before returning. 312 * lock before returning.
238 */ 313 */
239 struct page * lookup_swap_cache(swp_entry_t en 314 struct page * lookup_swap_cache(swp_entry_t entry)
240 { 315 {
241 struct page *page; 316 struct page *page;
242 317
243 page = find_get_page(&swapper_space, e !! 318 spin_lock_irq(&swapper_space.tree_lock);
244 !! 319 page = radix_tree_lookup(&swapper_space.page_tree, entry.val);
245 if (page) !! 320 if (page) {
>> 321 page_cache_get(page);
246 INC_CACHE_INFO(find_success); 322 INC_CACHE_INFO(find_success);
247 !! 323 }
>> 324 spin_unlock_irq(&swapper_space.tree_lock);
248 INC_CACHE_INFO(find_total); 325 INC_CACHE_INFO(find_total);
249 return page; 326 return page;
250 } 327 }
251 328
252 /* 329 /*
253 * Locate a page of swap in physical memory, r 330 * Locate a page of swap in physical memory, reserving swap cache space
254 * and reading the disk if it is not already c 331 * and reading the disk if it is not already cached.
255 * A failure return means that either the page 332 * A failure return means that either the page allocation failed or that
256 * the swap entry is no longer in use. 333 * the swap entry is no longer in use.
257 */ 334 */
258 struct page *read_swap_cache_async(swp_entry_t !! 335 struct page *read_swap_cache_async(swp_entry_t entry,
259 struct vm_area_struct 336 struct vm_area_struct *vma, unsigned long addr)
260 { 337 {
261 struct page *found_page, *new_page = N 338 struct page *found_page, *new_page = NULL;
262 int err; 339 int err;
263 340
264 do { 341 do {
265 /* 342 /*
266 * First check the swap cache. 343 * First check the swap cache. Since this is normally
267 * called after lookup_swap_ca 344 * called after lookup_swap_cache() failed, re-calling
268 * that would confuse statisti 345 * that would confuse statistics.
269 */ 346 */
270 found_page = find_get_page(&sw !! 347 spin_lock_irq(&swapper_space.tree_lock);
>> 348 found_page = radix_tree_lookup(&swapper_space.page_tree,
>> 349 entry.val);
>> 350 if (found_page)
>> 351 page_cache_get(found_page);
>> 352 spin_unlock_irq(&swapper_space.tree_lock);
271 if (found_page) 353 if (found_page)
272 break; 354 break;
273 355
274 /* 356 /*
275 * Get a new page to read into 357 * Get a new page to read into from swap.
276 */ 358 */
277 if (!new_page) { 359 if (!new_page) {
278 new_page = alloc_page_ !! 360 new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
279 if (!new_page) 361 if (!new_page)
280 break; 362 break; /* Out of memory */
281 } 363 }
282 364
283 /* 365 /*
284 * Swap entry may have been fr <<
285 */ <<
286 if (!swap_duplicate(entry)) <<
287 break; <<
288 <<
289 /* <<
290 * Associate the page with swa 366 * Associate the page with swap entry in the swap cache.
291 * May fail (-EEXIST) if there !! 367 * May fail (-ENOENT) if swap entry has been freed since
292 * with this entry in the swap !! 368 * our caller observed it. May fail (-EEXIST) if there
293 * read_swap_cache_async, or a !! 369 * is already a page associated with this entry in the
294 * re-using the just freed swa !! 370 * swap cache: added by a racing read_swap_cache_async,
>> 371 * or by try_to_swap_out (or shmem_writepage) re-using
>> 372 * the just freed swap entry for an existing page.
295 * May fail (-ENOMEM) if radix 373 * May fail (-ENOMEM) if radix-tree node allocation failed.
296 */ 374 */
297 SetPageLocked(new_page); !! 375 err = add_to_swap_cache(new_page, entry);
298 err = add_to_swap_cache(new_pa <<
299 if (!err) { 376 if (!err) {
300 /* 377 /*
301 * Initiate read into 378 * Initiate read into locked page and return.
302 */ 379 */
303 lru_cache_add_active(n 380 lru_cache_add_active(new_page);
304 swap_readpage(NULL, ne 381 swap_readpage(NULL, new_page);
305 return new_page; 382 return new_page;
306 } 383 }
307 ClearPageLocked(new_page); !! 384 } while (err != -ENOENT && err != -ENOMEM);
308 swap_free(entry); <<
309 } while (err != -ENOMEM); <<
310 385
311 if (new_page) 386 if (new_page)
312 page_cache_release(new_page); 387 page_cache_release(new_page);
313 return found_page; 388 return found_page;
314 } 389 }
315 <<
316 /** <<
317 * swapin_readahead - swap in pages in hope we <<
318 * @entry: swap entry of this memory <<
319 * @gfp_mask: memory allocation flags <<
320 * @vma: user vma this address belongs to <<
321 * @addr: target address for mempolicy <<
322 * <<
323 * Returns the struct page for entry and addr, <<
324 * <<
325 * Primitive swap readahead code. We simply re <<
326 * (1 << page_cluster) entries in the swap are <<
327 * because it doesn't cost us any seek time. <<
328 * the 'original' request together with the re <<
329 * <<
330 * This has been extended to use the NUMA poli <<
331 * the readahead. <<
332 * <<
333 * Caller must hold down_read on the vma->vm_m <<
334 */ <<
335 struct page *swapin_readahead(swp_entry_t entr <<
336 struct vm_area_struct <<
337 { <<
338 int nr_pages; <<
339 struct page *page; <<
340 unsigned long offset; <<
341 unsigned long end_offset; <<
342 <<
343 /* <<
344 * Get starting offset for readaround, <<
345 * Adjust starting address by readbehi <<
346 * No, it's very unlikely that swap la <<
347 * more likely that neighbouring swap <<
348 * so use the same "addr" to choose th <<
349 */ <<
350 nr_pages = valid_swaphandles(entry, &o <<
351 for (end_offset = offset + nr_pages; o <<
352 /* Ok, do the async read-ahead <<
353 page = read_swap_cache_async(s <<
354 <<
355 if (!page) <<
356 break; <<
357 page_cache_release(page); <<
358 } <<
359 lru_add_drain(); /* Push any ne <<
360 return read_swap_cache_async(entry, gf <<
361 } <<
362 390
|
This page was automatically generated by the
LXR engine.
|