| Linux kernel & device driver programming |
| [ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] |
1 /* 1 /*
2 * Copyright (C) 2001 Jens Axboe <axboe@kernel 2 * Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
3 * 3 *
4 * This program is free software; you can redi 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Publi 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 * 7 *
8 * This program is distributed in the hope tha 8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details 11 * GNU General Public License for more details.
12 * 12 *
13 * You should have received a copy of the GNU 13 * You should have received a copy of the GNU General Public Licens
14 * along with this program; if not, write to t 14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 33 15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
16 * 16 *
17 */ 17 */
18 #include <linux/mm.h> 18 #include <linux/mm.h>
19 #include <linux/swap.h> 19 #include <linux/swap.h>
20 #include <linux/bio.h> 20 #include <linux/bio.h>
21 #include <linux/blkdev.h> 21 #include <linux/blkdev.h>
22 #include <linux/slab.h> 22 #include <linux/slab.h>
23 #include <linux/init.h> 23 #include <linux/init.h>
24 #include <linux/kernel.h> 24 #include <linux/kernel.h>
25 #include <linux/module.h> 25 #include <linux/module.h>
26 #include <linux/mempool.h> 26 #include <linux/mempool.h>
27 #include <linux/workqueue.h> 27 #include <linux/workqueue.h>
28 #include <linux/blktrace_api.h> <<
29 #include <scsi/sg.h> /* for struct 28 #include <scsi/sg.h> /* for struct sg_iovec */
30 29
31 #define BIO_POOL_SIZE 2 !! 30 #include <trace/events/block.h>
32 <<
33 static struct kmem_cache *bio_slab __read_most <<
34 <<
35 #define BIOVEC_NR_POOLS 6 <<
36 31
37 /* 32 /*
38 * a small number of entries is fine, not goin !! 33 * Test patch to inline a certain number of bi_io_vec's inside the bio
39 * basically we just need to survive !! 34 * itself, to shrink a bio data allocation from two mempool calls to one
40 */ 35 */
41 #define BIO_SPLIT_ENTRIES 2 !! 36 #define BIO_INLINE_VECS 4
42 mempool_t *bio_split_pool __read_mostly; <<
43 37
44 struct biovec_slab { !! 38 static mempool_t *bio_split_pool __read_mostly;
45 int nr_vecs; <<
46 char *name; <<
47 struct kmem_cache *slab; <<
48 }; <<
49 39
50 /* 40 /*
51 * if you change this list, also change bvec_a 41 * if you change this list, also change bvec_alloc or things will
52 * break badly! cannot be bigger than what you 42 * break badly! cannot be bigger than what you can fit into an
53 * unsigned short 43 * unsigned short
54 */ 44 */
55 <<
56 #define BV(x) { .nr_vecs = x, .name = "biovec- 45 #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
57 static struct biovec_slab bvec_slabs[BIOVEC_NR !! 46 struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
58 BV(1), BV(4), BV(16), BV(64), BV(128), 47 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
59 }; 48 };
60 #undef BV 49 #undef BV
61 50
62 /* 51 /*
63 * bio_set is used to allow other portions of <<
64 * allocate their own private memory pools for <<
65 * These memory pools in turn all allocate fro <<
66 * and the bvec_slabs[]. <<
67 */ <<
68 struct bio_set { <<
69 mempool_t *bio_pool; <<
70 mempool_t *bvec_pools[BIOVEC_NR_POOLS] <<
71 }; <<
72 <<
73 /* <<
74 * fs_bio_set is the bio_set containing bio an 52 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
75 * IO code that does not need private memory p 53 * IO code that does not need private memory pools.
76 */ 54 */
77 static struct bio_set *fs_bio_set; !! 55 struct bio_set *fs_bio_set;
>> 56
>> 57 /*
>> 58 * Our slab pool management
>> 59 */
>> 60 struct bio_slab {
>> 61 struct kmem_cache *slab;
>> 62 unsigned int slab_ref;
>> 63 unsigned int slab_size;
>> 64 char name[8];
>> 65 };
>> 66 static DEFINE_MUTEX(bio_slab_lock);
>> 67 static struct bio_slab *bio_slabs;
>> 68 static unsigned int bio_slab_nr, bio_slab_max;
>> 69
>> 70 static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
>> 71 {
>> 72 unsigned int sz = sizeof(struct bio) + extra_size;
>> 73 struct kmem_cache *slab = NULL;
>> 74 struct bio_slab *bslab;
>> 75 unsigned int i, entry = -1;
>> 76
>> 77 mutex_lock(&bio_slab_lock);
>> 78
>> 79 i = 0;
>> 80 while (i < bio_slab_nr) {
>> 81 struct bio_slab *bslab = &bio_slabs[i];
>> 82
>> 83 if (!bslab->slab && entry == -1)
>> 84 entry = i;
>> 85 else if (bslab->slab_size == sz) {
>> 86 slab = bslab->slab;
>> 87 bslab->slab_ref++;
>> 88 break;
>> 89 }
>> 90 i++;
>> 91 }
>> 92
>> 93 if (slab)
>> 94 goto out_unlock;
>> 95
>> 96 if (bio_slab_nr == bio_slab_max && entry == -1) {
>> 97 bio_slab_max <<= 1;
>> 98 bio_slabs = krealloc(bio_slabs,
>> 99 bio_slab_max * sizeof(struct bio_slab),
>> 100 GFP_KERNEL);
>> 101 if (!bio_slabs)
>> 102 goto out_unlock;
>> 103 }
>> 104 if (entry == -1)
>> 105 entry = bio_slab_nr++;
>> 106
>> 107 bslab = &bio_slabs[entry];
>> 108
>> 109 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
>> 110 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
>> 111 if (!slab)
>> 112 goto out_unlock;
>> 113
>> 114 printk("bio: create slab <%s> at %d\n", bslab->name, entry);
>> 115 bslab->slab = slab;
>> 116 bslab->slab_ref = 1;
>> 117 bslab->slab_size = sz;
>> 118 out_unlock:
>> 119 mutex_unlock(&bio_slab_lock);
>> 120 return slab;
>> 121 }
>> 122
>> 123 static void bio_put_slab(struct bio_set *bs)
>> 124 {
>> 125 struct bio_slab *bslab = NULL;
>> 126 unsigned int i;
>> 127
>> 128 mutex_lock(&bio_slab_lock);
>> 129
>> 130 for (i = 0; i < bio_slab_nr; i++) {
>> 131 if (bs->bio_slab == bio_slabs[i].slab) {
>> 132 bslab = &bio_slabs[i];
>> 133 break;
>> 134 }
>> 135 }
>> 136
>> 137 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
>> 138 goto out;
>> 139
>> 140 WARN_ON(!bslab->slab_ref);
>> 141
>> 142 if (--bslab->slab_ref)
>> 143 goto out;
>> 144
>> 145 kmem_cache_destroy(bslab->slab);
>> 146 bslab->slab = NULL;
78 147
79 static inline struct bio_vec *bvec_alloc_bs(gf !! 148 out:
>> 149 mutex_unlock(&bio_slab_lock);
>> 150 }
>> 151
>> 152 unsigned int bvec_nr_vecs(unsigned short idx)
>> 153 {
>> 154 return bvec_slabs[idx].nr_vecs;
>> 155 }
>> 156
>> 157 void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
>> 158 {
>> 159 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
>> 160
>> 161 if (idx == BIOVEC_MAX_IDX)
>> 162 mempool_free(bv, bs->bvec_pool);
>> 163 else {
>> 164 struct biovec_slab *bvs = bvec_slabs + idx;
>> 165
>> 166 kmem_cache_free(bvs->slab, bv);
>> 167 }
>> 168 }
>> 169
>> 170 struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
>> 171 struct bio_set *bs)
80 { 172 {
81 struct bio_vec *bvl; 173 struct bio_vec *bvl;
82 174
83 /* 175 /*
84 * see comment near bvec_array define! 176 * see comment near bvec_array define!
85 */ 177 */
86 switch (nr) { 178 switch (nr) {
87 case 1 : *idx = 0; br !! 179 case 1:
88 case 2 ... 4: *idx = 1; br !! 180 *idx = 0;
89 case 5 ... 16: *idx = 2; br !! 181 break;
90 case 17 ... 64: *idx = 3; br !! 182 case 2 ... 4:
91 case 65 ... 128: *idx = 4; br !! 183 *idx = 1;
92 case 129 ... BIO_MAX_PAGES: *i !! 184 break;
93 default: !! 185 case 5 ... 16:
94 return NULL; !! 186 *idx = 2;
>> 187 break;
>> 188 case 17 ... 64:
>> 189 *idx = 3;
>> 190 break;
>> 191 case 65 ... 128:
>> 192 *idx = 4;
>> 193 break;
>> 194 case 129 ... BIO_MAX_PAGES:
>> 195 *idx = 5;
>> 196 break;
>> 197 default:
>> 198 return NULL;
95 } 199 }
>> 200
96 /* 201 /*
97 * idx now points to the pool we want !! 202 * idx now points to the pool we want to allocate from. only the
>> 203 * 1-vec entry pool is mempool backed.
98 */ 204 */
>> 205 if (*idx == BIOVEC_MAX_IDX) {
>> 206 fallback:
>> 207 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
>> 208 } else {
>> 209 struct biovec_slab *bvs = bvec_slabs + *idx;
>> 210 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
99 211
100 bvl = mempool_alloc(bs->bvec_pools[*id !! 212 /*
101 if (bvl) { !! 213 * Make this allocation restricted and don't dump info on
102 struct biovec_slab *bp = bvec_ !! 214 * allocation failures, since we'll fallback to the mempool
>> 215 * in case of failure.
>> 216 */
>> 217 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
103 218
104 memset(bvl, 0, bp->nr_vecs * s !! 219 /*
>> 220 * Try a slab allocation. If this fails and __GFP_WAIT
>> 221 * is set, retry with the 1-entry mempool
>> 222 */
>> 223 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
>> 224 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
>> 225 *idx = BIOVEC_MAX_IDX;
>> 226 goto fallback;
>> 227 }
105 } 228 }
106 229
107 return bvl; 230 return bvl;
108 } 231 }
109 232
110 void bio_free(struct bio *bio, struct bio_set !! 233 void bio_free(struct bio *bio, struct bio_set *bs)
111 { 234 {
112 if (bio->bi_io_vec) { !! 235 void *p;
113 const int pool_idx = BIO_POOL_ <<
114 236
115 BIO_BUG_ON(pool_idx >= BIOVEC_ !! 237 if (bio_has_allocated_vec(bio))
>> 238 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
116 239
117 mempool_free(bio->bi_io_vec, b !! 240 if (bio_integrity(bio))
118 } !! 241 bio_integrity_free(bio, bs);
119 242
120 mempool_free(bio, bio_set->bio_pool); !! 243 /*
121 } !! 244 * If we have front padding, adjust the bio pointer before freeing
>> 245 */
>> 246 p = bio;
>> 247 if (bs->front_pad)
>> 248 p -= bs->front_pad;
122 249
123 /* !! 250 mempool_free(p, bs->bio_pool);
124 * default destructor for a bio allocated with <<
125 */ <<
126 static void bio_fs_destructor(struct bio *bio) <<
127 { <<
128 bio_free(bio, fs_bio_set); <<
129 } 251 }
130 252
131 void bio_init(struct bio *bio) 253 void bio_init(struct bio *bio)
132 { 254 {
133 memset(bio, 0, sizeof(*bio)); 255 memset(bio, 0, sizeof(*bio));
134 bio->bi_flags = 1 << BIO_UPTODATE; 256 bio->bi_flags = 1 << BIO_UPTODATE;
>> 257 bio->bi_comp_cpu = -1;
135 atomic_set(&bio->bi_cnt, 1); 258 atomic_set(&bio->bi_cnt, 1);
136 } 259 }
137 260
138 /** 261 /**
139 * bio_alloc_bioset - allocate a bio for I/O 262 * bio_alloc_bioset - allocate a bio for I/O
140 * @gfp_mask: the GFP_ mask given to the sla 263 * @gfp_mask: the GFP_ mask given to the slab allocator
141 * @nr_iovecs: number of iovecs to pre-alloca 264 * @nr_iovecs: number of iovecs to pre-allocate
142 * @bs: the bio_set to allocate from !! 265 * @bs: the bio_set to allocate from. If %NULL, just use kmalloc
143 * 266 *
144 * Description: 267 * Description:
145 * bio_alloc_bioset will first try it's on m !! 268 * bio_alloc_bioset will first try its own mempool to satisfy the allocation.
146 * If %__GFP_WAIT is set then we will block 269 * If %__GFP_WAIT is set then we will block on the internal pool waiting
147 * for a &struct bio to become free. !! 270 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
>> 271 * fall back to just using @kmalloc to allocate the required memory.
148 * 272 *
149 * allocate bio and iovecs from the memory p !! 273 * Note that the caller must set ->bi_destructor on succesful return
150 * bio_set structure. !! 274 * of a bio, to do the appropriate freeing of the bio once the reference
>> 275 * count drops to zero.
151 **/ 276 **/
152 struct bio *bio_alloc_bioset(gfp_t gfp_mask, i 277 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
153 { 278 {
154 struct bio *bio = mempool_alloc(bs->bi !! 279 unsigned long idx = BIO_POOL_NONE;
>> 280 struct bio_vec *bvl = NULL;
>> 281 struct bio *bio;
>> 282 void *p;
155 283
156 if (likely(bio)) { !! 284 p = mempool_alloc(bs->bio_pool, gfp_mask);
157 struct bio_vec *bvl = NULL; !! 285 if (unlikely(!p))
>> 286 return NULL;
>> 287 bio = p + bs->front_pad;
158 288
159 bio_init(bio); !! 289 bio_init(bio);
160 if (likely(nr_iovecs)) { !! 290
161 unsigned long idx = 0; !! 291 if (unlikely(!nr_iovecs))
162 !! 292 goto out_set;
163 bvl = bvec_alloc_bs(gf !! 293
164 if (unlikely(!bvl)) { !! 294 if (nr_iovecs <= BIO_INLINE_VECS) {
165 mempool_free(b !! 295 bvl = bio->bi_inline_vecs;
166 bio = NULL; !! 296 nr_iovecs = BIO_INLINE_VECS;
167 goto out; !! 297 } else {
168 } !! 298 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
169 bio->bi_flags |= idx < !! 299 if (unlikely(!bvl))
170 bio->bi_max_vecs = bve !! 300 goto err_free;
171 } !! 301
172 bio->bi_io_vec = bvl; !! 302 nr_iovecs = bvec_nr_vecs(idx);
173 } !! 303 }
174 out: !! 304 out_set:
>> 305 bio->bi_flags |= idx << BIO_POOL_OFFSET;
>> 306 bio->bi_max_vecs = nr_iovecs;
>> 307 bio->bi_io_vec = bvl;
175 return bio; 308 return bio;
>> 309
>> 310 err_free:
>> 311 mempool_free(p, bs->bio_pool);
>> 312 return NULL;
>> 313 }
>> 314
>> 315 static void bio_fs_destructor(struct bio *bio)
>> 316 {
>> 317 bio_free(bio, fs_bio_set);
176 } 318 }
177 319
>> 320 /**
>> 321 * bio_alloc - allocate a new bio, memory pool backed
>> 322 * @gfp_mask: allocation mask to use
>> 323 * @nr_iovecs: number of iovecs
>> 324 *
>> 325 * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask
>> 326 * contains __GFP_WAIT, the allocation is guaranteed to succeed.
>> 327 *
>> 328 * RETURNS:
>> 329 * Pointer to new bio on success, NULL on failure.
>> 330 */
178 struct bio *bio_alloc(gfp_t gfp_mask, int nr_i 331 struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
179 { 332 {
180 struct bio *bio = bio_alloc_bioset(gfp 333 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
181 334
182 if (bio) 335 if (bio)
183 bio->bi_destructor = bio_fs_de 336 bio->bi_destructor = bio_fs_destructor;
184 337
185 return bio; 338 return bio;
186 } 339 }
187 340
>> 341 static void bio_kmalloc_destructor(struct bio *bio)
>> 342 {
>> 343 if (bio_integrity(bio))
>> 344 bio_integrity_free(bio, fs_bio_set);
>> 345 kfree(bio);
>> 346 }
>> 347
>> 348 /**
>> 349 * bio_alloc - allocate a bio for I/O
>> 350 * @gfp_mask: the GFP_ mask given to the slab allocator
>> 351 * @nr_iovecs: number of iovecs to pre-allocate
>> 352 *
>> 353 * Description:
>> 354 * bio_alloc will allocate a bio and associated bio_vec array that can hold
>> 355 * at least @nr_iovecs entries. Allocations will be done from the
>> 356 * fs_bio_set. Also see @bio_alloc_bioset.
>> 357 *
>> 358 * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
>> 359 * a bio. This is due to the mempool guarantees. To make this work, callers
>> 360 * must never allocate more than 1 bio at a time from this pool. Callers
>> 361 * that need to allocate more than 1 bio must always submit the previously
>> 362 * allocated bio for IO before attempting to allocate a new one. Failure to
>> 363 * do so can cause livelocks under memory pressure.
>> 364 *
>> 365 **/
>> 366 struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
>> 367 {
>> 368 struct bio *bio;
>> 369
>> 370 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
>> 371 gfp_mask);
>> 372 if (unlikely(!bio))
>> 373 return NULL;
>> 374
>> 375 bio_init(bio);
>> 376 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
>> 377 bio->bi_max_vecs = nr_iovecs;
>> 378 bio->bi_io_vec = bio->bi_inline_vecs;
>> 379 bio->bi_destructor = bio_kmalloc_destructor;
>> 380
>> 381 return bio;
>> 382 }
>> 383
188 void zero_fill_bio(struct bio *bio) 384 void zero_fill_bio(struct bio *bio)
189 { 385 {
190 unsigned long flags; 386 unsigned long flags;
191 struct bio_vec *bv; 387 struct bio_vec *bv;
192 int i; 388 int i;
193 389
194 bio_for_each_segment(bv, bio, i) { 390 bio_for_each_segment(bv, bio, i) {
195 char *data = bvec_kmap_irq(bv, 391 char *data = bvec_kmap_irq(bv, &flags);
196 memset(data, 0, bv->bv_len); 392 memset(data, 0, bv->bv_len);
197 flush_dcache_page(bv->bv_page) 393 flush_dcache_page(bv->bv_page);
198 bvec_kunmap_irq(data, &flags); 394 bvec_kunmap_irq(data, &flags);
199 } 395 }
200 } 396 }
201 EXPORT_SYMBOL(zero_fill_bio); 397 EXPORT_SYMBOL(zero_fill_bio);
202 398
203 /** 399 /**
204 * bio_put - release a reference to a bio 400 * bio_put - release a reference to a bio
205 * @bio: bio to release reference to 401 * @bio: bio to release reference to
206 * 402 *
207 * Description: 403 * Description:
208 * Put a reference to a &struct bio, either 404 * Put a reference to a &struct bio, either one you have gotten with
209 * bio_alloc or bio_get. The last put of a b 405 * bio_alloc or bio_get. The last put of a bio will free it.
210 **/ 406 **/
211 void bio_put(struct bio *bio) 407 void bio_put(struct bio *bio)
212 { 408 {
213 BIO_BUG_ON(!atomic_read(&bio->bi_cnt)) 409 BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
214 410
215 /* 411 /*
216 * last put frees it 412 * last put frees it
217 */ 413 */
218 if (atomic_dec_and_test(&bio->bi_cnt)) 414 if (atomic_dec_and_test(&bio->bi_cnt)) {
219 bio->bi_next = NULL; 415 bio->bi_next = NULL;
220 bio->bi_destructor(bio); 416 bio->bi_destructor(bio);
221 } 417 }
222 } 418 }
223 419
224 inline int bio_phys_segments(struct request_qu 420 inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
225 { 421 {
226 if (unlikely(!bio_flagged(bio, BIO_SEG 422 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
227 blk_recount_segments(q, bio); 423 blk_recount_segments(q, bio);
228 424
229 return bio->bi_phys_segments; 425 return bio->bi_phys_segments;
230 } 426 }
231 427
232 inline int bio_hw_segments(struct request_queu <<
233 { <<
234 if (unlikely(!bio_flagged(bio, BIO_SEG <<
235 blk_recount_segments(q, bio); <<
236 <<
237 return bio->bi_hw_segments; <<
238 } <<
239 <<
240 /** 428 /**
241 * __bio_clone - clone a bio 429 * __bio_clone - clone a bio
242 * @bio: destination bio 430 * @bio: destination bio
243 * @bio_src: bio to clone 431 * @bio_src: bio to clone
244 * 432 *
245 * Clone a &bio. Caller will own the retu 433 * Clone a &bio. Caller will own the returned bio, but not
246 * the actual data it points to. Referenc 434 * the actual data it points to. Reference count of returned
247 * bio will be one. 435 * bio will be one.
248 */ 436 */
249 void __bio_clone(struct bio *bio, struct bio * 437 void __bio_clone(struct bio *bio, struct bio *bio_src)
250 { 438 {
251 memcpy(bio->bi_io_vec, bio_src->bi_io_ 439 memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
252 bio_src->bi_max_vecs * sizeof( 440 bio_src->bi_max_vecs * sizeof(struct bio_vec));
253 441
254 /* 442 /*
255 * most users will be overriding ->bi_ 443 * most users will be overriding ->bi_bdev with a new target,
256 * so we don't set nor calculate new p 444 * so we don't set nor calculate new physical/hw segment counts here
257 */ 445 */
258 bio->bi_sector = bio_src->bi_sector; 446 bio->bi_sector = bio_src->bi_sector;
259 bio->bi_bdev = bio_src->bi_bdev; 447 bio->bi_bdev = bio_src->bi_bdev;
260 bio->bi_flags |= 1 << BIO_CLONED; 448 bio->bi_flags |= 1 << BIO_CLONED;
261 bio->bi_rw = bio_src->bi_rw; 449 bio->bi_rw = bio_src->bi_rw;
262 bio->bi_vcnt = bio_src->bi_vcnt; 450 bio->bi_vcnt = bio_src->bi_vcnt;
263 bio->bi_size = bio_src->bi_size; 451 bio->bi_size = bio_src->bi_size;
264 bio->bi_idx = bio_src->bi_idx; 452 bio->bi_idx = bio_src->bi_idx;
265 } 453 }
266 454
267 /** 455 /**
268 * bio_clone - clone a bio 456 * bio_clone - clone a bio
269 * @bio: bio to clone 457 * @bio: bio to clone
270 * @gfp_mask: allocation priority 458 * @gfp_mask: allocation priority
271 * 459 *
272 * Like __bio_clone, only also allocates 460 * Like __bio_clone, only also allocates the returned bio
273 */ 461 */
274 struct bio *bio_clone(struct bio *bio, gfp_t g 462 struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
275 { 463 {
276 struct bio *b = bio_alloc_bioset(gfp_m 464 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
277 465
278 if (b) { !! 466 if (!b)
279 b->bi_destructor = bio_fs_dest !! 467 return NULL;
280 __bio_clone(b, bio); !! 468
>> 469 b->bi_destructor = bio_fs_destructor;
>> 470 __bio_clone(b, bio);
>> 471
>> 472 if (bio_integrity(bio)) {
>> 473 int ret;
>> 474
>> 475 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
>> 476
>> 477 if (ret < 0) {
>> 478 bio_put(b);
>> 479 return NULL;
>> 480 }
281 } 481 }
282 482
283 return b; 483 return b;
284 } 484 }
285 485
286 /** 486 /**
287 * bio_get_nr_vecs - return appro 487 * bio_get_nr_vecs - return approx number of vecs
288 * @bdev: I/O target 488 * @bdev: I/O target
289 * 489 *
290 * Return the approximate number of pages 490 * Return the approximate number of pages we can send to this target.
291 * There's no guarantee that you will be 491 * There's no guarantee that you will be able to fit this number of pages
292 * into a bio, it does not account for dy 492 * into a bio, it does not account for dynamic restrictions that vary
293 * on offset. 493 * on offset.
294 */ 494 */
295 int bio_get_nr_vecs(struct block_device *bdev) 495 int bio_get_nr_vecs(struct block_device *bdev)
296 { 496 {
297 struct request_queue *q = bdev_get_que 497 struct request_queue *q = bdev_get_queue(bdev);
298 int nr_pages; 498 int nr_pages;
299 499
300 nr_pages = ((q->max_sectors << 9) + PA !! 500 nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
301 if (nr_pages > q->max_phys_segments) !! 501 if (nr_pages > queue_max_phys_segments(q))
302 nr_pages = q->max_phys_segment !! 502 nr_pages = queue_max_phys_segments(q);
303 if (nr_pages > q->max_hw_segments) !! 503 if (nr_pages > queue_max_hw_segments(q))
304 nr_pages = q->max_hw_segments; !! 504 nr_pages = queue_max_hw_segments(q);
305 505
306 return nr_pages; 506 return nr_pages;
307 } 507 }
308 508
309 static int __bio_add_page(struct request_queue 509 static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
310 *page, unsigned int 510 *page, unsigned int len, unsigned int offset,
311 unsigned short max_s 511 unsigned short max_sectors)
312 { 512 {
313 int retried_segments = 0; 513 int retried_segments = 0;
314 struct bio_vec *bvec; 514 struct bio_vec *bvec;
315 515
316 /* 516 /*
317 * cloned bio must not modify vec list 517 * cloned bio must not modify vec list
318 */ 518 */
319 if (unlikely(bio_flagged(bio, BIO_CLON 519 if (unlikely(bio_flagged(bio, BIO_CLONED)))
320 return 0; 520 return 0;
321 521
322 if (((bio->bi_size + len) >> 9) > max_ 522 if (((bio->bi_size + len) >> 9) > max_sectors)
323 return 0; 523 return 0;
324 524
325 /* 525 /*
326 * For filesystems with a blocksize sm 526 * For filesystems with a blocksize smaller than the pagesize
327 * we will often be called with the sa 527 * we will often be called with the same page as last time and
328 * a consecutive offset. Optimize thi 528 * a consecutive offset. Optimize this special case.
329 */ 529 */
330 if (bio->bi_vcnt > 0) { 530 if (bio->bi_vcnt > 0) {
331 struct bio_vec *prev = &bio->b 531 struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
332 532
333 if (page == prev->bv_page && 533 if (page == prev->bv_page &&
334 offset == prev->bv_offset 534 offset == prev->bv_offset + prev->bv_len) {
335 prev->bv_len += len; 535 prev->bv_len += len;
336 if (q->merge_bvec_fn & !! 536
337 q->merge_bvec_fn(q !! 537 if (q->merge_bvec_fn) {
338 prev->bv_len - !! 538 struct bvec_merge_data bvm = {
339 return 0; !! 539 .bi_bdev = bio->bi_bdev,
>> 540 .bi_sector = bio->bi_sector,
>> 541 .bi_size = bio->bi_size,
>> 542 .bi_rw = bio->bi_rw,
>> 543 };
>> 544
>> 545 if (q->merge_bvec_fn(q, &bvm, prev) < len) {
>> 546 prev->bv_len -= len;
>> 547 return 0;
>> 548 }
340 } 549 }
341 550
342 goto done; 551 goto done;
343 } 552 }
344 } 553 }
345 554
346 if (bio->bi_vcnt >= bio->bi_max_vecs) 555 if (bio->bi_vcnt >= bio->bi_max_vecs)
347 return 0; 556 return 0;
348 557
349 /* 558 /*
350 * we might lose a segment or two here 559 * we might lose a segment or two here, but rather that than
351 * make this too complex. 560 * make this too complex.
352 */ 561 */
353 562
354 while (bio->bi_phys_segments >= q->max !! 563 while (bio->bi_phys_segments >= queue_max_phys_segments(q)
355 || bio->bi_hw_segments >= q->ma !! 564 || bio->bi_phys_segments >= queue_max_hw_segments(q)) {
356 || BIOVEC_VIRT_OVERSIZE(bio->bi <<
357 565
358 if (retried_segments) 566 if (retried_segments)
359 return 0; 567 return 0;
360 568
361 retried_segments = 1; 569 retried_segments = 1;
362 blk_recount_segments(q, bio); 570 blk_recount_segments(q, bio);
363 } 571 }
364 572
365 /* 573 /*
366 * setup the new entry, we might clear 574 * setup the new entry, we might clear it again later if we
367 * cannot add the page 575 * cannot add the page
368 */ 576 */
369 bvec = &bio->bi_io_vec[bio->bi_vcnt]; 577 bvec = &bio->bi_io_vec[bio->bi_vcnt];
370 bvec->bv_page = page; 578 bvec->bv_page = page;
371 bvec->bv_len = len; 579 bvec->bv_len = len;
372 bvec->bv_offset = offset; 580 bvec->bv_offset = offset;
373 581
374 /* 582 /*
375 * if queue has other restrictions (eg 583 * if queue has other restrictions (eg varying max sector size
376 * depending on offset), it can specif 584 * depending on offset), it can specify a merge_bvec_fn in the
377 * queue to get further control 585 * queue to get further control
378 */ 586 */
379 if (q->merge_bvec_fn) { 587 if (q->merge_bvec_fn) {
>> 588 struct bvec_merge_data bvm = {
>> 589 .bi_bdev = bio->bi_bdev,
>> 590 .bi_sector = bio->bi_sector,
>> 591 .bi_size = bio->bi_size,
>> 592 .bi_rw = bio->bi_rw,
>> 593 };
>> 594
380 /* 595 /*
381 * merge_bvec_fn() returns num 596 * merge_bvec_fn() returns number of bytes it can accept
382 * at this offset 597 * at this offset
383 */ 598 */
384 if (q->merge_bvec_fn(q, bio, b !! 599 if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
385 bvec->bv_page = NULL; 600 bvec->bv_page = NULL;
386 bvec->bv_len = 0; 601 bvec->bv_len = 0;
387 bvec->bv_offset = 0; 602 bvec->bv_offset = 0;
388 return 0; 603 return 0;
389 } 604 }
390 } 605 }
391 606
392 /* If we may be able to merge these bi 607 /* If we may be able to merge these biovecs, force a recount */
393 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGE !! 608 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
394 BIOVEC_VIRT_MERGEABLE(bvec-1, bvec <<
395 bio->bi_flags &= ~(1 << BIO_SE 609 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
396 610
397 bio->bi_vcnt++; 611 bio->bi_vcnt++;
398 bio->bi_phys_segments++; 612 bio->bi_phys_segments++;
399 bio->bi_hw_segments++; <<
400 done: 613 done:
401 bio->bi_size += len; 614 bio->bi_size += len;
402 return len; 615 return len;
403 } 616 }
404 617
405 /** 618 /**
406 * bio_add_pc_page - attempt to add 619 * bio_add_pc_page - attempt to add page to bio
407 * @q: the target queue 620 * @q: the target queue
408 * @bio: destination bio 621 * @bio: destination bio
409 * @page: page to add 622 * @page: page to add
410 * @len: vec entry length 623 * @len: vec entry length
411 * @offset: vec entry offset 624 * @offset: vec entry offset
412 * 625 *
413 * Attempt to add a page to the bio_vec m 626 * Attempt to add a page to the bio_vec maplist. This can fail for a
414 * number of reasons, such as the bio bei 627 * number of reasons, such as the bio being full or target block
415 * device limitations. The target block d 628 * device limitations. The target block device must allow bio's
416 * smaller than PAGE_SIZE, so it is alway 629 * smaller than PAGE_SIZE, so it is always possible to add a single
417 * page to an empty bio. This should only 630 * page to an empty bio. This should only be used by REQ_PC bios.
418 */ 631 */
419 int bio_add_pc_page(struct request_queue *q, s 632 int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
420 unsigned int len, unsigned 633 unsigned int len, unsigned int offset)
421 { 634 {
422 return __bio_add_page(q, bio, page, le !! 635 return __bio_add_page(q, bio, page, len, offset,
>> 636 queue_max_hw_sectors(q));
423 } 637 }
424 638
425 /** 639 /**
426 * bio_add_page - attempt to add 640 * bio_add_page - attempt to add page to bio
427 * @bio: destination bio 641 * @bio: destination bio
428 * @page: page to add 642 * @page: page to add
429 * @len: vec entry length 643 * @len: vec entry length
430 * @offset: vec entry offset 644 * @offset: vec entry offset
431 * 645 *
432 * Attempt to add a page to the bio_vec m 646 * Attempt to add a page to the bio_vec maplist. This can fail for a
433 * number of reasons, such as the bio bei 647 * number of reasons, such as the bio being full or target block
434 * device limitations. The target block d 648 * device limitations. The target block device must allow bio's
435 * smaller than PAGE_SIZE, so it is alway 649 * smaller than PAGE_SIZE, so it is always possible to add a single
436 * page to an empty bio. 650 * page to an empty bio.
437 */ 651 */
438 int bio_add_page(struct bio *bio, struct page 652 int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
439 unsigned int offset) 653 unsigned int offset)
440 { 654 {
441 struct request_queue *q = bdev_get_que 655 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
442 return __bio_add_page(q, bio, page, le !! 656 return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
443 } 657 }
444 658
445 struct bio_map_data { 659 struct bio_map_data {
446 struct bio_vec *iovecs; 660 struct bio_vec *iovecs;
447 void __user *userptr; !! 661 struct sg_iovec *sgvecs;
>> 662 int nr_sgvecs;
>> 663 int is_our_pages;
448 }; 664 };
449 665
450 static void bio_set_map_data(struct bio_map_da !! 666 static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
>> 667 struct sg_iovec *iov, int iov_count,
>> 668 int is_our_pages)
451 { 669 {
452 memcpy(bmd->iovecs, bio->bi_io_vec, si 670 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
>> 671 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
>> 672 bmd->nr_sgvecs = iov_count;
>> 673 bmd->is_our_pages = is_our_pages;
453 bio->bi_private = bmd; 674 bio->bi_private = bmd;
454 } 675 }
455 676
456 static void bio_free_map_data(struct bio_map_d 677 static void bio_free_map_data(struct bio_map_data *bmd)
457 { 678 {
458 kfree(bmd->iovecs); 679 kfree(bmd->iovecs);
>> 680 kfree(bmd->sgvecs);
459 kfree(bmd); 681 kfree(bmd);
460 } 682 }
461 683
462 static struct bio_map_data *bio_alloc_map_data !! 684 static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
>> 685 gfp_t gfp_mask)
463 { 686 {
464 struct bio_map_data *bmd = kmalloc(siz !! 687 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
465 688
466 if (!bmd) 689 if (!bmd)
467 return NULL; 690 return NULL;
468 691
469 bmd->iovecs = kmalloc(sizeof(struct bi !! 692 bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
470 if (bmd->iovecs) !! 693 if (!bmd->iovecs) {
>> 694 kfree(bmd);
>> 695 return NULL;
>> 696 }
>> 697
>> 698 bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
>> 699 if (bmd->sgvecs)
471 return bmd; 700 return bmd;
472 701
>> 702 kfree(bmd->iovecs);
473 kfree(bmd); 703 kfree(bmd);
474 return NULL; 704 return NULL;
475 } 705 }
476 706
>> 707 static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
>> 708 struct sg_iovec *iov, int iov_count,
>> 709 int to_user, int from_user, int do_free_page)
>> 710 {
>> 711 int ret = 0, i;
>> 712 struct bio_vec *bvec;
>> 713 int iov_idx = 0;
>> 714 unsigned int iov_off = 0;
>> 715
>> 716 __bio_for_each_segment(bvec, bio, i, 0) {
>> 717 char *bv_addr = page_address(bvec->bv_page);
>> 718 unsigned int bv_len = iovecs[i].bv_len;
>> 719
>> 720 while (bv_len && iov_idx < iov_count) {
>> 721 unsigned int bytes;
>> 722 char __user *iov_addr;
>> 723
>> 724 bytes = min_t(unsigned int,
>> 725 iov[iov_idx].iov_len - iov_off, bv_len);
>> 726 iov_addr = iov[iov_idx].iov_base + iov_off;
>> 727
>> 728 if (!ret) {
>> 729 if (to_user)
>> 730 ret = copy_to_user(iov_addr, bv_addr,
>> 731 bytes);
>> 732
>> 733 if (from_user)
>> 734 ret = copy_from_user(bv_addr, iov_addr,
>> 735 bytes);
>> 736
>> 737 if (ret)
>> 738 ret = -EFAULT;
>> 739 }
>> 740
>> 741 bv_len -= bytes;
>> 742 bv_addr += bytes;
>> 743 iov_addr += bytes;
>> 744 iov_off += bytes;
>> 745
>> 746 if (iov[iov_idx].iov_len == iov_off) {
>> 747 iov_idx++;
>> 748 iov_off = 0;
>> 749 }
>> 750 }
>> 751
>> 752 if (do_free_page)
>> 753 __free_page(bvec->bv_page);
>> 754 }
>> 755
>> 756 return ret;
>> 757 }
>> 758
477 /** 759 /**
478 * bio_uncopy_user - finish previou 760 * bio_uncopy_user - finish previously mapped bio
479 * @bio: bio being terminated 761 * @bio: bio being terminated
480 * 762 *
481 * Free pages allocated from bio_copy_use 763 * Free pages allocated from bio_copy_user() and write back data
482 * to user space in case of a read. 764 * to user space in case of a read.
483 */ 765 */
484 int bio_uncopy_user(struct bio *bio) 766 int bio_uncopy_user(struct bio *bio)
485 { 767 {
486 struct bio_map_data *bmd = bio->bi_pri 768 struct bio_map_data *bmd = bio->bi_private;
487 const int read = bio_data_dir(bio) == !! 769 int ret = 0;
488 struct bio_vec *bvec; <<
489 int i, ret = 0; <<
490 770
491 __bio_for_each_segment(bvec, bio, i, 0 !! 771 if (!bio_flagged(bio, BIO_NULL_MAPPED))
492 char *addr = page_address(bvec !! 772 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
493 unsigned int len = bmd->iovecs !! 773 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
494 !! 774 0, bmd->is_our_pages);
495 if (read && !ret && copy_to_us <<
496 ret = -EFAULT; <<
497 <<
498 __free_page(bvec->bv_page); <<
499 bmd->userptr += len; <<
500 } <<
501 bio_free_map_data(bmd); 775 bio_free_map_data(bmd);
502 bio_put(bio); 776 bio_put(bio);
503 return ret; 777 return ret;
504 } 778 }
505 779
506 /** 780 /**
507 * bio_copy_user - copy user data !! 781 * bio_copy_user_iov - copy user data to bio
508 * @q: destination block queue 782 * @q: destination block queue
509 * @uaddr: start of user address !! 783 * @map_data: pointer to the rq_map_data holding pages (if necessary)
510 * @len: length in bytes !! 784 * @iov: the iovec.
>> 785 * @iov_count: number of elements in the iovec
511 * @write_to_vm: bool indicating writing 786 * @write_to_vm: bool indicating writing to pages or not
>> 787 * @gfp_mask: memory allocation flags
512 * 788 *
513 * Prepares and returns a bio for indirec 789 * Prepares and returns a bio for indirect user io, bouncing data
514 * to/from kernel pages as necessary. Mus 790 * to/from kernel pages as necessary. Must be paired with
515 * call bio_uncopy_user() on io completio 791 * call bio_uncopy_user() on io completion.
516 */ 792 */
517 struct bio *bio_copy_user(struct request_queue !! 793 struct bio *bio_copy_user_iov(struct request_queue *q,
518 unsigned int len, in !! 794 struct rq_map_data *map_data,
>> 795 struct sg_iovec *iov, int iov_count,
>> 796 int write_to_vm, gfp_t gfp_mask)
519 { 797 {
520 unsigned long end = (uaddr + len + PAG <<
521 unsigned long start = uaddr >> PAGE_SH <<
522 struct bio_map_data *bmd; 798 struct bio_map_data *bmd;
523 struct bio_vec *bvec; 799 struct bio_vec *bvec;
524 struct page *page; 800 struct page *page;
525 struct bio *bio; 801 struct bio *bio;
526 int i, ret; 802 int i, ret;
>> 803 int nr_pages = 0;
>> 804 unsigned int len = 0;
>> 805 unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
>> 806
>> 807 for (i = 0; i < iov_count; i++) {
>> 808 unsigned long uaddr;
>> 809 unsigned long end;
>> 810 unsigned long start;
>> 811
>> 812 uaddr = (unsigned long)iov[i].iov_base;
>> 813 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
>> 814 start = uaddr >> PAGE_SHIFT;
>> 815
>> 816 nr_pages += end - start;
>> 817 len += iov[i].iov_len;
>> 818 }
527 819
528 bmd = bio_alloc_map_data(end - start); !! 820 if (offset)
>> 821 nr_pages++;
>> 822
>> 823 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
529 if (!bmd) 824 if (!bmd)
530 return ERR_PTR(-ENOMEM); 825 return ERR_PTR(-ENOMEM);
531 826
532 bmd->userptr = (void __user *) uaddr; <<
533 <<
534 ret = -ENOMEM; 827 ret = -ENOMEM;
535 bio = bio_alloc(GFP_KERNEL, end - star !! 828 bio = bio_kmalloc(gfp_mask, nr_pages);
536 if (!bio) 829 if (!bio)
537 goto out_bmd; 830 goto out_bmd;
538 831
539 bio->bi_rw |= (!write_to_vm << BIO_RW) 832 bio->bi_rw |= (!write_to_vm << BIO_RW);
540 833
541 ret = 0; 834 ret = 0;
>> 835
>> 836 if (map_data) {
>> 837 nr_pages = 1 << map_data->page_order;
>> 838 i = map_data->offset / PAGE_SIZE;
>> 839 }
542 while (len) { 840 while (len) {
543 unsigned int bytes = PAGE_SIZE 841 unsigned int bytes = PAGE_SIZE;
544 842
>> 843 bytes -= offset;
>> 844
545 if (bytes > len) 845 if (bytes > len)
546 bytes = len; 846 bytes = len;
547 847
548 page = alloc_page(q->bounce_gf !! 848 if (map_data) {
549 if (!page) { !! 849 if (i == map_data->nr_entries * nr_pages) {
550 ret = -ENOMEM; !! 850 ret = -ENOMEM;
551 break; !! 851 break;
>> 852 }
>> 853
>> 854 page = map_data->pages[i / nr_pages];
>> 855 page += (i % nr_pages);
>> 856
>> 857 i++;
>> 858 } else {
>> 859 page = alloc_page(q->bounce_gfp | gfp_mask);
>> 860 if (!page) {
>> 861 ret = -ENOMEM;
>> 862 break;
>> 863 }
552 } 864 }
553 865
554 if (bio_add_pc_page(q, bio, pa !! 866 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
555 break; 867 break;
556 868
557 len -= bytes; 869 len -= bytes;
>> 870 offset = 0;
558 } 871 }
559 872
560 if (ret) 873 if (ret)
561 goto cleanup; 874 goto cleanup;
562 875
563 /* 876 /*
564 * success 877 * success
565 */ 878 */
566 if (!write_to_vm) { !! 879 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
567 char __user *p = (char __user !! 880 (map_data && map_data->from_user)) {
568 !! 881 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
569 /* !! 882 if (ret)
570 * for a write, copy in data t !! 883 goto cleanup;
571 */ <<
572 ret = -EFAULT; <<
573 bio_for_each_segment(bvec, bio <<
574 char *addr = page_addr <<
575 <<
576 if (copy_from_user(add <<
577 goto cleanup; <<
578 p += bvec->bv_len; <<
579 } <<
580 } 884 }
581 885
582 bio_set_map_data(bmd, bio); !! 886 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
583 return bio; 887 return bio;
584 cleanup: 888 cleanup:
585 bio_for_each_segment(bvec, bio, i) !! 889 if (!map_data)
586 __free_page(bvec->bv_page); !! 890 bio_for_each_segment(bvec, bio, i)
>> 891 __free_page(bvec->bv_page);
587 892
588 bio_put(bio); 893 bio_put(bio);
589 out_bmd: 894 out_bmd:
590 bio_free_map_data(bmd); 895 bio_free_map_data(bmd);
591 return ERR_PTR(ret); 896 return ERR_PTR(ret);
592 } 897 }
593 898
>> 899 /**
>> 900 * bio_copy_user - copy user data to bio
>> 901 * @q: destination block queue
>> 902 * @map_data: pointer to the rq_map_data holding pages (if necessary)
>> 903 * @uaddr: start of user address
>> 904 * @len: length in bytes
>> 905 * @write_to_vm: bool indicating writing to pages or not
>> 906 * @gfp_mask: memory allocation flags
>> 907 *
>> 908 * Prepares and returns a bio for indirect user io, bouncing data
>> 909 * to/from kernel pages as necessary. Must be paired with
>> 910 * call bio_uncopy_user() on io completion.
>> 911 */
>> 912 struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
>> 913 unsigned long uaddr, unsigned int len,
>> 914 int write_to_vm, gfp_t gfp_mask)
>> 915 {
>> 916 struct sg_iovec iov;
>> 917
>> 918 iov.iov_base = (void __user *)uaddr;
>> 919 iov.iov_len = len;
>> 920
>> 921 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
>> 922 }
>> 923
594 static struct bio *__bio_map_user_iov(struct r 924 static struct bio *__bio_map_user_iov(struct request_queue *q,
595 struct b 925 struct block_device *bdev,
596 struct s 926 struct sg_iovec *iov, int iov_count,
597 int writ !! 927 int write_to_vm, gfp_t gfp_mask)
598 { 928 {
599 int i, j; 929 int i, j;
600 int nr_pages = 0; 930 int nr_pages = 0;
601 struct page **pages; 931 struct page **pages;
602 struct bio *bio; 932 struct bio *bio;
603 int cur_page = 0; 933 int cur_page = 0;
604 int ret, offset; 934 int ret, offset;
605 935
606 for (i = 0; i < iov_count; i++) { 936 for (i = 0; i < iov_count; i++) {
607 unsigned long uaddr = (unsigne 937 unsigned long uaddr = (unsigned long)iov[i].iov_base;
608 unsigned long len = iov[i].iov 938 unsigned long len = iov[i].iov_len;
609 unsigned long end = (uaddr + l 939 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
610 unsigned long start = uaddr >> 940 unsigned long start = uaddr >> PAGE_SHIFT;
611 941
612 nr_pages += end - start; 942 nr_pages += end - start;
613 /* 943 /*
614 * buffer must be aligned to a 944 * buffer must be aligned to at least hardsector size for now
615 */ 945 */
616 if (uaddr & queue_dma_alignmen 946 if (uaddr & queue_dma_alignment(q))
617 return ERR_PTR(-EINVAL 947 return ERR_PTR(-EINVAL);
618 } 948 }
619 949
620 if (!nr_pages) 950 if (!nr_pages)
621 return ERR_PTR(-EINVAL); 951 return ERR_PTR(-EINVAL);
622 952
623 bio = bio_alloc(GFP_KERNEL, nr_pages); !! 953 bio = bio_kmalloc(gfp_mask, nr_pages);
624 if (!bio) 954 if (!bio)
625 return ERR_PTR(-ENOMEM); 955 return ERR_PTR(-ENOMEM);
626 956
627 ret = -ENOMEM; 957 ret = -ENOMEM;
628 pages = kcalloc(nr_pages, sizeof(struc !! 958 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
629 if (!pages) 959 if (!pages)
630 goto out; 960 goto out;
631 961
632 for (i = 0; i < iov_count; i++) { 962 for (i = 0; i < iov_count; i++) {
633 unsigned long uaddr = (unsigne 963 unsigned long uaddr = (unsigned long)iov[i].iov_base;
634 unsigned long len = iov[i].iov 964 unsigned long len = iov[i].iov_len;
635 unsigned long end = (uaddr + l 965 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
636 unsigned long start = uaddr >> 966 unsigned long start = uaddr >> PAGE_SHIFT;
637 const int local_nr_pages = end 967 const int local_nr_pages = end - start;
638 const int page_limit = cur_pag 968 const int page_limit = cur_page + local_nr_pages;
639 969
640 down_read(¤t->mm->mmap_s !! 970 ret = get_user_pages_fast(uaddr, local_nr_pages,
641 ret = get_user_pages(current, !! 971 write_to_vm, &pages[cur_page]);
642 local_nr_ <<
643 write_to_ <<
644 up_read(¤t->mm->mmap_sem <<
645 <<
646 if (ret < local_nr_pages) { 972 if (ret < local_nr_pages) {
647 ret = -EFAULT; 973 ret = -EFAULT;
648 goto out_unmap; 974 goto out_unmap;
649 } 975 }
650 976
651 offset = uaddr & ~PAGE_MASK; 977 offset = uaddr & ~PAGE_MASK;
652 for (j = cur_page; j < page_li 978 for (j = cur_page; j < page_limit; j++) {
653 unsigned int bytes = P 979 unsigned int bytes = PAGE_SIZE - offset;
654 980
655 if (len <= 0) 981 if (len <= 0)
656 break; 982 break;
657 983
658 if (bytes > len) 984 if (bytes > len)
659 bytes = len; 985 bytes = len;
660 986
661 /* 987 /*
662 * sorry... 988 * sorry...
663 */ 989 */
664 if (bio_add_pc_page(q, 990 if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
665 by 991 bytes)
666 break; 992 break;
667 993
668 len -= bytes; 994 len -= bytes;
669 offset = 0; 995 offset = 0;
670 } 996 }
671 997
672 cur_page = j; 998 cur_page = j;
673 /* 999 /*
674 * release the pages we didn't 1000 * release the pages we didn't map into the bio, if any
675 */ 1001 */
676 while (j < page_limit) 1002 while (j < page_limit)
677 page_cache_release(pag 1003 page_cache_release(pages[j++]);
678 } 1004 }
679 1005
680 kfree(pages); 1006 kfree(pages);
681 1007
682 /* 1008 /*
683 * set data direction, and check if ma 1009 * set data direction, and check if mapped pages need bouncing
684 */ 1010 */
685 if (!write_to_vm) 1011 if (!write_to_vm)
686 bio->bi_rw |= (1 << BIO_RW); 1012 bio->bi_rw |= (1 << BIO_RW);
687 1013
688 bio->bi_bdev = bdev; 1014 bio->bi_bdev = bdev;
689 bio->bi_flags |= (1 << BIO_USER_MAPPED 1015 bio->bi_flags |= (1 << BIO_USER_MAPPED);
690 return bio; 1016 return bio;
691 1017
692 out_unmap: 1018 out_unmap:
693 for (i = 0; i < nr_pages; i++) { 1019 for (i = 0; i < nr_pages; i++) {
694 if(!pages[i]) 1020 if(!pages[i])
695 break; 1021 break;
696 page_cache_release(pages[i]); 1022 page_cache_release(pages[i]);
697 } 1023 }
698 out: 1024 out:
699 kfree(pages); 1025 kfree(pages);
700 bio_put(bio); 1026 bio_put(bio);
701 return ERR_PTR(ret); 1027 return ERR_PTR(ret);
702 } 1028 }
703 1029
704 /** 1030 /**
705 * bio_map_user - map user addre 1031 * bio_map_user - map user address into bio
706 * @q: the struct request_queue for the b 1032 * @q: the struct request_queue for the bio
707 * @bdev: destination block device 1033 * @bdev: destination block device
708 * @uaddr: start of user address 1034 * @uaddr: start of user address
709 * @len: length in bytes 1035 * @len: length in bytes
710 * @write_to_vm: bool indicating writing 1036 * @write_to_vm: bool indicating writing to pages or not
>> 1037 * @gfp_mask: memory allocation flags
711 * 1038 *
712 * Map the user space address into a bio 1039 * Map the user space address into a bio suitable for io to a block
713 * device. Returns an error pointer in ca 1040 * device. Returns an error pointer in case of error.
714 */ 1041 */
715 struct bio *bio_map_user(struct request_queue 1042 struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
716 unsigned long uaddr, !! 1043 unsigned long uaddr, unsigned int len, int write_to_vm,
>> 1044 gfp_t gfp_mask)
717 { 1045 {
718 struct sg_iovec iov; 1046 struct sg_iovec iov;
719 1047
720 iov.iov_base = (void __user *)uaddr; 1048 iov.iov_base = (void __user *)uaddr;
721 iov.iov_len = len; 1049 iov.iov_len = len;
722 1050
723 return bio_map_user_iov(q, bdev, &iov, !! 1051 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
724 } 1052 }
725 1053
726 /** 1054 /**
727 * bio_map_user_iov - map user sg_iovec t 1055 * bio_map_user_iov - map user sg_iovec table into bio
728 * @q: the struct request_queue for the b 1056 * @q: the struct request_queue for the bio
729 * @bdev: destination block device 1057 * @bdev: destination block device
730 * @iov: the iovec. 1058 * @iov: the iovec.
731 * @iov_count: number of elements in the 1059 * @iov_count: number of elements in the iovec
732 * @write_to_vm: bool indicating writing 1060 * @write_to_vm: bool indicating writing to pages or not
>> 1061 * @gfp_mask: memory allocation flags
733 * 1062 *
734 * Map the user space address into a bio 1063 * Map the user space address into a bio suitable for io to a block
735 * device. Returns an error pointer in ca 1064 * device. Returns an error pointer in case of error.
736 */ 1065 */
737 struct bio *bio_map_user_iov(struct request_qu 1066 struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
738 struct sg_iovec * 1067 struct sg_iovec *iov, int iov_count,
739 int write_to_vm) !! 1068 int write_to_vm, gfp_t gfp_mask)
740 { 1069 {
741 struct bio *bio; 1070 struct bio *bio;
742 1071
743 bio = __bio_map_user_iov(q, bdev, iov, !! 1072 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
744 !! 1073 gfp_mask);
745 if (IS_ERR(bio)) 1074 if (IS_ERR(bio))
746 return bio; 1075 return bio;
747 1076
748 /* 1077 /*
749 * subtle -- if __bio_map_user() ended 1078 * subtle -- if __bio_map_user() ended up bouncing a bio,
750 * it would normally disappear when it 1079 * it would normally disappear when its bi_end_io is run.
751 * however, we need it for the unmap, 1080 * however, we need it for the unmap, so grab an extra
752 * reference to it 1081 * reference to it
753 */ 1082 */
754 bio_get(bio); 1083 bio_get(bio);
755 1084
756 return bio; 1085 return bio;
757 } 1086 }
758 1087
759 static void __bio_unmap_user(struct bio *bio) 1088 static void __bio_unmap_user(struct bio *bio)
760 { 1089 {
761 struct bio_vec *bvec; 1090 struct bio_vec *bvec;
762 int i; 1091 int i;
763 1092
764 /* 1093 /*
765 * make sure we dirty pages we wrote t 1094 * make sure we dirty pages we wrote to
766 */ 1095 */
767 __bio_for_each_segment(bvec, bio, i, 0 1096 __bio_for_each_segment(bvec, bio, i, 0) {
768 if (bio_data_dir(bio) == READ) 1097 if (bio_data_dir(bio) == READ)
769 set_page_dirty_lock(bv 1098 set_page_dirty_lock(bvec->bv_page);
770 1099
771 page_cache_release(bvec->bv_pa 1100 page_cache_release(bvec->bv_page);
772 } 1101 }
773 1102
774 bio_put(bio); 1103 bio_put(bio);
775 } 1104 }
776 1105
777 /** 1106 /**
778 * bio_unmap_user - unmap a bio 1107 * bio_unmap_user - unmap a bio
779 * @bio: the bio being unmapped 1108 * @bio: the bio being unmapped
780 * 1109 *
781 * Unmap a bio previously mapped by bio_m 1110 * Unmap a bio previously mapped by bio_map_user(). Must be called with
782 * a process context. 1111 * a process context.
783 * 1112 *
784 * bio_unmap_user() may sleep. 1113 * bio_unmap_user() may sleep.
785 */ 1114 */
786 void bio_unmap_user(struct bio *bio) 1115 void bio_unmap_user(struct bio *bio)
787 { 1116 {
788 __bio_unmap_user(bio); 1117 __bio_unmap_user(bio);
789 bio_put(bio); 1118 bio_put(bio);
790 } 1119 }
791 1120
792 static void bio_map_kern_endio(struct bio *bio 1121 static void bio_map_kern_endio(struct bio *bio, int err)
793 { 1122 {
794 bio_put(bio); 1123 bio_put(bio);
795 } 1124 }
796 1125
797 1126
798 static struct bio *__bio_map_kern(struct reque 1127 static struct bio *__bio_map_kern(struct request_queue *q, void *data,
799 unsigned int 1128 unsigned int len, gfp_t gfp_mask)
800 { 1129 {
801 unsigned long kaddr = (unsigned long)d 1130 unsigned long kaddr = (unsigned long)data;
802 unsigned long end = (kaddr + len + PAG 1131 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
803 unsigned long start = kaddr >> PAGE_SH 1132 unsigned long start = kaddr >> PAGE_SHIFT;
804 const int nr_pages = end - start; 1133 const int nr_pages = end - start;
805 int offset, i; 1134 int offset, i;
806 struct bio *bio; 1135 struct bio *bio;
807 1136
808 bio = bio_alloc(gfp_mask, nr_pages); !! 1137 bio = bio_kmalloc(gfp_mask, nr_pages);
809 if (!bio) 1138 if (!bio)
810 return ERR_PTR(-ENOMEM); 1139 return ERR_PTR(-ENOMEM);
811 1140
812 offset = offset_in_page(kaddr); 1141 offset = offset_in_page(kaddr);
813 for (i = 0; i < nr_pages; i++) { 1142 for (i = 0; i < nr_pages; i++) {
814 unsigned int bytes = PAGE_SIZE 1143 unsigned int bytes = PAGE_SIZE - offset;
815 1144
816 if (len <= 0) 1145 if (len <= 0)
817 break; 1146 break;
818 1147
819 if (bytes > len) 1148 if (bytes > len)
820 bytes = len; 1149 bytes = len;
821 1150
822 if (bio_add_pc_page(q, bio, vi 1151 if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
823 offset) < 1152 offset) < bytes)
824 break; 1153 break;
825 1154
826 data += bytes; 1155 data += bytes;
827 len -= bytes; 1156 len -= bytes;
828 offset = 0; 1157 offset = 0;
829 } 1158 }
830 1159
831 bio->bi_end_io = bio_map_kern_endio; 1160 bio->bi_end_io = bio_map_kern_endio;
832 return bio; 1161 return bio;
833 } 1162 }
834 1163
835 /** 1164 /**
836 * bio_map_kern - map kernel add 1165 * bio_map_kern - map kernel address into bio
837 * @q: the struct request_queue for the b 1166 * @q: the struct request_queue for the bio
838 * @data: pointer to buffer to map 1167 * @data: pointer to buffer to map
839 * @len: length in bytes 1168 * @len: length in bytes
840 * @gfp_mask: allocation flags for bio al 1169 * @gfp_mask: allocation flags for bio allocation
841 * 1170 *
842 * Map the kernel address into a bio suit 1171 * Map the kernel address into a bio suitable for io to a block
843 * device. Returns an error pointer in ca 1172 * device. Returns an error pointer in case of error.
844 */ 1173 */
845 struct bio *bio_map_kern(struct request_queue 1174 struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
846 gfp_t gfp_mask) 1175 gfp_t gfp_mask)
847 { 1176 {
848 struct bio *bio; 1177 struct bio *bio;
849 1178
850 bio = __bio_map_kern(q, data, len, gfp 1179 bio = __bio_map_kern(q, data, len, gfp_mask);
851 if (IS_ERR(bio)) 1180 if (IS_ERR(bio))
852 return bio; 1181 return bio;
853 1182
854 if (bio->bi_size == len) 1183 if (bio->bi_size == len)
855 return bio; 1184 return bio;
856 1185
857 /* 1186 /*
858 * Don't support partial mappings. 1187 * Don't support partial mappings.
859 */ 1188 */
860 bio_put(bio); 1189 bio_put(bio);
861 return ERR_PTR(-EINVAL); 1190 return ERR_PTR(-EINVAL);
862 } 1191 }
863 1192
>> 1193 static void bio_copy_kern_endio(struct bio *bio, int err)
>> 1194 {
>> 1195 struct bio_vec *bvec;
>> 1196 const int read = bio_data_dir(bio) == READ;
>> 1197 struct bio_map_data *bmd = bio->bi_private;
>> 1198 int i;
>> 1199 char *p = bmd->sgvecs[0].iov_base;
>> 1200
>> 1201 __bio_for_each_segment(bvec, bio, i, 0) {
>> 1202 char *addr = page_address(bvec->bv_page);
>> 1203 int len = bmd->iovecs[i].bv_len;
>> 1204
>> 1205 if (read)
>> 1206 memcpy(p, addr, len);
>> 1207
>> 1208 __free_page(bvec->bv_page);
>> 1209 p += len;
>> 1210 }
>> 1211
>> 1212 bio_free_map_data(bmd);
>> 1213 bio_put(bio);
>> 1214 }
>> 1215
>> 1216 /**
>> 1217 * bio_copy_kern - copy kernel address into bio
>> 1218 * @q: the struct request_queue for the bio
>> 1219 * @data: pointer to buffer to copy
>> 1220 * @len: length in bytes
>> 1221 * @gfp_mask: allocation flags for bio and page allocation
>> 1222 * @reading: data direction is READ
>> 1223 *
>> 1224 * copy the kernel address into a bio suitable for io to a block
>> 1225 * device. Returns an error pointer in case of error.
>> 1226 */
>> 1227 struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
>> 1228 gfp_t gfp_mask, int reading)
>> 1229 {
>> 1230 struct bio *bio;
>> 1231 struct bio_vec *bvec;
>> 1232 int i;
>> 1233
>> 1234 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
>> 1235 if (IS_ERR(bio))
>> 1236 return bio;
>> 1237
>> 1238 if (!reading) {
>> 1239 void *p = data;
>> 1240
>> 1241 bio_for_each_segment(bvec, bio, i) {
>> 1242 char *addr = page_address(bvec->bv_page);
>> 1243
>> 1244 memcpy(addr, p, bvec->bv_len);
>> 1245 p += bvec->bv_len;
>> 1246 }
>> 1247 }
>> 1248
>> 1249 bio->bi_end_io = bio_copy_kern_endio;
>> 1250
>> 1251 return bio;
>> 1252 }
>> 1253
864 /* 1254 /*
865 * bio_set_pages_dirty() and bio_check_pages_d 1255 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
866 * for performing direct-IO in BIOs. 1256 * for performing direct-IO in BIOs.
867 * 1257 *
868 * The problem is that we cannot run set_page_ 1258 * The problem is that we cannot run set_page_dirty() from interrupt context
869 * because the required locks are not interrup 1259 * because the required locks are not interrupt-safe. So what we can do is to
870 * mark the pages dirty _before_ performing IO 1260 * mark the pages dirty _before_ performing IO. And in interrupt context,
871 * check that the pages are still dirty. If 1261 * check that the pages are still dirty. If so, fine. If not, redirty them
872 * in process context. 1262 * in process context.
873 * 1263 *
874 * We special-case compound pages here: normal 1264 * We special-case compound pages here: normally this means reads into hugetlb
875 * pages. The logic in here doesn't really wo 1265 * pages. The logic in here doesn't really work right for compound pages
876 * because the VM does not uniformly chase dow 1266 * because the VM does not uniformly chase down the head page in all cases.
877 * But dirtiness of compound pages is pretty m 1267 * But dirtiness of compound pages is pretty meaningless anyway: the VM doesn't
878 * handle them at all. So we skip compound pa 1268 * handle them at all. So we skip compound pages here at an early stage.
879 * 1269 *
880 * Note that this code is very hard to test un 1270 * Note that this code is very hard to test under normal circumstances because
881 * direct-io pins the pages with get_user_page 1271 * direct-io pins the pages with get_user_pages(). This makes
882 * is_page_cache_freeable return false, and th 1272 * is_page_cache_freeable return false, and the VM will not clean the pages.
883 * But other code (eg, pdflush) could clean th 1273 * But other code (eg, pdflush) could clean the pages if they are mapped
884 * pagecache. 1274 * pagecache.
885 * 1275 *
886 * Simply disabling the call to bio_set_pages_ 1276 * Simply disabling the call to bio_set_pages_dirty() is a good way to test the
887 * deferred bio dirtying paths. 1277 * deferred bio dirtying paths.
888 */ 1278 */
889 1279
890 /* 1280 /*
891 * bio_set_pages_dirty() will mark all the bio 1281 * bio_set_pages_dirty() will mark all the bio's pages as dirty.
892 */ 1282 */
893 void bio_set_pages_dirty(struct bio *bio) 1283 void bio_set_pages_dirty(struct bio *bio)
894 { 1284 {
895 struct bio_vec *bvec = bio->bi_io_vec; 1285 struct bio_vec *bvec = bio->bi_io_vec;
896 int i; 1286 int i;
897 1287
898 for (i = 0; i < bio->bi_vcnt; i++) { 1288 for (i = 0; i < bio->bi_vcnt; i++) {
899 struct page *page = bvec[i].bv 1289 struct page *page = bvec[i].bv_page;
900 1290
901 if (page && !PageCompound(page 1291 if (page && !PageCompound(page))
902 set_page_dirty_lock(pa 1292 set_page_dirty_lock(page);
903 } 1293 }
904 } 1294 }
905 1295
906 static void bio_release_pages(struct bio *bio) 1296 static void bio_release_pages(struct bio *bio)
907 { 1297 {
908 struct bio_vec *bvec = bio->bi_io_vec; 1298 struct bio_vec *bvec = bio->bi_io_vec;
909 int i; 1299 int i;
910 1300
911 for (i = 0; i < bio->bi_vcnt; i++) { 1301 for (i = 0; i < bio->bi_vcnt; i++) {
912 struct page *page = bvec[i].bv 1302 struct page *page = bvec[i].bv_page;
913 1303
914 if (page) 1304 if (page)
915 put_page(page); 1305 put_page(page);
916 } 1306 }
917 } 1307 }
918 1308
919 /* 1309 /*
920 * bio_check_pages_dirty() will check that all 1310 * bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
921 * If they are, then fine. If, however, some 1311 * If they are, then fine. If, however, some pages are clean then they must
922 * have been written out during the direct-IO 1312 * have been written out during the direct-IO read. So we take another ref on
923 * the BIO and the offending pages and re-dirt 1313 * the BIO and the offending pages and re-dirty the pages in process context.
924 * 1314 *
925 * It is expected that bio_check_pages_dirty() 1315 * It is expected that bio_check_pages_dirty() will wholly own the BIO from
926 * here on. It will run one page_cache_releas 1316 * here on. It will run one page_cache_release() against each page and will
927 * run one bio_put() against the BIO. 1317 * run one bio_put() against the BIO.
928 */ 1318 */
929 1319
930 static void bio_dirty_fn(struct work_struct *w 1320 static void bio_dirty_fn(struct work_struct *work);
931 1321
932 static DECLARE_WORK(bio_dirty_work, bio_dirty_ 1322 static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
933 static DEFINE_SPINLOCK(bio_dirty_lock); 1323 static DEFINE_SPINLOCK(bio_dirty_lock);
934 static struct bio *bio_dirty_list; 1324 static struct bio *bio_dirty_list;
935 1325
936 /* 1326 /*
937 * This runs in process context 1327 * This runs in process context
938 */ 1328 */
939 static void bio_dirty_fn(struct work_struct *w 1329 static void bio_dirty_fn(struct work_struct *work)
940 { 1330 {
941 unsigned long flags; 1331 unsigned long flags;
942 struct bio *bio; 1332 struct bio *bio;
943 1333
944 spin_lock_irqsave(&bio_dirty_lock, fla 1334 spin_lock_irqsave(&bio_dirty_lock, flags);
945 bio = bio_dirty_list; 1335 bio = bio_dirty_list;
946 bio_dirty_list = NULL; 1336 bio_dirty_list = NULL;
947 spin_unlock_irqrestore(&bio_dirty_lock 1337 spin_unlock_irqrestore(&bio_dirty_lock, flags);
948 1338
949 while (bio) { 1339 while (bio) {
950 struct bio *next = bio->bi_pri 1340 struct bio *next = bio->bi_private;
951 1341
952 bio_set_pages_dirty(bio); 1342 bio_set_pages_dirty(bio);
953 bio_release_pages(bio); 1343 bio_release_pages(bio);
954 bio_put(bio); 1344 bio_put(bio);
955 bio = next; 1345 bio = next;
956 } 1346 }
957 } 1347 }
958 1348
959 void bio_check_pages_dirty(struct bio *bio) 1349 void bio_check_pages_dirty(struct bio *bio)
960 { 1350 {
961 struct bio_vec *bvec = bio->bi_io_vec; 1351 struct bio_vec *bvec = bio->bi_io_vec;
962 int nr_clean_pages = 0; 1352 int nr_clean_pages = 0;
963 int i; 1353 int i;
964 1354
965 for (i = 0; i < bio->bi_vcnt; i++) { 1355 for (i = 0; i < bio->bi_vcnt; i++) {
966 struct page *page = bvec[i].bv 1356 struct page *page = bvec[i].bv_page;
967 1357
968 if (PageDirty(page) || PageCom 1358 if (PageDirty(page) || PageCompound(page)) {
969 page_cache_release(pag 1359 page_cache_release(page);
970 bvec[i].bv_page = NULL 1360 bvec[i].bv_page = NULL;
971 } else { 1361 } else {
972 nr_clean_pages++; 1362 nr_clean_pages++;
973 } 1363 }
974 } 1364 }
975 1365
976 if (nr_clean_pages) { 1366 if (nr_clean_pages) {
977 unsigned long flags; 1367 unsigned long flags;
978 1368
979 spin_lock_irqsave(&bio_dirty_l 1369 spin_lock_irqsave(&bio_dirty_lock, flags);
980 bio->bi_private = bio_dirty_li 1370 bio->bi_private = bio_dirty_list;
981 bio_dirty_list = bio; 1371 bio_dirty_list = bio;
982 spin_unlock_irqrestore(&bio_di 1372 spin_unlock_irqrestore(&bio_dirty_lock, flags);
983 schedule_work(&bio_dirty_work) 1373 schedule_work(&bio_dirty_work);
984 } else { 1374 } else {
985 bio_put(bio); 1375 bio_put(bio);
986 } 1376 }
987 } 1377 }
988 1378
989 /** 1379 /**
990 * bio_endio - end I/O on a bio 1380 * bio_endio - end I/O on a bio
991 * @bio: bio 1381 * @bio: bio
992 * @error: error, if any 1382 * @error: error, if any
993 * 1383 *
994 * Description: 1384 * Description:
995 * bio_endio() will end I/O on the whole bio 1385 * bio_endio() will end I/O on the whole bio. bio_endio() is the
996 * preferred way to end I/O on a bio, it tak 1386 * preferred way to end I/O on a bio, it takes care of clearing
997 * BIO_UPTODATE on error. @error is 0 on suc 1387 * BIO_UPTODATE on error. @error is 0 on success, and and one of the
998 * established -Exxxx (-EIO, for instance) e 1388 * established -Exxxx (-EIO, for instance) error values in case
999 * something went wrong. Noone should call b 1389 * something went wrong. Noone should call bi_end_io() directly on a
1000 * bio unless they own it and thus know tha 1390 * bio unless they own it and thus know that it has an end_io
1001 * function. 1391 * function.
1002 **/ 1392 **/
1003 void bio_endio(struct bio *bio, int error) 1393 void bio_endio(struct bio *bio, int error)
1004 { 1394 {
1005 if (error) 1395 if (error)
1006 clear_bit(BIO_UPTODATE, &bio- 1396 clear_bit(BIO_UPTODATE, &bio->bi_flags);
1007 else if (!test_bit(BIO_UPTODATE, &bio 1397 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1008 error = -EIO; 1398 error = -EIO;
1009 1399
1010 if (bio->bi_end_io) 1400 if (bio->bi_end_io)
1011 bio->bi_end_io(bio, error); 1401 bio->bi_end_io(bio, error);
1012 } 1402 }
1013 1403
1014 void bio_pair_release(struct bio_pair *bp) 1404 void bio_pair_release(struct bio_pair *bp)
1015 { 1405 {
1016 if (atomic_dec_and_test(&bp->cnt)) { 1406 if (atomic_dec_and_test(&bp->cnt)) {
1017 struct bio *master = bp->bio1 1407 struct bio *master = bp->bio1.bi_private;
1018 1408
1019 bio_endio(master, bp->error); 1409 bio_endio(master, bp->error);
1020 mempool_free(bp, bp->bio2.bi_ 1410 mempool_free(bp, bp->bio2.bi_private);
1021 } 1411 }
1022 } 1412 }
1023 1413
1024 static void bio_pair_end_1(struct bio *bi, in 1414 static void bio_pair_end_1(struct bio *bi, int err)
1025 { 1415 {
1026 struct bio_pair *bp = container_of(bi 1416 struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
1027 1417
1028 if (err) 1418 if (err)
1029 bp->error = err; 1419 bp->error = err;
1030 1420
1031 bio_pair_release(bp); 1421 bio_pair_release(bp);
1032 } 1422 }
1033 1423
1034 static void bio_pair_end_2(struct bio *bi, in 1424 static void bio_pair_end_2(struct bio *bi, int err)
1035 { 1425 {
1036 struct bio_pair *bp = container_of(bi 1426 struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
1037 1427
1038 if (err) 1428 if (err)
1039 bp->error = err; 1429 bp->error = err;
1040 1430
1041 bio_pair_release(bp); 1431 bio_pair_release(bp);
1042 } 1432 }
1043 1433
1044 /* 1434 /*
1045 * split a bio - only worry about a bio with !! 1435 * split a bio - only worry about a bio with a single page in its iovec
1046 * in it's iovec <<
1047 */ 1436 */
1048 struct bio_pair *bio_split(struct bio *bi, me !! 1437 struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1049 { 1438 {
1050 struct bio_pair *bp = mempool_alloc(p !! 1439 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1051 1440
1052 if (!bp) 1441 if (!bp)
1053 return bp; 1442 return bp;
1054 1443
1055 blk_add_trace_pdu_int(bdev_get_queue( !! 1444 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1056 bi->bi_sector 1445 bi->bi_sector + first_sectors);
1057 1446
1058 BUG_ON(bi->bi_vcnt != 1); 1447 BUG_ON(bi->bi_vcnt != 1);
1059 BUG_ON(bi->bi_idx != 0); 1448 BUG_ON(bi->bi_idx != 0);
1060 atomic_set(&bp->cnt, 3); 1449 atomic_set(&bp->cnt, 3);
1061 bp->error = 0; 1450 bp->error = 0;
1062 bp->bio1 = *bi; 1451 bp->bio1 = *bi;
1063 bp->bio2 = *bi; 1452 bp->bio2 = *bi;
1064 bp->bio2.bi_sector += first_sectors; 1453 bp->bio2.bi_sector += first_sectors;
1065 bp->bio2.bi_size -= first_sectors << 1454 bp->bio2.bi_size -= first_sectors << 9;
1066 bp->bio1.bi_size = first_sectors << 9 1455 bp->bio1.bi_size = first_sectors << 9;
1067 1456
1068 bp->bv1 = bi->bi_io_vec[0]; 1457 bp->bv1 = bi->bi_io_vec[0];
1069 bp->bv2 = bi->bi_io_vec[0]; 1458 bp->bv2 = bi->bi_io_vec[0];
1070 bp->bv2.bv_offset += first_sectors << 1459 bp->bv2.bv_offset += first_sectors << 9;
1071 bp->bv2.bv_len -= first_sectors << 9; 1460 bp->bv2.bv_len -= first_sectors << 9;
1072 bp->bv1.bv_len = first_sectors << 9; 1461 bp->bv1.bv_len = first_sectors << 9;
1073 1462
1074 bp->bio1.bi_io_vec = &bp->bv1; 1463 bp->bio1.bi_io_vec = &bp->bv1;
1075 bp->bio2.bi_io_vec = &bp->bv2; 1464 bp->bio2.bi_io_vec = &bp->bv2;
1076 1465
1077 bp->bio1.bi_max_vecs = 1; 1466 bp->bio1.bi_max_vecs = 1;
1078 bp->bio2.bi_max_vecs = 1; 1467 bp->bio2.bi_max_vecs = 1;
1079 1468
1080 bp->bio1.bi_end_io = bio_pair_end_1; 1469 bp->bio1.bi_end_io = bio_pair_end_1;
1081 bp->bio2.bi_end_io = bio_pair_end_2; 1470 bp->bio2.bi_end_io = bio_pair_end_2;
1082 1471
1083 bp->bio1.bi_private = bi; 1472 bp->bio1.bi_private = bi;
1084 bp->bio2.bi_private = pool; !! 1473 bp->bio2.bi_private = bio_split_pool;
>> 1474
>> 1475 if (bio_integrity(bi))
>> 1476 bio_integrity_split(bi, bp, first_sectors);
1085 1477
1086 return bp; 1478 return bp;
1087 } 1479 }
1088 1480
>> 1481 /**
>> 1482 * bio_sector_offset - Find hardware sector offset in bio
>> 1483 * @bio: bio to inspect
>> 1484 * @index: bio_vec index
>> 1485 * @offset: offset in bv_page
>> 1486 *
>> 1487 * Return the number of hardware sectors between beginning of bio
>> 1488 * and an end point indicated by a bio_vec index and an offset
>> 1489 * within that vector's page.
>> 1490 */
>> 1491 sector_t bio_sector_offset(struct bio *bio, unsigned short index,
>> 1492 unsigned int offset)
>> 1493 {
>> 1494 unsigned int sector_sz;
>> 1495 struct bio_vec *bv;
>> 1496 sector_t sectors;
>> 1497 int i;
>> 1498
>> 1499 sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
>> 1500 sectors = 0;
>> 1501
>> 1502 if (index >= bio->bi_idx)
>> 1503 index = bio->bi_vcnt - 1;
>> 1504
>> 1505 __bio_for_each_segment(bv, bio, i, 0) {
>> 1506 if (i == index) {
>> 1507 if (offset > bv->bv_offset)
>> 1508 sectors += (offset - bv->bv_offset) / sector_sz;
>> 1509 break;
>> 1510 }
>> 1511
>> 1512 sectors += bv->bv_len / sector_sz;
>> 1513 }
>> 1514
>> 1515 return sectors;
>> 1516 }
>> 1517 EXPORT_SYMBOL(bio_sector_offset);
1089 1518
1090 /* 1519 /*
1091 * create memory pools for biovec's in a bio_ 1520 * create memory pools for biovec's in a bio_set.
1092 * use the global biovec slabs created for ge 1521 * use the global biovec slabs created for general use.
1093 */ 1522 */
1094 static int biovec_create_pools(struct bio_set 1523 static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1095 { 1524 {
1096 int i; !! 1525 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1097 1526
1098 for (i = 0; i < BIOVEC_NR_POOLS; i++) !! 1527 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1099 struct biovec_slab *bp = bvec !! 1528 if (!bs->bvec_pool)
1100 mempool_t **bvp = bs->bvec_po !! 1529 return -ENOMEM;
1101 1530
1102 *bvp = mempool_create_slab_po <<
1103 if (!*bvp) <<
1104 return -ENOMEM; <<
1105 } <<
1106 return 0; 1531 return 0;
1107 } 1532 }
1108 1533
1109 static void biovec_free_pools(struct bio_set 1534 static void biovec_free_pools(struct bio_set *bs)
1110 { 1535 {
1111 int i; !! 1536 mempool_destroy(bs->bvec_pool);
1112 <<
1113 for (i = 0; i < BIOVEC_NR_POOLS; i++) <<
1114 mempool_t *bvp = bs->bvec_poo <<
1115 <<
1116 if (bvp) <<
1117 mempool_destroy(bvp); <<
1118 } <<
1119 <<
1120 } 1537 }
1121 1538
1122 void bioset_free(struct bio_set *bs) 1539 void bioset_free(struct bio_set *bs)
1123 { 1540 {
1124 if (bs->bio_pool) 1541 if (bs->bio_pool)
1125 mempool_destroy(bs->bio_pool) 1542 mempool_destroy(bs->bio_pool);
1126 1543
>> 1544 bioset_integrity_free(bs);
1127 biovec_free_pools(bs); 1545 biovec_free_pools(bs);
>> 1546 bio_put_slab(bs);
1128 1547
1129 kfree(bs); 1548 kfree(bs);
1130 } 1549 }
1131 1550
1132 struct bio_set *bioset_create(int bio_pool_si !! 1551 /**
>> 1552 * bioset_create - Create a bio_set
>> 1553 * @pool_size: Number of bio and bio_vecs to cache in the mempool
>> 1554 * @front_pad: Number of bytes to allocate in front of the returned bio
>> 1555 *
>> 1556 * Description:
>> 1557 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
>> 1558 * to ask for a number of bytes to be allocated in front of the bio.
>> 1559 * Front pad allocation is useful for embedding the bio inside
>> 1560 * another structure, to avoid allocating extra data to go with the bio.
>> 1561 * Note that the bio must be embedded at the END of that structure always,
>> 1562 * or things will break badly.
>> 1563 */
>> 1564 struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1133 { 1565 {
1134 struct bio_set *bs = kzalloc(sizeof(* !! 1566 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
>> 1567 struct bio_set *bs;
1135 1568
>> 1569 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1136 if (!bs) 1570 if (!bs)
1137 return NULL; 1571 return NULL;
1138 1572
1139 bs->bio_pool = mempool_create_slab_po !! 1573 bs->front_pad = front_pad;
>> 1574
>> 1575 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
>> 1576 if (!bs->bio_slab) {
>> 1577 kfree(bs);
>> 1578 return NULL;
>> 1579 }
>> 1580
>> 1581 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1140 if (!bs->bio_pool) 1582 if (!bs->bio_pool)
1141 goto bad; 1583 goto bad;
1142 1584
1143 if (!biovec_create_pools(bs, bvec_poo !! 1585 if (bioset_integrity_create(bs, pool_size))
>> 1586 goto bad;
>> 1587
>> 1588 if (!biovec_create_pools(bs, pool_size))
1144 return bs; 1589 return bs;
1145 1590
1146 bad: 1591 bad:
1147 bioset_free(bs); 1592 bioset_free(bs);
1148 return NULL; 1593 return NULL;
1149 } 1594 }
1150 1595
1151 static void __init biovec_init_slabs(void) 1596 static void __init biovec_init_slabs(void)
1152 { 1597 {
1153 int i; 1598 int i;
1154 1599
1155 for (i = 0; i < BIOVEC_NR_POOLS; i++) 1600 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1156 int size; 1601 int size;
1157 struct biovec_slab *bvs = bve 1602 struct biovec_slab *bvs = bvec_slabs + i;
1158 1603
>> 1604 #ifndef CONFIG_BLK_DEV_INTEGRITY
>> 1605 if (bvs->nr_vecs <= BIO_INLINE_VECS) {
>> 1606 bvs->slab = NULL;
>> 1607 continue;
>> 1608 }
>> 1609 #endif
>> 1610
1159 size = bvs->nr_vecs * sizeof( 1611 size = bvs->nr_vecs * sizeof(struct bio_vec);
1160 bvs->slab = kmem_cache_create 1612 bvs->slab = kmem_cache_create(bvs->name, size, 0,
1161 SLAB_HWCACHE_ 1613 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1162 } 1614 }
1163 } 1615 }
1164 1616
1165 static int __init init_bio(void) 1617 static int __init init_bio(void)
1166 { 1618 {
1167 bio_slab = KMEM_CACHE(bio, SLAB_HWCAC !! 1619 bio_slab_max = 2;
>> 1620 bio_slab_nr = 0;
>> 1621 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
>> 1622 if (!bio_slabs)
>> 1623 panic("bio: can't allocate bios\n");
1168 1624
>> 1625 bio_integrity_init();
1169 biovec_init_slabs(); 1626 biovec_init_slabs();
1170 1627
1171 fs_bio_set = bioset_create(BIO_POOL_S !! 1628 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1172 if (!fs_bio_set) 1629 if (!fs_bio_set)
1173 panic("bio: can't allocate bi 1630 panic("bio: can't allocate bios\n");
1174 1631
1175 bio_split_pool = mempool_create_kmall 1632 bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
1176 1633 sizeof(struct bio_pair));
1177 if (!bio_split_pool) 1634 if (!bio_split_pool)
1178 panic("bio: can't create spli 1635 panic("bio: can't create split pool\n");
1179 1636
1180 return 0; 1637 return 0;
1181 } 1638 }
1182 1639
1183 subsys_initcall(init_bio); 1640 subsys_initcall(init_bio);
1184 1641
1185 EXPORT_SYMBOL(bio_alloc); 1642 EXPORT_SYMBOL(bio_alloc);
>> 1643 EXPORT_SYMBOL(bio_kmalloc);
1186 EXPORT_SYMBOL(bio_put); 1644 EXPORT_SYMBOL(bio_put);
1187 EXPORT_SYMBOL(bio_free); 1645 EXPORT_SYMBOL(bio_free);
1188 EXPORT_SYMBOL(bio_endio); 1646 EXPORT_SYMBOL(bio_endio);
1189 EXPORT_SYMBOL(bio_init); 1647 EXPORT_SYMBOL(bio_init);
1190 EXPORT_SYMBOL(__bio_clone); 1648 EXPORT_SYMBOL(__bio_clone);
1191 EXPORT_SYMBOL(bio_clone); 1649 EXPORT_SYMBOL(bio_clone);
1192 EXPORT_SYMBOL(bio_phys_segments); 1650 EXPORT_SYMBOL(bio_phys_segments);
1193 EXPORT_SYMBOL(bio_hw_segments); <<
1194 EXPORT_SYMBOL(bio_add_page); 1651 EXPORT_SYMBOL(bio_add_page);
1195 EXPORT_SYMBOL(bio_add_pc_page); 1652 EXPORT_SYMBOL(bio_add_pc_page);
1196 EXPORT_SYMBOL(bio_get_nr_vecs); 1653 EXPORT_SYMBOL(bio_get_nr_vecs);
1197 EXPORT_SYMBOL(bio_map_user); 1654 EXPORT_SYMBOL(bio_map_user);
1198 EXPORT_SYMBOL(bio_unmap_user); 1655 EXPORT_SYMBOL(bio_unmap_user);
1199 EXPORT_SYMBOL(bio_map_kern); 1656 EXPORT_SYMBOL(bio_map_kern);
>> 1657 EXPORT_SYMBOL(bio_copy_kern);
1200 EXPORT_SYMBOL(bio_pair_release); 1658 EXPORT_SYMBOL(bio_pair_release);
1201 EXPORT_SYMBOL(bio_split); 1659 EXPORT_SYMBOL(bio_split);
1202 EXPORT_SYMBOL(bio_split_pool); <<
1203 EXPORT_SYMBOL(bio_copy_user); 1660 EXPORT_SYMBOL(bio_copy_user);
1204 EXPORT_SYMBOL(bio_uncopy_user); 1661 EXPORT_SYMBOL(bio_uncopy_user);
1205 EXPORT_SYMBOL(bioset_create); 1662 EXPORT_SYMBOL(bioset_create);
1206 EXPORT_SYMBOL(bioset_free); 1663 EXPORT_SYMBOL(bioset_free);
1207 EXPORT_SYMBOL(bio_alloc_bioset); 1664 EXPORT_SYMBOL(bio_alloc_bioset);
1208 1665
| This page was automatically generated by the LXR engine. |