Diff markup
1 /* -*- mode: c; c-basic-offset: 8; -*- 1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0: 2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 * 3 *
4 * dlmlock.c 4 * dlmlock.c
5 * 5 *
6 * underlying calls for lock creation 6 * underlying calls for lock creation
7 * 7 *
8 * Copyright (C) 2004 Oracle. All rights rese 8 * Copyright (C) 2004 Oracle. All rights reserved.
9 * 9 *
10 * This program is free software; you can redi 10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Genera 11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software F 12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your optio 13 * version 2 of the License, or (at your option) any later version.
14 * 14 *
15 * This program is distributed in the hope tha 15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 18 * General Public License for more details.
19 * 19 *
20 * You should have received a copy of the GNU 20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, wr 21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple P 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA. 23 * Boston, MA 021110-1307, USA.
24 * 24 *
25 */ 25 */
26 26
27 27
28 #include <linux/module.h> 28 #include <linux/module.h>
29 #include <linux/fs.h> 29 #include <linux/fs.h>
30 #include <linux/types.h> 30 #include <linux/types.h>
31 #include <linux/slab.h> 31 #include <linux/slab.h>
32 #include <linux/highmem.h> 32 #include <linux/highmem.h>
33 #include <linux/utsname.h> 33 #include <linux/utsname.h>
34 #include <linux/init.h> 34 #include <linux/init.h>
35 #include <linux/sysctl.h> 35 #include <linux/sysctl.h>
36 #include <linux/random.h> 36 #include <linux/random.h>
37 #include <linux/blkdev.h> 37 #include <linux/blkdev.h>
38 #include <linux/socket.h> 38 #include <linux/socket.h>
39 #include <linux/inet.h> 39 #include <linux/inet.h>
40 #include <linux/spinlock.h> 40 #include <linux/spinlock.h>
41 #include <linux/delay.h> 41 #include <linux/delay.h>
42 42
43 43
44 #include "cluster/heartbeat.h" 44 #include "cluster/heartbeat.h"
45 #include "cluster/nodemanager.h" 45 #include "cluster/nodemanager.h"
46 #include "cluster/tcp.h" 46 #include "cluster/tcp.h"
47 47
48 #include "dlmapi.h" 48 #include "dlmapi.h"
49 #include "dlmcommon.h" 49 #include "dlmcommon.h"
50 50
51 #include "dlmconvert.h" 51 #include "dlmconvert.h"
52 52
53 #define MLOG_MASK_PREFIX ML_DLM 53 #define MLOG_MASK_PREFIX ML_DLM
54 #include "cluster/masklog.h" 54 #include "cluster/masklog.h"
55 55
56 static struct kmem_cache *dlm_lock_cache = NUL <<
57 <<
58 static DEFINE_SPINLOCK(dlm_cookie_lock); 56 static DEFINE_SPINLOCK(dlm_cookie_lock);
59 static u64 dlm_next_cookie = 1; 57 static u64 dlm_next_cookie = 1;
60 58
61 static enum dlm_status dlm_send_remote_lock_re 59 static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
62 60 struct dlm_lock_resource *res,
63 61 struct dlm_lock *lock, int flags);
64 static void dlm_init_lock(struct dlm_lock *new 62 static void dlm_init_lock(struct dlm_lock *newlock, int type,
65 u8 node, u64 cookie) 63 u8 node, u64 cookie);
66 static void dlm_lock_release(struct kref *kref 64 static void dlm_lock_release(struct kref *kref);
67 static void dlm_lock_detach_lockres(struct dlm 65 static void dlm_lock_detach_lockres(struct dlm_lock *lock);
68 66
69 int dlm_init_lock_cache(void) <<
70 { <<
71 dlm_lock_cache = kmem_cache_create("o2 <<
72 siz <<
73 0, <<
74 if (dlm_lock_cache == NULL) <<
75 return -ENOMEM; <<
76 return 0; <<
77 } <<
78 <<
79 void dlm_destroy_lock_cache(void) <<
80 { <<
81 if (dlm_lock_cache) <<
82 kmem_cache_destroy(dlm_lock_ca <<
83 } <<
84 <<
85 /* Tell us whether we can grant a new lock req 67 /* Tell us whether we can grant a new lock request.
86 * locking: 68 * locking:
87 * caller needs: res->spinlock 69 * caller needs: res->spinlock
88 * taken: none 70 * taken: none
89 * held on exit: none 71 * held on exit: none
90 * returns: 1 if the lock can be granted, 0 ot 72 * returns: 1 if the lock can be granted, 0 otherwise.
91 */ 73 */
92 static int dlm_can_grant_new_lock(struct dlm_l 74 static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
93 struct dlm_l 75 struct dlm_lock *lock)
94 { 76 {
95 struct list_head *iter; 77 struct list_head *iter;
96 struct dlm_lock *tmplock; 78 struct dlm_lock *tmplock;
97 79
98 list_for_each(iter, &res->granted) { 80 list_for_each(iter, &res->granted) {
99 tmplock = list_entry(iter, str 81 tmplock = list_entry(iter, struct dlm_lock, list);
100 82
101 if (!dlm_lock_compatible(tmplo 83 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
102 return 0; 84 return 0;
103 } 85 }
104 86
105 list_for_each(iter, &res->converting) 87 list_for_each(iter, &res->converting) {
106 tmplock = list_entry(iter, str 88 tmplock = list_entry(iter, struct dlm_lock, list);
107 89
108 if (!dlm_lock_compatible(tmplo 90 if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
109 return 0; 91 return 0;
110 } 92 }
111 93
112 return 1; 94 return 1;
113 } 95 }
114 96
115 /* performs lock creation at the lockres maste 97 /* performs lock creation at the lockres master site
116 * locking: 98 * locking:
117 * caller needs: none 99 * caller needs: none
118 * taken: takes and drops res->spinl 100 * taken: takes and drops res->spinlock
119 * held on exit: none 101 * held on exit: none
120 * returns: DLM_NORMAL, DLM_NOTQUEUED 102 * returns: DLM_NORMAL, DLM_NOTQUEUED
121 */ 103 */
122 static enum dlm_status dlmlock_master(struct d 104 static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
123 struct d 105 struct dlm_lock_resource *res,
124 struct d 106 struct dlm_lock *lock, int flags)
125 { 107 {
126 int call_ast = 0, kick_thread = 0; 108 int call_ast = 0, kick_thread = 0;
127 enum dlm_status status = DLM_NORMAL; 109 enum dlm_status status = DLM_NORMAL;
128 110
129 mlog_entry("type=%d\n", lock->ml.type) 111 mlog_entry("type=%d\n", lock->ml.type);
130 112
131 spin_lock(&res->spinlock); 113 spin_lock(&res->spinlock);
132 /* if called from dlm_create_lock_hand 114 /* if called from dlm_create_lock_handler, need to
133 * ensure it will not sleep in dlm_wai 115 * ensure it will not sleep in dlm_wait_on_lockres */
134 status = __dlm_lockres_state_to_status 116 status = __dlm_lockres_state_to_status(res);
135 if (status != DLM_NORMAL && 117 if (status != DLM_NORMAL &&
136 lock->ml.node != dlm->node_num) { 118 lock->ml.node != dlm->node_num) {
137 /* erf. state changed after l 119 /* erf. state changed after lock was dropped. */
138 spin_unlock(&res->spinlock); 120 spin_unlock(&res->spinlock);
139 dlm_error(status); 121 dlm_error(status);
140 return status; 122 return status;
141 } 123 }
142 __dlm_wait_on_lockres(res); 124 __dlm_wait_on_lockres(res);
143 __dlm_lockres_reserve_ast(res); 125 __dlm_lockres_reserve_ast(res);
144 126
145 if (dlm_can_grant_new_lock(res, lock)) 127 if (dlm_can_grant_new_lock(res, lock)) {
146 mlog(0, "I can grant this lock 128 mlog(0, "I can grant this lock right away\n");
147 /* got it right away */ 129 /* got it right away */
148 lock->lksb->status = DLM_NORMA 130 lock->lksb->status = DLM_NORMAL;
149 status = DLM_NORMAL; 131 status = DLM_NORMAL;
150 dlm_lock_get(lock); 132 dlm_lock_get(lock);
151 list_add_tail(&lock->list, &re 133 list_add_tail(&lock->list, &res->granted);
152 134
153 /* for the recovery lock, we c 135 /* for the recovery lock, we can't allow the ast
154 * to be queued since the dlmt 136 * to be queued since the dlmthread is already
155 * frozen. but the recovery l 137 * frozen. but the recovery lock is always locked
156 * with LKM_NOQUEUE so we do n 138 * with LKM_NOQUEUE so we do not need the ast in
157 * this special case */ 139 * this special case */
158 if (!dlm_is_recovery_lock(res- 140 if (!dlm_is_recovery_lock(res->lockname.name,
159 res- 141 res->lockname.len)) {
160 kick_thread = 1; 142 kick_thread = 1;
161 call_ast = 1; 143 call_ast = 1;
162 } else { 144 } else {
163 mlog(0, "%s: returning 145 mlog(0, "%s: returning DLM_NORMAL to "
164 "node %u for reco 146 "node %u for reco lock\n", dlm->name,
165 lock->ml.node); 147 lock->ml.node);
166 } 148 }
167 } else { 149 } else {
168 /* for NOQUEUE request, unless 150 /* for NOQUEUE request, unless we get the
169 * lock right away, return DLM 151 * lock right away, return DLM_NOTQUEUED */
170 if (flags & LKM_NOQUEUE) { 152 if (flags & LKM_NOQUEUE) {
171 status = DLM_NOTQUEUED 153 status = DLM_NOTQUEUED;
172 if (dlm_is_recovery_lo 154 if (dlm_is_recovery_lock(res->lockname.name,
173 155 res->lockname.len)) {
174 mlog(0, "%s: r 156 mlog(0, "%s: returning NOTQUEUED to "
175 "node %u 157 "node %u for reco lock\n", dlm->name,
176 lock->ml. 158 lock->ml.node);
177 } 159 }
178 } else { 160 } else {
179 dlm_lock_get(lock); 161 dlm_lock_get(lock);
180 list_add_tail(&lock->l 162 list_add_tail(&lock->list, &res->blocked);
181 kick_thread = 1; 163 kick_thread = 1;
182 } 164 }
183 } 165 }
184 /* reduce the inflight count, this may 166 /* reduce the inflight count, this may result in the lockres
185 * being purged below during calc_usag 167 * being purged below during calc_usage */
186 if (lock->ml.node == dlm->node_num) 168 if (lock->ml.node == dlm->node_num)
187 dlm_lockres_drop_inflight_ref( 169 dlm_lockres_drop_inflight_ref(dlm, res);
188 170
189 spin_unlock(&res->spinlock); 171 spin_unlock(&res->spinlock);
190 wake_up(&res->wq); 172 wake_up(&res->wq);
191 173
192 /* either queue the ast or release it 174 /* either queue the ast or release it */
193 if (call_ast) 175 if (call_ast)
194 dlm_queue_ast(dlm, lock); 176 dlm_queue_ast(dlm, lock);
195 else 177 else
196 dlm_lockres_release_ast(dlm, r 178 dlm_lockres_release_ast(dlm, res);
197 179
198 dlm_lockres_calc_usage(dlm, res); 180 dlm_lockres_calc_usage(dlm, res);
199 if (kick_thread) 181 if (kick_thread)
200 dlm_kick_thread(dlm, res); 182 dlm_kick_thread(dlm, res);
201 183
202 return status; 184 return status;
203 } 185 }
204 186
205 void dlm_revert_pending_lock(struct dlm_lock_r 187 void dlm_revert_pending_lock(struct dlm_lock_resource *res,
206 struct dlm_lock * 188 struct dlm_lock *lock)
207 { 189 {
208 /* remove from local queue if it faile 190 /* remove from local queue if it failed */
209 list_del_init(&lock->list); 191 list_del_init(&lock->list);
210 lock->lksb->flags &= ~DLM_LKSB_GET_LVB 192 lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
211 } 193 }
212 194
213 195
214 /* 196 /*
215 * locking: 197 * locking:
216 * caller needs: none 198 * caller needs: none
217 * taken: takes and drops res->spinl 199 * taken: takes and drops res->spinlock
218 * held on exit: none 200 * held on exit: none
219 * returns: DLM_DENIED, DLM_RECOVERING, or net 201 * returns: DLM_DENIED, DLM_RECOVERING, or net status
220 */ 202 */
221 static enum dlm_status dlmlock_remote(struct d 203 static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
222 struct d 204 struct dlm_lock_resource *res,
223 struct d 205 struct dlm_lock *lock, int flags)
224 { 206 {
225 enum dlm_status status = DLM_DENIED; 207 enum dlm_status status = DLM_DENIED;
226 int lockres_changed = 1; 208 int lockres_changed = 1;
227 209
228 mlog_entry("type=%d\n", lock->ml.type) 210 mlog_entry("type=%d\n", lock->ml.type);
229 mlog(0, "lockres %.*s, flags = 0x%x\n" 211 mlog(0, "lockres %.*s, flags = 0x%x\n", res->lockname.len,
230 res->lockname.name, flags); 212 res->lockname.name, flags);
231 213
232 spin_lock(&res->spinlock); 214 spin_lock(&res->spinlock);
233 215
234 /* will exit this call with spinlock h 216 /* will exit this call with spinlock held */
235 __dlm_wait_on_lockres(res); 217 __dlm_wait_on_lockres(res);
236 res->state |= DLM_LOCK_RES_IN_PROGRESS 218 res->state |= DLM_LOCK_RES_IN_PROGRESS;
237 219
238 /* add lock to local (secondary) queue 220 /* add lock to local (secondary) queue */
239 dlm_lock_get(lock); 221 dlm_lock_get(lock);
240 list_add_tail(&lock->list, &res->block 222 list_add_tail(&lock->list, &res->blocked);
241 lock->lock_pending = 1; 223 lock->lock_pending = 1;
242 spin_unlock(&res->spinlock); 224 spin_unlock(&res->spinlock);
243 225
244 /* spec seems to say that you will get 226 /* spec seems to say that you will get DLM_NORMAL when the lock
245 * has been queued, meaning we need to 227 * has been queued, meaning we need to wait for a reply here. */
246 status = dlm_send_remote_lock_request( 228 status = dlm_send_remote_lock_request(dlm, res, lock, flags);
247 229
248 spin_lock(&res->spinlock); 230 spin_lock(&res->spinlock);
249 res->state &= ~DLM_LOCK_RES_IN_PROGRES 231 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
250 lock->lock_pending = 0; 232 lock->lock_pending = 0;
251 if (status != DLM_NORMAL) { 233 if (status != DLM_NORMAL) {
252 if (status == DLM_RECOVERING & 234 if (status == DLM_RECOVERING &&
253 dlm_is_recovery_lock(res-> 235 dlm_is_recovery_lock(res->lockname.name,
254 res-> 236 res->lockname.len)) {
255 /* recovery lock was m 237 /* recovery lock was mastered by dead node.
256 * we need to have cal 238 * we need to have calc_usage shoot down this
257 * lockres and complet 239 * lockres and completely remaster it. */
258 mlog(0, "%s: recovery 240 mlog(0, "%s: recovery lock was owned by "
259 "dead node %u, re 241 "dead node %u, remaster it now.\n",
260 dlm->name, res->o 242 dlm->name, res->owner);
261 } else if (status != DLM_NOTQU 243 } else if (status != DLM_NOTQUEUED) {
262 /* 244 /*
263 * DO NOT call calc_us 245 * DO NOT call calc_usage, as this would unhash
264 * the remote lockres 246 * the remote lockres before we ever get to use
265 * it. treat as if we 247 * it. treat as if we never made any change to
266 * the lockres. 248 * the lockres.
267 */ 249 */
268 lockres_changed = 0; 250 lockres_changed = 0;
269 dlm_error(status); 251 dlm_error(status);
270 } 252 }
271 dlm_revert_pending_lock(res, l 253 dlm_revert_pending_lock(res, lock);
272 dlm_lock_put(lock); 254 dlm_lock_put(lock);
273 } else if (dlm_is_recovery_lock(res->l 255 } else if (dlm_is_recovery_lock(res->lockname.name,
274 res->l 256 res->lockname.len)) {
275 /* special case for the $RECOV 257 /* special case for the $RECOVERY lock.
276 * there will never be an AST 258 * there will never be an AST delivered to put
277 * this lock on the proper sec 259 * this lock on the proper secondary queue
278 * (granted), so do it manuall 260 * (granted), so do it manually. */
279 mlog(0, "%s: $RECOVERY lock fo 261 mlog(0, "%s: $RECOVERY lock for this node (%u) is "
280 "mastered by %u; got lock 262 "mastered by %u; got lock, manually granting (no ast)\n",
281 dlm->name, dlm->node_num, 263 dlm->name, dlm->node_num, res->owner);
282 list_move_tail(&lock->list, &r 264 list_move_tail(&lock->list, &res->granted);
283 } 265 }
284 spin_unlock(&res->spinlock); 266 spin_unlock(&res->spinlock);
285 267
286 if (lockres_changed) 268 if (lockres_changed)
287 dlm_lockres_calc_usage(dlm, re 269 dlm_lockres_calc_usage(dlm, res);
288 270
289 wake_up(&res->wq); 271 wake_up(&res->wq);
290 return status; 272 return status;
291 } 273 }
292 274
293 275
294 /* for remote lock creation. 276 /* for remote lock creation.
295 * locking: 277 * locking:
296 * caller needs: none, but need res->state 278 * caller needs: none, but need res->state & DLM_LOCK_RES_IN_PROGRESS
297 * taken: none 279 * taken: none
298 * held on exit: none 280 * held on exit: none
299 * returns: DLM_NOLOCKMGR, or net status 281 * returns: DLM_NOLOCKMGR, or net status
300 */ 282 */
301 static enum dlm_status dlm_send_remote_lock_re 283 static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
302 284 struct dlm_lock_resource *res,
303 285 struct dlm_lock *lock, int flags)
304 { 286 {
305 struct dlm_create_lock create; 287 struct dlm_create_lock create;
306 int tmpret, status = 0; 288 int tmpret, status = 0;
307 enum dlm_status ret; 289 enum dlm_status ret;
308 290
309 mlog_entry_void(); 291 mlog_entry_void();
310 292
311 memset(&create, 0, sizeof(create)); 293 memset(&create, 0, sizeof(create));
312 create.node_idx = dlm->node_num; 294 create.node_idx = dlm->node_num;
313 create.requested_type = lock->ml.type; 295 create.requested_type = lock->ml.type;
314 create.cookie = lock->ml.cookie; 296 create.cookie = lock->ml.cookie;
315 create.namelen = res->lockname.len; 297 create.namelen = res->lockname.len;
316 create.flags = cpu_to_be32(flags); 298 create.flags = cpu_to_be32(flags);
317 memcpy(create.name, res->lockname.name 299 memcpy(create.name, res->lockname.name, create.namelen);
318 300
319 tmpret = o2net_send_message(DLM_CREATE 301 tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create,
320 sizeof(cre 302 sizeof(create), res->owner, &status);
321 if (tmpret >= 0) { 303 if (tmpret >= 0) {
322 // successfully sent and recei 304 // successfully sent and received
323 ret = status; // this is alre 305 ret = status; // this is already a dlm_status
324 if (ret == DLM_REJECTED) { 306 if (ret == DLM_REJECTED) {
325 mlog(ML_ERROR, "%s:%.* 307 mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres "
326 "no longer owned 308 "no longer owned by %u. that node is coming back "
327 "up currently.\n" 309 "up currently.\n", dlm->name, create.namelen,
328 create.name, res- 310 create.name, res->owner);
329 dlm_print_one_lock_res 311 dlm_print_one_lock_resource(res);
330 BUG(); 312 BUG();
331 } 313 }
332 } else { 314 } else {
333 mlog_errno(tmpret); 315 mlog_errno(tmpret);
334 if (dlm_is_host_down(tmpret)) 316 if (dlm_is_host_down(tmpret)) {
335 ret = DLM_RECOVERING; 317 ret = DLM_RECOVERING;
336 mlog(0, "node %u died 318 mlog(0, "node %u died so returning DLM_RECOVERING "
337 "from lock messag 319 "from lock message!\n", res->owner);
338 } else { 320 } else {
339 ret = dlm_err_to_dlm_s 321 ret = dlm_err_to_dlm_status(tmpret);
340 } 322 }
341 } 323 }
342 324
343 return ret; 325 return ret;
344 } 326 }
345 327
346 void dlm_lock_get(struct dlm_lock *lock) 328 void dlm_lock_get(struct dlm_lock *lock)
347 { 329 {
348 kref_get(&lock->lock_refs); 330 kref_get(&lock->lock_refs);
349 } 331 }
350 332
351 void dlm_lock_put(struct dlm_lock *lock) 333 void dlm_lock_put(struct dlm_lock *lock)
352 { 334 {
353 kref_put(&lock->lock_refs, dlm_lock_re 335 kref_put(&lock->lock_refs, dlm_lock_release);
354 } 336 }
355 337
356 static void dlm_lock_release(struct kref *kref 338 static void dlm_lock_release(struct kref *kref)
357 { 339 {
358 struct dlm_lock *lock; 340 struct dlm_lock *lock;
359 341
360 lock = container_of(kref, struct dlm_l 342 lock = container_of(kref, struct dlm_lock, lock_refs);
361 343
362 BUG_ON(!list_empty(&lock->list)); 344 BUG_ON(!list_empty(&lock->list));
363 BUG_ON(!list_empty(&lock->ast_list)); 345 BUG_ON(!list_empty(&lock->ast_list));
364 BUG_ON(!list_empty(&lock->bast_list)); 346 BUG_ON(!list_empty(&lock->bast_list));
365 BUG_ON(lock->ast_pending); 347 BUG_ON(lock->ast_pending);
366 BUG_ON(lock->bast_pending); 348 BUG_ON(lock->bast_pending);
367 349
368 dlm_lock_detach_lockres(lock); 350 dlm_lock_detach_lockres(lock);
369 351
370 if (lock->lksb_kernel_allocated) { 352 if (lock->lksb_kernel_allocated) {
371 mlog(0, "freeing kernel-alloca 353 mlog(0, "freeing kernel-allocated lksb\n");
372 kfree(lock->lksb); 354 kfree(lock->lksb);
373 } 355 }
374 kmem_cache_free(dlm_lock_cache, lock); !! 356 kfree(lock);
375 } 357 }
376 358
377 /* associate a lock with it's lockres, getting 359 /* associate a lock with it's lockres, getting a ref on the lockres */
378 void dlm_lock_attach_lockres(struct dlm_lock * 360 void dlm_lock_attach_lockres(struct dlm_lock *lock,
379 struct dlm_lock_r 361 struct dlm_lock_resource *res)
380 { 362 {
381 dlm_lockres_get(res); 363 dlm_lockres_get(res);
382 lock->lockres = res; 364 lock->lockres = res;
383 } 365 }
384 366
385 /* drop ref on lockres, if there is still one 367 /* drop ref on lockres, if there is still one associated with lock */
386 static void dlm_lock_detach_lockres(struct dlm 368 static void dlm_lock_detach_lockres(struct dlm_lock *lock)
387 { 369 {
388 struct dlm_lock_resource *res; 370 struct dlm_lock_resource *res;
389 371
390 res = lock->lockres; 372 res = lock->lockres;
391 if (res) { 373 if (res) {
392 lock->lockres = NULL; 374 lock->lockres = NULL;
393 mlog(0, "removing lock's lockr 375 mlog(0, "removing lock's lockres reference\n");
394 dlm_lockres_put(res); 376 dlm_lockres_put(res);
395 } 377 }
396 } 378 }
397 379
398 static void dlm_init_lock(struct dlm_lock *new 380 static void dlm_init_lock(struct dlm_lock *newlock, int type,
399 u8 node, u64 cookie) 381 u8 node, u64 cookie)
400 { 382 {
401 INIT_LIST_HEAD(&newlock->list); 383 INIT_LIST_HEAD(&newlock->list);
402 INIT_LIST_HEAD(&newlock->ast_list); 384 INIT_LIST_HEAD(&newlock->ast_list);
403 INIT_LIST_HEAD(&newlock->bast_list); 385 INIT_LIST_HEAD(&newlock->bast_list);
404 spin_lock_init(&newlock->spinlock); 386 spin_lock_init(&newlock->spinlock);
405 newlock->ml.type = type; 387 newlock->ml.type = type;
406 newlock->ml.convert_type = LKM_IVMODE; 388 newlock->ml.convert_type = LKM_IVMODE;
407 newlock->ml.highest_blocked = LKM_IVMO 389 newlock->ml.highest_blocked = LKM_IVMODE;
408 newlock->ml.node = node; 390 newlock->ml.node = node;
409 newlock->ml.pad1 = 0; 391 newlock->ml.pad1 = 0;
410 newlock->ml.list = 0; 392 newlock->ml.list = 0;
411 newlock->ml.flags = 0; 393 newlock->ml.flags = 0;
412 newlock->ast = NULL; 394 newlock->ast = NULL;
413 newlock->bast = NULL; 395 newlock->bast = NULL;
414 newlock->astdata = NULL; 396 newlock->astdata = NULL;
415 newlock->ml.cookie = cpu_to_be64(cooki 397 newlock->ml.cookie = cpu_to_be64(cookie);
416 newlock->ast_pending = 0; 398 newlock->ast_pending = 0;
417 newlock->bast_pending = 0; 399 newlock->bast_pending = 0;
418 newlock->convert_pending = 0; 400 newlock->convert_pending = 0;
419 newlock->lock_pending = 0; 401 newlock->lock_pending = 0;
420 newlock->unlock_pending = 0; 402 newlock->unlock_pending = 0;
421 newlock->cancel_pending = 0; 403 newlock->cancel_pending = 0;
422 newlock->lksb_kernel_allocated = 0; 404 newlock->lksb_kernel_allocated = 0;
423 405
424 kref_init(&newlock->lock_refs); 406 kref_init(&newlock->lock_refs);
425 } 407 }
426 408
427 struct dlm_lock * dlm_new_lock(int type, u8 no 409 struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
428 struct dlm_lock 410 struct dlm_lockstatus *lksb)
429 { 411 {
430 struct dlm_lock *lock; 412 struct dlm_lock *lock;
431 int kernel_allocated = 0; 413 int kernel_allocated = 0;
432 414
433 lock = (struct dlm_lock *) kmem_cache_ !! 415 lock = kzalloc(sizeof(*lock), GFP_NOFS);
434 if (!lock) 416 if (!lock)
435 return NULL; 417 return NULL;
436 418
437 if (!lksb) { 419 if (!lksb) {
438 /* zero memory only if kernel- 420 /* zero memory only if kernel-allocated */
439 lksb = kzalloc(sizeof(*lksb), 421 lksb = kzalloc(sizeof(*lksb), GFP_NOFS);
440 if (!lksb) { 422 if (!lksb) {
441 kfree(lock); 423 kfree(lock);
442 return NULL; 424 return NULL;
443 } 425 }
444 kernel_allocated = 1; 426 kernel_allocated = 1;
445 } 427 }
446 428
447 dlm_init_lock(lock, type, node, cookie 429 dlm_init_lock(lock, type, node, cookie);
448 if (kernel_allocated) 430 if (kernel_allocated)
449 lock->lksb_kernel_allocated = 431 lock->lksb_kernel_allocated = 1;
450 lock->lksb = lksb; 432 lock->lksb = lksb;
451 lksb->lockid = lock; 433 lksb->lockid = lock;
452 return lock; 434 return lock;
453 } 435 }
454 436
455 /* handler for lock creation net message 437 /* handler for lock creation net message
456 * locking: 438 * locking:
457 * caller needs: none 439 * caller needs: none
458 * taken: takes and drops res->spinl 440 * taken: takes and drops res->spinlock
459 * held on exit: none 441 * held on exit: none
460 * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCK 442 * returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED
461 */ 443 */
462 int dlm_create_lock_handler(struct o2net_msg * 444 int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data,
463 void **ret_data) 445 void **ret_data)
464 { 446 {
465 struct dlm_ctxt *dlm = data; 447 struct dlm_ctxt *dlm = data;
466 struct dlm_create_lock *create = (stru 448 struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf;
467 struct dlm_lock_resource *res = NULL; 449 struct dlm_lock_resource *res = NULL;
468 struct dlm_lock *newlock = NULL; 450 struct dlm_lock *newlock = NULL;
469 struct dlm_lockstatus *lksb = NULL; 451 struct dlm_lockstatus *lksb = NULL;
470 enum dlm_status status = DLM_NORMAL; 452 enum dlm_status status = DLM_NORMAL;
471 char *name; 453 char *name;
472 unsigned int namelen; 454 unsigned int namelen;
473 455
474 BUG_ON(!dlm); 456 BUG_ON(!dlm);
475 457
476 mlog_entry_void(); 458 mlog_entry_void();
477 459
478 if (!dlm_grab(dlm)) 460 if (!dlm_grab(dlm))
479 return DLM_REJECTED; 461 return DLM_REJECTED;
480 462
481 name = create->name; 463 name = create->name;
482 namelen = create->namelen; 464 namelen = create->namelen;
483 status = DLM_REJECTED; 465 status = DLM_REJECTED;
484 if (!dlm_domain_fully_joined(dlm)) { 466 if (!dlm_domain_fully_joined(dlm)) {
485 mlog(ML_ERROR, "Domain %s not 467 mlog(ML_ERROR, "Domain %s not fully joined, but node %u is "
486 "sending a create_lock me 468 "sending a create_lock message for lock %.*s!\n",
487 dlm->name, create->node_i 469 dlm->name, create->node_idx, namelen, name);
488 dlm_error(status); 470 dlm_error(status);
489 goto leave; 471 goto leave;
490 } 472 }
491 473
492 status = DLM_IVBUFLEN; 474 status = DLM_IVBUFLEN;
493 if (namelen > DLM_LOCKID_NAME_MAX) { 475 if (namelen > DLM_LOCKID_NAME_MAX) {
494 dlm_error(status); 476 dlm_error(status);
495 goto leave; 477 goto leave;
496 } 478 }
497 479
498 status = DLM_SYSERR; 480 status = DLM_SYSERR;
499 newlock = dlm_new_lock(create->request 481 newlock = dlm_new_lock(create->requested_type,
500 create->node_id 482 create->node_idx,
501 be64_to_cpu(cre 483 be64_to_cpu(create->cookie), NULL);
502 if (!newlock) { 484 if (!newlock) {
503 dlm_error(status); 485 dlm_error(status);
504 goto leave; 486 goto leave;
505 } 487 }
506 488
507 lksb = newlock->lksb; 489 lksb = newlock->lksb;
508 490
509 if (be32_to_cpu(create->flags) & LKM_G 491 if (be32_to_cpu(create->flags) & LKM_GET_LVB) {
510 lksb->flags |= DLM_LKSB_GET_LV 492 lksb->flags |= DLM_LKSB_GET_LVB;
511 mlog(0, "set DLM_LKSB_GET_LVB 493 mlog(0, "set DLM_LKSB_GET_LVB flag\n");
512 } 494 }
513 495
514 status = DLM_IVLOCKID; 496 status = DLM_IVLOCKID;
515 res = dlm_lookup_lockres(dlm, name, na 497 res = dlm_lookup_lockres(dlm, name, namelen);
516 if (!res) { 498 if (!res) {
517 dlm_error(status); 499 dlm_error(status);
518 goto leave; 500 goto leave;
519 } 501 }
520 502
521 spin_lock(&res->spinlock); 503 spin_lock(&res->spinlock);
522 status = __dlm_lockres_state_to_status 504 status = __dlm_lockres_state_to_status(res);
523 spin_unlock(&res->spinlock); 505 spin_unlock(&res->spinlock);
524 506
525 if (status != DLM_NORMAL) { 507 if (status != DLM_NORMAL) {
526 mlog(0, "lockres recovering/mi 508 mlog(0, "lockres recovering/migrating/in-progress\n");
527 goto leave; 509 goto leave;
528 } 510 }
529 511
530 dlm_lock_attach_lockres(newlock, res); 512 dlm_lock_attach_lockres(newlock, res);
531 513
532 status = dlmlock_master(dlm, res, newl 514 status = dlmlock_master(dlm, res, newlock, be32_to_cpu(create->flags));
533 leave: 515 leave:
534 if (status != DLM_NORMAL) 516 if (status != DLM_NORMAL)
535 if (newlock) 517 if (newlock)
536 dlm_lock_put(newlock); 518 dlm_lock_put(newlock);
537 519
538 if (res) 520 if (res)
539 dlm_lockres_put(res); 521 dlm_lockres_put(res);
540 522
541 dlm_put(dlm); 523 dlm_put(dlm);
542 524
543 return status; 525 return status;
544 } 526 }
545 527
546 528
547 /* fetch next node-local (u8 nodenum + u56 coo 529 /* fetch next node-local (u8 nodenum + u56 cookie) into u64 */
548 static inline void dlm_get_next_cookie(u8 node 530 static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
549 { 531 {
550 u64 tmpnode = node_num; 532 u64 tmpnode = node_num;
551 533
552 /* shift single byte of node num into 534 /* shift single byte of node num into top 8 bits */
553 tmpnode <<= 56; 535 tmpnode <<= 56;
554 536
555 spin_lock(&dlm_cookie_lock); 537 spin_lock(&dlm_cookie_lock);
556 *cookie = (dlm_next_cookie | tmpnode); 538 *cookie = (dlm_next_cookie | tmpnode);
557 if (++dlm_next_cookie & 0xff0000000000 539 if (++dlm_next_cookie & 0xff00000000000000ull) {
558 mlog(0, "This node's cookie wi 540 mlog(0, "This node's cookie will now wrap!\n");
559 dlm_next_cookie = 1; 541 dlm_next_cookie = 1;
560 } 542 }
561 spin_unlock(&dlm_cookie_lock); 543 spin_unlock(&dlm_cookie_lock);
562 } 544 }
563 545
564 enum dlm_status dlmlock(struct dlm_ctxt *dlm, 546 enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
565 struct dlm_lockstatus 547 struct dlm_lockstatus *lksb, int flags,
566 const char *name, int 548 const char *name, int namelen, dlm_astlockfunc_t *ast,
567 void *data, dlm_bastlo 549 void *data, dlm_bastlockfunc_t *bast)
568 { 550 {
569 enum dlm_status status; 551 enum dlm_status status;
570 struct dlm_lock_resource *res = NULL; 552 struct dlm_lock_resource *res = NULL;
571 struct dlm_lock *lock = NULL; 553 struct dlm_lock *lock = NULL;
572 int convert = 0, recovery = 0; 554 int convert = 0, recovery = 0;
573 555
574 /* yes this function is a mess. 556 /* yes this function is a mess.
575 * TODO: clean this up. lots of commo 557 * TODO: clean this up. lots of common code in the
576 * lock and convert paths, espec 558 * lock and convert paths, especially in the retry blocks */
577 if (!lksb) { 559 if (!lksb) {
578 dlm_error(DLM_BADARGS); 560 dlm_error(DLM_BADARGS);
579 return DLM_BADARGS; 561 return DLM_BADARGS;
580 } 562 }
581 563
582 status = DLM_BADPARAM; 564 status = DLM_BADPARAM;
583 if (mode != LKM_EXMODE && mode != LKM_ 565 if (mode != LKM_EXMODE && mode != LKM_PRMODE && mode != LKM_NLMODE) {
584 dlm_error(status); 566 dlm_error(status);
585 goto error; 567 goto error;
586 } 568 }
587 569
588 if (flags & ~LKM_VALID_FLAGS) { 570 if (flags & ~LKM_VALID_FLAGS) {
589 dlm_error(status); 571 dlm_error(status);
590 goto error; 572 goto error;
591 } 573 }
592 574
593 convert = (flags & LKM_CONVERT); 575 convert = (flags & LKM_CONVERT);
594 recovery = (flags & LKM_RECOVERY); 576 recovery = (flags & LKM_RECOVERY);
595 577
596 if (recovery && 578 if (recovery &&
597 (!dlm_is_recovery_lock(name, namel 579 (!dlm_is_recovery_lock(name, namelen) || convert) ) {
598 dlm_error(status); 580 dlm_error(status);
599 goto error; 581 goto error;
600 } 582 }
601 if (convert && (flags & LKM_LOCAL)) { 583 if (convert && (flags & LKM_LOCAL)) {
602 mlog(ML_ERROR, "strange LOCAL 584 mlog(ML_ERROR, "strange LOCAL convert request!\n");
603 goto error; 585 goto error;
604 } 586 }
605 587
606 if (convert) { 588 if (convert) {
607 /* CONVERT request */ 589 /* CONVERT request */
608 590
609 /* if converting, must pass in 591 /* if converting, must pass in a valid dlm_lock */
610 lock = lksb->lockid; 592 lock = lksb->lockid;
611 if (!lock) { 593 if (!lock) {
612 mlog(ML_ERROR, "NULL l 594 mlog(ML_ERROR, "NULL lock pointer in convert "
613 "request\n"); 595 "request\n");
614 goto error; 596 goto error;
615 } 597 }
616 598
617 res = lock->lockres; 599 res = lock->lockres;
618 if (!res) { 600 if (!res) {
619 mlog(ML_ERROR, "NULL l 601 mlog(ML_ERROR, "NULL lockres pointer in convert "
620 "request\n"); 602 "request\n");
621 goto error; 603 goto error;
622 } 604 }
623 dlm_lockres_get(res); 605 dlm_lockres_get(res);
624 606
625 /* XXX: for ocfs2 purposes, th 607 /* XXX: for ocfs2 purposes, the ast/bast/astdata/lksb are
626 * static after the original l 608 * static after the original lock call. convert requests will
627 * ensure that everything is t 609 * ensure that everything is the same, or return DLM_BADARGS.
628 * this means that DLM_DENIED_ 610 * this means that DLM_DENIED_NOASTS will never be returned.
629 */ 611 */
630 if (lock->lksb != lksb || lock 612 if (lock->lksb != lksb || lock->ast != ast ||
631 lock->bast != bast || lock 613 lock->bast != bast || lock->astdata != data) {
632 status = DLM_BADARGS; 614 status = DLM_BADARGS;
633 mlog(ML_ERROR, "new ar 615 mlog(ML_ERROR, "new args: lksb=%p, ast=%p, bast=%p, "
634 "astdata=%p\n", l 616 "astdata=%p\n", lksb, ast, bast, data);
635 mlog(ML_ERROR, "orig a 617 mlog(ML_ERROR, "orig args: lksb=%p, ast=%p, bast=%p, "
636 "astdata=%p\n", l 618 "astdata=%p\n", lock->lksb, lock->ast,
637 lock->bast, lock- 619 lock->bast, lock->astdata);
638 goto error; 620 goto error;
639 } 621 }
640 retry_convert: 622 retry_convert:
641 dlm_wait_for_recovery(dlm); 623 dlm_wait_for_recovery(dlm);
642 624
643 if (res->owner == dlm->node_nu 625 if (res->owner == dlm->node_num)
644 status = dlmconvert_ma 626 status = dlmconvert_master(dlm, res, lock, flags, mode);
645 else 627 else
646 status = dlmconvert_re 628 status = dlmconvert_remote(dlm, res, lock, flags, mode);
647 if (status == DLM_RECOVERING | 629 if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
648 status == DLM_FORWARD) { 630 status == DLM_FORWARD) {
649 /* for now, see how th 631 /* for now, see how this works without sleeping
650 * and just retry righ 632 * and just retry right away. I suspect the reco
651 * or migration will c 633 * or migration will complete fast enough that
652 * no waiting will be 634 * no waiting will be necessary */
653 mlog(0, "retrying conv 635 mlog(0, "retrying convert with migration/recovery/"
654 "in-progress\n"); 636 "in-progress\n");
655 msleep(100); 637 msleep(100);
656 goto retry_convert; 638 goto retry_convert;
657 } 639 }
658 } else { 640 } else {
659 u64 tmpcookie; 641 u64 tmpcookie;
660 642
661 /* LOCK request */ 643 /* LOCK request */
662 status = DLM_BADARGS; 644 status = DLM_BADARGS;
663 if (!name) { 645 if (!name) {
664 dlm_error(status); 646 dlm_error(status);
665 goto error; 647 goto error;
666 } 648 }
667 649
668 status = DLM_IVBUFLEN; 650 status = DLM_IVBUFLEN;
669 if (namelen > DLM_LOCKID_NAME_ 651 if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) {
670 dlm_error(status); 652 dlm_error(status);
671 goto error; 653 goto error;
672 } 654 }
673 655
674 dlm_get_next_cookie(dlm->node_ 656 dlm_get_next_cookie(dlm->node_num, &tmpcookie);
675 lock = dlm_new_lock(mode, dlm- 657 lock = dlm_new_lock(mode, dlm->node_num, tmpcookie, lksb);
676 if (!lock) { 658 if (!lock) {
677 dlm_error(status); 659 dlm_error(status);
678 goto error; 660 goto error;
679 } 661 }
680 662
681 if (!recovery) 663 if (!recovery)
682 dlm_wait_for_recovery( 664 dlm_wait_for_recovery(dlm);
683 665
684 /* find or create the lock res 666 /* find or create the lock resource */
685 res = dlm_get_lock_resource(dl 667 res = dlm_get_lock_resource(dlm, name, namelen, flags);
686 if (!res) { 668 if (!res) {
687 status = DLM_IVLOCKID; 669 status = DLM_IVLOCKID;
688 dlm_error(status); 670 dlm_error(status);
689 goto error; 671 goto error;
690 } 672 }
691 673
692 mlog(0, "type=%d, flags = 0x%x 674 mlog(0, "type=%d, flags = 0x%x\n", mode, flags);
693 mlog(0, "creating lock: lock=% 675 mlog(0, "creating lock: lock=%p res=%p\n", lock, res);
694 676
695 dlm_lock_attach_lockres(lock, 677 dlm_lock_attach_lockres(lock, res);
696 lock->ast = ast; 678 lock->ast = ast;
697 lock->bast = bast; 679 lock->bast = bast;
698 lock->astdata = data; 680 lock->astdata = data;
699 681
700 retry_lock: 682 retry_lock:
701 if (flags & LKM_VALBLK) { 683 if (flags & LKM_VALBLK) {
702 mlog(0, "LKM_VALBLK pa 684 mlog(0, "LKM_VALBLK passed by caller\n");
703 685
704 /* LVB requests for no 686 /* LVB requests for non PR, PW or EX locks are
705 * ignored. */ 687 * ignored. */
706 if (mode < LKM_PRMODE) 688 if (mode < LKM_PRMODE)
707 flags &= ~LKM_ 689 flags &= ~LKM_VALBLK;
708 else { 690 else {
709 flags |= LKM_G 691 flags |= LKM_GET_LVB;
710 lock->lksb->fl 692 lock->lksb->flags |= DLM_LKSB_GET_LVB;
711 } 693 }
712 } 694 }
713 695
714 if (res->owner == dlm->node_nu 696 if (res->owner == dlm->node_num)
715 status = dlmlock_maste 697 status = dlmlock_master(dlm, res, lock, flags);
716 else 698 else
717 status = dlmlock_remot 699 status = dlmlock_remote(dlm, res, lock, flags);
718 700
719 if (status == DLM_RECOVERING | 701 if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
720 status == DLM_FORWARD) { 702 status == DLM_FORWARD) {
721 mlog(0, "retrying lock 703 mlog(0, "retrying lock with migration/"
722 "recovery/in prog 704 "recovery/in progress\n");
723 msleep(100); 705 msleep(100);
724 /* no waiting for dlm_ 706 /* no waiting for dlm_reco_thread */
725 if (recovery) { 707 if (recovery) {
726 if (status != 708 if (status != DLM_RECOVERING)
727 goto r 709 goto retry_lock;
728 710
729 mlog(0, "%s: g 711 mlog(0, "%s: got RECOVERING "
730 "for $REC 712 "for $RECOVERY lock, master "
731 "was %u\n 713 "was %u\n", dlm->name,
732 res->owne 714 res->owner);
733 /* wait to see 715 /* wait to see the node go down, then
734 * drop down a 716 * drop down and allow the lockres to
735 * get cleaned 717 * get cleaned up. need to remaster. */
736 dlm_wait_for_n 718 dlm_wait_for_node_death(dlm, res->owner,
737 719 DLM_NODE_DEATH_WAIT_MAX);
738 } else { 720 } else {
739 dlm_wait_for_r 721 dlm_wait_for_recovery(dlm);
740 goto retry_loc 722 goto retry_lock;
741 } 723 }
742 } 724 }
743 725
744 if (status != DLM_NORMAL) { 726 if (status != DLM_NORMAL) {
745 lock->lksb->flags &= ~ 727 lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
746 if (status != DLM_NOTQ 728 if (status != DLM_NOTQUEUED)
747 dlm_error(stat 729 dlm_error(status);
748 goto error; 730 goto error;
749 } 731 }
750 } 732 }
751 733
752 error: 734 error:
753 if (status != DLM_NORMAL) { 735 if (status != DLM_NORMAL) {
754 if (lock && !convert) 736 if (lock && !convert)
755 dlm_lock_put(lock); 737 dlm_lock_put(lock);
756 // this is kind of unnecessary 738 // this is kind of unnecessary
757 lksb->status = status; 739 lksb->status = status;
758 } 740 }
759 741
760 /* put lockres ref from the convert pa 742 /* put lockres ref from the convert path
761 * or from dlm_get_lock_resource */ 743 * or from dlm_get_lock_resource */
762 if (res) 744 if (res)
763 dlm_lockres_put(res); 745 dlm_lockres_put(res);
764 746
765 return status; 747 return status;
766 } 748 }
767 EXPORT_SYMBOL_GPL(dlmlock); 749 EXPORT_SYMBOL_GPL(dlmlock);
768 750
|
This page was automatically generated by the
LXR engine.
|