Linux kernel & device driver programming

Cross-Referenced Linux and Device Driver Code

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ]
Version: [ 2.6.11.8 ] [ 2.6.25 ] [ 2.6.25.8 ] [ 2.6.31.13 ] Architecture: [ i386 ]
  1 /*
  2  * Copyright (c) 2006, Intel Corporation.
  3  *
  4  * This program is free software; you can redistribute it and/or modify it
  5  * under the terms and conditions of the GNU General Public License,
  6  * version 2, as published by the Free Software Foundation.
  7  *
  8  * This program is distributed in the hope it will be useful, but WITHOUT
  9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 11  * more details.
 12  *
 13  * You should have received a copy of the GNU General Public License along with
 14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 15  * Place - Suite 330, Boston, MA 02111-1307 USA.
 16  *
 17  * Copyright (C) 2006-2008 Intel Corporation
 18  * Author: Ashok Raj <ashok.raj@intel.com>
 19  * Author: Shaohua Li <shaohua.li@intel.com>
 20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 21  */
 22 
 23 #include <linux/init.h>
 24 #include <linux/bitmap.h>
 25 #include <linux/slab.h>
 26 #include <linux/irq.h>
 27 #include <linux/interrupt.h>
 28 #include <linux/sysdev.h>
 29 #include <linux/spinlock.h>
 30 #include <linux/pci.h>
 31 #include <linux/dmar.h>
 32 #include <linux/dma-mapping.h>
 33 #include <linux/mempool.h>
 34 #include "iova.h"
 35 #include "intel-iommu.h"
 36 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
 37 #include <asm/cacheflush.h>
 38 #include <asm/gart.h>
 39 #include "pci.h"
 40 
 41 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
 42 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
 43 
 44 #define IOAPIC_RANGE_START      (0xfee00000)
 45 #define IOAPIC_RANGE_END        (0xfeefffff)
 46 #define IOVA_START_ADDR         (0x1000)
 47 
 48 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
 49 
 50 #define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
 51 
 52 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 53 
 54 static void domain_remove_dev_info(struct dmar_domain *domain);
 55 
 56 static int dmar_disabled;
 57 static int __initdata dmar_map_gfx = 1;
 58 static int dmar_forcedac;
 59 
 60 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 61 static DEFINE_SPINLOCK(device_domain_lock);
 62 static LIST_HEAD(device_domain_list);
 63 
 64 static int __init intel_iommu_setup(char *str)
 65 {
 66         if (!str)
 67                 return -EINVAL;
 68         while (*str) {
 69                 if (!strncmp(str, "off", 3)) {
 70                         dmar_disabled = 1;
 71                         printk(KERN_INFO"Intel-IOMMU: disabled\n");
 72                 } else if (!strncmp(str, "igfx_off", 8)) {
 73                         dmar_map_gfx = 0;
 74                         printk(KERN_INFO
 75                                 "Intel-IOMMU: disable GFX device mapping\n");
 76                 } else if (!strncmp(str, "forcedac", 8)) {
 77                         printk (KERN_INFO
 78                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
 79                         dmar_forcedac = 1;
 80                 }
 81 
 82                 str += strcspn(str, ",");
 83                 while (*str == ',')
 84                         str++;
 85         }
 86         return 0;
 87 }
 88 __setup("intel_iommu=", intel_iommu_setup);
 89 
 90 static struct kmem_cache *iommu_domain_cache;
 91 static struct kmem_cache *iommu_devinfo_cache;
 92 static struct kmem_cache *iommu_iova_cache;
 93 
 94 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
 95 {
 96         unsigned int flags;
 97         void *vaddr;
 98 
 99         /* trying to avoid low memory issues */
100         flags = current->flags & PF_MEMALLOC;
101         current->flags |= PF_MEMALLOC;
102         vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
103         current->flags &= (~PF_MEMALLOC | flags);
104         return vaddr;
105 }
106 
107 
108 static inline void *alloc_pgtable_page(void)
109 {
110         unsigned int flags;
111         void *vaddr;
112 
113         /* trying to avoid low memory issues */
114         flags = current->flags & PF_MEMALLOC;
115         current->flags |= PF_MEMALLOC;
116         vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
117         current->flags &= (~PF_MEMALLOC | flags);
118         return vaddr;
119 }
120 
121 static inline void free_pgtable_page(void *vaddr)
122 {
123         free_page((unsigned long)vaddr);
124 }
125 
126 static inline void *alloc_domain_mem(void)
127 {
128         return iommu_kmem_cache_alloc(iommu_domain_cache);
129 }
130 
131 static inline void free_domain_mem(void *vaddr)
132 {
133         kmem_cache_free(iommu_domain_cache, vaddr);
134 }
135 
136 static inline void * alloc_devinfo_mem(void)
137 {
138         return iommu_kmem_cache_alloc(iommu_devinfo_cache);
139 }
140 
141 static inline void free_devinfo_mem(void *vaddr)
142 {
143         kmem_cache_free(iommu_devinfo_cache, vaddr);
144 }
145 
146 struct iova *alloc_iova_mem(void)
147 {
148         return iommu_kmem_cache_alloc(iommu_iova_cache);
149 }
150 
151 void free_iova_mem(struct iova *iova)
152 {
153         kmem_cache_free(iommu_iova_cache, iova);
154 }
155 
156 static inline void __iommu_flush_cache(
157         struct intel_iommu *iommu, void *addr, int size)
158 {
159         if (!ecap_coherent(iommu->ecap))
160                 clflush_cache_range(addr, size);
161 }
162 
163 /* Gets context entry for a given bus and devfn */
164 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
165                 u8 bus, u8 devfn)
166 {
167         struct root_entry *root;
168         struct context_entry *context;
169         unsigned long phy_addr;
170         unsigned long flags;
171 
172         spin_lock_irqsave(&iommu->lock, flags);
173         root = &iommu->root_entry[bus];
174         context = get_context_addr_from_root(root);
175         if (!context) {
176                 context = (struct context_entry *)alloc_pgtable_page();
177                 if (!context) {
178                         spin_unlock_irqrestore(&iommu->lock, flags);
179                         return NULL;
180                 }
181                 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
182                 phy_addr = virt_to_phys((void *)context);
183                 set_root_value(root, phy_addr);
184                 set_root_present(root);
185                 __iommu_flush_cache(iommu, root, sizeof(*root));
186         }
187         spin_unlock_irqrestore(&iommu->lock, flags);
188         return &context[devfn];
189 }
190 
191 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
192 {
193         struct root_entry *root;
194         struct context_entry *context;
195         int ret;
196         unsigned long flags;
197 
198         spin_lock_irqsave(&iommu->lock, flags);
199         root = &iommu->root_entry[bus];
200         context = get_context_addr_from_root(root);
201         if (!context) {
202                 ret = 0;
203                 goto out;
204         }
205         ret = context_present(context[devfn]);
206 out:
207         spin_unlock_irqrestore(&iommu->lock, flags);
208         return ret;
209 }
210 
211 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
212 {
213         struct root_entry *root;
214         struct context_entry *context;
215         unsigned long flags;
216 
217         spin_lock_irqsave(&iommu->lock, flags);
218         root = &iommu->root_entry[bus];
219         context = get_context_addr_from_root(root);
220         if (context) {
221                 context_clear_entry(context[devfn]);
222                 __iommu_flush_cache(iommu, &context[devfn], \
223                         sizeof(*context));
224         }
225         spin_unlock_irqrestore(&iommu->lock, flags);
226 }
227 
228 static void free_context_table(struct intel_iommu *iommu)
229 {
230         struct root_entry *root;
231         int i;
232         unsigned long flags;
233         struct context_entry *context;
234 
235         spin_lock_irqsave(&iommu->lock, flags);
236         if (!iommu->root_entry) {
237                 goto out;
238         }
239         for (i = 0; i < ROOT_ENTRY_NR; i++) {
240                 root = &iommu->root_entry[i];
241                 context = get_context_addr_from_root(root);
242                 if (context)
243                         free_pgtable_page(context);
244         }
245         free_pgtable_page(iommu->root_entry);
246         iommu->root_entry = NULL;
247 out:
248         spin_unlock_irqrestore(&iommu->lock, flags);
249 }
250 
251 /* page table handling */
252 #define LEVEL_STRIDE            (9)
253 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
254 
255 static inline int agaw_to_level(int agaw)
256 {
257         return agaw + 2;
258 }
259 
260 static inline int agaw_to_width(int agaw)
261 {
262         return 30 + agaw * LEVEL_STRIDE;
263 
264 }
265 
266 static inline int width_to_agaw(int width)
267 {
268         return (width - 30) / LEVEL_STRIDE;
269 }
270 
271 static inline unsigned int level_to_offset_bits(int level)
272 {
273         return (12 + (level - 1) * LEVEL_STRIDE);
274 }
275 
276 static inline int address_level_offset(u64 addr, int level)
277 {
278         return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
279 }
280 
281 static inline u64 level_mask(int level)
282 {
283         return ((u64)-1 << level_to_offset_bits(level));
284 }
285 
286 static inline u64 level_size(int level)
287 {
288         return ((u64)1 << level_to_offset_bits(level));
289 }
290 
291 static inline u64 align_to_level(u64 addr, int level)
292 {
293         return ((addr + level_size(level) - 1) & level_mask(level));
294 }
295 
296 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
297 {
298         int addr_width = agaw_to_width(domain->agaw);
299         struct dma_pte *parent, *pte = NULL;
300         int level = agaw_to_level(domain->agaw);
301         int offset;
302         unsigned long flags;
303 
304         BUG_ON(!domain->pgd);
305 
306         addr &= (((u64)1) << addr_width) - 1;
307         parent = domain->pgd;
308 
309         spin_lock_irqsave(&domain->mapping_lock, flags);
310         while (level > 0) {
311                 void *tmp_page;
312 
313                 offset = address_level_offset(addr, level);
314                 pte = &parent[offset];
315                 if (level == 1)
316                         break;
317 
318                 if (!dma_pte_present(*pte)) {
319                         tmp_page = alloc_pgtable_page();
320 
321                         if (!tmp_page) {
322                                 spin_unlock_irqrestore(&domain->mapping_lock,
323                                         flags);
324                                 return NULL;
325                         }
326                         __iommu_flush_cache(domain->iommu, tmp_page,
327                                         PAGE_SIZE_4K);
328                         dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
329                         /*
330                          * high level table always sets r/w, last level page
331                          * table control read/write
332                          */
333                         dma_set_pte_readable(*pte);
334                         dma_set_pte_writable(*pte);
335                         __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
336                 }
337                 parent = phys_to_virt(dma_pte_addr(*pte));
338                 level--;
339         }
340 
341         spin_unlock_irqrestore(&domain->mapping_lock, flags);
342         return pte;
343 }
344 
345 /* return address's pte at specific level */
346 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
347                 int level)
348 {
349         struct dma_pte *parent, *pte = NULL;
350         int total = agaw_to_level(domain->agaw);
351         int offset;
352 
353         parent = domain->pgd;
354         while (level <= total) {
355                 offset = address_level_offset(addr, total);
356                 pte = &parent[offset];
357                 if (level == total)
358                         return pte;
359 
360                 if (!dma_pte_present(*pte))
361                         break;
362                 parent = phys_to_virt(dma_pte_addr(*pte));
363                 total--;
364         }
365         return NULL;
366 }
367 
368 /* clear one page's page table */
369 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
370 {
371         struct dma_pte *pte = NULL;
372 
373         /* get last level pte */
374         pte = dma_addr_level_pte(domain, addr, 1);
375 
376         if (pte) {
377                 dma_clear_pte(*pte);
378                 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
379         }
380 }
381 
382 /* clear last level pte, a tlb flush should be followed */
383 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
384 {
385         int addr_width = agaw_to_width(domain->agaw);
386 
387         start &= (((u64)1) << addr_width) - 1;
388         end &= (((u64)1) << addr_width) - 1;
389         /* in case it's partial page */
390         start = PAGE_ALIGN_4K(start);
391         end &= PAGE_MASK_4K;
392 
393         /* we don't need lock here, nobody else touches the iova range */
394         while (start < end) {
395                 dma_pte_clear_one(domain, start);
396                 start += PAGE_SIZE_4K;
397         }
398 }
399 
400 /* free page table pages. last level pte should already be cleared */
401 static void dma_pte_free_pagetable(struct dmar_domain *domain,
402         u64 start, u64 end)
403 {
404         int addr_width = agaw_to_width(domain->agaw);
405         struct dma_pte *pte;
406         int total = agaw_to_level(domain->agaw);
407         int level;
408         u64 tmp;
409 
410         start &= (((u64)1) << addr_width) - 1;
411         end &= (((u64)1) << addr_width) - 1;
412 
413         /* we don't need lock here, nobody else touches the iova range */
414         level = 2;
415         while (level <= total) {
416                 tmp = align_to_level(start, level);
417                 if (tmp >= end || (tmp + level_size(level) > end))
418                         return;
419 
420                 while (tmp < end) {
421                         pte = dma_addr_level_pte(domain, tmp, level);
422                         if (pte) {
423                                 free_pgtable_page(
424                                         phys_to_virt(dma_pte_addr(*pte)));
425                                 dma_clear_pte(*pte);
426                                 __iommu_flush_cache(domain->iommu,
427                                                 pte, sizeof(*pte));
428                         }
429                         tmp += level_size(level);
430                 }
431                 level++;
432         }
433         /* free pgd */
434         if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
435                 free_pgtable_page(domain->pgd);
436                 domain->pgd = NULL;
437         }
438 }
439 
440 /* iommu handling */
441 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
442 {
443         struct root_entry *root;
444         unsigned long flags;
445 
446         root = (struct root_entry *)alloc_pgtable_page();
447         if (!root)
448                 return -ENOMEM;
449 
450         __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
451 
452         spin_lock_irqsave(&iommu->lock, flags);
453         iommu->root_entry = root;
454         spin_unlock_irqrestore(&iommu->lock, flags);
455 
456         return 0;
457 }
458 
459 #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
460 {\
461         unsigned long start_time = jiffies;\
462         while (1) {\
463                 sts = op (iommu->reg + offset);\
464                 if (cond)\
465                         break;\
466                 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
467                         panic("DMAR hardware is malfunctioning\n");\
468                 cpu_relax();\
469         }\
470 }
471 
472 static void iommu_set_root_entry(struct intel_iommu *iommu)
473 {
474         void *addr;
475         u32 cmd, sts;
476         unsigned long flag;
477 
478         addr = iommu->root_entry;
479 
480         spin_lock_irqsave(&iommu->register_lock, flag);
481         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
482 
483         cmd = iommu->gcmd | DMA_GCMD_SRTP;
484         writel(cmd, iommu->reg + DMAR_GCMD_REG);
485 
486         /* Make sure hardware complete it */
487         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
488                 readl, (sts & DMA_GSTS_RTPS), sts);
489 
490         spin_unlock_irqrestore(&iommu->register_lock, flag);
491 }
492 
493 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
494 {
495         u32 val;
496         unsigned long flag;
497 
498         if (!cap_rwbf(iommu->cap))
499                 return;
500         val = iommu->gcmd | DMA_GCMD_WBF;
501 
502         spin_lock_irqsave(&iommu->register_lock, flag);
503         writel(val, iommu->reg + DMAR_GCMD_REG);
504 
505         /* Make sure hardware complete it */
506         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
507                         readl, (!(val & DMA_GSTS_WBFS)), val);
508 
509         spin_unlock_irqrestore(&iommu->register_lock, flag);
510 }
511 
512 /* return value determine if we need a write buffer flush */
513 static int __iommu_flush_context(struct intel_iommu *iommu,
514         u16 did, u16 source_id, u8 function_mask, u64 type,
515         int non_present_entry_flush)
516 {
517         u64 val = 0;
518         unsigned long flag;
519 
520         /*
521          * In the non-present entry flush case, if hardware doesn't cache
522          * non-present entry we do nothing and if hardware cache non-present
523          * entry, we flush entries of domain 0 (the domain id is used to cache
524          * any non-present entries)
525          */
526         if (non_present_entry_flush) {
527                 if (!cap_caching_mode(iommu->cap))
528                         return 1;
529                 else
530                         did = 0;
531         }
532 
533         switch (type) {
534         case DMA_CCMD_GLOBAL_INVL:
535                 val = DMA_CCMD_GLOBAL_INVL;
536                 break;
537         case DMA_CCMD_DOMAIN_INVL:
538                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
539                 break;
540         case DMA_CCMD_DEVICE_INVL:
541                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
542                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
543                 break;
544         default:
545                 BUG();
546         }
547         val |= DMA_CCMD_ICC;
548 
549         spin_lock_irqsave(&iommu->register_lock, flag);
550         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
551 
552         /* Make sure hardware complete it */
553         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
554                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
555 
556         spin_unlock_irqrestore(&iommu->register_lock, flag);
557 
558         /* flush context entry will implictly flush write buffer */
559         return 0;
560 }
561 
562 static int inline iommu_flush_context_global(struct intel_iommu *iommu,
563         int non_present_entry_flush)
564 {
565         return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
566                 non_present_entry_flush);
567 }
568 
569 static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
570         int non_present_entry_flush)
571 {
572         return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
573                 non_present_entry_flush);
574 }
575 
576 static int inline iommu_flush_context_device(struct intel_iommu *iommu,
577         u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
578 {
579         return __iommu_flush_context(iommu, did, source_id, function_mask,
580                 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
581 }
582 
583 /* return value determine if we need a write buffer flush */
584 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
585         u64 addr, unsigned int size_order, u64 type,
586         int non_present_entry_flush)
587 {
588         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
589         u64 val = 0, val_iva = 0;
590         unsigned long flag;
591 
592         /*
593          * In the non-present entry flush case, if hardware doesn't cache
594          * non-present entry we do nothing and if hardware cache non-present
595          * entry, we flush entries of domain 0 (the domain id is used to cache
596          * any non-present entries)
597          */
598         if (non_present_entry_flush) {
599                 if (!cap_caching_mode(iommu->cap))
600                         return 1;
601                 else
602                         did = 0;
603         }
604 
605         switch (type) {
606         case DMA_TLB_GLOBAL_FLUSH:
607                 /* global flush doesn't need set IVA_REG */
608                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
609                 break;
610         case DMA_TLB_DSI_FLUSH:
611                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
612                 break;
613         case DMA_TLB_PSI_FLUSH:
614                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
615                 /* Note: always flush non-leaf currently */
616                 val_iva = size_order | addr;
617                 break;
618         default:
619                 BUG();
620         }
621         /* Note: set drain read/write */
622 #if 0
623         /*
624          * This is probably to be super secure.. Looks like we can
625          * ignore it without any impact.
626          */
627         if (cap_read_drain(iommu->cap))
628                 val |= DMA_TLB_READ_DRAIN;
629 #endif
630         if (cap_write_drain(iommu->cap))
631                 val |= DMA_TLB_WRITE_DRAIN;
632 
633         spin_lock_irqsave(&iommu->register_lock, flag);
634         /* Note: Only uses first TLB reg currently */
635         if (val_iva)
636                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
637         dmar_writeq(iommu->reg + tlb_offset + 8, val);
638 
639         /* Make sure hardware complete it */
640         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
641                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
642 
643         spin_unlock_irqrestore(&iommu->register_lock, flag);
644 
645         /* check IOTLB invalidation granularity */
646         if (DMA_TLB_IAIG(val) == 0)
647                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
648         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
649                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
650                         DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
651         /* flush context entry will implictly flush write buffer */
652         return 0;
653 }
654 
655 static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
656         int non_present_entry_flush)
657 {
658         return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
659                 non_present_entry_flush);
660 }
661 
662 static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
663         int non_present_entry_flush)
664 {
665         return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
666                 non_present_entry_flush);
667 }
668 
669 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
670         u64 addr, unsigned int pages, int non_present_entry_flush)
671 {
672         unsigned int mask;
673 
674         BUG_ON(addr & (~PAGE_MASK_4K));
675         BUG_ON(pages == 0);
676 
677         /* Fallback to domain selective flush if no PSI support */
678         if (!cap_pgsel_inv(iommu->cap))
679                 return iommu_flush_iotlb_dsi(iommu, did,
680                         non_present_entry_flush);
681 
682         /*
683          * PSI requires page size to be 2 ^ x, and the base address is naturally
684          * aligned to the size
685          */
686         mask = ilog2(__roundup_pow_of_two(pages));
687         /* Fallback to domain selective flush if size is too big */
688         if (mask > cap_max_amask_val(iommu->cap))
689                 return iommu_flush_iotlb_dsi(iommu, did,
690                         non_present_entry_flush);
691 
692         return __iommu_flush_iotlb(iommu, did, addr, mask,
693                 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
694 }
695 
696 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
697 {
698         u32 pmen;
699         unsigned long flags;
700 
701         spin_lock_irqsave(&iommu->register_lock, flags);
702         pmen = readl(iommu->reg + DMAR_PMEN_REG);
703         pmen &= ~DMA_PMEN_EPM;
704         writel(pmen, iommu->reg + DMAR_PMEN_REG);
705 
706         /* wait for the protected region status bit to clear */
707         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
708                 readl, !(pmen & DMA_PMEN_PRS), pmen);
709 
710         spin_unlock_irqrestore(&iommu->register_lock, flags);
711 }
712 
713 static int iommu_enable_translation(struct intel_iommu *iommu)
714 {
715         u32 sts;
716         unsigned long flags;
717 
718         spin_lock_irqsave(&iommu->register_lock, flags);
719         writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
720 
721         /* Make sure hardware complete it */
722         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
723                 readl, (sts & DMA_GSTS_TES), sts);
724 
725         iommu->gcmd |= DMA_GCMD_TE;
726         spin_unlock_irqrestore(&iommu->register_lock, flags);
727         return 0;
728 }
729 
730 static int iommu_disable_translation(struct intel_iommu *iommu)
731 {
732         u32 sts;
733         unsigned long flag;
734 
735         spin_lock_irqsave(&iommu->register_lock, flag);
736         iommu->gcmd &= ~DMA_GCMD_TE;
737         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
738 
739         /* Make sure hardware complete it */
740         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
741                 readl, (!(sts & DMA_GSTS_TES)), sts);
742 
743         spin_unlock_irqrestore(&iommu->register_lock, flag);
744         return 0;
745 }
746 
747 /* iommu interrupt handling. Most stuff are MSI-like. */
748 
749 static const char *fault_reason_strings[] =
750 {
751         "Software",
752         "Present bit in root entry is clear",
753         "Present bit in context entry is clear",
754         "Invalid context entry",
755         "Access beyond MGAW",
756         "PTE Write access is not set",
757         "PTE Read access is not set",
758         "Next page table ptr is invalid",
759         "Root table address invalid",
760         "Context table ptr is invalid",
761         "non-zero reserved fields in RTP",
762         "non-zero reserved fields in CTP",
763         "non-zero reserved fields in PTE",
764 };
765 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
766 
767 const char *dmar_get_fault_reason(u8 fault_reason)
768 {
769         if (fault_reason > MAX_FAULT_REASON_IDX)
770                 return "Unknown";
771         else
772                 return fault_reason_strings[fault_reason];
773 }
774 
775 void dmar_msi_unmask(unsigned int irq)
776 {
777         struct intel_iommu *iommu = get_irq_data(irq);
778         unsigned long flag;
779 
780         /* unmask it */
781         spin_lock_irqsave(&iommu->register_lock, flag);
782         writel(0, iommu->reg + DMAR_FECTL_REG);
783         /* Read a reg to force flush the post write */
784         readl(iommu->reg + DMAR_FECTL_REG);
785         spin_unlock_irqrestore(&iommu->register_lock, flag);
786 }
787 
788 void dmar_msi_mask(unsigned int irq)
789 {
790         unsigned long flag;
791         struct intel_iommu *iommu = get_irq_data(irq);
792 
793         /* mask it */
794         spin_lock_irqsave(&iommu->register_lock, flag);
795         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
796         /* Read a reg to force flush the post write */
797         readl(iommu->reg + DMAR_FECTL_REG);
798         spin_unlock_irqrestore(&iommu->register_lock, flag);
799 }
800 
801 void dmar_msi_write(int irq, struct msi_msg *msg)
802 {
803         struct intel_iommu *iommu = get_irq_data(irq);
804         unsigned long flag;
805 
806         spin_lock_irqsave(&iommu->register_lock, flag);
807         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
808         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
809         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
810         spin_unlock_irqrestore(&iommu->register_lock, flag);
811 }
812 
813 void dmar_msi_read(int irq, struct msi_msg *msg)
814 {
815         struct intel_iommu *iommu = get_irq_data(irq);
816         unsigned long flag;
817 
818         spin_lock_irqsave(&iommu->register_lock, flag);
819         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
820         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
821         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
822         spin_unlock_irqrestore(&iommu->register_lock, flag);
823 }
824 
825 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
826                 u8 fault_reason, u16 source_id, u64 addr)
827 {
828         const char *reason;
829 
830         reason = dmar_get_fault_reason(fault_reason);
831 
832         printk(KERN_ERR
833                 "DMAR:[%s] Request device [%02x:%02x.%d] "
834                 "fault addr %llx \n"
835                 "DMAR:[fault reason %02d] %s\n",
836                 (type ? "DMA Read" : "DMA Write"),
837                 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
838                 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
839         return 0;
840 }
841 
842 #define PRIMARY_FAULT_REG_LEN (16)
843 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
844 {
845         struct intel_iommu *iommu = dev_id;
846         int reg, fault_index;
847         u32 fault_status;
848         unsigned long flag;
849 
850         spin_lock_irqsave(&iommu->register_lock, flag);
851         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
852 
853         /* TBD: ignore advanced fault log currently */
854         if (!(fault_status & DMA_FSTS_PPF))
855                 goto clear_overflow;
856 
857         fault_index = dma_fsts_fault_record_index(fault_status);
858         reg = cap_fault_reg_offset(iommu->cap);
859         while (1) {
860                 u8 fault_reason;
861                 u16 source_id;
862                 u64 guest_addr;
863                 int type;
864                 u32 data;
865 
866                 /* highest 32 bits */
867                 data = readl(iommu->reg + reg +
868                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
869                 if (!(data & DMA_FRCD_F))
870                         break;
871 
872                 fault_reason = dma_frcd_fault_reason(data);
873                 type = dma_frcd_type(data);
874 
875                 data = readl(iommu->reg + reg +
876                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
877                 source_id = dma_frcd_source_id(data);
878 
879                 guest_addr = dmar_readq(iommu->reg + reg +
880                                 fault_index * PRIMARY_FAULT_REG_LEN);
881                 guest_addr = dma_frcd_page_addr(guest_addr);
882                 /* clear the fault */
883                 writel(DMA_FRCD_F, iommu->reg + reg +
884                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
885 
886                 spin_unlock_irqrestore(&iommu->register_lock, flag);
887 
888                 iommu_page_fault_do_one(iommu, type, fault_reason,
889                                 source_id, guest_addr);
890 
891                 fault_index++;
892                 if (fault_index > cap_num_fault_regs(iommu->cap))
893                         fault_index = 0;
894                 spin_lock_irqsave(&iommu->register_lock, flag);
895         }
896 clear_overflow:
897         /* clear primary fault overflow */
898         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
899         if (fault_status & DMA_FSTS_PFO)
900                 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
901 
902         spin_unlock_irqrestore(&iommu->register_lock, flag);
903         return IRQ_HANDLED;
904 }
905 
906 int dmar_set_interrupt(struct intel_iommu *iommu)
907 {
908         int irq, ret;
909 
910         irq = create_irq();
911         if (!irq) {
912                 printk(KERN_ERR "IOMMU: no free vectors\n");
913                 return -EINVAL;
914         }
915 
916         set_irq_data(irq, iommu);
917         iommu->irq = irq;
918 
919         ret = arch_setup_dmar_msi(irq);
920         if (ret) {
921                 set_irq_data(irq, NULL);
922                 iommu->irq = 0;
923                 destroy_irq(irq);
924                 return 0;
925         }
926 
927         /* Force fault register is cleared */
928         iommu_page_fault(irq, iommu);
929 
930         ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
931         if (ret)
932                 printk(KERN_ERR "IOMMU: can't request irq\n");
933         return ret;
934 }
935 
936 static int iommu_init_domains(struct intel_iommu *iommu)
937 {
938         unsigned long ndomains;
939         unsigned long nlongs;
940 
941         ndomains = cap_ndoms(iommu->cap);
942         pr_debug("Number of Domains supportd <%ld>\n", ndomains);
943         nlongs = BITS_TO_LONGS(ndomains);
944 
945         /* TBD: there might be 64K domains,
946          * consider other allocation for future chip
947          */
948         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
949         if (!iommu->domain_ids) {
950                 printk(KERN_ERR "Allocating domain id array failed\n");
951                 return -ENOMEM;
952         }
953         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
954                         GFP_KERNEL);
955         if (!iommu->domains) {
956                 printk(KERN_ERR "Allocating domain array failed\n");
957                 kfree(iommu->domain_ids);
958                 return -ENOMEM;
959         }
960 
961         /*
962          * if Caching mode is set, then invalid translations are tagged
963          * with domainid 0. Hence we need to pre-allocate it.
964          */
965         if (cap_caching_mode(iommu->cap))
966                 set_bit(0, iommu->domain_ids);
967         return 0;
968 }
969 
970 static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
971 {
972         struct intel_iommu *iommu;
973         int ret;
974         int map_size;
975         u32 ver;
976 
977         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
978         if (!iommu)
979                 return NULL;
980         iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
981         if (!iommu->reg) {
982                 printk(KERN_ERR "IOMMU: can't map the region\n");
983                 goto error;
984         }
985         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
986         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
987 
988         /* the registers might be more than one page */
989         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
990                 cap_max_fault_reg_offset(iommu->cap));
991         map_size = PAGE_ALIGN_4K(map_size);
992         if (map_size > PAGE_SIZE_4K) {
993                 iounmap(iommu->reg);
994                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
995                 if (!iommu->reg) {
996                         printk(KERN_ERR "IOMMU: can't map the region\n");
997                         goto error;
998                 }
999         }
1000 
1001         ver = readl(iommu->reg + DMAR_VER_REG);
1002         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1003                 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1004                 iommu->cap, iommu->ecap);
1005         ret = iommu_init_domains(iommu);
1006         if (ret)
1007                 goto error_unmap;
1008         spin_lock_init(&iommu->lock);
1009         spin_lock_init(&iommu->register_lock);
1010 
1011         drhd->iommu = iommu;
1012         return iommu;
1013 error_unmap:
1014         iounmap(iommu->reg);
1015 error:
1016         kfree(iommu);
1017         return NULL;
1018 }
1019 
1020 static void domain_exit(struct dmar_domain *domain);
1021 static void free_iommu(struct intel_iommu *iommu)
1022 {
1023         struct dmar_domain *domain;
1024         int i;
1025 
1026         if (!iommu)
1027                 return;
1028 
1029         i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1030         for (; i < cap_ndoms(iommu->cap); ) {
1031                 domain = iommu->domains[i];
1032                 clear_bit(i, iommu->domain_ids);
1033                 domain_exit(domain);
1034                 i = find_next_bit(iommu->domain_ids,
1035                         cap_ndoms(iommu->cap), i+1);
1036         }
1037 
1038         if (iommu->gcmd & DMA_GCMD_TE)
1039                 iommu_disable_translation(iommu);
1040 
1041         if (iommu->irq) {
1042                 set_irq_data(iommu->irq, NULL);
1043                 /* This will mask the irq */
1044                 free_irq(iommu->irq, iommu);
1045                 destroy_irq(iommu->irq);
1046         }
1047 
1048         kfree(iommu->domains);
1049         kfree(iommu->domain_ids);
1050 
1051         /* free context mapping */
1052         free_context_table(iommu);
1053 
1054         if (iommu->reg)
1055                 iounmap(iommu->reg);
1056         kfree(iommu);
1057 }
1058 
1059 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1060 {
1061         unsigned long num;
1062         unsigned long ndomains;
1063         struct dmar_domain *domain;
1064         unsigned long flags;
1065 
1066         domain = alloc_domain_mem();
1067         if (!domain)
1068                 return NULL;
1069 
1070         ndomains = cap_ndoms(iommu->cap);
1071 
1072         spin_lock_irqsave(&iommu->lock, flags);
1073         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1074         if (num >= ndomains) {
1075                 spin_unlock_irqrestore(&iommu->lock, flags);
1076                 free_domain_mem(domain);
1077                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1078                 return NULL;
1079         }
1080 
1081         set_bit(num, iommu->domain_ids);
1082         domain->id = num;
1083         domain->iommu = iommu;
1084         iommu->domains[num] = domain;
1085         spin_unlock_irqrestore(&iommu->lock, flags);
1086 
1087         return domain;
1088 }
1089 
1090 static void iommu_free_domain(struct dmar_domain *domain)
1091 {
1092         unsigned long flags;
1093 
1094         spin_lock_irqsave(&domain->iommu->lock, flags);
1095         clear_bit(domain->id, domain->iommu->domain_ids);
1096         spin_unlock_irqrestore(&domain->iommu->lock, flags);
1097 }
1098 
1099 static struct iova_domain reserved_iova_list;
1100 static struct lock_class_key reserved_alloc_key;
1101 static struct lock_class_key reserved_rbtree_key;
1102 
1103 static void dmar_init_reserved_ranges(void)
1104 {
1105         struct pci_dev *pdev = NULL;
1106         struct iova *iova;
1107         int i;
1108         u64 addr, size;
1109 
1110         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1111 
1112         lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1113                 &reserved_alloc_key);
1114         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1115                 &reserved_rbtree_key);
1116 
1117         /* IOAPIC ranges shouldn't be accessed by DMA */
1118         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1119                 IOVA_PFN(IOAPIC_RANGE_END));
1120         if (!iova)
1121                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1122 
1123         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1124         for_each_pci_dev(pdev) {
1125                 struct resource *r;
1126 
1127                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1128                         r = &pdev->resource[i];
1129                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1130                                 continue;
1131                         addr = r->start;
1132                         addr &= PAGE_MASK_4K;
1133                         size = r->end - addr;
1134                         size = PAGE_ALIGN_4K(size);
1135                         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1136                                 IOVA_PFN(size + addr) - 1);
1137                         if (!iova)
1138                                 printk(KERN_ERR "Reserve iova failed\n");
1139                 }
1140         }
1141 
1142 }
1143 
1144 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1145 {
1146         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1147 }
1148 
1149 static inline int guestwidth_to_adjustwidth(int gaw)
1150 {
1151         int agaw;
1152         int r = (gaw - 12) % 9;
1153 
1154         if (r == 0)
1155                 agaw = gaw;
1156         else
1157                 agaw = gaw + 9 - r;
1158         if (agaw > 64)
1159                 agaw = 64;
1160         return agaw;
1161 }
1162 
1163 static int domain_init(struct dmar_domain *domain, int guest_width)
1164 {
1165         struct intel_iommu *iommu;
1166         int adjust_width, agaw;
1167         unsigned long sagaw;
1168 
1169         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1170         spin_lock_init(&domain->mapping_lock);
1171 
1172         domain_reserve_special_ranges(domain);
1173 
1174         /* calculate AGAW */
1175         iommu = domain->iommu;
1176         if (guest_width > cap_mgaw(iommu->cap))
1177                 guest_width = cap_mgaw(iommu->cap);
1178         domain->gaw = guest_width;
1179         adjust_width = guestwidth_to_adjustwidth(guest_width);
1180         agaw = width_to_agaw(adjust_width);
1181         sagaw = cap_sagaw(iommu->cap);
1182         if (!test_bit(agaw, &sagaw)) {
1183                 /* hardware doesn't support it, choose a bigger one */
1184                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1185                 agaw = find_next_bit(&sagaw, 5, agaw);
1186                 if (agaw >= 5)
1187                         return -ENODEV;
1188         }
1189         domain->agaw = agaw;
1190         INIT_LIST_HEAD(&domain->devices);
1191 
1192         /* always allocate the top pgd */
1193         domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1194         if (!domain->pgd)
1195                 return -ENOMEM;
1196         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1197         return 0;
1198 }
1199 
1200 static void domain_exit(struct dmar_domain *domain)
1201 {
1202         u64 end;
1203 
1204         /* Domain 0 is reserved, so dont process it */
1205         if (!domain)
1206                 return;
1207 
1208         domain_remove_dev_info(domain);
1209         /* destroy iovas */
1210         put_iova_domain(&domain->iovad);
1211         end = DOMAIN_MAX_ADDR(domain->gaw);
1212         end = end & (~PAGE_MASK_4K);
1213 
1214         /* clear ptes */
1215         dma_pte_clear_range(domain, 0, end);
1216 
1217         /* free page tables */
1218         dma_pte_free_pagetable(domain, 0, end);
1219 
1220         iommu_free_domain(domain);
1221         free_domain_mem(domain);
1222 }
1223 
1224 static int domain_context_mapping_one(struct dmar_domain *domain,
1225                 u8 bus, u8 devfn)
1226 {
1227         struct context_entry *context;
1228         struct intel_iommu *iommu = domain->iommu;
1229         unsigned long flags;
1230 
1231         pr_debug("Set context mapping for %02x:%02x.%d\n",
1232                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1233         BUG_ON(!domain->pgd);
1234         context = device_to_context_entry(iommu, bus, devfn);
1235         if (!context)
1236                 return -ENOMEM;
1237         spin_lock_irqsave(&iommu->lock, flags);
1238         if (context_present(*context)) {
1239                 spin_unlock_irqrestore(&iommu->lock, flags);
1240                 return 0;
1241         }
1242 
1243         context_set_domain_id(*context, domain->id);
1244         context_set_address_width(*context, domain->agaw);
1245         context_set_address_root(*context, virt_to_phys(domain->pgd));
1246         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1247         context_set_fault_enable(*context);
1248         context_set_present(*context);
1249         __iommu_flush_cache(iommu, context, sizeof(*context));
1250 
1251         /* it's a non-present to present mapping */
1252         if (iommu_flush_context_device(iommu, domain->id,
1253                         (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1254                 iommu_flush_write_buffer(iommu);
1255         else
1256                 iommu_flush_iotlb_dsi(iommu, 0, 0);
1257         spin_unlock_irqrestore(&iommu->lock, flags);
1258         return 0;
1259 }
1260 
1261 static int
1262 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1263 {
1264         int ret;
1265         struct pci_dev *tmp, *parent;
1266 
1267         ret = domain_context_mapping_one(domain, pdev->bus->number,
1268                 pdev->devfn);
1269         if (ret)
1270                 return ret;
1271 
1272         /* dependent device mapping */
1273         tmp = pci_find_upstream_pcie_bridge(pdev);
1274         if (!tmp)
1275                 return 0;
1276         /* Secondary interface's bus number and devfn 0 */
1277         parent = pdev->bus->self;
1278         while (parent != tmp) {
1279                 ret = domain_context_mapping_one(domain, parent->bus->number,
1280                         parent->devfn);
1281                 if (ret)
1282                         return ret;
1283                 parent = parent->bus->self;
1284         }
1285         if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1286                 return domain_context_mapping_one(domain,
1287                         tmp->subordinate->number, 0);
1288         else /* this is a legacy PCI bridge */
1289                 return domain_context_mapping_one(domain,
1290                         tmp->bus->number, tmp->devfn);
1291 }
1292 
1293 static int domain_context_mapped(struct dmar_domain *domain,
1294         struct pci_dev *pdev)
1295 {
1296         int ret;
1297         struct pci_dev *tmp, *parent;
1298 
1299         ret = device_context_mapped(domain->iommu,
1300                 pdev->bus->number, pdev->devfn);
1301         if (!ret)
1302                 return ret;
1303         /* dependent device mapping */
1304         tmp = pci_find_upstream_pcie_bridge(pdev);
1305         if (!tmp)
1306                 return ret;
1307         /* Secondary interface's bus number and devfn 0 */
1308         parent = pdev->bus->self;
1309         while (parent != tmp) {
1310                 ret = device_context_mapped(domain->iommu, parent->bus->number,
1311                         parent->devfn);
1312                 if (!ret)
1313                         return ret;
1314                 parent = parent->bus->self;
1315         }
1316         if (tmp->is_pcie)
1317                 return device_context_mapped(domain->iommu,
1318                         tmp->subordinate->number, 0);
1319         else
1320                 return device_context_mapped(domain->iommu,
1321                         tmp->bus->number, tmp->devfn);
1322 }
1323 
1324 static int
1325 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1326                         u64 hpa, size_t size, int prot)
1327 {
1328         u64 start_pfn, end_pfn;
1329         struct dma_pte *pte;
1330         int index;
1331 
1332         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1333                 return -EINVAL;
1334         iova &= PAGE_MASK_4K;
1335         start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1336         end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1337         index = 0;
1338         while (start_pfn < end_pfn) {
1339                 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1340                 if (!pte)
1341                         return -ENOMEM;
1342                 /* We don't need lock here, nobody else
1343                  * touches the iova range
1344                  */
1345                 BUG_ON(dma_pte_addr(*pte));
1346                 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1347                 dma_set_pte_prot(*pte, prot);
1348                 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1349                 start_pfn++;
1350                 index++;
1351         }
1352         return 0;
1353 }
1354 
1355 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1356 {
1357         clear_context_table(domain->iommu, bus, devfn);
1358         iommu_flush_context_global(domain->iommu, 0);
1359         iommu_flush_iotlb_global(domain->iommu, 0);
1360 }
1361 
1362 static void domain_remove_dev_info(struct dmar_domain *domain)
1363 {
1364         struct device_domain_info *info;
1365         unsigned long flags;
1366 
1367         spin_lock_irqsave(&device_domain_lock, flags);
1368         while (!list_empty(&domain->devices)) {
1369                 info = list_entry(domain->devices.next,
1370                         struct device_domain_info, link);
1371                 list_del(&info->link);
1372                 list_del(&info->global);
1373                 if (info->dev)
1374                         info->dev->dev.archdata.iommu = NULL;
1375                 spin_unlock_irqrestore(&device_domain_lock, flags);
1376 
1377                 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1378                 free_devinfo_mem(info);
1379 
1380                 spin_lock_irqsave(&device_domain_lock, flags);
1381         }
1382         spin_unlock_irqrestore(&device_domain_lock, flags);
1383 }
1384 
1385 /*
1386  * find_domain
1387  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1388  */
1389 struct dmar_domain *
1390 find_domain(struct pci_dev *pdev)
1391 {
1392         struct device_domain_info *info;
1393 
1394         /* No lock here, assumes no domain exit in normal case */
1395         info = pdev->dev.archdata.iommu;
1396         if (info)
1397                 return info->domain;
1398         return NULL;
1399 }
1400 
1401 static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1402      struct pci_dev *dev)
1403 {
1404         int index;
1405 
1406         while (dev) {
1407                 for (index = 0; index < cnt; index ++)
1408                         if (dev == devices[index])
1409                                 return 1;
1410 
1411                 /* Check our parent */
1412                 dev = dev->bus->self;
1413         }
1414 
1415         return 0;
1416 }
1417 
1418 static struct dmar_drhd_unit *
1419 dmar_find_matched_drhd_unit(struct pci_dev *dev)
1420 {
1421         struct dmar_drhd_unit *drhd = NULL;
1422 
1423         list_for_each_entry(drhd, &dmar_drhd_units, list) {
1424                 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1425                                                 drhd->devices_cnt, dev))
1426                         return drhd;
1427         }
1428 
1429         return NULL;
1430 }
1431 
1432 /* domain is initialized */
1433 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1434 {
1435         struct dmar_domain *domain, *found = NULL;
1436         struct intel_iommu *iommu;
1437         struct dmar_drhd_unit *drhd;
1438         struct device_domain_info *info, *tmp;
1439         struct pci_dev *dev_tmp;
1440         unsigned long flags;
1441         int bus = 0, devfn = 0;
1442 
1443         domain = find_domain(pdev);
1444         if (domain)
1445                 return domain;
1446 
1447         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1448         if (dev_tmp) {
1449                 if (dev_tmp->is_pcie) {
1450                         bus = dev_tmp->subordinate->number;
1451                         devfn = 0;
1452                 } else {
1453                         bus = dev_tmp->bus->number;
1454                         devfn = dev_tmp->devfn;
1455                 }
1456                 spin_lock_irqsave(&device_domain_lock, flags);
1457                 list_for_each_entry(info, &device_domain_list, global) {
1458                         if (info->bus == bus && info->devfn == devfn) {
1459                                 found = info->domain;
1460                                 break;
1461                         }
1462                 }
1463                 spin_unlock_irqrestore(&device_domain_lock, flags);
1464                 /* pcie-pci bridge already has a domain, uses it */
1465                 if (found) {
1466                         domain = found;
1467                         goto found_domain;
1468                 }
1469         }
1470 
1471         /* Allocate new domain for the device */
1472         drhd = dmar_find_matched_drhd_unit(pdev);
1473         if (!drhd) {
1474                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1475                         pci_name(pdev));
1476                 return NULL;
1477         }
1478         iommu = drhd->iommu;
1479 
1480         domain = iommu_alloc_domain(iommu);
1481         if (!domain)
1482                 goto error;
1483 
1484         if (domain_init(domain, gaw)) {
1485                 domain_exit(domain);
1486                 goto error;
1487         }
1488 
1489         /* register pcie-to-pci device */
1490         if (dev_tmp) {
1491                 info = alloc_devinfo_mem();
1492                 if (!info) {
1493                         domain_exit(domain);
1494                         goto error;
1495                 }
1496                 info->bus = bus;
1497                 info->devfn = devfn;
1498                 info->dev = NULL;
1499                 info->domain = domain;
1500                 /* This domain is shared by devices under p2p bridge */
1501                 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1502 
1503                 /* pcie-to-pci bridge already has a domain, uses it */
1504                 found = NULL;
1505                 spin_lock_irqsave(&device_domain_lock, flags);
1506                 list_for_each_entry(tmp, &device_domain_list, global) {
1507                         if (tmp->bus == bus && tmp->devfn == devfn) {
1508                                 found = tmp->domain;
1509                                 break;
1510                         }
1511                 }
1512                 if (found) {
1513                         free_devinfo_mem(info);
1514                         domain_exit(domain);
1515                         domain = found;
1516                 } else {
1517                         list_add(&info->link, &domain->devices);
1518                         list_add(&info->global, &device_domain_list);
1519                 }
1520                 spin_unlock_irqrestore(&device_domain_lock, flags);
1521         }
1522 
1523 found_domain:
1524         info = alloc_devinfo_mem();
1525         if (!info)
1526                 goto error;
1527         info->bus = pdev->bus->number;
1528         info->devfn = pdev->devfn;
1529         info->dev = pdev;
1530         info->domain = domain;
1531         spin_lock_irqsave(&device_domain_lock, flags);
1532         /* somebody is fast */
1533         found = find_domain(pdev);
1534         if (found != NULL) {
1535                 spin_unlock_irqrestore(&device_domain_lock, flags);
1536                 if (found != domain) {
1537                         domain_exit(domain);
1538                         domain = found;
1539                 }
1540                 free_devinfo_mem(info);
1541                 return domain;
1542         }
1543         list_add(&info->link, &domain->devices);
1544         list_add(&info->global, &device_domain_list);
1545         pdev->dev.archdata.iommu = info;
1546         spin_unlock_irqrestore(&device_domain_lock, flags);
1547         return domain;
1548 error:
1549         /* recheck it here, maybe others set it */
1550         return find_domain(pdev);
1551 }
1552 
1553 static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1554 {
1555         struct dmar_domain *domain;
1556         unsigned long size;
1557         u64 base;
1558         int ret;
1559 
1560         printk(KERN_INFO
1561                 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1562                 pci_name(pdev), start, end);
1563         /* page table init */
1564         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1565         if (!domain)
1566                 return -ENOMEM;
1567 
1568         /* The address might not be aligned */
1569         base = start & PAGE_MASK_4K;
1570         size = end - base;
1571         size = PAGE_ALIGN_4K(size);
1572         if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1573                         IOVA_PFN(base + size) - 1)) {
1574                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1575                 ret = -ENOMEM;
1576                 goto error;
1577         }
1578 
1579         pr_debug("Mapping reserved region %lx@%llx for %s\n",
1580                 size, base, pci_name(pdev));
1581         /*
1582          * RMRR range might have overlap with physical memory range,
1583          * clear it first
1584          */
1585         dma_pte_clear_range(domain, base, base + size);
1586 
1587         ret = domain_page_mapping(domain, base, base, size,
1588                 DMA_PTE_READ|DMA_PTE_WRITE);
1589         if (ret)
1590                 goto error;
1591 
1592         /* context entry init */
1593         ret = domain_context_mapping(domain, pdev);
1594         if (!ret)
1595                 return 0;
1596 error:
1597         domain_exit(domain);
1598         return ret;
1599 
1600 }
1601 
1602 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1603         struct pci_dev *pdev)
1604 {
1605         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1606                 return 0;
1607         return iommu_prepare_identity_map(pdev, rmrr->base_address,
1608                 rmrr->end_address + 1);
1609 }
1610 
1611 #ifdef CONFIG_DMAR_GFX_WA
1612 extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1613 static void __init iommu_prepare_gfx_mapping(void)
1614 {
1615         struct pci_dev *pdev = NULL;
1616         u64 base, size;
1617         int slot;
1618         int ret;
1619 
1620         for_each_pci_dev(pdev) {
1621                 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1622                                 !IS_GFX_DEVICE(pdev))
1623                         continue;
1624                 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1625                         pci_name(pdev));
1626                 slot = arch_get_ram_range(0, &base, &size);
1627                 while (slot >= 0) {
1628                         ret = iommu_prepare_identity_map(pdev,
1629                                         base, base + size);
1630                         if (ret)
1631                                 goto error;
1632                         slot = arch_get_ram_range(slot, &base, &size);
1633                 }
1634                 continue;
1635 error:
1636                 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1637         }
1638 }
1639 #endif
1640 
1641 #ifdef CONFIG_DMAR_FLOPPY_WA
1642 static inline void iommu_prepare_isa(void)
1643 {
1644         struct pci_dev *pdev;
1645         int ret;
1646 
1647         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1648         if (!pdev)
1649                 return;
1650 
1651         printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1652         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1653 
1654         if (ret)
1655                 printk("IOMMU: Failed to create 0-64M identity map, "
1656                         "floppy might not work\n");
1657 
1658 }
1659 #else
1660 static inline void iommu_prepare_isa(void)
1661 {
1662         return;
1663 }
1664 #endif /* !CONFIG_DMAR_FLPY_WA */
1665 
1666 int __init init_dmars(void)
1667 {
1668         struct dmar_drhd_unit *drhd;
1669         struct dmar_rmrr_unit *rmrr;
1670         struct pci_dev *pdev;
1671         struct intel_iommu *iommu;
1672         int ret, unit = 0;
1673 
1674         /*
1675          * for each drhd
1676          *    allocate root
1677          *    initialize and program root entry to not present
1678          * endfor
1679          */
1680         for_each_drhd_unit(drhd) {
1681                 if (drhd->ignored)
1682                         continue;
1683                 iommu = alloc_iommu(drhd);
1684                 if (!iommu) {
1685                         ret = -ENOMEM;
1686                         goto error;
1687                 }
1688 
1689                 /*
1690                  * TBD:
1691                  * we could share the same root & context tables
1692                  * amoung all IOMMU's. Need to Split it later.
1693                  */
1694                 ret = iommu_alloc_root_entry(iommu);
1695                 if (ret) {
1696                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1697                         goto error;
1698                 }
1699         }
1700 
1701         /*
1702          * For each rmrr
1703          *   for each dev attached to rmrr
1704          *   do
1705          *     locate drhd for dev, alloc domain for dev
1706          *     allocate free domain
1707          *     allocate page table entries for rmrr
1708          *     if context not allocated for bus
1709          *           allocate and init context
1710          *           set present in root table for this bus
1711          *     init context with domain, translation etc
1712          *    endfor
1713          * endfor
1714          */
1715         for_each_rmrr_units(rmrr) {
1716                 int i;
1717                 for (i = 0; i < rmrr->devices_cnt; i++) {
1718                         pdev = rmrr->devices[i];
1719                         /* some BIOS lists non-exist devices in DMAR table */
1720                         if (!pdev)
1721                                 continue;
1722                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1723                         if (ret)
1724                                 printk(KERN_ERR
1725                                  "IOMMU: mapping reserved region failed\n");
1726                 }
1727         }
1728 
1729         iommu_prepare_gfx_mapping();
1730 
1731         iommu_prepare_isa();
1732 
1733         /*
1734          * for each drhd
1735          *   enable fault log
1736          *   global invalidate context cache
1737          *   global invalidate iotlb
1738          *   enable translation
1739          */
1740         for_each_drhd_unit(drhd) {
1741                 if (drhd->ignored)
1742                         continue;
1743                 iommu = drhd->iommu;
1744                 sprintf (iommu->name, "dmar%d", unit++);
1745 
1746                 iommu_flush_write_buffer(iommu);
1747 
1748                 ret = dmar_set_interrupt(iommu);
1749                 if (ret)
1750                         goto error;
1751 
1752                 iommu_set_root_entry(iommu);
1753 
1754                 iommu_flush_context_global(iommu, 0);
1755                 iommu_flush_iotlb_global(iommu, 0);
1756 
1757                 iommu_disable_protect_mem_regions(iommu);
1758 
1759                 ret = iommu_enable_translation(iommu);
1760                 if (ret)
1761                         goto error;
1762         }
1763 
1764         return 0;
1765 error:
1766         for_each_drhd_unit(drhd) {
1767                 if (drhd->ignored)
1768                         continue;
1769                 iommu = drhd->iommu;
1770                 free_iommu(iommu);
1771         }
1772         return ret;
1773 }
1774 
1775 static inline u64 aligned_size(u64 host_addr, size_t size)
1776 {
1777         u64 addr;
1778         addr = (host_addr & (~PAGE_MASK_4K)) + size;
1779         return PAGE_ALIGN_4K(addr);
1780 }
1781 
1782 struct iova *
1783 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1784 {
1785         struct iova *piova;
1786 
1787         /* Make sure it's in range */
1788         end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1789         if (!size || (IOVA_START_ADDR + size > end))
1790                 return NULL;
1791 
1792         piova = alloc_iova(&domain->iovad,
1793                         size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
1794         return piova;
1795 }
1796 
1797 static struct iova *
1798 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1799                 size_t size)
1800 {
1801         struct pci_dev *pdev = to_pci_dev(dev);
1802         struct iova *iova = NULL;
1803 
1804         if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1805                 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1806         } else  {
1807                 /*
1808                  * First try to allocate an io virtual address in
1809                  * DMA_32BIT_MASK and if that fails then try allocating
1810                  * from higher range
1811                  */
1812                 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1813                 if (!iova)
1814                         iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1815         }
1816 
1817         if (!iova) {
1818                 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1819                 return NULL;
1820         }
1821 
1822         return iova;
1823 }
1824 
1825 static struct dmar_domain *
1826 get_valid_domain_for_dev(struct pci_dev *pdev)
1827 {
1828         struct dmar_domain *domain;
1829         int ret;
1830 
1831         domain = get_domain_for_dev(pdev,
1832                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
1833         if (!domain) {
1834                 printk(KERN_ERR
1835                         "Allocating domain for %s failed", pci_name(pdev));
1836                 return NULL;
1837         }
1838 
1839         /* make sure context mapping is ok */
1840         if (unlikely(!domain_context_mapped(domain, pdev))) {
1841                 ret = domain_context_mapping(domain, pdev);
1842                 if (ret) {
1843                         printk(KERN_ERR
1844                                 "Domain context map for %s failed",
1845                                 pci_name(pdev));
1846                         return NULL;
1847                 }
1848         }
1849 
1850         return domain;
1851 }
1852 
1853 static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1854         size_t size, int dir)
1855 {
1856         struct pci_dev *pdev = to_pci_dev(hwdev);
1857         int ret;
1858         struct dmar_domain *domain;
1859         unsigned long start_addr;
1860         struct iova *iova;
1861         int prot = 0;
1862 
1863         BUG_ON(dir == DMA_NONE);
1864         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1865                 return virt_to_bus(addr);
1866 
1867         domain = get_valid_domain_for_dev(pdev);
1868         if (!domain)
1869                 return 0;
1870 
1871         addr = (void *)virt_to_phys(addr);
1872         size = aligned_size((u64)addr, size);
1873 
1874         iova = __intel_alloc_iova(hwdev, domain, size);
1875         if (!iova)
1876                 goto error;
1877 
1878         start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1879 
1880         /*
1881          * Check if DMAR supports zero-length reads on write only
1882          * mappings..
1883          */
1884         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1885                         !cap_zlr(domain->iommu->cap))
1886                 prot |= DMA_PTE_READ;
1887         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1888                 prot |= DMA_PTE_WRITE;
1889         /*
1890          * addr - (addr + size) might be partial page, we should map the whole
1891          * page.  Note: if two part of one page are separately mapped, we
1892          * might have two guest_addr mapping to the same host addr, but this
1893          * is not a big problem
1894          */
1895         ret = domain_page_mapping(domain, start_addr,
1896                 ((u64)addr) & PAGE_MASK_4K, size, prot);
1897         if (ret)
1898                 goto error;
1899 
1900         pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1901                 pci_name(pdev), size, (u64)addr,
1902                 size, (u64)start_addr, dir);
1903 
1904         /* it's a non-present to present mapping */
1905         ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1906                         start_addr, size >> PAGE_SHIFT_4K, 1);
1907         if (ret)
1908                 iommu_flush_write_buffer(domain->iommu);
1909 
1910         return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
1911 
1912 error:
1913         if (iova)
1914                 __free_iova(&domain->iovad, iova);
1915         printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1916                 pci_name(pdev), size, (u64)addr, dir);
1917         return 0;
1918 }
1919 
1920 static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1921         size_t size, int dir)
1922 {
1923         struct pci_dev *pdev = to_pci_dev(dev);
1924         struct dmar_domain *domain;
1925         unsigned long start_addr;
1926         struct iova *iova;
1927 
1928         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1929                 return;
1930         domain = find_domain(pdev);
1931         BUG_ON(!domain);
1932 
1933         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1934         if (!iova)
1935                 return;
1936 
1937         start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1938         size = aligned_size((u64)dev_addr, size);
1939 
1940         pr_debug("Device %s unmapping: %lx@%llx\n",
1941                 pci_name(pdev), size, (u64)start_addr);
1942 
1943         /*  clear the whole page */
1944         dma_pte_clear_range(domain, start_addr, start_addr + size);
1945         /* free page tables */
1946         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1947 
1948         if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1949                         size >> PAGE_SHIFT_4K, 0))
1950                 iommu_flush_write_buffer(domain->iommu);
1951 
1952         /* free iova */
1953         __free_iova(&domain->iovad, iova);
1954 }
1955 
1956 static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1957                        dma_addr_t *dma_handle, gfp_t flags)
1958 {
1959         void *vaddr;
1960         int order;
1961 
1962         size = PAGE_ALIGN_4K(size);
1963         order = get_order(size);
1964         flags &= ~(GFP_DMA | GFP_DMA32);
1965 
1966         vaddr = (void *)__get_free_pages(flags, order);
1967         if (!vaddr)
1968                 return NULL;
1969         memset(vaddr, 0, size);
1970 
1971         *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1972         if (*dma_handle)
1973                 return vaddr;
1974         free_pages((unsigned long)vaddr, order);
1975         return NULL;
1976 }
1977 
1978 static void intel_free_coherent(struct device *hwdev, size_t size,
1979         void *vaddr, dma_addr_t dma_handle)
1980 {
1981         int order;
1982 
1983         size = PAGE_ALIGN_4K(size);
1984         order = get_order(size);
1985 
1986         intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1987         free_pages((unsigned long)vaddr, order);
1988 }
1989 
1990 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
1991 static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
1992         int nelems, int dir)
1993 {
1994         int i;
1995         struct pci_dev *pdev = to_pci_dev(hwdev);
1996         struct dmar_domain *domain;
1997         unsigned long start_addr;
1998         struct iova *iova;
1999         size_t size = 0;
2000         void *addr;
2001         struct scatterlist *sg;
2002 
2003         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2004                 return;
2005 
2006         domain = find_domain(pdev);
2007 
2008         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2009         if (!iova)
2010                 return;
2011         for_each_sg(sglist, sg, nelems, i) {
2012                 addr = SG_ENT_VIRT_ADDRESS(sg);
2013                 size += aligned_size((u64)addr, sg->length);
2014         }
2015 
2016         start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2017 
2018         /*  clear the whole page */
2019         dma_pte_clear_range(domain, start_addr, start_addr + size);
2020         /* free page tables */
2021         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2022 
2023         if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2024                         size >> PAGE_SHIFT_4K, 0))
2025                 iommu_flush_write_buffer(domain->iommu);
2026 
2027         /* free iova */
2028         __free_iova(&domain->iovad, iova);
2029 }
2030 
2031 static int intel_nontranslate_map_sg(struct device *hddev,
2032         struct scatterlist *sglist, int nelems, int dir)
2033 {
2034         int i;
2035         struct scatterlist *sg;
2036 
2037         for_each_sg(sglist, sg, nelems, i) {
2038                 BUG_ON(!sg_page(sg));
2039                 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2040                 sg->dma_length = sg->length;
2041         }
2042         return nelems;
2043 }
2044 
2045 static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2046                                 int nelems, int dir)
2047 {
2048         void *addr;
2049         int i;
2050         struct pci_dev *pdev = to_pci_dev(hwdev);
2051         struct dmar_domain *domain;
2052         size_t size = 0;
2053         int prot = 0;
2054         size_t offset = 0;
2055         struct iova *iova = NULL;
2056         int ret;
2057         struct scatterlist *sg;
2058         unsigned long start_addr;
2059 
2060         BUG_ON(dir == DMA_NONE);
2061         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2062                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2063 
2064         domain = get_valid_domain_for_dev(pdev);
2065         if (!domain)
2066                 return 0;
2067 
2068         for_each_sg(sglist, sg, nelems, i) {
2069                 addr = SG_ENT_VIRT_ADDRESS(sg);
2070                 addr = (void *)virt_to_phys(addr);
2071                 size += aligned_size((u64)addr, sg->length);
2072         }
2073 
2074         iova = __intel_alloc_iova(hwdev, domain, size);
2075         if (!iova) {
2076                 sglist->dma_length = 0;
2077                 return 0;
2078         }
2079 
2080         /*
2081          * Check if DMAR supports zero-length reads on write only
2082          * mappings..
2083          */
2084         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2085                         !cap_zlr(domain->iommu->cap))
2086                 prot |= DMA_PTE_READ;
2087         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2088                 prot |= DMA_PTE_WRITE;
2089 
2090         start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2091         offset = 0;
2092         for_each_sg(sglist, sg, nelems, i) {
2093                 addr = SG_ENT_VIRT_ADDRESS(sg);
2094                 addr = (void *)virt_to_phys(addr);
2095                 size = aligned_size((u64)addr, sg->length);
2096                 ret = domain_page_mapping(domain, start_addr + offset,
2097                         ((u64)addr) & PAGE_MASK_4K,
2098                         size, prot);
2099                 if (ret) {
2100                         /*  clear the page */
2101                         dma_pte_clear_range(domain, start_addr,
2102                                   start_addr + offset);
2103                         /* free page tables */
2104                         dma_pte_free_pagetable(domain, start_addr,
2105                                   start_addr + offset);
2106                         /* free iova */
2107                         __free_iova(&domain->iovad, iova);
2108                         return 0;
2109                 }
2110                 sg->dma_address = start_addr + offset +
2111                                 ((u64)addr & (~PAGE_MASK_4K));
2112                 sg->dma_length = sg->length;
2113                 offset += size;
2114         }
2115 
2116         /* it's a non-present to present mapping */
2117         if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2118                         start_addr, offset >> PAGE_SHIFT_4K, 1))
2119                 iommu_flush_write_buffer(domain->iommu);
2120         return nelems;
2121 }
2122 
2123 static struct dma_mapping_ops intel_dma_ops = {
2124         .alloc_coherent = intel_alloc_coherent,
2125         .free_coherent = intel_free_coherent,
2126         .map_single = intel_map_single,
2127         .unmap_single = intel_unmap_single,
2128         .map_sg = intel_map_sg,
2129         .unmap_sg = intel_unmap_sg,
2130 };
2131 
2132 static inline int iommu_domain_cache_init(void)
2133 {
2134         int ret = 0;
2135 
2136         iommu_domain_cache = kmem_cache_create("iommu_domain",
2137                                          sizeof(struct dmar_domain),
2138                                          0,
2139                                          SLAB_HWCACHE_ALIGN,
2140 
2141                                          NULL);
2142         if (!iommu_domain_cache) {
2143                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2144                 ret = -ENOMEM;
2145         }
2146 
2147         return ret;
2148 }
2149 
2150 static inline int iommu_devinfo_cache_init(void)
2151 {
2152         int ret = 0;
2153 
2154         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2155                                          sizeof(struct device_domain_info),
2156                                          0,
2157                                          SLAB_HWCACHE_ALIGN,
2158 
2159                                          NULL);
2160         if (!iommu_devinfo_cache) {
2161                 printk(KERN_ERR "Couldn't create devinfo cache\n");
2162                 ret = -ENOMEM;
2163         }
2164 
2165         return ret;
2166 }
2167 
2168 static inline int iommu_iova_cache_init(void)
2169 {
2170         int ret = 0;
2171 
2172         iommu_iova_cache = kmem_cache_create("iommu_iova",
2173                                          sizeof(struct iova),
2174                                          0,
2175                                          SLAB_HWCACHE_ALIGN,
2176 
2177                                          NULL);
2178         if (!iommu_iova_cache) {
2179                 printk(KERN_ERR "Couldn't create iova cache\n");
2180                 ret = -ENOMEM;
2181         }
2182 
2183         return ret;
2184 }
2185 
2186 static int __init iommu_init_mempool(void)
2187 {
2188         int ret;
2189         ret = iommu_iova_cache_init();
2190         if (ret)
2191                 return ret;
2192 
2193         ret = iommu_domain_cache_init();
2194         if (ret)
2195                 goto domain_error;
2196 
2197         ret = iommu_devinfo_cache_init();
2198         if (!ret)
2199                 return ret;
2200 
2201         kmem_cache_destroy(iommu_domain_cache);
2202 domain_error:
2203         kmem_cache_destroy(iommu_iova_cache);
2204 
2205         return -ENOMEM;
2206 }
2207 
2208 static void __init iommu_exit_mempool(void)
2209 {
2210         kmem_cache_destroy(iommu_devinfo_cache);
2211         kmem_cache_destroy(iommu_domain_cache);
2212         kmem_cache_destroy(iommu_iova_cache);
2213 
2214 }
2215 
2216 void __init detect_intel_iommu(void)
2217 {
2218         if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2219                 return;
2220         if (early_dmar_detect()) {
2221                 iommu_detected = 1;
2222         }
2223 }
2224 
2225 static void __init init_no_remapping_devices(void)
2226 {
2227         struct dmar_drhd_unit *drhd;
2228 
2229         for_each_drhd_unit(drhd) {
2230                 if (!drhd->include_all) {
2231                         int i;
2232                         for (i = 0; i < drhd->devices_cnt; i++)
2233                                 if (drhd->devices[i] != NULL)
2234                                         break;
2235                         /* ignore DMAR unit if no pci devices exist */
2236                         if (i == drhd->devices_cnt)
2237                                 drhd->ignored = 1;
2238                 }
2239         }
2240 
2241         if (dmar_map_gfx)
2242                 return;
2243 
2244         for_each_drhd_unit(drhd) {
2245                 int i;
2246                 if (drhd->ignored || drhd->include_all)
2247                         continue;
2248 
2249                 for (i = 0; i < drhd->devices_cnt; i++)
2250                         if (drhd->devices[i] &&
2251                                 !IS_GFX_DEVICE(drhd->devices[i]))
2252                                 break;
2253 
2254                 if (i < drhd->devices_cnt)
2255                         continue;
2256 
2257                 /* bypass IOMMU if it is just for gfx devices */
2258                 drhd->ignored = 1;
2259                 for (i = 0; i < drhd->devices_cnt; i++) {
2260                         if (!drhd->devices[i])
2261                                 continue;
2262                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2263                 }
2264         }
2265 }
2266 
2267 int __init intel_iommu_init(void)
2268 {
2269         int ret = 0;
2270 
2271         if (no_iommu || swiotlb || dmar_disabled)
2272                 return -ENODEV;
2273 
2274         if (dmar_table_init())
2275                 return  -ENODEV;
2276 
2277         iommu_init_mempool();
2278         dmar_init_reserved_ranges();
2279 
2280         init_no_remapping_devices();
2281 
2282         ret = init_dmars();
2283         if (ret) {
2284                 printk(KERN_ERR "IOMMU: dmar init failed\n");
2285                 put_iova_domain(&reserved_iova_list);
2286                 iommu_exit_mempool();
2287                 return ret;
2288         }
2289         printk(KERN_INFO
2290         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2291 
2292         force_iommu = 1;
2293         dma_ops = &intel_dma_ops;
2294         return 0;
2295 }
2296 
2297 
  This page was automatically generated by the LXR engine.