longkgsl_ioctl_gpumem_alloc(structkgsl_device_private*dev_priv,unsignedint cmd,void*data){// ioctl参数structkgsl_gpumem_alloc*param = data;// kgsl_mem_entry用于描述用户空间的内存分配[见2.1节]structkgsl_mem_entry*entry;// 用户空间指定的标志位uint64_t flags = param->flags;/** On 64 bit kernel, secure memory region is expanded and* moved to 64 bit address, 32 bit apps can not access it from* this IOCTL.*/if((param->flags & KGSL_MEMFLAGS_SECURE)&&is_compat_task()&&test_bit(KGSL_MMU_64BIT,&device->mmu.features))return-EOPNOTSUPP;/* Legacy functions doesn't support these advanced features */flags &=~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);if(is_compat_task())flags |= KGSL_MEMFLAGS_FORCE_32BIT;// 创建kgsl_mem_entry[见2.2节]entry =gpumem_alloc_entry(dev_priv,(uint64_t) param->size, flags);if(IS_ERR(entry))returnPTR_ERR(entry);// 更新参数param->gpuaddr =(unsignedlong) entry->memdesc.gpuaddr;param->size =(size_t) entry->memdesc.size;param->flags =(unsignedint) entry->memdesc.flags;/* Put the extra ref from kgsl_mem_entry_create() */// 减少引用计数, 如果引用计数减为0则通过kgsl_mem_entry_destroy释放kgsl_mem_entrykgsl_mem_entry_put(entry);return0;}
2.1 kgsl_mem_entry
/** struct kgsl_mem_entry - a userspace memory allocation*/structkgsl_mem_entry{// Currently userspace can only hold a single reference count but the kernel may hold morestructkref refcount;// description of the memory[见2.1.1节]structkgsl_memdesc memdesc;// type-specific data, such as the dma-buf attachment pointervoid*priv_data;// rb_node for the gpu address lookup rb treestructrb_node node;// idr index for this entry, can be used to find memory that does not have a valid GPU addressunsignedint id;// 持有该内存的进程structkgsl_process_private*priv;// if !0, userspace requested that his memory be freed, but there are still references to itint pending_free;// String containing user specified metadata for the entrychar metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX +1];// used to schedule a kgsl_mem_entry_put in atomic contextsstructwork_struct work;/*** @map_count: Count how many vmas this object is mapped in - used for* debugfs accounting*/// 映射的VMA数量atomic_t map_count;};
2.1.1 kgsl_memdesc
/*** struct kgsl_memdesc - GPU memory object descriptor*/structkgsl_memdesc{// 此块对象映射的页表structkgsl_pagetable*pagetable;// CPU(进程)虚拟地址void*hostptr;// 使用CPU虚拟地址的线程个数unsignedint hostptr_count;// GPU虚拟地址uint64_t gpuaddr;// 该内存对象的物理地址phys_addr_t physaddr;// 该内存对象的物理内存大小uint64_t size;// Internal flags and settingsunsignedint priv;structsg_table*sgt;// 操作这块内存的函数[见2.1.2节]conststructkgsl_memdesc_ops*ops;// 用户空间申请内存时设置的标志位(Flags set from userspace)uint64_t flags;structdevice*dev;// dma attributes for this memoryunsignedlong attrs;// An array of pointers to allocated pages// 申请的物理页面数组structpage**pages;// Total number of pages allocated// 申请的物理页面数量unsignedint page_count;/** @lock: Spinlock to protect the gpuaddr from being accessed by* multiple entities trying to map the same SVM region at once*/spinlock_t lock;};
2.1.2 kgsl_memdesc_ops
// 具体实现见2.2.5节kgsl_page_opsstructkgsl_memdesc_ops{unsignedint vmflags;vm_fault_t(*vmfault)(structkgsl_memdesc*memdesc,structvm_area_struct*vma,structvm_fault*vmf);// 释放内存void(*free)(structkgsl_memdesc*memdesc);// 映射到内核虚拟地址空间int(*map_kernel)(structkgsl_memdesc*memdesc);// 解映射void(*unmap_kernel)(structkgsl_memdesc*memdesc);/*** @put_gpuaddr: Put away the GPU address and unmap the memory* descriptor*/void(*put_gpuaddr)(structkgsl_memdesc*memdesc);};
2.2 gpumem_alloc_entry
structkgsl_mem_entry*gpumem_alloc_entry(structkgsl_device_private*dev_priv,uint64_t size,uint64_t flags){int ret;structkgsl_process_private*private = dev_priv->process_priv;structkgsl_mem_entry*entry;structkgsl_mmu*mmu =&dev_priv->device->mmu;unsignedint align;flags &= KGSL_MEMFLAGS_GPUREADONLY| KGSL_CACHEMODE_MASK| KGSL_MEMTYPE_MASK| KGSL_MEMALIGN_MASK| KGSL_MEMFLAGS_USE_CPU_MAP| KGSL_MEMFLAGS_SECURE| KGSL_MEMFLAGS_FORCE_32BIT| KGSL_MEMFLAGS_IOCOHERENT| KGSL_MEMFLAGS_GUARD_PAGE;/* Return not supported error if secure memory isn't enabled */if(!kgsl_mmu_is_secured(mmu)&&(flags & KGSL_MEMFLAGS_SECURE)){dev_WARN_ONCE(dev_priv->device->dev,1,"Secure memory not supported");returnERR_PTR(-EOPNOTSUPP);}/* Cap the alignment bits to the highest number we can handle */align =MEMFLAGS(flags, KGSL_MEMALIGN_MASK, KGSL_MEMALIGN_SHIFT);if(align >=ilog2(KGSL_MAX_ALIGN)){dev_err(dev_priv->device->dev,"Alignment too large; restricting to %dK\n",KGSL_MAX_ALIGN >>10);flags &=~((uint64_t) KGSL_MEMALIGN_MASK);flags |=(uint64_t)((ilog2(KGSL_MAX_ALIGN)<<KGSL_MEMALIGN_SHIFT)&KGSL_MEMALIGN_MASK);}/* For now only allow allocations up to 4G */if(size ==0|| size > UINT_MAX)returnERR_PTR(-EINVAL);// 更新缓存策略flags =kgsl_filter_cachemode(flags);// 前面主要完成标志位的校验和更新// 这里开始创建kgsl_mem_entry[见2.2.1节]entry =kgsl_mem_entry_create();if(entry ==NULL)returnERR_PTR(-ENOMEM);// 根据标志位判断是否是cached bufferif(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT)&&kgsl_cachemode_is_cached(flags))flags |= KGSL_MEMFLAGS_IOCOHERENT;// 私有内存分配[2.2.2节]ret =kgsl_allocate_user(dev_priv->device,&entry->memdesc,size, flags,0);if(ret !=0)goto err;// 将该内存绑定到kgsl进程[2.2.7节]ret =kgsl_mem_entry_attach_process(dev_priv->device, private, entry);if(ret !=0){kgsl_sharedmem_free(&entry->memdesc);goto err;}kgsl_process_add_stats(private,kgsl_memdesc_usermem_type(&entry->memdesc),entry->memdesc.size);trace_kgsl_mem_alloc(entry);// 将kgsl_mem_entry提交到kgsl_process_private, 以便其他操作也能够访问kgsl_mem_entry_commit_process(entry);return entry;
err:kfree(entry);returnERR_PTR(ret);}
2.2.1 kgsl_mem_entry_create
staticstructkgsl_mem_entry*kgsl_mem_entry_create(void){// 创建kgsl_mem_entrystructkgsl_mem_entry*entry =kzalloc(sizeof(*entry), GFP_KERNEL);if(entry !=NULL){// 初始化kgsl_mem_entry引用计数为1kref_init(&entry->refcount);/* put this ref in userspace memory alloc and map ioctls */// 引用计数加1kref_get(&entry->refcount);// 初始化映射的VMA数量为0atomic_set(&entry->map_count,0);}return entry;}
voidkgsl_memdesc_init(structkgsl_device*device,structkgsl_memdesc*memdesc,uint64_t flags){structkgsl_mmu*mmu =&device->mmu;unsignedint align;// 初始化kgsl_memdescmemset(memdesc,0,sizeof(*memdesc));/* Turn off SVM if the system doesn't support it */// 判断是否支持KGSL_MMU_IOPGTABLEif(!kgsl_mmu_is_perprocess(mmu))flags &=~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);/* Secure memory disables advanced addressing modes */if(flags & KGSL_MEMFLAGS_SECURE)flags &=~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);/* Disable IO coherence if it is not supported on the chip */// 判断是否支持I/O coherencyif(!kgsl_mmu_has_feature(device, KGSL_MMU_IO_COHERENT)){flags &=~((uint64_t) KGSL_MEMFLAGS_IOCOHERENT);WARN_ONCE(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT),"I/O coherency is not supported on this target\n");}elseif(IS_ENABLED(CONFIG_QCOM_KGSL_IOCOHERENCY_DEFAULT))flags |= KGSL_MEMFLAGS_IOCOHERENT;/** We can't enable I/O coherency on uncached surfaces because of* situations where hardware might snoop the cpu caches which can* have stale data. This happens primarily due to the limitations* of dma caching APIs available on arm64*/if(!kgsl_cachemode_is_cached(flags))flags &=~((u64) KGSL_MEMFLAGS_IOCOHERENT);if(kgsl_mmu_has_feature(device, KGSL_MMU_NEED_GUARD_PAGE)||(flags & KGSL_MEMFLAGS_GUARD_PAGE))memdesc->priv |= KGSL_MEMDESC_GUARD_PAGE;if(flags & KGSL_MEMFLAGS_SECURE)memdesc->priv |= KGSL_MEMDESC_SECURE;// 设置标志位memdesc->flags = flags;// 设置持有该内存的devicememdesc->dev =&device->pdev->dev;// 对齐align =max_t(unsignedint,kgsl_memdesc_get_align(memdesc),ilog2(PAGE_SIZE));// 设置kgsl_memdesc的对齐标志位kgsl_memdesc_set_align(memdesc, align);spin_lock_init(&memdesc->lock);}
staticint_kgsl_alloc_pages(structkgsl_memdesc*memdesc,u64 size,structpage***pages,structdevice*dev){int count =0;// 将内存大小转换为页面数量int npages = size >> PAGE_SHIFT;// attempt to allocate physically contiguous memory by kmalloc// but upon failure, fall back to non-contiguous (vmalloc) allocationstructpage**local =kvcalloc(npages,sizeof(*local), GFP_KERNEL);u32 page_size, align;u64 len = size;if(!local)return-ENOMEM;// 共享内存设置成功或者未配置CONFIG_QCOM_KGSL_USE_SHMEM则返回0[见2.2.6.1节]count =kgsl_memdesc_file_setup(memdesc, size);if(count){kvfree(local);return count;}/* Start with 1MB alignment to get the biggest page we can */align =ilog2(SZ_1M);// 根据内存大小计算页面大小page_size =kgsl_get_page_size(len, align);while(len){// 调用kgsl_pool_alloc_page分配, 并将获取的page通过local数组返回int ret =kgsl_alloc_page(&page_size,&local[count],npages,&align, count, memdesc->shmem_filp, dev);if(ret ==-EAGAIN)continue;elseif(ret <=0){int i;for(i =0; i < count;){int n =1<<compound_order(local[i]);kgsl_free_page(local[i]);i += n;}kvfree(local);if(!kgsl_sharedmem_noretry_flag)pr_err_ratelimited("kgsl: out of memory: only allocated %lldKb of %lldKb requested\n",(size - len)>>10, size >>10);if(memdesc->shmem_filp)fput(memdesc->shmem_filp);return-ENOMEM;}count += ret;npages -= ret;len -= page_size;page_size =kgsl_get_page_size(len, align);}// pages作为返回值*pages = local;return count;}
/** Attach the memory object to a process by (possibly) getting a GPU address and* (possibly) mapping it*/staticintkgsl_mem_entry_attach_process(structkgsl_device*device,structkgsl_process_private*process,structkgsl_mem_entry*entry){int id, ret;// kgsl_process_private引用计数加1ret =kgsl_process_private_get(process);if(!ret)return-EBADF;// [见2.2.7.1节]ret =kgsl_mem_entry_track_gpuaddr(device, process, entry);if(ret){kgsl_process_private_put(process);return ret;}idr_preload(GFP_KERNEL);spin_lock(&process->mem_lock);/* Allocate the ID but don't attach the pointer just yet */// 为kgsl_mem_entry分配idid =idr_alloc(&process->mem_idr,NULL,1,0, GFP_NOWAIT);spin_unlock(&process->mem_lock);idr_preload_end();if(id <0){if(!kgsl_memdesc_use_cpu_map(&entry->memdesc))kgsl_mmu_put_gpuaddr(&entry->memdesc);kgsl_process_private_put(process);return id;}entry->id = id;entry->priv = process;/** Map the memory if a GPU address is already assigned, either through* kgsl_mem_entry_track_gpuaddr() or via some other SVM process*/// GPU虚拟地址分配成功if(entry->memdesc.gpuaddr){// [见2.2.7.6节]ret =kgsl_mmu_map(entry->memdesc.pagetable,&entry->memdesc);if(ret)kgsl_mem_entry_detach_process(entry);}kgsl_memfree_purge(entry->memdesc.pagetable, entry->memdesc.gpuaddr,entry->memdesc.size);return ret;}
2.2.7.1 kgsl_mem_entry_track_gpuaddr
/* Allocate a IOVA for memory objects that don't use SVM */staticintkgsl_mem_entry_track_gpuaddr(structkgsl_device*device,structkgsl_process_private*process,structkgsl_mem_entry*entry){structkgsl_pagetable*pagetable;/** If SVM is enabled for this object then the address needs to be* assigned elsewhere* Also do not proceed further in case of NoMMU.*/// 不支持IOMMU则直接返回if(kgsl_memdesc_use_cpu_map(&entry->memdesc)||(kgsl_mmu_get_mmutype(device)== KGSL_MMU_TYPE_NONE))return0;// 使用kgsl进程页表pagetable =kgsl_memdesc_is_secured(&entry->memdesc)?device->mmu.securepagetable : process->pagetable;// 获取GPU虚拟地址[见2.2.7.2节]returnkgsl_mmu_get_gpuaddr(pagetable,&entry->memdesc);}
2.2.7.2 kgsl_mmu_get_gpuaddr
#definePT_OP_VALID(_pt, _field)\(((_pt)!=NULL)&&\((_pt)->pt_ops !=NULL)&&\((_pt)->pt_ops->_field !=NULL))/*** kgsl_mmu_get_gpuaddr - Assign a GPU address to the memdesc* @pagetable: GPU pagetable to assign the address in* @memdesc: mem descriptor to assign the memory to** Return: 0 on success or negative on failure*/staticinlineintkgsl_mmu_get_gpuaddr(structkgsl_pagetable*pagetable,structkgsl_memdesc*memdesc){// 调用iommu_pt_ops中定义的kgsl_iommu_get_gpuaddr分配GPU虚拟地址[2.2.7.5节]if(PT_OP_VALID(pagetable, get_gpuaddr))return pagetable->pt_ops->get_gpuaddr(pagetable, memdesc);return-ENOMEM;}
2.2.7.3 kgsl_iommu_get_gpuaddr
staticintkgsl_iommu_get_gpuaddr(structkgsl_pagetable*pagetable,structkgsl_memdesc*memdesc){structkgsl_iommu_pt*pt = pagetable->priv;int ret =0;uint64_t addr, start, end, size;unsignedint align;if(WARN_ON(kgsl_memdesc_use_cpu_map(memdesc)))return-EINVAL;if(memdesc->flags & KGSL_MEMFLAGS_SECURE &&pagetable->name != KGSL_MMU_SECURE_PT)return-EINVAL;// 获取映射区域(kgsl_memdesc)的大小size =kgsl_memdesc_footprint(memdesc);align =max_t(uint64_t,1<<kgsl_memdesc_get_align(memdesc),PAGE_SIZE);if(memdesc->flags & KGSL_MEMFLAGS_FORCE_32BIT){start = pagetable->compat_va_start;end = pagetable->compat_va_end;}else{// 页表的起始虚拟地址start = pt->va_start;// 页表的结束虚拟地址end = pt->va_end;}spin_lock(&pagetable->lock);// 获取一块未映射的虚拟地址[2.2.7.4节]addr =_get_unmapped_area(pagetable, start, end, size, align);if(addr ==(uint64_t)-ENOMEM){ret =-ENOMEM;goto out;}/** This path is only called in a non-SVM path with locks so we can be* sure we aren't racing with anybody so we don't need to worry about* taking the lock*/// 将该虚拟地址插入页表[2.2.7.5节]ret =_insert_gpuaddr(pagetable, addr, size);if(ret ==0){// 设置GPU虚拟地址memdesc->gpuaddr = addr;// 设置页表memdesc->pagetable = pagetable;}out:spin_unlock(&pagetable->lock);return ret;}
2.2.7.4 _get_unmapped_area
/** struct kgsl_iommu_addr_entry - entry in the kgsl_pagetable rbtree.* @base: starting virtual address of the entry* @size: size of the entry* @node: the rbtree node*/structkgsl_iommu_addr_entry{// 起始虚拟地址uint64_t base;uint64_t size;structrb_node node;};staticuint64_t_get_unmapped_area(structkgsl_pagetable*pagetable,uint64_t bottom,uint64_t top,uint64_t size,uint64_t align){// 页表radix tree头节点structrb_node*node =rb_first(&pagetable->rbtree);uint64_t start;bottom =ALIGN(bottom, align);start = bottom;while(node !=NULL){uint64_t gap;// 查找rb_node的容器即kgsl_iommu_addr_entrystructkgsl_iommu_addr_entry*entry =rb_entry(node,structkgsl_iommu_addr_entry, node);/** Skip any entries that are outside of the range, but make sure* to account for some that might straddle the lower bound*/if(entry->base < bottom){if(entry->base + entry->size > bottom)start =ALIGN(entry->base + entry->size, align);node =rb_next(node);continue;}/* Stop if we went over the top */if(entry->base >= top)break;/* Make sure there is a gap to consider */if(start < entry->base){gap = entry->base - start;if(gap >= size)return start;}/* Stop if there is no more room in the region */if(entry->base + entry->size >= top)return(uint64_t)-ENOMEM;/* Start the next cycle at the end of the current entry */start =ALIGN(entry->base + entry->size, align);node =rb_next(node);}// 返回起始虚拟地址if(start + size <= top)return start;return(uint64_t)-ENOMEM;}
intkgsl_mmu_map(structkgsl_pagetable*pagetable,structkgsl_memdesc*memdesc){int size;structkgsl_device*device =KGSL_MMU_DEVICE(pagetable->mmu);if(!memdesc->gpuaddr)return-EINVAL;/* Only global mappings should be mapped multiple times */// KGSL_MEMDESC_MAPPED标志位用于判断kgsl_memdesc是否被映射:只有全局共享内存才能映射多次if(!kgsl_memdesc_is_global(memdesc)&&(KGSL_MEMDESC_MAPPED & memdesc->priv))return-EINVAL;size =kgsl_memdesc_footprint(memdesc);if(PT_OP_VALID(pagetable, mmu_map)){int ret;// 调用iommu_pt_ops中定义的kgsl_iommu_map[见2.2.7.7节]ret = pagetable->pt_ops->mmu_map(pagetable, memdesc);if(ret)return ret;atomic_inc(&pagetable->stats.entries);// 内存统计KGSL_STATS_ADD(size,&pagetable->stats.mapped,&pagetable->stats.max_mapped);kgsl_mmu_trace_gpu_mem_pagetable(pagetable);if(!kgsl_memdesc_is_global(memdesc)&&!(memdesc->flags & KGSL_MEMFLAGS_USERMEM_ION)){kgsl_trace_gpu_mem_total(device, size);}// 标记此块内存已经被映射memdesc->priv |= KGSL_MEMDESC_MAPPED;}return0;}
2.2.7.6 kgsl_iommu_map
staticintkgsl_iommu_map(structkgsl_pagetable*pt,structkgsl_memdesc*memdesc){int ret;uint64_t addr = memdesc->gpuaddr;uint64_t size = memdesc->size;unsignedint flags =_get_protection_flags(pt, memdesc);structsg_table*sgt =NULL;/** For paged memory allocated through kgsl, memdesc->pages is not NULL.* Allocate sgt here just for its map operation. Contiguous memory* already has its sgt, so no need to allocate it here.*/if(memdesc->pages !=NULL)sgt =kgsl_alloc_sgt_from_pages(memdesc);elsesgt = memdesc->sgt;if(IS_ERR(sgt))returnPTR_ERR(sgt);ret =_iommu_map_sg(pt, addr, sgt->sgl, sgt->nents, flags);if(ret)goto done;ret =_iommu_map_guard_page(pt, memdesc, addr + size, flags);if(ret)_iommu_unmap(pt, addr, size);done:if(memdesc->pages !=NULL)kgsl_free_sgt(sgt);return ret;}