#include #include #include #include #include #include #include #include #include #include #include "mt_sched_drv.h" #define SCHED_DEV_NAME "sched" struct mt_task { pid_t pid; struct task_struct *p; struct cpumask mask; struct list_head list; }; static struct mt_task mt_task_head; static DEFINE_SPINLOCK(mt_sched_spinlock); static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, struct cpumask *new_mask) { if (len < cpumask_size()) cpumask_clear(new_mask); else if (len > cpumask_size()) len = cpumask_size(); return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; } /** * find_process_by_pid - find a process with a matching PID value. * @pid: the pid in question. */ static struct task_struct *find_process_by_pid(pid_t pid) { return pid ? find_task_by_vpid(pid) : current; } /* * check the target process has a UID that matches the current process's */ static bool check_same_owner(struct task_struct *p) { const struct cred *cred = current_cred(), *pcred; bool match; rcu_read_lock(); pcred = __task_cred(p); match = (uid_eq(cred->euid, pcred->euid) || uid_eq(cred->euid, pcred->uid)); rcu_read_unlock(); return match; } /* * check the task link list. If the task is exit, delete the task. */ static void mt_sched_check_tasks(void) { struct mt_task *tmp, *tmp2; unsigned long irq_flags; spin_lock_irqsave(&mt_sched_spinlock, irq_flags); list_for_each_entry_safe(tmp, tmp2, &mt_task_head.list, list) { if (tmp->pid != tmp->p->pid) { list_del(&(tmp->list)); kfree(tmp); } } spin_unlock_irqrestore(&mt_sched_spinlock, irq_flags); } static long __mt_sched_addaffinity(struct task_struct *p, const struct cpumask *new_mask) { #if MT_SCHED_AFFININTY_DEBUG pr_debug("MT_SCHED: %s pid[%d]\n", __func__, p->pid); #endif struct mt_task *new; struct mt_task *tmp, *tmp2; unsigned long irq_flags; int find = 0; new = kmalloc(sizeof(struct mt_task), GFP_KERNEL); if (!new) return -ENOMEM; INIT_LIST_HEAD(&(new->list)); new->pid = p->pid; new->p = p; cpumask_copy(&new->mask, new_mask); spin_lock_irqsave(&mt_sched_spinlock, irq_flags); list_for_each_entry_safe(tmp, tmp2, &mt_task_head.list, list) { if (tmp->pid != tmp->p->pid) { list_del(&(tmp->list)); kfree(tmp); continue; } if (!find && (tmp->p == p)) { find = 1; cpumask_copy(&tmp->mask, new_mask); } } if (!find) list_add(&(new->list), &(mt_task_head.list)); spin_unlock_irqrestore(&mt_sched_spinlock, irq_flags); if (find) kfree(new); return 0; } static long __mt_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) { cpumask_var_t cpus_allowed, new_mask; struct task_struct *p; int retval; #if MT_SCHED_AFFININTY_DEBUG pr_debug("MT_SCHED: %s pid[%d]\n", __func__, pid); #endif rcu_read_lock(); p = find_process_by_pid(pid); if (!p) { rcu_read_unlock(); pr_debug("MT_SCHED: setaffinity find process %d fail\n", pid); return -ESRCH; } /* Prevent p going away */ get_task_struct(p); rcu_read_unlock(); if (p->flags & PF_NO_SETAFFINITY) { retval = -EINVAL; pr_debug("MT_SCHED: setaffinity flags PF_NO_SETAFFINITY fail\n"); goto out_put_task; } if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { retval = -ENOMEM; pr_debug("MT_SCHED: setaffinity allo_cpumask_var for cpus_allowed fail\n"); goto out_put_task; } if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { retval = -ENOMEM; pr_debug("MT_SCHED: setaffinity allo_cpumask_var for new_mask fail\n"); goto out_free_cpus_allowed; } retval = -EPERM; if (!check_same_owner(p)) { rcu_read_lock(); if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { rcu_read_unlock(); pr_debug("MT_SCHED: setaffinity check_same_owner and task_ns_capable fail\n"); goto out_free_new_mask; } rcu_read_unlock(); } retval = security_task_setscheduler(p); if (retval) { pr_debug("MT_SCHED: setaffinity security_task_setscheduler fail, status: %d\n", retval); goto out_free_new_mask; } cpuset_cpus_allowed(p, cpus_allowed); cpumask_and(new_mask, in_mask, cpus_allowed); /* * Since bandwidth control happens on root_domain basis, * if admission test is enabled, we only admit -deadline * tasks allowed to run on all the CPUs in the task's * root_domain. */ #ifdef CONFIG_SMP if (task_has_dl_policy(p) && dl_bandwidth_enabled()) { rcu_read_lock(); if (!cpumask_subset(task_rq(p)->rd->span, new_mask)) { retval = -EBUSY; rcu_read_unlock(); pr_debug("MT_SCHED: setaffinity deadline tasks fail, status: %d\n", retval); goto out_free_new_mask; } rcu_read_unlock(); } #endif again: retval = set_cpus_allowed_ptr(p, new_mask); if (retval) pr_debug("MT_SCHED: set_cpus_allowed_ptr status %d\n", retval); if (!retval) { cpuset_cpus_allowed(p, cpus_allowed); if (!cpumask_subset(new_mask, cpus_allowed)) { /* * We must have raced with a concurrent cpuset * update. Just reset the cpus_allowed to the * cpuset's cpus_allowed */ cpumask_copy(new_mask, cpus_allowed); goto again; } } /* modify for the mt_sched_setaffinity */ if ((!retval) || (!cpumask_intersects(new_mask, cpu_active_mask))) retval = __mt_sched_addaffinity(p, new_mask); out_free_new_mask: free_cpumask_var(new_mask); out_free_cpus_allowed: free_cpumask_var(cpus_allowed); out_put_task: put_task_struct(p); if (retval) pr_debug("MT_SCHED: setaffinity status %d\n", retval); return retval; } static long __mt_sched_getaffinity(pid_t pid, struct cpumask *mask, struct cpumask *mt_mask) { struct task_struct *p; struct mt_task *tmp; unsigned long irq_flags; unsigned long flags; int retval; rcu_read_lock(); #if MT_SCHED_AFFININTY_DEBUG pr_debug("MT_SCHED: %s pid[%d]\n", __func__, pid); #endif retval = -ESRCH; p = find_process_by_pid(pid); if (!p) { pr_debug("MT_SCHED: getaffinity find process %d fail\n", pid); goto out_unlock; } retval = security_task_getscheduler(p); if (retval) { pr_debug("MT_SCHED: getaffinity security_task_getscheduler fail, status: %d\n", retval); goto out_unlock; } raw_spin_lock_irqsave(&p->pi_lock, flags); cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); raw_spin_unlock_irqrestore(&p->pi_lock, flags); /* add for the mt_mask */ cpumask_clear(mt_mask); spin_lock_irqsave(&mt_sched_spinlock, irq_flags); list_for_each_entry(tmp, &mt_task_head.list, list) { if ((p == tmp->p) && (p->pid == tmp->pid)) { cpumask_copy(mt_mask, &tmp->mask); break; } } spin_unlock_irqrestore(&mt_sched_spinlock, irq_flags); out_unlock: rcu_read_unlock(); if (retval) pr_debug("MT_SCHED: getaffinity status %d\n", retval); return retval; } static long __mt_sched_exitaffinity(pid_t pid) { struct mt_task *tmp, *tmp2; unsigned long irq_flags; int find = 0; if (0 == pid) pid = current->pid; #if MT_SCHED_AFFININTY_DEBUG pr_debug("MT_SCHED: %s pid[%d]\n", __func__, pid); #endif spin_lock_irqsave(&mt_sched_spinlock, irq_flags); list_for_each_entry_safe(tmp, tmp2, &mt_task_head.list, list) { if (pid == tmp->pid) { list_del(&(tmp->list)); kfree(tmp); find = 1; break; } } spin_unlock_irqrestore(&mt_sched_spinlock, irq_flags); if (!find) { pr_debug("MT_SCHED: exit affinity find process %d fail.\n", pid); return -ESRCH; } return 0; } static long sched_ioctl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int retval; pid_t pid; struct ioctl_arg data; cpumask_var_t new_mask, mask, mt_mask; int len; memset(&data, 0, sizeof(data)); switch (cmd) { case IOCTL_SETAFFINITY: if (copy_from_user(&data, (int __user *)arg, sizeof(data))) { retval = -EFAULT; goto done; } #if MT_SCHED_AFFININTY_DEBUG pr_debug("MT_SCHED: %s [IOCTL_SETAFFINITY]\n", __func__); #endif if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) return -ENOMEM; retval = get_user_cpu_mask(data.mask, data.len, new_mask); if (retval == 0) retval = __mt_sched_setaffinity(data.pid, new_mask); if (retval) pr_debug("MT_SCHED: setaffinity status %d\n", retval); free_cpumask_var(new_mask); break; case IOCTL_GETAFFINITY: if (copy_from_user(&data, (int __user *)arg, sizeof(data))) { retval = -EFAULT; goto done; } #if MT_SCHED_AFFININTY_DEBUG pr_debug("MT_SCHED: %s [IOCTL_GETAFFINITY]\n", __func__); #endif len = data.len; if ((len * BITS_PER_BYTE) < nr_cpu_ids) return -EINVAL; if (len & (sizeof(unsigned int) - 1)) return -EINVAL; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; if (!alloc_cpumask_var(&mt_mask, GFP_KERNEL)) { goto getaffinity_free1; return -ENOMEM; } retval = __mt_sched_getaffinity(data.pid, mask, mt_mask); if (retval == 0) { size_t retlen = min_t(size_t, len, cpumask_size()); if (copy_to_user((int __user *)data.mask, mask, retlen)) { retval = -EFAULT; goto getaffinity_free; } else { retval = retlen; } if (copy_to_user((int __user *)data.mt_mask, mt_mask, retlen)) retval = -EFAULT; else retval = retlen; } getaffinity_free: free_cpumask_var(mt_mask); getaffinity_free1: free_cpumask_var(mask); break; case IOCTL_EXITAFFINITY: #if MT_SCHED_AFFININTY_DEBUG pr_debug("MT_SCHED: %s [IOCTL_EXITAFFINITY]\n", __func__); #endif if (copy_from_user(&pid, (int __user *)arg, sizeof(pid))) { retval = -EFAULT; goto done; } retval = __mt_sched_exitaffinity(pid); break; default: retval = -ENOTTY; } done: return retval; } const struct file_operations sched_ioctl_fops = { .owner = THIS_MODULE, .unlocked_ioctl = sched_ioctl_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = sched_ioctl_compat, #endif }; static struct cdev *sched_ioctl_cdev; static dev_t sched_dev_num; struct class *sched_class; static int __init sched_ioctl_init(void) { int ret; struct device *class_dev = NULL; if (alloc_chrdev_region(&sched_dev_num, 0, 1, SCHED_DEV_NAME)) { pr_debug("MT_SCHED: Device major number allocation failed\n"); return -EAGAIN; } sched_ioctl_cdev = cdev_alloc(); if (NULL == sched_ioctl_cdev) { pr_debug("MT_SCHED: cdev_alloc failed\n"); ret = -1; goto out_err2; } cdev_init(sched_ioctl_cdev, &sched_ioctl_fops); sched_ioctl_cdev->owner = THIS_MODULE; ret = cdev_add(sched_ioctl_cdev, sched_dev_num, 1); if (ret) { pr_debug("MT_SCHED: Char device add failed\n"); goto out_err2; } sched_class = class_create(THIS_MODULE, "scheddrv"); if (IS_ERR(sched_class)) { pr_debug("Unable to create class, err = %d\n", (int)PTR_ERR(sched_class)); goto out_err1; } class_dev = device_create(sched_class, NULL, sched_dev_num, NULL, "mtk_sched"); pr_alert("MT_SCHED: Init complete, device major number = %d\n", MAJOR(sched_dev_num)); goto out; class_destroy(sched_class); out_err1: cdev_del(sched_ioctl_cdev); out_err2: unregister_chrdev_region(sched_dev_num, 1); out: return ret; } /** * /proc/mtk_sched/affinity_status */ static int sched_status_show(struct seq_file *seq, void *v); static int sched_status_open(struct inode *inode, struct file *file); static unsigned int sched_status_poll(struct file *file, poll_table *wait); static const struct file_operations sched_status_fops = { .open = sched_status_open, .read = seq_read, .poll = sched_status_poll, .llseek = seq_lseek, .release = single_release }; static int sched_status_open(struct inode *inode, struct file *file) { return single_open(file, sched_status_show, inode->i_private); } static int sched_status_show(struct seq_file *seq, void *v) { struct mt_task *tmp; struct cpumask mask; unsigned long irq_flags; int i; for_each_online_cpu(i) seq_printf(seq, "CPU%d:\t\tonline\n", i); mt_sched_check_tasks(); spin_lock_irqsave(&mt_sched_spinlock, irq_flags); seq_puts(seq, "\n PID REAL BACKUP CMD\n"); list_for_each_entry(tmp, &mt_task_head.list, list) { cpumask_and(&mask, &tmp->p->cpus_allowed, cpu_online_mask); seq_printf(seq, "%5d %4lu %4lu %s\n", tmp->pid, *mask.bits, *tmp->mask.bits, tmp->p->comm); } spin_unlock_irqrestore(&mt_sched_spinlock, irq_flags); return 0; } static unsigned int sched_status_poll(struct file *file, poll_table *wait) { return 0; } static int __init sched_proc_init(void) { struct proc_dir_entry *pe; if (!proc_mkdir("mtk_sched", NULL)) return -1; pe = proc_create("mtk_sched/affinity_status", 0444, NULL, &sched_status_fops); if (!pe) return -ENOMEM; return 0; } /** * sched_cpu_notify - sched cpu notifer callback function. */ static int sched_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; struct mt_task *tmp, *tmp2; unsigned long irq_flags; struct task_struct *p; switch (action) { case CPU_ONLINE: spin_lock_irqsave(&mt_sched_spinlock, irq_flags); list_for_each_entry_safe(tmp, tmp2, &mt_task_head.list, list) { if (cpumask_test_cpu(cpu, &(tmp->mask))) { p = find_process_by_pid(tmp->pid); if (p) { if (p == tmp->p) cpumask_copy(&tmp->p->cpus_allowed, &(tmp->mask)); else{ list_del(&(tmp->list)); kfree(tmp); } } else { list_del(&(tmp->list)); kfree(tmp); } } } spin_unlock_irqrestore(&mt_sched_spinlock, irq_flags); break; case CPU_DOWN_FAILED: break; case CPU_DOWN_PREPARE: break; default: break; } return NOTIFY_OK; } static struct notifier_block sched_cpu_nb = { .notifier_call = sched_cpu_notify, }; static int __init sched_module_init(void) { int ret; unsigned long irq_flags; ret = sched_ioctl_init(); if (ret) return ret; ret = sched_proc_init(); if (ret) return ret; ret = register_cpu_notifier(&sched_cpu_nb); if (ret) return ret; spin_lock_irqsave(&mt_sched_spinlock, irq_flags); INIT_LIST_HEAD(&(mt_task_head.list)); spin_unlock_irqrestore(&mt_sched_spinlock, irq_flags); return ret; } static void sched_module_exit(void) { class_destroy(sched_class); cdev_del(sched_ioctl_cdev); unregister_chrdev_region(sched_dev_num, 1); pr_alert("MT_SCHED: driver removed.\n"); } module_init(sched_module_init); module_exit(sched_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("YaTing Chang "); MODULE_DESCRIPTION("This is sched module.");