/*
 * Copyright (C) 2005-2006 Junjiro Okajima
 *
 * This program, aufs is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

/* $Id: wh.c,v 1.11 2006/08/07 14:50:22 sfjro Exp $ */

#include <linux/fs.h>
#include <linux/kthread.h>
#include <linux/namei.h>
#include <linux/random.h>
#include <linux/security.h>
#include "aufs.h"

#define WH_MASK			S_IRUGO
#define WH_BASENAME		WHPFX "aufs"

/* If a directory contains this file, then it is opaque.  We start with the
 * .wh. flag so that it is blocked by lookup.
 */
#define DIROPQ		WHPFX DIROPQ_NAME
static struct qstr diropq_name = {
	.name = DIROPQ,
	.len = sizeof(DIROPQ)-1
};

/* Locking */
DEFINE_RWLOCK(aufs_lock);

/*
 * generate whiteout name, which is NOT terminated by NULL.
 * @name: original d_name.name
 * @len: original d_name.len
 * @wh: whiteout qstr
 * returns zero when succeeds, otherwise error.
 * succeeded value as wh->name should be freed by free_whname().
 */
int alloc_whname(const char *name, int len, struct qstr *wh)
{
	char *p;

	DEBUG_ON(!name || !len || !wh);

	if (len > PAGE_SIZE-sizeof(WHPFX))
		return -ENAMETOOLONG;

	wh->len = len+WHLEN;
	wh->name = p = kmalloc(wh->len, GFP_KERNEL);
	if (p) {
		memcpy(p, WHPFX, WHLEN);
		memcpy(p+WHLEN, name, len);
		return 0;
	}
	return -ENOMEM;
}

void free_whname(struct qstr *wh)
{
	DEBUG_ON(!wh || !wh->name);
	kfree(wh->name);
#ifdef CONFIG_AUFS_DEBUG
	wh->name = NULL;
#endif
}

/* ---------------------------------------------------------------------- */

/* you should call superio_restore() asap */
int superio_store(struct superio *sio)
{
	int err;
	struct rlimit *rl;
	kernel_cap_t super;
	static const kernel_cap_t drop
		= to_cap_t(CAP_TO_MASK(CAP_SYS_RESOURCE)
			   | CAP_TO_MASK(CAP_SETPCAP)
			   | CAP_TO_MASK(CAP_SETUID));

	TraceEnter();

	/* task_capability_lock is not exported */
	write_lock_irq(&aufs_lock);
	//sio->flags = current->flags;
	err = security_capget(current, &sio->cap.effective,
			      &sio->cap.inheritable, &sio->cap.permitted);
	if (err) {
		write_unlock_irq(&aufs_lock);
		goto out;
	}
	super = cap_combine(sio->cap.effective,
			    to_cap_t(CAP_FS_MASK | CAP_TO_MASK(CAP_MKNOD)));
	super = cap_drop(super, drop);
	security_capset_set(current, &super, &sio->cap.inheritable,
			    &sio->cap.permitted);
	write_unlock_irq(&aufs_lock);

	rl = current->signal->rlim + RLIMIT_CORE;
	task_lock(current->group_leader); //??
	sio->rlim_core = rl->rlim_cur;
	rl->rlim_cur = 0;
	task_unlock(current->group_leader);

	sio->fsuid = current->fsuid;
	current->fsuid = 0;
 out:
	TraceErr(err);
	return err;
}

void superio_revert(struct superio *sio)
{
	TraceEnter();
	current->fsuid = sio->fsuid;
	task_lock(current->group_leader); //??
	current->signal->rlim[RLIMIT_CORE].rlim_cur = sio->rlim_core;
	task_unlock(current->group_leader);

	/* task_capability_lock is not exported */
	write_lock_irq(&aufs_lock);
	security_capset_set(current, &sio->cap.effective,
			    &sio->cap.inheritable, &sio->cap.permitted);
	//current->flags = sio->flags;
	write_unlock_irq(&aufs_lock);
}

int superio_test(struct inode *hidden_inode, int mask)
{
	if ((mask & MAY_WRITE)
	    && S_ISDIR(hidden_inode->i_mode)
	    && SB_NFS(hidden_inode->i_sb))
		mask |= MAY_READ; /* force permission check */
	return permission(hidden_inode, mask, NULL);
}

/* ---------------------------------------------------------------------- */

int is_wh(struct dentry *hidden_parent, struct qstr *wh_name)
{
	int err;
	struct dentry *wh_dentry;
	struct inode *hidden_dir;

	LKTRTrace("%.*s/%.*s\n", DLNPair(hidden_parent),
		  wh_name->len, wh_name->name);
	hidden_dir = hidden_parent->d_inode;
	DEBUG_ON(!S_ISDIR(hidden_dir->i_mode));
	IMustLock(hidden_dir);

	wh_dentry = lookup_one_len(wh_name->name, hidden_parent, wh_name->len);
	err = PTR_ERR(wh_dentry);
	if (IS_ERR(wh_dentry))
		goto out;

	err = 0;
	if (!wh_dentry->d_inode)
		goto out_wh; /* success */

	err = 1;
	if (S_ISREG(wh_dentry->d_inode->i_mode))
		goto out_wh; /* success */

	err = -EIO;
	IOErr("%.*s Invalid whiteout entry type 0%o.\n",
	      DLNPair(wh_dentry), wh_dentry->d_inode->i_mode);

 out_wh:
	dput(wh_dentry);
 out:
	TraceErr(err);
	return err;
}

int is_diropq(struct dentry *hidden_dentry)
{
	int err;
	struct inode *hidden_dir;
	int do_superio;
	struct superio sio;

	LKTRTrace("dentry %.*s\n", DLNPair(hidden_dentry));
	hidden_dir = hidden_dentry->d_inode;
	DEBUG_ON(!S_ISDIR(hidden_dir->i_mode));
	IMustLock(hidden_dir);

	err = 0;
	do_superio = superio_test(hidden_dir, MAY_EXEC);
	if (do_superio)
		err = superio_store(&sio);
	if (!err) {
		err = is_wh(hidden_dentry, &diropq_name);
		if (do_superio)
			superio_revert(&sio);
	}
	TraceErr(err);
	return err;
}

struct dentry *lookup_whtmp(struct dentry *hidden_parent, struct qstr *prefix)
{
#define HEX_LEN		8
#define TMPNAM_LEN	(HEX_LEN+WHLEN)
	struct dentry *dentry;
	unsigned int random;
	char defname[TMPNAM_LEN+1], *name, *p;
	int i, n, len;

	LKTRTrace("hp %.*s\n", DLNPair(hidden_parent));
	if (prefix)
		LKTRTrace("prefix %.*s\n", prefix->len, prefix->name);
	DEBUG_ON(!hidden_parent->d_inode);
	IMustLock(hidden_parent->d_inode);

	// rewrite this function if it fails.
	DEBUG_ON(sizeof(random) < (HEX_LEN+1)/2);

	name = defname;
	p = defname+WHLEN;
	len = TMPNAM_LEN;
	if (prefix) {
		dentry = ERR_PTR(-ENOMEM);
		len = prefix->len+TMPNAM_LEN+1;
		name = kmalloc(len, GFP_KERNEL);
		if (!name)
			goto out;
		memcpy(name, WHPFX, WHLEN);
		memcpy(name+WHLEN, prefix->name, prefix->len);
		p = name+WHLEN+prefix->len;
		*p++ = '.';
	} else
		memcpy(name, WHPFX, WHLEN);

	for (i = 0; i < 3; i++) {
		get_random_bytes(&random, sizeof(random));
		n = snprintf(p, HEX_LEN+1, "%.*x", HEX_LEN, random);
		DEBUG_ON(n != HEX_LEN);
		dentry = lookup_one_len(name, hidden_parent, len);
		if (IS_ERR(dentry) || !dentry->d_inode)
			goto out_name;
		dput(dentry);
	}
	//Warn("could not get random name\n");
	dentry = ERR_PTR(-EAGAIN);

 out_name:
	if (prefix)
		kfree(name);
 out:
	TraceErrPtr(dentry);
	return dentry;
#undef HEX_LEN
#undef TMPNAM_LEN
}

int rename_whtmp(struct dentry *hidden_parent, struct dentry *hidden_dentry)
{
	int err;
	struct inode *hidden_dir;
	struct dentry *tmp_dentry;

	LKTRTrace("%.*s/%.*s\n", DLNPair(hidden_parent),
		  DLNPair(hidden_dentry));
	DEBUG_ON(!hidden_dentry->d_inode);
	hidden_dir = hidden_parent->d_inode;
	IMustLock(hidden_dir);

	tmp_dentry = lookup_whtmp(hidden_parent, &hidden_dentry->d_name);
	//tmp_dentry = ERR_PTR(-EPERM);
	err = PTR_ERR(tmp_dentry);
	if (!IS_ERR(tmp_dentry)) {
		/* under the same dir, no need to lock_rename() */
		err = vfs_rename(hidden_dir, hidden_dentry, hidden_dir,
				 tmp_dentry);
		TraceErr(err);
		//err = -EPERM;
		dput(tmp_dentry);
	}

	TraceErr(err);
	return err;
}

/* ---------------------------------------------------------------------- */

int unlink_wh_dentry(struct inode *hidden_dir, struct dentry *wh_dentry,
		     struct dentry *dentry)
{
	int err;

	LKTRTrace("hi%lu, wh %.*s, d %p\n", hidden_dir->i_ino,
		  DLNPair(wh_dentry), dentry);
	DEBUG_ON((dentry && dbwh(dentry) == -1)
		 || !wh_dentry->d_inode
		 || !S_ISREG(wh_dentry->d_inode->i_mode));
	IMustLock(hidden_dir);

	err = safe_unlink(hidden_dir, wh_dentry, NULL);
	if (!err && dentry)
		set_dbwh(dentry, -1);

	TraceErr(err);
	return err;
}

static int unlink_wh_name(struct dentry *hidden_parent, struct qstr *wh)
{
	int err;
	struct inode *hidden_dir;
	struct dentry *hidden_dentry;

	LKTRTrace("%.*s/%.*s\n", DLNPair(hidden_parent), LNPair(wh));
	hidden_dir = hidden_parent->d_inode;
	IMustLock(hidden_dir);

	hidden_dentry = lookup_one_len(wh->name, hidden_parent, wh->len);
	if (!IS_ERR(hidden_dentry)) {
		err = 0;
		if (hidden_dentry->d_inode)
			err = safe_unlink(hidden_dir, hidden_dentry, NULL);
		dput(hidden_dentry);
	} else
		err = PTR_ERR(hidden_dentry);

	TraceErr(err);
	return err;
}

/* ---------------------------------------------------------------------- */

int init_wh(struct dentry *hidden_root, struct aufs_branch *br)
{
	int err;
	struct dentry *wh;
	struct inode *hidden_dir;
	struct superio sio;
	static struct qstr base_name = {.name = WH_BASENAME,
					.len = sizeof(WH_BASENAME)-1};

	// not yet
	br->br_wh = NULL;
	return 0;

	TraceEnter();
	hidden_dir = hidden_root->d_inode;
	IMustLock(hidden_dir);
	RwMustWriteLock(&br->br_wh_rwsem);

#if 0
	if (!hidden_dir->i_op || !hidden_dir->i_op->link) {
		err = -ENOSYS;
		//Err("required link(2) in %s\n", path);
	}
	DEBUG_ON(!hidden_dir->i_op || !hidden_dir->i_op->link
		|| br->br_wh);
#endif

	// base_name is already whiteout-ed, doubly wh prefix-ed
	wh = lookup_wh(hidden_root, &base_name);
	err = PTR_ERR(wh);
	if (IS_ERR(wh))
		goto out;

	err = -EEXIST;
	if (wh->d_inode) {
		if (S_ISREG(wh->d_inode->i_mode))
			err = 0;
		dput(wh);
		goto out;
	}

	err = superio_store(&sio);
	if (err)
		goto out;
	err = vfs_create(hidden_dir, wh, WH_MASK, NULL);
	superio_revert(&sio);
	if (!err)
		br->br_wh = wh;

 out:
	TraceErr(err);
	return err;
}

struct reinit_br_wh_arg {
	struct super_block *sb;
	struct aufs_branch *br;
	struct dentry *hidden_root;
};

#if 0
static int reinit_br_wh_thread(void *arg)
{
#if 1
	// not yet
	BUG();
#else
	int err;
	struct reinit_br_wh_arg *a = arg;
	struct inode *hidden_dir, *wh_inode;
	struct superio sio;

	TraceEnter();

	si_read_lock(a->sb);
	if (!(a->br->br_perm & MAY_WRITE))
		goto out;

	hidden_dir = a->hidden_root->d_inode;
	err = superio_store(&sio);
	if (err)
		goto out;
	i_lock(hidden_dir);
	rw_write_lock(&a->br->br_wh_rwsem);
	if (a->br->br_wh) {
		wh_inode = a->br->br_wh->d_inode;
		if (wh_inode && wh_inode->i_nlink);
	}
	err = safe_unlink(hidden_dir, a->br->br_wh, NULL);
	if (!err || err == -ENOENT) {
		dput(a->br->br_wh);
		a->br->br_wh = NULL;
		err = init_wh(a->hidden_root, a->br);
	}
	rw_write_unlock(&a->br->br_wh_rwsem);
	i_unlock(hidden_dir);
	superio_revert(&sio);

 out:
	dput(a->hidden_root);
	br_put(a->br);
	si_read_unlock(a->sb);
	kfree(arg);
	if (err)
		IOErr("err %d\n", err);
	do_exit(err);
#endif
	return 0;
}
#endif

static int link_wh_or_create(struct dentry *wh, struct aufs_branch *br)
{
	int err;
	struct dentry *hidden_parent;
	struct inode *hidden_dir;

	LKTRTrace("%.*s\n", DLNPair(wh));
	hidden_parent = wh->d_parent;
	hidden_dir = hidden_parent->d_inode;
	IMustLock(hidden_dir);

	rw_read_lock(&br->br_wh_rwsem);
	if (br->br_wh) {
		BUG(); // not yet
		err = vfs_link(br->br_wh, hidden_dir, wh);
		if (!err || err != -EMLINK)
			goto out;
	}

	// keep this error value in this context
	err = vfs_create(hidden_dir, wh, WH_MASK, NULL);

#if 0
	struct reinit_br_wh_arg *arg;
	// ignore ENOMEM
	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
	if (arg) {
		struct task_struct *tsk;
		DECLARE_WAIT_QUEUE_HEAD(wq);

		// br_put() and dput() in reinit_thread
		br_get(br);
		arg->hidden_parent = dget(hidden_parent);
		arg->br = br;
		wait_event(wq,
			   !IS_ERR(tsk = kthread_create
				   (reinit_br_wh_thread, arg,
				    AUFS_NAME "_reinit_br_wh", NULL)));
		wake_up_process(tsk);
	}
#endif

 out:
	rw_read_unlock(&br->br_wh_rwsem);
	TraceErr(err);
	return err;
}

/* ---------------------------------------------------------------------- */

struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
			 int do_create)
{
	struct dentry *opq_dentry, *hidden_dentry;
	struct inode *hidden_dir;
	int err, do_superio, rerr;
	struct superio sio;
	struct iattr ia;

	LKTRTrace("%.*s, bindex %d, do_create %d\n", DLNPair(dentry),
		  bindex, do_create);
	hidden_dentry = dtohd_index(dentry, bindex);
	DEBUG_ON(!hidden_dentry);
	hidden_dir = hidden_dentry->d_inode;
	DEBUG_ON(!hidden_dir
		 || !S_ISDIR(hidden_dir->i_mode));
	IMustLock(hidden_dir);

	err = 0;
	do_superio = superio_test(hidden_dir, MAY_EXEC|MAY_WRITE);
	if (do_superio) {
		ia.ia_valid = ATTR_UID;
		ia.ia_uid = current->fsuid;
		err = superio_store(&sio);
		//err = -1;
	}
	opq_dentry = ERR_PTR(err);
	if (err)
		goto out;
	opq_dentry = lookup_one_len(diropq_name.name, hidden_dentry,
				    diropq_name.len);
	//opq_dentry = ERR_PTR(-1);
	if (IS_ERR(opq_dentry))
		goto out_sio;

	if (do_create) {
		DEBUG_ON(opq_dentry->d_inode);
		err = link_wh_or_create(opq_dentry,
					stobr(dentry->d_sb, bindex));
		//err = -1;
		if (!err) {
			if (do_superio) {
				i_lock(opq_dentry->d_inode);
				err = hidden_notify_change(opq_dentry, &ia);
				//err = -1;
				i_unlock(opq_dentry->d_inode);
				if (err)
					goto revert;
			}
			if (!err) {
				set_dbdiropq(dentry, bindex);
				goto out_sio; /* success */
			}
		}
	} else {
		DEBUG_ON(/* !S_ISDIR(dentry->d_inode->i_mode)
			  * ||  */!opq_dentry->d_inode);
		err = safe_unlink(hidden_dir, opq_dentry, NULL);
		//err = -1;
		if (!err)
			set_dbdiropq(dentry, -1);
	}
	dput(opq_dentry);
	opq_dentry = ERR_PTR(err);

 out_sio:
	if (do_superio)
		superio_revert(&sio);
 out:
	TraceErrPtr(opq_dentry);
	return opq_dentry;

 revert:
	rerr = safe_unlink(hidden_dir, opq_dentry, NULL);
	if (rerr) {
		IOErr("unlink dir_opq(%d, %d)\n", err, rerr);
		err = -EIO;
	}
	goto out_sio;
}

/* ---------------------------------------------------------------------- */

/*
 * lokkup whiteout dentry.
 * @hidden_parent: hidden parent dentry which must exist and be locked
 * @base_name: name of dentry which will be whiteouted
 * returns dentry for whiteout.
 */
struct dentry *lookup_wh(struct dentry *hidden_parent, struct qstr *base_name)
{
	int err;
	struct qstr wh_name;
	struct dentry *wh_dentry;

	LKTRTrace("%.*s/%.*s\n", DLNPair(hidden_parent), LNPair(base_name));
	IMustLock(hidden_parent->d_inode);

	err = alloc_whname(base_name->name, base_name->len, &wh_name);
	wh_dentry = ERR_PTR(err);
	if (!err) {
		wh_dentry = lookup_one_len(wh_name.name, hidden_parent,
					   wh_name.len);
		free_whname(&wh_name);
	}
	TraceErrPtr(wh_dentry);
	return wh_dentry;
}

struct dentry *simple_create_wh(struct dentry *dentry, aufs_bindex_t bindex,
				struct dentry *hidden_parent)
{
	struct dentry *wh_dentry;
	int err;

	LKTRTrace("%.*s/%.*s on b%d\n", DLNPair(hidden_parent),
		  DLNPair(dentry), bindex);

	wh_dentry = lookup_wh(hidden_parent, &dentry->d_name);
	if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
		IMustLock(hidden_parent->d_inode);
		err = link_wh_or_create(wh_dentry, stobr(dentry->d_sb, bindex));
		if (!err)
			set_dbwh(dentry, bindex);
		else {
			dput(wh_dentry);
			wh_dentry = ERR_PTR(err);
		}
	}

	TraceErrPtr(wh_dentry);
	return wh_dentry;
}

/* ---------------------------------------------------------------------- */

/* Delete all whiteouts in this directory in branch bindex. */
int do_delete_whiteouts(struct aufs_nhash *whlist, struct dentry *hidden_parent,
			aufs_bindex_t bindex)
{
	int err, do_superio, i;
	struct qstr wh_name;
	char *p;
	struct superio sio;
	struct inode *hidden_dir;
	struct hlist_head *head;
	struct aufs_wh *tpos;
	struct hlist_node *pos;
	struct aufs_destr *str;

	LKTRTrace("%.*s\n", DLNPair(hidden_parent));
	hidden_dir = hidden_parent->d_inode;
	IMustLock(hidden_dir);
	DEBUG_ON(IS_RDONLY(hidden_dir));
	//SiMustReadLock(??);

	err = -ENOMEM;
	wh_name.name = p = __getname();
	if (!wh_name.name)
		goto out;
	memcpy(p, WHPFX, WHLEN);
	p += WHLEN;

	do_superio = superio_test(hidden_dir, MAY_EXEC|MAY_WRITE);
	if (do_superio) {
		err = superio_store(&sio);
		if (err)
			goto out_name;
	}

	err = 0;
	for (i = 0; !err && i < AUFS_NHASH_SIZE; i++) {
		head = whlist->heads+i;
		hlist_for_each_entry(tpos, pos, head, wh_hash) {
			if (tpos->wh_bindex != bindex)
				continue;
			str = &tpos->wh_str;
			if (str->len + WHLEN <= PATH_MAX) {
				memcpy(p, str->name, str->len);
				wh_name.len = WHLEN+str->len;
				err = unlink_wh_name(hidden_parent, &wh_name);
				if (!err)
					continue;
				break;
			}
			IOErr("whiteout name too long %.*s\n",
			      str->len, str->name);
			err = -EIO;
			break;
		}
	}
	if (do_superio)
		superio_revert(&sio);

 out_name:
	__putname(wh_name.name);
 out:
	TraceErr(err);
	return err;
}

/* ---------------------------------------------------------------------- */

int rmdir_whtmp(struct dentry *hidden_dentry, struct aufs_nhash *whlist,
		aufs_bindex_t bindex, struct inode *dir, int do_lock)
{
	int err;
	struct inode *hidden_inode, *hidden_dir;

	LKTRTrace("hd %.*s, b%d, i%lu\n",
		  DLNPair(hidden_dentry), bindex, dir->i_ino);

	err = 0;
	hidden_inode = hidden_dentry->d_inode;
	i_lock(hidden_inode);
	err = do_delete_whiteouts(whlist, hidden_dentry, bindex);
	//err = -EPERM;
	i_unlock(hidden_inode);

	if (!err) {
		hidden_dir = hidden_dentry->d_parent->d_inode;
		if (do_lock)
			i_lock(hidden_dir);
		err = vfs_rmdir(hidden_dir, hidden_dentry);
		if (do_lock)
			i_unlock(hidden_dir);
		//d_drop(hidden_dentry);
	}
	if (!err) {
		if (do_lock) {
			i_lock(dir);
			ii_write_lock(dir);
		}
		if (ibstart(dir) == bindex) {
			cpup_attr_timesizes(dir);
			cpup_attr_nlink(dir);
		}
		if (do_lock) {
			ii_write_unlock(dir);
			i_unlock(dir);
		}
		return 0; /* success */
	}

	Warn("failed removing %.*s(%d), ignored\n",
	     DLNPair(hidden_dentry), err);
	return err;
}

static int rmdir_whtmp_thread(void *arg)
{
	int err;
	struct rmdir_whtmp_arg *a = arg; // fix it

	LKTRTrace("%.*s, b%d, dir i%lu, lock %d\n",
		  DLNPair(a->hidden_dentry), a->bindex, a->dir->i_ino,
		  a->do_lock);

	dget(a->hidden_dentry);
	si_read_lock(a->dir->i_sb);
	err = test_ro(a->dir->i_sb, a->bindex, NULL);
	if (!err)
		err = rmdir_whtmp(a->hidden_dentry, &a->whlist, a->bindex,
				  a->dir, a->do_lock);
	si_read_unlock(a->dir->i_sb);
	dput(a->hidden_dentry);
	free_whlist(&a->whlist);
	kfree(arg);
	if (err)
		IOErr("err %d\n", err);
	do_exit(err);

	TraceErr(err);
	return err;
}

int rmdir_whtmp_start(struct rmdir_whtmp_arg *arg)
{
	struct task_struct *tsk;
	DECLARE_WAIT_QUEUE_HEAD(wq);

	TraceEnter();
	wait_event(wq,
		   !IS_ERR(tsk = kthread_create
			   (rmdir_whtmp_thread, arg, AUFS_NAME "_rmdir%.*s",
			    DLNPair(arg->hidden_dentry))));
	wake_up_process(tsk);
	return 0;
}
