/*
 * raid_io.c : Utility for the Linux Multiple Devices driver
 *             Copyright (C) 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
 *	       Copyright (C) 1998 Erik Troan
 *
 * RAID IO operations.
 *
 * This source is covered by the GNU GPL, the same as all Linux kernel
 * sources.
 *
 * 97-09-23: >4GB limit fix, Rob Hagopian <hagopiar@vuser.vu.union.edu>
 */

#include <sys/mount.h>		/* for BLKGETSIZE */
#ifndef BLKGETSIZE
#include <linux/fs.h>          /* for BLKGETSIZE */
#endif
#include <sys/sysmacros.h>

#ifndef BLOCK_SIZE
#define BLOCK_SIZE      1024
#endif

#ifndef BLKGETSIZE
#define BLKGETSIZE _IO(0x12, 96) /* Return device size.  */
#endif


#include "common.h"
#include "config.h"
#include "parser.h"
#include "raidlib.h"

md_cfg_entry_t *p;
mdp_super_t *sb;


#define TIME long long

static unsigned long T (void)
{
	struct timeval t;
	t.tv_usec = 1;
	t.tv_sec = 1;
	gettimeofday(&t,0);

	return (t.tv_usec+t.tv_sec*1000000);
}


#define F float
void progress (unsigned long blocks, unsigned long current)
{
	static TIME begin_t = 0, last_status_t = 0;
	TIME left_t, elapsed_t;
	TIME now_t;

	if (!begin_t)
		begin_t = last_status_t = T();

	now_t = T();
	/*
	 * do max one status update per second
	 */
	if ((current != blocks) && (now_t-last_status_t < (TIME)1e6))
		return;

	elapsed_t = (now_t-begin_t)/(TIME)1e6;
	left_t = (TIME) ((F)(elapsed_t) * ((F)blocks/(F)current-1.0));

	OUT("(%2ld%% done; ~%d:%02d left %d:%02d elapsed [%.1f KB/sec])",
		(current / (blocks/100)),
		(int) left_t/60, (int) left_t%60,
		(int) elapsed_t/60, (int) elapsed_t%60,
		((F)current)/(F)elapsed_t
	);
	fflush(stderr);
	putc('\r', stderr);

	last_status_t=now_t;
}
#undef F

#if !(defined(__alpha__) || defined(__sparc_v9__) || defined(__ia64__))
# ifndef __NR__llseek
#  ifdef __sparc__
#   define __NR__llseek		236
#  else
#   define __NR__llseek		140
#  endif
# endif

static int _llseek (unsigned int, unsigned long,
		unsigned long, long long *, unsigned int);

static _syscall5( int, _llseek, unsigned int, fd, unsigned long, offset_high,
		unsigned long, offset_low, long long *, result,
		unsigned int, origin)
#endif

long long raidseek (unsigned int fd, unsigned long blk)
{
	unsigned long long offset = (unsigned long long)blk *
					(unsigned long long) MD_BLK_SIZ;
	long long result;
	int retval;

#if defined(__alpha__) || defined(__sparc_v9__) || defined(__ia64__)
	return lseek(fd, offset, SEEK_SET);
#else
	retval = _llseek (fd, ((unsigned long long) offset) >> 32,
			((unsigned long long) offset) & 0xffffffff,
			&result, SEEK_SET);
	return (retval == -1 ? (long long) retval : result);
#endif
}

int upgrade_sb (int fd, mdp_super_t *sb, md_cfg_entry_t * cfg, int verbose)
{
	struct stat stat_buf;
	mdp_disk_t *disk;
	int i;

	if (
		 (sb->major_version == MKRAID_MAJOR_VERSION) &&
		 (sb->minor_version == MKRAID_MINOR_VERSION) &&
		 (sb->patch_version == MKRAID_PATCHLEVEL_VERSION)) {
		fprintf(stderr, "array needs no upgrade\n");
		return 1;
	}

	if (
		 (sb->major_version > MKRAID_MAJOR_VERSION) ||
		 ((sb->major_version == MKRAID_MAJOR_VERSION) &&
		 (sb->minor_version > MKRAID_MINOR_VERSION)) ||
		 ((sb->major_version == MKRAID_MAJOR_VERSION) &&
		 (sb->minor_version == MKRAID_MINOR_VERSION) &&
		 (sb->patch_version > MKRAID_PATCHLEVEL_VERSION))) {
		fprintf(stderr, "cannot downgrade array ... \n");
		return 1;
	}

	if (verbose) {
		printf("MD ID:                   %x\n", sb->md_magic);
		printf("Changing MD version from %d.%d.%d to %d.%d.%d.\n",
			 sb->major_version, sb->minor_version,sb->patch_version,
		 	MKRAID_MAJOR_VERSION, MKRAID_MINOR_VERSION,
			MKRAID_PATCHLEVEL_VERSION);
	}

	sb->major_version = MKRAID_MAJOR_VERSION;
	sb->minor_version = MKRAID_MINOR_VERSION;
	sb->patch_version = MKRAID_PATCHLEVEL_VERSION;

	if (verbose)
		if ((sb->major_version > 0) || (sb->minor_version >= 50))
		printf("preferred minor %d (md%d)\n", sb->md_minor, sb->md_minor);
        if (stat(cfg->md_name,&stat_buf)) {
                fprintf(stderr, "%s: file doesn't exist!\n", cfg->md_name);
                return 1;
        }
	sb->md_minor = minor(stat_buf.st_rdev);
	cfg->array.param.md_minor = sb->md_minor;

	printf("changed preferred minor to %d (md%d)\n", sb->md_minor, sb->md_minor);

	if (fstat(fd, &stat_buf) == -1) {
		fprintf(stderr, "couldn't call stat() disk\n");
		return 1;
	}
	/*
	 * check wether the major/minor number of devices has
	 * changed since the array has been created ...
	 */
	for (i = 0; i < sb->nr_disks; i++) {

		if (sb->this_disk.number == cfg->array.disks[i].number)
			break;
	}
	if (i == sb->nr_disks) {
		fprintf(stderr, "weird superblock ...\n");
		return 1;
	}

	if ( (cfg->array.disks[i].major != major(stat_buf.st_rdev))
		|| (cfg->array.disks[i].minor != minor(stat_buf.st_rdev)))
	{
		fprintf(stderr, "strange superblock, order of disks mixed up?...\n");
		return 1;
	}
	sb->this_disk.major = cfg->array.disks[i].major;
	sb->this_disk.minor = cfg->array.disks[i].minor;

	for (i = 0; i < sb->nr_disks; i++) {
		disk = sb->disks + i;
		
		if (verbose) 
			fprintf(stderr, "upgrading RAID disk %d: major %d, minor %d, raid_disk %d\n", disk->number, disk->major, disk->minor, disk->raid_disk);
		disk->major = cfg->array.disks[i].major;
		disk->minor = cfg->array.disks[i].minor;
		if (verbose) 
			fprintf(stderr, "    => to major %d, minor %d, raid_disk %d\n", disk->major, disk->minor, disk->raid_disk);
	}

	if (!sb->chunk_size) {
		sb->chunk_size = cfg->array.param.chunk_size;
		printf("changed chunksize to %d\n", sb->chunk_size);
	}

	return 0;
}

void print_sb (mdp_super_t *sb)
{
	time_t t;
	mdp_disk_t *disk;
	int i;

	printf("MD ID:                   %x\n", sb->md_magic);
	printf("Conforms to MD version:  %d.%d.%d\n", sb->major_version,
					 sb->minor_version, sb->patch_version);
	if ((sb->major_version > 0) || (sb->minor_version >= 50))
		printf("preferred minor           %d (md%d)\n", sb->md_minor,
						 sb->md_minor);

	printf("gvalid_words:            %d\n", sb->gvalid_words);
	printf("Raid set ID:             %x\n", sb->set_uuid0);
	t = (time_t) sb->ctime;
	printf("Creation time:           %s", ctime(&t));
	t = (time_t) sb->utime;
	printf("Update time:             %s", ctime(&t));
	printf("State:                   %d%s\n", sb->state, sb->state &
					 (1 << MD_SB_CLEAN) ? " (clean)" : "");
	printf("Raid level:              %d\n", sb->level);
	printf("Individual disk size:    %uMB (%ukB)\n", sb->size /
					 MD_BLK_SIZ, sb->size);
	if (sb->level == 4 || sb->level == 5)
		printf("Chunk size:              %dkB\n", sb->chunk_size / MD_BLK_SIZ);
	i = sb->layout;
	if (sb->level == 5)
		printf("Parity algorithm:        %d (%s)\n", i, i < 4 ? parity_algorithm_table[i] : "unknown");
	printf("Total number of disks:   %d\n", sb->nr_disks);
	printf("Number of raid disks:    %d\n", sb->raid_disks);
	printf("Number of active disks:  %d\n", sb->active_disks);
	printf("Number of working disks: %d\n", sb->working_disks);
	printf("Number of failed disks:  %d\n", sb->failed_disks);
	printf("Number of spare disks:   %d\n", sb->spare_disks);
	printf("\n");

	for (i = 0; i < sb->nr_disks; i++) {
		disk = sb->disks + i;
#if 1
		printf("Disk %d: major %d, minor %d, raid_disk %d, ", disk->number
			, disk->major, disk->minor, disk->raid_disk);
#else
		printf("Disk %d: raid_disk %d, ", disk->number, disk->raid_disk);
#endif
		printf("state: %d (%s, %s, %s)\n", disk->state,
		disk->state & (1 << MD_DISK_FAULTY) ? "faulty" : "operational",
		disk->state & (1 << MD_DISK_ACTIVE) ? "active" : "not active",
		disk->state & (1 << MD_DISK_SYNC) ? "sync" : "not in sync");
	}
}

static int sanity_checks (char *name, int fd, int sb_offset,
	 int forceSanity, int upgradeArray, md_cfg_entry_t * cfg, int dowrite)
{
	FILE *fp;
	unsigned char buffer[MD_SB_BYTES];
	mdp_super_t *phys_sb;
	int err;
	struct mntent * mntinfo;

	/*
	 * Check if the device is mounted
	   Comment out this sanity check by vkp 091503
	fp = setmntent("/etc/mtab", "r");
	err = 0;
	while ((mntinfo = getmntent(fp))) {
	    if (!strcmp(mntinfo->mnt_fsname, name)) {
		    fprintf(stderr, "%s is mounted\n", name);
		    err = 1;
	    }
	}

	endmntent(fp);
	if (err) return 1;
        */
	if (!upgradeArray) {
		if (forceSanity)
			return 0;
		if (cfg->array.param.not_persistent)
			/*
			 * We have no business analyzing the contents
			 * of a superblock-less array.
			 */
			return 0;
		/*
		 * Check if the device contains an ext2 filesystem
		 */
		if ((raidseek(fd, 1)) == -1)
			return 1;
		if ((read(fd, buffer, MD_BLK_SIZ)) != MD_BLK_SIZ)
			return 1;
		if (buffer[56] == 0x53 && buffer[57] == 0xef && buffer[33] == 0x20 && buffer[37] == 0x20) {
			fprintf(stderr, "%s appears to contain an ext2 filesystem -- use -f to override\n", name);
			return 1;
		}
		if (raidseek(fd, sb_offset) == -1)
			return 1;
		if ((read(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES)
			return 1;
		phys_sb = (mdp_super_t *) buffer;
		if (phys_sb->md_magic == MD_SB_MAGIC) {
			fprintf(stderr, "%s appears to be already part of a raid array -- use -f to\nforce the destruction of the old superblock\n", name);
			return 1;
		}
		return 0;
	}

	/*
	 * Check if we can convert the array to a new version
	 */

	if (raidseek(fd, sb_offset) == -1)
		return 1;
	if ((read(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES)
		return 1;
	phys_sb = (mdp_super_t *) buffer;
	if (phys_sb->md_magic == MD_SB_MAGIC) {
		if (dowrite) {
			fprintf(stderr, "upgrading superblock on %s ...\n",
					 name);
			fprintf(stderr, "old superblock:\n");
			print_sb(phys_sb);
		}
		if (upgrade_sb(fd, phys_sb, cfg, dowrite))
			return 1;
		if (dowrite) {
			fprintf(stderr, "new superblock:\n");
			print_sb(phys_sb);
			if (raidseek(fd, sb_offset) == -1)
				return 1;
			if ((write(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES) {
				fprintf(stderr, "could not write new superblock!\n");
				return 1;
			}
			printf("sb->minor after write: %d\n", phys_sb->minor_version);
			fsync(fd);
		}
		return 0;
	}
	fprintf(stderr, "cannot upgrade magic-less superblock on %s ...\n",
									 name);
	return 1;
}

int analyze_sb (mdu_version_t * ver, enum mkraidFunc func,
		md_cfg_entry_t * cfg, int forceSanity,
		int upgradeArray, int Resync)
{
	int fd, i, j;
	md_raid_info_t *array;
	mdu_disk_info_t *disk;
	struct stat stat_buf;
	__u32 nr_blocks;

	if (!cfg)
		return 1;

	if (stat(cfg->md_name,&stat_buf)) {
		fprintf(stderr, "%s: file doesn't exist!\n", cfg->md_name);
		return 1;
	}

	if (major(stat_buf.st_rdev) != MD_MAJOR) {
		fprintf(stderr, "%s: not an MD device!\n", cfg->md_name);
		return 1;
	}

	printf("handling MD device %s\n", cfg->md_name);
	printf("analyzing super-block\n");
	array = &cfg->array;

	array->param.major_version = MKRAID_MAJOR_VERSION;
	array->param.minor_version = MKRAID_MINOR_VERSION;
	array->param.patch_version = MKRAID_PATCHLEVEL_VERSION;

	cfg->array.param.md_minor = minor(stat_buf.st_rdev);

	array->param.ctime = array->param.utime = (__u32) time(NULL);
	if (Resync)
		array->param.state = 0;
	else
		array->param.state = 1 << MD_SB_CLEAN;

	array->param.working_disks = array->param.nr_disks-array->param.failed_disks;
	array->param.active_disks = array->param.raid_disks-array->param.failed_disks;
	if (array->param.raid_disks + array->param.spare_disks
						!= array->param.nr_disks) {
		fprintf(stderr, "raid_disks + spare_disks != nr_disks\n");
		return 1;
	}
	if ((array->param.level==4 || array->param.level==5) && array->param.failed_disks > 1) {
		fprintf(stderr, "raid level %d can't have more than 1 failed disk\n",array->param.level);
		return 1;
	}
	for (i = 0; i < array->param.nr_disks; i++) {
		disk = array->disks + i;
		disk->number = i;

		if ((disk->state & (1 << MD_DISK_FAULTY))==0) {
			if (stat(cfg->device_name[i], &stat_buf) == -1) {
				fprintf(stderr, "couldn't call stat() on device %s -- %s\n", cfg->device_name[i], strerror(errno));
				return 1;
			}
			disk->major = major(stat_buf.st_rdev);
			disk->minor = minor(stat_buf.st_rdev);
		} else {
			disk->major = 0;
			disk->minor = 0;
		}
		if (disk->raid_disk >= array->param.nr_disks) {
			fprintf(stderr, "raid_disk for %s (%d) > nr_disks (%d)\n", cfg->device_name[i], disk->raid_disk, array->param.nr_disks);
			return 1;
		}
		for (j = 0; j < i; j++)
			if (array->disks[j].raid_disk == disk->raid_disk) {
				fprintf(stderr, "raid_disk conflict on %s and %s (%d)\n", cfg->device_name[j], cfg->device_name[i], disk->raid_disk);
				return 1;
			}
		if ((disk->raid_disk < array->param.raid_disks) && !(disk->state & (1<<MD_DISK_FAULTY)))
			disk->state = (1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC);
		if ((array->param.level == 0 || array->param.level == 1 || array->param.level == 4 || array->param.level == 5) && (!array->param.chunk_size || array->param.chunk_size % 4)) {
			fprintf(stderr, "invalid chunk-size (%dkB)\n", array->param.chunk_size);
			return 1;
		}
	}
	/*
	 * first pass, check them all
	 */
	for (i = 0; i < array->param.nr_disks; i++) {
		disk = array->disks + i;
		if ((disk->state & (1 << MD_DISK_FAULTY))==0) {
			fd = open(cfg->device_name[i], O_RDWR);
			if (fd == -1) {
				fprintf(stderr, "couldn't open device %s -- %s\n", cfg->device_name[i], strerror(errno));
				return 1;
			}
			if (ioctl(fd, BLKGETSIZE, (unsigned long)&nr_blocks) == -1) {
				fprintf(stderr, "couldn't get device size for %s -- %s\n", cfg->device_name[i], strerror(errno));
				close(fd);
				return 1;
			}
			nr_blocks >>= 1;
			if (nr_blocks < MD_RESERVED_BLOCKS * 2) {
				fprintf(stderr, "%s: device too small (%dkB)\n", cfg->device_name[i], nr_blocks);
				close(fd);
				return 1;
			}

                	cfg->sb_block_offset[i] = MD_NEW_SIZE_BLOCKS(nr_blocks);
			if (!cfg->array.param.not_persistent) {
                		printf("disk %d: %s, %ukB, raid superblock at %dkB\n", i, cfg->device_name[i], nr_blocks, cfg->sb_block_offset[i]);
	                	if (sanity_checks(cfg->device_name[i], fd,
					 	cfg->sb_block_offset[i], forceSanity,
					 	upgradeArray, cfg, 0)) {
					close(fd);
					return 1;
				}
			}
			close(fd);
		} else {
			printf("disk %d: %s, failed\n", i, cfg->device_name[i]);
		}
	}
	/*	
	 * second pass, write stuff out ...
	 */
	for (i = 0; i < array->param.nr_disks; i++) {
		disk = array->disks + i;
		if ((disk->state & (1 << MD_DISK_FAULTY))==0) {
			if (upgradeArray)
				fd = open(cfg->device_name[i], O_RDWR);
			else
				fd = open(cfg->device_name[i], O_RDONLY);
			if (fd == -1) {
				fprintf(stderr, "couldn't open device %s -- %s\n", cfg->device_name[i], strerror(errno));
				return 1;
			}
			if (ioctl(fd, BLKGETSIZE, (unsigned long)&nr_blocks) == -1) {
				fprintf(stderr, "couldn't get device size for %s -- %s\n", cfg->device_name[i], strerror(errno));
				close(fd);
				return 1;
			}
			nr_blocks >>= 1;
			if (nr_blocks < MD_RESERVED_BLOCKS * 2) {
				fprintf(stderr, "%s: device too small (%dkB)\n", cfg->device_name[i], nr_blocks);
				close(fd);
				return 1;
			}

                	cfg->sb_block_offset[i] = MD_NEW_SIZE_BLOCKS(nr_blocks);
                	if (sanity_checks(cfg->device_name[i], fd,
				 	cfg->sb_block_offset[i], forceSanity,
				 	upgradeArray, cfg, 1)) {
				close(fd);
				return 1;
			}
			close(fd);
		}
	}

	return 0;
}


int check_active (md_cfg_entry_t *p)
{
	char buffer[MAX_LINE_LENGTH], line[MAX_LINE_LENGTH], *ch;
	FILE *fp;

	if ((ch = strstr(p->md_name, "/md")) == NULL)
		return 0;
	strcpy(buffer, ch+1);
	if ((fp = fopen("/proc/mdstat", "r")) == NULL)
		return 0;
	while (1) {
		if ((fgets(line, MAX_LINE_LENGTH, fp)) == NULL)
			break;
		if (strstr(line, buffer) && !strstr(line, "inactive")) {
			fprintf(stderr, "%s: array is active -- run raidstop first.\n", p->md_name);
			fclose(fp);
			return 1;
		}
	}
	fclose(fp);
	return 0;
}

