#!/bin/bash
#
# smart_spindown
#
# Copyright (C) 2003 by Bart Samwel
#
# You may do with this file (and parts thereof) whatever you want, as long
# as my copyright notice is retained.
#
# - Extended by joerk at gentoo-wiki.com
# - Heavily modified to fit WD MyBook World external hard drives by 
#   kyyhkynen at gmail.com
# - Adapted by LaCie to handle specific spindown purposes
#
#
# Last updated 2008/12/17
#
##########################################################################

CONFDIR=/etc/smart_spindown

# Load configuration
[ x"${UNITTEST}" != "xyes" ] && . ${CONFDIR}/smart_spindown.conf

# Override default configuration with dynamic unicorn configuration.
[ -f "${CONFDIR}/smart_spindown_unicorn.conf" ] && . ${CONFDIR}/smart_spindown_unicorn.conf

CAT=/bin/cat
CUT=/usr/bin/cut
GREP=/bin/grep
HDPARM=/sbin/hdparm
LS=/bin/ls
MOUNT=/bin/mount
SED=/bin/sed
SLEEP=/bin/sleep

MOUNTPOINTS_FILE=/etc/mtab

PRE_SPINDOWN_DIR=${CONFDIR}/pre
POST_SPINDOWN_DIR=${CONFDIR}/post
PRE_SPINDOWN_PROGS=
POST_SPINDOWN_PROGS=

#slightly modify the dirty_background & dirty_ration when start the daemon
echo 50 > /proc/sys/vm/dirty_ratio
echo 20 > /proc/sys/vm/dirty_background_ratio


#
# Statistics informations
#

# Number of times is put in spindown mode.
spindown_count=0

# Total time the disk is up.
uptime=0

# Total time the disk is down.
downtime=0

# 'last' time spent in spindown 
downtime_part=0


#
# Pre-setup some variables
#

if [ ${do_remount_noatime} -eq 1 ]; then
  noatime_opt=",noatime"
fi

# max_age (in centiseconds)
max_age_cent=$((100*${max_age}))

# Must wait for raid superblock synchronization
wait_raid_sb_sync=0

#
# Log functions
#

info()
{
  echo -e ">> $*" >&2
}

error()
{
  echo -e "!! $*" >&2
}

log()
{
  if [ ${verbose} -eq 1 ]; then
    echo -e "$*" >&2
  fi
}

log_and_die()
{
  # umount the target device
  exit_status=$1
  shift
  error "------------------------------------------------------------"
  error "                                                            "
  error $*
  error "                                                            "
  error "------------------------------------------------------------"
  error "An error occured while executing smart_spindown."
  error "Please, report this error to the LaCie support team."
  error ""
  error "Thanks."
  error ""

  exit $exit_status
}

log_stats()
{
  log "Spindowns counter: ${spindown_count}, time up/down: ${uptime}/${downtime}, backoff ${backoff_factor}."
}

info_stats()
{
  info "Spindowns counter: ${spindown_count}, time up/down: ${uptime}/${downtime}."
}

#
# Misc functions
#
tune_apm_profile()
{
	test=`$HDPARM -B $1 | grep "not supported"`
	if [ -z "$test" ]
	then
		$HDPARM -B 120 $1 >/dev/null 2>&1
	fi
}


get_progs_in_dir()
{
  dir=$1
  if [ -d "$dir" ]; then
    find $dir -type f -perm +100
  fi
}

# Make sure that all the programs we will use during a spin down 
# stage are available and already cached.
# Spin up a disk to load "sleep" in the cache page must be avoided :)
check_and_cache()
{
  ${CAT}                  < /dev/null       || log_and_die $? "Fail execute command: ${CAT}"
  echo " " | ${CUT} -b1   > /dev/null 2>&1  || log_and_die $? "Fail execute command: ${CUT}"
  ${HDPARM} -V            > /dev/null 2>&1  || log_and_die $? "Fail execute command: ${HDPARM}"
  ${LS} /sys/block        > /dev/null 2>&1  || log_and_die $? "Fail execute command: ${LS}"
  ${MOUNT} --version      > /dev/null 2>&1  || log_and_die $? "Fail execute command: ${MOUNT}"
  ${SED} --version        > /dev/null 2>&1  || log_and_die $? "Fail execute command: ${SED}"
  ${SLEEP} 1                                || log_and_die $? "Fail execute command: ${SLEEP}"
  #
  # Build the pre and post spindown programs list.
  #
  PRE_SPINDOWN_PROGS=`get_progs_in_dir ${PRE_SPINDOWN_DIR}`
  POST_SPINDOWN_PROGS=`get_progs_in_dir ${POST_SPINDOWN_DIR}`
}

# Execute a given program list without taking care of the result
# or the return code.
exec_prog_list()
{
  prog_list=$1

  for prog in ${prog_list}; do
    ${prog} >/dev/null 2>&1
  done
}

killall_background()
{
  local pids="$(jobs -p)"

  [ -n "$pids" ] && kill $pids
}

#
# Mount functions
#

# Remove an option (the first parameter) of the form option=<alpha number> 
# from a ${MOUNT} options string (the rest of the parameters).
remove_mount_opts()
{
  OPT="$1"
  shift
  echo ",$*," | ${SED}                \
    -e 's/,'"${OPT}"'=[a-z0-9]*,/,/g' \
    -e 's/,,*/,/g'                    \
    -e 's/^,//'                       \
    -e 's/,$//'
}

# Remove an option (the first parameter) without any arguments from
# a ${MOUNT} option string (the rest of the parameters).
remove_mount_yesno_opts() 
{
  OPT="$1"
  shift
  echo ",$*," | ${SED}      \
    -e 's/,'"${OPT}"',/,/g' \
    -e 's/,,*/,/g'          \
    -e 's/^,//'             \
    -e 's/,$//'
}

# Execute the given command for each disk mountpoint
for_each_mountpoint()
{
  ${CAT} ${MOUNTPOINTS_FILE} | while read -r DEV MP FST OPTS DUMP PASS ; do

    [ -z "${DEV}" -o -z "${MP}" -o -z "${FST}" -o -z "${OPTS}" ] &&
      log_and_die $? "Fail to parse mountpoints file ${MOUNTPOINTS_FILE}"

    # Replace oct dec or hex code with the associated character
    # Example: "\040" => " "
    MP=`printf $MP`

    $* "${DEV}" "${MP}" "${FST}" "${OPTS}" ||
      log_and_die $? "Fail to execute: $* ${DEV} ${MP} ${FST} ${OPTS}"

  done || return $? # Forward bad exit status returned by the shell pipe
}

#
# Disk functions
#

for_each_device()
{
  for disk in ${DEVNAMES}; do
    $* ${disk} || log_and_die $? "Fail to execute: $* ${disk}"
  done
}
# Execute the given command for each disk
for_each_disk()
{
  for disk in ${DISKS}; do
    $* ${disk} || log_and_die $? "Fail to execute: $* ${disk}"
  done
}

# Get and set the read disk count for a given disk
get_disk_read_block_count()
{
  eval read_block_count_${1}=`${CAT} /sys/block/${1}/stat | ${SED} -e "s/^[ ]*\([0-9]*\).*/\1/"`
}

backup_read_block_count()
{
  eval read_block_count_${1}_bk="\$read_block_count_${1}"
}

cmp_read_block_count()
{
  eval backup="\$read_block_count_${1}_bk"
  eval current="\$read_block_count_${1}"
  if [ ${backup} -ne ${current} ]; then
    read_block_occur=1
  fi
}

#
# Check if the given device is a hard disk by executing the mandatory 
# disk commands. 
#
# See: include/scsi/scsi.h / * DEVICE TYPES
# define TYPE_DISK 0x00
#
# return 0 if hard disk, 1 otherwise
is_hard_disk()
{
  disk=`basename $1`

  # Is the device a SCSI drive?
  if [ -f "/sys/block/${1}/device/type" ] && [ `cat "/sys/block/${1}/device/type"` = '0'  ] ; then
    return 0
  fi

  # Is the device an IDE drive?
  if [ -f "/sys/block/${1}/device/media" ] && [ `cat "/sys/block/${1}/device/media"` = 'disk'  ] ; then
    return 0
  fi

  return 1
}

is_disk_volume()
{
  # $1 is disk_volume (volume path)
  ${HDPARM} -C $1 > /dev/null 2>&1
  return $?
}

is_raid_volume()
{
  ${LS} -l $1 2>/dev/null | grep -q -E '(9|259),'
  return $?
}

#
# Check if the given disk is external (USB or ESATA). The scsi disk
# module use the "se" notation to name external disks.
#
is_external_disk()
{
  disk=`basename $1`

  if [ x"${disk:0:2}" = x"se" ]; then
    return 0
  fi

  return 1
}

#
# Check if the given block device is a raid device that need to update 
# his superblock while processing write request.
# Actually it is true for raid 1, 5 and 10.
#
check_raid_sb_sync()
{
  bdev=$1

  if [ -f "/sys/block/${bdev}/md/level" ]; then
    grep -q -E 'raid(1|5|10)' /sys/block/${bdev}/md/level && return 1
  fi
  return 0
}

build_disk_list()
{
  DISKS=""
  DISKNUM=0
  DEVNAMES=""
  EXTERNAL_DISKS=""

  # Don't perform autodetection if a static disk list is specified in the 
  # configuration file.
  if [ -n "${STATIC_DISKS_LIST}" ]; then
    block_devices=${STATIC_DISKS_LIST}
  else
    block_devices=`${LS} /sys/block`
  fi
  for bdev in ${block_devices}; do
    #
    # If this block device is a RAID device that can perform superblock
    # synchronization, we have to turn off laptop_mode. Without that, 
    # an endless write loop will happen:
    # - Superblock write trigger cache page flush (laptop mode)
    # - And page write trigger superblock update.
    #
    check_raid_sb_sync ${bdev}
    if [ $? -eq 1 ]; then
      laptop_mode=0
      wait_raid_sb_sync=1
    fi
    is_hard_disk ${bdev} || continue
    is_external_disk ${bdev}
    if [ $? -eq 0 ]; then
      # External disk are not added into the main disk list, because they 
      # can be removed... and we really don't want handle that
      EXTERNAL_DISKS="${EXTERNAL_DISKS} ${bdev}"
    else
      DISKS="${DISKS} ${bdev}"
      DEVNAMES="${DEVNAMES} /dev/${bdev}"
      DISKNUM=$((${DISKNUM}+1))
    fi
  done
}

get_disk_context()
{
  build_disk_list
  for_each_disk get_disk_read_block_count
  for_each_disk backup_read_block_count
  # for_each_device tune_apm_profile

  [ -n "${DISKS}" ] || log_and_die 255 "No disks to monitor !!!"
  log "Monitoring spindown opportunities for disk(s) ${DISKS}"
}

#
# Power mode related functions
#

# 
# Set the spindown timeout the external disks.
#
external_set_spindown_mode()
{
  [ ${handle_external_disk} -eq 0 ] && return

  for disk in ${EXTERNAL_DISKS}; do
    ${HDPARM} -S ${external_standby_timeout} /dev/${disk} >/dev/null 2>&1
    if [ $? -eq 0 ]; then
      log "Set spindown timeout to ${external_standby_timeout} for external disk ${disk}"
    else
      error "Fail to execute: ${HDPARM} -S ${external_standby_timeout} ${disk}"
    fi
  done
}

do_smart_remount()
{
  DEV=$1
  MP=$2
  FST=$3
  OPTS=$4

  # Only handle hard disk or raid volumes
  is_disk_volume ${DEV} || is_raid_volume ${DEV} || return 0

  # Don't handle external disk volumes
  is_external_disk ${DEV} && return 0

  #PARSEDOPTS="$(remove_mount_opts "$OPTS")"
  case "${FST}" in
    "ext3"|"reiserfs")
      PARSEDOPTS="$(remove_mount_opts commit "${OPTS}")"
      PARSEDOPTS="$(remove_mount_opts data "${PARSEDOPTS}")"
      ${MOUNT} ${DEV} -t ${FST} "${MP}" -o remount,${PARSEDOPTS},commit=${max_age}${noatime_opt} >/dev/null 2>&1 \
        || log_and_die $? "Fail to execute ${MOUNT} ${DEV} -t ${FST} ${MP} -o remount,${PARSEDOPTS},commit=${max_age}${noatime_opt}"
      ;;
    "xfs")
      PARSEDOPTS="$(remove_mount_opts data "${OPTS}")"
      ${MOUNT} ${DEV} -t ${FST} "${MP}" -o remount,${OPTS}${noatime_opt} >/dev/null 2>&1                         \
        || log_and_die $? "Fail to execute ${MOUNT} ${DEV} -t ${FST} ${MP} -o remount,${OPTS}${noatime_opt}"
      ;;
  esac
}

do_restore_remount()
{
  DEV=$1
  MP=$2
  FST=$3
  OPTS=$4

  # Only handle hard disk or raid volumes
  is_disk_volume ${DEV} || is_raid_volume ${DEV} || return 0

  # Don't handle external disk volumes
  is_external_disk ${DEV} && return 0

  case "${FST}" in
    "ext3"|"reiserfs")
      PARSEDOPTS="$(remove_mount_opts commit ${OPTS})"
      PARSEDOPTS="$(remove_mount_opts data ${PARSEDOPTS})"
      PARSEDOPTS="$(remove_mount_yesno_opts noatime ${PARSEDOPTS})"
      ${MOUNT} ${DEV} -t ${FST} "${MP}" -o remount,${PARSEDOPTS},commit=0 >/dev/null 2>&1 \
        || log_and_die $? "Fail to execute: ${MOUNT} ${DEV} -t ${FST} ${MP} -o remount,${PARSEDOPTS}"
      ;;
    "xfs")
      PARSEDOPTS="$(remove_mount_yesno_opts noatime ${OPTS})"
      PARSEDOPTS="$(remove_mount_opts data ${PARSEDOPTS})"
      ${MOUNT} ${DEV} -t ${FST} "${MP}" -o remount,${PARSEDOPTS} >/dev/null 2>&1 \
        || log_and_die $? "Fail to execute: ${MOUNT} ${DEV} -t ${FST} ${MP} -o remount,${PARSEDOPTS}"
      ;;
  esac
}

set_spindown_mode()
{
  # Execute pre-spindown programs
  exec_prog_list ${PRE_SPINDOWN_PROGS}

  if [ ${do_remount} -eq 1 ]; then
    for_each_mountpoint do_smart_remount ||
      log_and_die $? "Fail to parse mountpoints file ${MOUNTPOINTS_FILE}"
  fi

  #
  # FIXME: Backup original configuration is quite thin. If an other 
  # application modify this parameters during a spindown stage, it could 
  # lead to restore obsolete parameters. The same kind of problems appear 
  # with the ${MOUNT} points options. 
  #
  laptop_mode_orig=`${CAT} /proc/sys/vm/laptop_mode`                        
  dirty_writeback_centisecs_orig=`${CAT} /proc/sys/vm/dirty_writeback_centisecs`
  dirty_expire_centisecs_orig=`${CAT} /proc/sys/vm/dirty_expire_centisecs`
  dirty_ratio_orig=`${CAT} /proc/sys/vm/dirty_ratio`
  dirty_background_ratio_orig=`${CAT} /proc/sys/vm/dirty_background_ratio`

  echo ${laptop_mode}             > /proc/sys/vm/laptop_mode
  echo ${max_age_cent}            > /proc/sys/vm/dirty_writeback_centisecs
  echo ${max_age_cent}            > /proc/sys/vm/dirty_expire_centisecs
  echo ${dirty_ratio}             > /proc/sys/vm/dirty_ratio
  echo ${dirty_background_ratio}  > /proc/sys/vm/dirty_background_ratio

  # Unneeded with laptop_mode active.
  # do this only if NOT in laptop_mode
  # POTENTIALLY CHANGES THE BEHAVIOR OF SOME DISKS
  if [ ${laptop_mode} -eq 0 ]; then
    sync
  fi

  # Wait end of raid superblock write if needed.
  if [ ${wait_raid_sb_sync} -eq 1 ]; then
    # 5 seconds is a smart value.
    sleep 5
  fi

  ${HDPARM} -q -y ${DEVNAMES} || log_and_die $? "Fail to execute: ${HDPARM} -q -y ${DEVNAMES}"

  return $?
}

unset_spindown_mode()
{
  if [ ${do_remount} -eq 1 ]; then
    for_each_mountpoint do_restore_remount ||
      log_and_die $? "Fail to parse mountpoints file ${MOUNTPOINTS_FILE}"
  fi

  # Restore original cache page settings
  if [ -n "${laptop_mode_orig}" ]; then
    echo ${laptop_mode_orig}                > /proc/sys/vm/laptop_mode
    laptop_mode_orig=""
  fi
  if [ -n "${dirty_writeback_centisecs_orig}" ]; then
    echo ${dirty_writeback_centisecs_orig}  > /proc/sys/vm/dirty_writeback_centisecs
    dirty_writeback_centisecs_orig=""
  fi
  if [ -n "${dirty_expire_centisecs_orig}" ]; then
    echo ${dirty_expire_centisecs_orig}     > /proc/sys/vm/dirty_expire_centisecs
    dirty_expire_centisecs_orig=""
  fi
  if [ -n "${dirty_ratio_orig}" ]; then
    echo ${dirty_ratio_orig}                > /proc/sys/vm/dirty_ratio
    dirty_ratio_orig=""
  fi
  if [ -n "${dirty_background_ratio_orig}" ]; then
    echo ${dirty_background_ratio_orig}     > /proc/sys/vm/dirty_background_ratio
    dirty_background_ratio_orig=""
  fi

  # Unneeded with laptop_mode active.
  if [ ${laptop_mode} -eq 0 ]; then
    sync
  fi

  # Execute post-spindown programs
  exec_prog_list ${POST_SPINDOWN_PROGS}

  return 0
}

#
# This function return the disks power mode. Even if several disks are
# checked, a single power mode is returned:
#
# - "active"  : at least one disk is up
# - "standby" : all disks are down
# - ""        : no power mode (disk removed ?)
#
check_power_mode()
{
  pm_out=`${HDPARM} -C ${DEVNAMES}`
  if [ $? != 0 ]; then
    echo "unplug"
    return
  fi

  echo "${pm_out}" | ${GREP} -q "active"
  if [ "$?" = 0 ]; then
    echo "active"
    return
  fi

  # Return "standby" if all disks are down
  disk_down_num=`echo "${pm_out}" | ${GREP} -c "standby"`
  if [ ${disk_down_num} -eq ${DISKNUM} ]; then
    echo "standby"
    return
  fi

  # Log this very unexpected state
  error "Unable to parse \"hdparm -C ${DEVNAMES}\" output: ${pm_out}"
}

#
# Backoff functions
#

is_quiet()
{
  read_block_occur=0 
  for_each_disk get_disk_read_block_count
  for_each_disk cmp_read_block_count
  return ${read_block_occur}
}

increase_backoff_factor()
{
  backoff_factor=$((${backoff_factor}*${backoff_increase_factor}/100))
  # Cap the backoff factor accordingly to the max_wait_time value
  if [ $((${base_wait_time}*${backoff_factor}/100)) -gt ${max_wait_time} ]; then
    backoff_factor=$((${max_wait_time}*100/${base_wait_time}))
  fi
  log "Increase backoff factor: ${backoff_factor}"
}

decrease_backoff_factor()
{
  backoff_factor=$((${backoff_factor}*${backoff_decrease_factor}/100))
  if [ ${backoff_factor} -lt 100 ]; then
    backoff_factor=100
  fi
  log "Decrease backoff factor: ${backoff_factor}"
}

compute_wait_time()
{
  wait_time=$((${base_wait_time}*${backoff_factor}/100))
}

#
# Let a unit test script only source the above functions
#
if [ x"${UNITTEST}" != "xyes" ]; then

  #
  # Signal functions
  #

  # Trap SIGUSR1 to active/desactive verbose mode.
  trap_verbose_signal()
  {
    trap "verbose=$((1-verbose))" SIGUSR1
  }

  # Trap and log end of script execution.
  trap "unset_spindown_mode; info_stats; info 'smart_spindown stop'; killall_background; exit" 0
  #trap "exit 0" 15
  trap "error 'Abort'; exit 255" 2 3 6
  #
  # Trap SIGHUP signal to update the disk context. It can be used to
  # handle a new external disk.
  #
  trap "echo 'Update disk context'; get_disk_context; external_set_spindown_mode" SIGHUP

  #
  # Main loop
  #

  check_and_cache
  get_disk_context
  external_set_spindown_mode

  info "smart_spindown start - disk(s): ${DISKS}"

  power_mode='unknown'
  quiet_time=0
  downtime_part=0
  max_age_inf=$((${max_age}-3*${poll_time}))
  while [ true ]; do

    trap_verbose_signal
    compute_wait_time
    previous_power_mode=${power_mode}

    ${SLEEP} ${poll_time}

    #
    # Check disk power mode.
    #
    power_mode=`check_power_mode`
    case ${power_mode} in
      #
      # TODO: handle the idle state
      #
      'active'|'idle')
        uptime=$((${uptime}+${poll_time}))

        if [ ${previous_power_mode} = 'standby' ]; then
          #
          # Disk restart
          #
          unset_spindown_mode || exit $?
	      # increase backoff only if the last downtime_part was 
	      # inf to max_age to avoid the penalization of the
	      # spin off we asked via max_age commit time
	      if [ ${downtime_part} -le ${max_age_inf} ]; then
          	increase_backoff_factor
	      fi
          # reset now downtime_part
	      downtime_part=0
          log "Disk(s) restart"
          log_stats
          #
          # Smartly, refresh the disk list (may be a new device is available)
          # and go to the next iteration.
          #
          get_disk_context
          external_set_spindown_mode
          quiet_time=0
          continue
        fi

        is_quiet
        if [ $? -eq 0 ]; then
          quiet_time=$((${quiet_time}+${poll_time}))
          log "Seconds of quiet: ${quiet_time}/${wait_time}"
        else
          log "Disk read"
          for_each_disk backup_read_block_count
          quiet_time=0
        fi
        if [ ${quiet_time} -ge ${wait_time} ]; then
          set_spindown_mode || exit $?
          log "Disk(s) spin-down"
          spindown_count=$((${spindown_count}+1))
          power_mode='standby'
        fi
        ;;
      'standby')
        downtime=$((${downtime}+${poll_time}))
        downtime_part=$((${downtime_part}+${poll_time}))
        decrease_backoff_factor
        ;;
      'unplug')
        # Refresh the disk list and remove unplugged disks
        get_disk_context
        ;;
      *)
        # Unexpected power mode
        log_and_die 255 "Unexpected power mode"
        ;;
    esac

  done

fi

