IT干货网

监控redis服务是否存活

developer 2022年03月04日 操作系统 242 0
#!/bin/bash 
#监控redis是否存活的脚本 
while [ 1 ];  
do 
a=`lsof -i:6379 |grep 'redis-ser' |grep -v grep |grep -v agent|sort | tail -1 | cut -f 1 -d' '` 
if [ "$a" != 'redis-ser' ];then 
{  /usr/local/redis/bin/redis-server /usr/local/redis/conf/redis.conf & 
echo redis restart at `date` >> /data/redis/data/redisLive.log   
} 
fi 
sleep 5 
done 
 
###Redis监控和告警 
 
###健康状态 
 
redis_stat.sh 
 
#!/bin/bash 
host="127.0.0.1" 
passwd1="123456" 
passwd2="123456" 
passwd3="123456" 
if [ $# -lt 4 ];then 
        echo "Usage: bash $0 -p [6379] -s section [-k] keys [-d] db [-o] opt" 
        exit 1 
else 
        port=6379 
        db='' 
        opt='' 
        while [ $# -gt 0 ] 
        do 
                if [ $1 == "-p" ];then 
                        port=$2 
                fi 
                if [ $1 == "-s" ];then 
                        section=$2 
                fi 
                if [ $1 == "-k" ];then 
                        key=$2 
                fi 
                if [ $1 == "-d" ];then 
                        db=$2 
                fi 
                if [ $1 == "-o" ];then 
                        opt=$2 
                fi 
                shift 2 
        done 
fi 
case $port in 
        6379) 
                passwd=$passwd1 
                ;; 
        6380) 
                passwd=$passwd2 
                ;; 
        6381) 
                passwd=$passwd3 
                ;; 
        *) 
                passwd="" 
                ;; 
esac 
if [[ $section == "Keyspace" && $key == "dbs" ]];then 
        cmd="/usr/local/redis/bin/redis-cli -p $port -a $passwd info Keyspace |grep '^db[0-9]\{1,2\}:*'|wc -l" 
elif [[ ! -z $db && ! -z $opt ]];then 
        cmd="/usr/local/redis/bin/redis-cli -p $port -a $passwd info Keyspace |grep $db |tr -s ',' '\n'|grep $opt|cut -d '=' -f 2" 
        #cmd="/usr/local/redis/bin/redis-cli -p $port -a $passwd info Keyspace |grep $db |tr -s ',' '\n' |awk -F [=] -v akey=$opt '/$akey/{print """$NF"""}'" 
else 
        cmd="/usr/local/redis/bin/redis-cli -p $port -a $passwd info $section|grep ${key}: |cut -d ':' -f 2" 
fi 
#echo $cmd 
eval $cmd 
 
### 
redis-cli -h 127.0.0.1 -p 6379 -a zvIZ31Yn3G info all | awk -F ':'  
redis-cli用法: 
 
# redis-cli -h 127.0.0.1 -p 6379 -a passwoed     //连接指定的redis 
# redis-cli info        //没有密码的情况下,打印本地redis-srv的状态信息 
# redis-cli info Keyspace     //查看指定状态域的信息 
 
###redis3.0集群监控脚本 
http://blog.itpub.net/20625855/viewspace-1847453/ 
 
 
#!/bin/bash 
######变量定义#################################### 
auth="ASF6tp73yF5VPGVhHJuF"               #认证密码 
configdir='/data/redis_cluster'          #配置目录 
bindir='/usr/local/redis-3.0.3/bin/'            #redis安装目录 
client="$bindir/redis-cli "       # 
max_memory=0                              #初始化最大内存 
flag=0                                    #输入参数是否有误标识 
 
#集群ip list 
list=( 
172.16.106.76:6379 
172.16.106.78:6379 
172.16.106.77:6379 
)                                         
 
 
is_sendms=$2                              #是否发送短信报警:默认不发送;=1: 发送 
################################################### 
 
 
 
 
###报警函数定义#################################### 
sendms(){ 
if [ "$2" == "1" ]; then 
#自己填写报警方式 
echo "报警:$1" 
fi 
} 
 
 
export -f sendms  
################################################### 
 
 
 
 
 
 
###redis状态统计函数############################### 
statistics_redis(){ 
local ip=`echo $1 | awk -F':' '{print $1}'` 
local port=`echo $1 | awk -F':' '{print $2}'` 
local is_slowlog=0 
 
 
last_slowlog_time=`$client -h $ip -p $port slowlog get 1 |awk  '{if(NR==2) print $1}'` 
current_time=`date +%s` 
 
 
if [ "$last_slowlog_time" != "" ]; then 
  if [ $((last_slowlog_time+60*10)) -gt $current_time ]; then 
   is_slowlog=1 
  fi 
fi 
 
 
 
 
$client -h $ip -p $port info all | awk -F ':' -v max_memory=$max_memory -v addr=$1 -v is_sendms=$2 -v is_slowlog=$is_slowlog '{\ 
if($0~/uptime_in_seconds:/) uptime=$2;\ 
else if($0~/connected_clients:/) cnt_clients=$2;\ 
else if($0~/used_memory:/) used_memory=$2;\ 
else if($0~/used_memory_rss:/) used_memory_rss=$2;\ 
else if($0~/used_memory_peak:/) used_memory_peak=$2;\ 
else if($0~/mem_fragmentation_ratio:/) mem_ratio=$2;\ 
 
 
else if($0~/rdb_last_bgsave_status:/) bgsave_status=$2;\ 
else if($0~/aof_last_write_status:/) aof_write_status=$2;\ 
else if($0~/aof_last_bgrewrite_status:/) aof_bgrewrite_status=$2;\ 
 
 
else if($0~/instantaneous_ops_per_sec:/) ops_per_sec=$2;\ 
else if($0~/keyspace_hits:/) keyspace_hits=$2;\ 
else if($0~/keyspace_misses:/) keyspace_misses=$2;\ 
else if($0~/:keys=/) keys=keys"\n"$2;\ 
 
 
}END{\ 
printf("\033[1;33;1m####概况:\033[0m\n");\ 
printf("    启动时间:%d\n",uptime);\ 
printf("    当前连接数:%d\n",cnt_clients);\ 
printf("    当前OPS:%d\n",ops_per_sec);\ 
printf("    当前key分布情况:%s\n",keys);\ 
 
 
printf("\033[1;33;1m####命中情况:\033[0m\n");\ 
printf("    命中次数: %d\n",keyspace_hits);\ 
printf("    miss次数: %d\n",keyspace_misses);\ 
printf("    命中率:%d%\n",keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100);\ 
 
 
printf("\033[1;33;1m####内存使用情况:\033[0m\n");\ 
printf("    1)分配总内存:%dMb\n",used_memory_rss/1024/1024);\ 
printf("    2)使用内存:%dMb\n",used_memory/1024/1024);\ 
printf("    3)峰值:%dMb\n",used_memory/1024/1024);\ 
printf("    4)最大内存:%dMb\n",max_memory/1024/1024);\ 
printf("    5)内存碎片率:%s\n",mem_ratio);\ 
 
 
printf("\033[1;33;1m####持久化: \033[0m\n");\ 
printf("    上次bgsave状态:%s\n",bgsave_status);\ 
printf("    上次aof状态:%s\n",aof_write_status);\ 
printf("    上次rewrite状态:%s\n",aof_bgrewrite_status);\ 
 
 
printf("\033[1;33;1m####报警信息: \033[0m\n");\ 
if(cnt_clients>=1000){printf("\033[1;31;1mwarning: 当前连接数:%d,超标\033[0m\n",cnt_clients);\ 
                      system("sendms "addr"_当前连接数:"cnt_clients" "is_sendms);\ 
                      };\ 
if(ops_per_sec>=50000){printf("\033[1;31;1mwarning: 当前OPS:%d,超标\033[0m\n",ops_per_sec);\ 
                       system("sendms "addr"_当前OPS:"ops_per_sec" "is_sendms);\ 
                      };\ 
 
 
if(keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100<=50){\ 
                  printf("\033[1;31;1mwarning:当前命中率:%d%,过低\033[0m\n",keyspace_hits/(keyspace_hits+keyspace_misses+0.1)*100);\ 
                 };\ 
 
 
if(used_memory/max_memory*100>=80){printf("\033[1;31;1mwarning:当前内存使用:%d%,过高\033[0m\n",used_memory/max_memory*100);\ 
                                   system("sendms "addr"_内存使用率:"used_memory/max_memory*100"% "is_sendms);\ 
                                  };\ 
 
 
if(is_slowlog>0) {printf("\033[1;31;1mwarning:存在慢查询,请确认!\033[0m\n");\ 
                    system("sendms "addr"_存在慢查询 "is_sendms) };\ 
 
 
#if(bgsave_status!~/ok/){printf("上次bgsave状态错误:%s\n",bgsave_status)};\ 
#if(aof_write_status!~/ok/){printf("上次aof状态错误:%s\n",aof_write_status)};\ 
#if (aof_bgrewrite_status!~/ok/){printf("上次rewrite状态错误:%s\n",aof_bgrewrite_status)};\ 
}' 
 
 
} 
############################################### 
 
 
 
 
 
 
####BEGIN:遍历list################## 
for config in ${list[@]} 
do 
   ip=`echo $config| awk -F':' '{print $1}'` 
   port=`echo $config| awk -F':' '{print $2}'` 
 
 
####stop###################### 
   if [ "$1" == "stop" ]; then 
      flag=1 
      echo -n $config" " 
      $client -h $ip -p $port shutdown5588 2>>/dev/null 
      #if [ $? -eq 0 ]; then 
         echo "shutdown success!" 
      #else 
      #   echo "shutdown meet error!" 
      #fi 
   fi 
 
 
####start################### 
   if [ "$1" == "start" ]; then 
      flag=1 
      echo -n  $config" " 
      ssh $ip "${bindir}/redis-server  ${configdir}/${port}.cnf" 
      if [ $? -eq 0 ]; then 
         echo "started" 
      else 
        echo "starting meet error" 
      fi 
   fi 
####status################## 
   if [ "$1" == "status" ]; then 
      flag=1 
      ###找到一个存活的节点,根据它来查看集群状态 
      isalive=`$client -h $ip -p $port  ping 2>>/dev/null` 
      if [ "$isalive" == "PONG" ]; then 
         echo -e  "\e[1;32;1m#----------------------------------#\e[0m" 
         echo  -e "\e[1;32;1m#集群基本信息:                     #\e[0m" 
         echo -e  "\e[1;32;1m#----------------------------------#\e[0m" 
         cluster_is_ok=`$client  -h $ip -p $port  cluster info | grep cluster_state| cut -b 15-16` 
         if [ "$cluster_is_ok" == "ok" ]; then 
             echo -e "cluster_state:\e[1;32;1mok\e[0m" 
         else 
             echo -e "\033[1;31;1m$($client  -h $ip -p $port  cluster info | grep cluster_state)\e[0m" 
             sendms "${config}_cluster_state:$cluster_is_ok" $is_sendms 
         fi 
         nodes_alive=`$client -h ${ip}  -p ${port}  cluster nodes | grep -vE 'fail|disconnected' | wc -l` 
         
         if [ ${#list[*]} -ne $nodes_alive ]; then 
            echo -e "total nodes:${#list[*]}, \033[1;31;1malive nodes:${nodes_alive}!!\033[0m" 
            echo -e "\033[1;31;1mWarning: some nodes have down!!\033[0m" 
            sendms "${config}_cluster_state:some_nodes_is_down" $is_sendms 
         else 
            echo "total nodes:${#list[*]}, alive nodes:${nodes_alive}" 
         fi 
          
         max_memory=`$client -h $ip -p $port config5588 get maxmemory | awk '{if(NR>1)print $1}'` 
         ###使用循环匹配整理出目前cluster的M-s关系树 
         echo -e  "\e[1;32;4m#####主从结构树:\e[0m" 
 
 
         v_str="" 
         cnt=1 
         for master in `$client -h $ip -p $port cluster nodes|grep 'master'|grep -vE 'fail|disconnected'|awk '{print $1","$2}'|sort -k 2,2 -t ','` 
         do 
           mid=`echo $master | awk -F',' '{print $1}'` 
           maddr=`echo $master | awk -F',' '{print $2}'` 
           mip=`echo $master | awk -F',|:' '{print $2}'` 
           echo -e  "\033[1;36;1mmaster${cnt}:"$maddr"\033[0m" 
           $client -h $ip  -p $port cluster nodes | grep 'slave'|grep -vE 'fail|disconnected' | grep $mid | awk '{print "                      |-->slave"NR":"$2}' 
           tmp=`$client -h $ip -p $port cluster nodes | grep  'slave'|grep -vE 'fail|disconnected'  | grep $mid | grep $mip | wc -l` 
           v_tmp=`$client -h $ip  -p $port cluster nodes | grep 'slave'|grep -vE 'fail|disconnected' | grep $mid | awk '{printf $2" "}'` 
 
 
          if [ $tmp -ne 0 ]; then 
             echo -e "\033[1;31;1mWarning: master's slave node is on the master's server!!\033[0m" 
             sendms "${maddr}_cluster_state:M-S_is_on_same_server" $is_sendms  
          fi 
 
 
           v_str=$v_str"\""$maddr" "$v_tmp"\" "  
           let cnt++ 
         done 
          
         ###v_str变量记录("m1 s1" "m2 s2")类似的二维数组结构用于逐个分析每个存活状态的redis实例 
         declare -a array="("$v_str")" 
 
 
          
         ###使用双层嵌套循环遍历收集各个redis实例的状态 
         n_array=${#array[*]} 
         for((i=0; i<$n_array;i++)) 
         do 
           inner_array=(${array[$i]}) 
           n_inner_array=${#inner_array[*]} 
           echo "" 
           echo -e  "\e[1;32;1m#----------------------------------#\e[0m" 
           echo -e  "\e[1;32;1m#分片$((i+1)):                            #\e[0m" 
           echo -e  "\e[1;32;1m#----------------------------------#\e[0m" 
           for((j=0;j<$n_inner_array;j++)) 
           do 
             echo -e "\e[1;35;1m+++++${inner_array[$j]}+++++\e[0m" 
             statistics_redis ${inner_array[$j]} $is_sendms 
           done 
         done 
 
         exit 0 
      fi 
   fi 
 
 
done  
 
 
if [ "$flag" == "0" ]; then 
  echo -e  "\e[1;31;1musage: sh cluster_control [start|stop|status] [1]\e[0m" 
fi 
 
 
if [ "$is_alive" != "PONG" -a "$1" == "status" ]; then 
echo -e "\e[1;31;1mAll nodes is stopped.\e[0m" 
fi

评论关闭
IT干货网

微信公众号号:IT虾米 (左侧二维码扫一扫)欢迎添加!