`
cocos
  • 浏览: 394364 次
  • 性别: Icon_minigender_1
  • 来自: 福州
社区版块
存档分类
最新评论

腾讯服务器监控nginx进程,自动拉起

阅读更多
其实就是一个ps进程探测,
后面写一个分析
文件先贴出来。
#! /bin/sh

###file_ver=2.0.3

PATH=$PATH:.

#monitor the application
#create by leonlaili,2006-12-6

####### Custom variables begin  ########
##todo: add custom variables here
#get script path
dir_pre=$(dirname $(which $0))
####### Custom variables end    ########

#load common functions
load_lib()
{
    common_file=$dir_pre/common.sh
    if [ -f $common_file ];then
        . $common_file
    fi
}

#check current user
check_user()
{
    if [ "$user" != "`whoami`" ];then
        echo "Only $user can execute this script"
        exit 1
    fi
}

#print help information
print_help()
{
    ##todo: output help information here
    # echo ....
    return 
}

#check script parameters
check_params()
{
    ok="true"
    ##todo: add addition parameters checking statement here...
    
    if [ "$ok" != "true" ];then
        echo "Some of the parameters are invalid. "
        print_help
        exit 1
    fi
}

get_app_num()
{
    numbers=`echo $app_name | sed -e "s:[ \t]:\n:g" | grep "^$1[:$]" | awk -F: '{print $2}'`
    num1=`echo $numbers|awk -F, '{print $1}'`
    num2=`echo $numbers|awk -F, '{print $2}'`

    if [ "${num1}" = "" ];then
        num1=1
    fi

    if [ "${num2}" = "" ];then
        num2=999999999
    fi
    
}

#check port
check_port()
{
    nc_cmd="/usr/bin/nc"
    if [ ! -f $nc_cmd ];then
        nc_cmd="/usr/bin/netcat"
    fi

    $nc_cmd -zn -w4 $1 $2
    if [ $? -ne 0 ];then
        for (( i=0 ; i<5 ; i++ ))
        do
            $nc_cmd -zn -w4 $1 $2
            if [ $? -eq 0 ];then return 0;fi
            sleep 1
        done
        #check VIP again
        if [ "$vip" != "" ];then
            for (( i=0 ; i<5 ; i++ ))
            do
                $nc_cmd -zn -w4 $vip $2
                if [ $? -eq 0 ];then return 0;fi
                sleep 1
            done
        fi
        err_port="$err_port $p"
        return 1 
    fi
    return 0
}

#check process
check_process()
{
    get_app_num $1
    app=`echo $1 | awk -F: '{print $1}'`
    num=`ps -C $app | sed -e "1d" | wc -l`
    if [ $num -lt $num1 -o $num -gt $num2 ];then
        err_app="$err_app $app"
        return 1
    fi
    return 0
}

#check if application is ok
check_app()
{
    if [ ! -f $runing_file ];then
        return 0
    fi

    if [ "$ip_type" = "0" ];then
        bind_ip=$ip_inner
    elif [ "$ip_type" = "1" ];then
        bind_ip=$ip_outer
    elif [ "$ip_type" = "2" ];then
        bind_ip="0.0.0.0"
    elif [ "$ip_type" = "3" ];then
        bind_ip=$vip
    elif [ "$ip_type" = "4" ];then
        bind_ip=127.0.0.1
    fi 
 
    ##todo: add application checking statement here
    err_app=""
    err_port=""

    run_config "monitor"

}

#resolve the problems of application
resolve_app()
{

    #发送告警信息
    report "Monitor: restart [process:${err_app}][port:${err_port}]"

    ##todo: add custom statement here

    run_config "resolve"

    return
}

#report monitor result infomation
rpt_info()
{
    local rtype="$1"
    local elem="$2"
    local action="$3"
    report_ip=172.16.211.50
    url_head="http://$report_ip/pkg/monitor_rpt.php"
    response_file="/tmp/.monitor_report.tmp"
    wget_options="-T 10 -O $response_file --connect-timeout=5"
    
    wget $wget_options "${url_head}?ip=${ip_inner}&install_path=${install_path}&type=${rtype}&elem=${elem}&action=${action}" > /dev/null 2>&1
        
    rm $response_file 2>/dev/null
}


###### Main Begin ########
if [ "$1" = "--help" ];then
    print_help
    exit 0
fi

load_lib
check_user
check_params
check_app
if [ "$err_app" != "" -o "$err_port" != "" ];then
    resolve_app
fi

if [ "$err_app" != "" ];then
    err_app_list=`echo "$err_app" | sed -e 's/ /,/g' -e 's/^,//' -e 's/,$//'`
    rpt_info 'app' "$err_app_list" "restart"
fi

if [ "$err_port" != "" ];then
    err_port_list=`echo "$err_port" | sed -e 's/ /,/g' -e 's/^,//' -e 's/,$//'`
    rpt_info 'port' "$err_port_list" "restart"
fi
###### Main End   ########



framework_ver=2.0.7
<!--程序包基本信息-->
<base_info>
#程序作者
author="funnychen"
#所属产品
product="third"
#模块
module=""
#软件包名称
name="nginx"
#进程名不能超过15字符,多个进程空格间隔,可指定进程数量.app:min,max,如app_name:1,20
app_name="nginx"
#进程监听TCP端口!仅限TCP端口,多个端口使用空格间隔
port="80"
#TCP端口绑定ip类型!仅限TCP端口 0:内网;1:外网;2:0.0.0.0;3:VIP;4:127.0.0.1
ip_type="2"
#软件包版本
version="0.7.67"
#所属用户
user="root"
#停止进程信号量
kill_sig="KILL"
#是否开机启动[0|1]
auto_start="1"
#开机启动脚本存放文件
boot_path="/usr/local/services/etc/svc.sh"
#日志目录(日志直接存放在安装目录请设为空)
log_dir="/data/log/$name-$version"
#告警特性ID(默认不需要修改)
rpt_port="5570"
#基础安装目录(请勿修改)
install_base="/usr/local/services"
#系统使用变量请勿修改
install_path="/usr/local/services/nginx-0.7.67"
#是否强制在老目录安装(请勿修改)
force_install="false"
</base_info>

<!--安装时创建文件链接-->
<link>
cd $install_path
ln -s log logs

rm /usr/local/nginx 2>/dev/null
ln -s $install_path /usr/local/nginx
</link>







<!--程序启动方式,请使用相对bin目录的路径-->
<start>
#限制core文件大小为4k,用于进程coredump监控
ulimit -c 4 -S
ulimit -n 100001

#-----单进程名程序包请在这添加启动命令-----
./nginx

#-----多进程名程序包请在这添加启动命令-----
#每个进程复制一份if语句,注意保留if判断逻辑并修改内部的示例进程名app_test
#变量app_to_start为需要启动的进程名,不需要修改
#if [ "$app_to_start" = "app_test" -o "$app_to_start" = "all" ];then
#    ./app_test &
#fi
</start>



<!--自定义crontab调度,请使用/usr/local/services/nginx-0.7.67占位符-->
<!--Example: "0 0 * * * /usr/local/services/nginx-0.7.67/admin/start.sh &" -->
<crontab>
*/3 * * * * /usr/local/services/nginx-0.7.67/admin/monitor.sh > /usr/local/services/nginx-0.7.67/log/crontab.log 2>&1 &
*/15 * * * * /usr/local/services/nginx-0.7.67/admin/clear.sh file > /usr/local/services/nginx-0.7.67/log/crontab.log 2>&1 &
15 8 * * * /usr/local/services/nginx-0.7.67/admin/md5sum.sh check > /usr/local/services/nginx-0.7.67/log/crontab.log 2>&1 &
0 0 * * * /usr/local/services/nginx-0.7.67/tools/log_clear.sh > /usr/local/services/nginx-0.7.67/log/log_clear.log 2>&1 &
</crontab>


<!--程序停止方式-->
<stop>
#停止进程后等待时间(秒)
sleep_count=1

#变量app_to_stop为需要启动的进程名

if [ $app_count -gt 1 -a "$app_to_stop" != "all" ];then
    kill_app $app_to_stop $kill_sig
    sleep $sleep_count
    return
fi

for app_info in $app_name
do
    app=`echo $app_info | awk -F: '{print $1}'`
    kill_app $app $kill_sig
    sleep $sleep_count
done
</stop>

<!--程序状态检测,若检测异常则添加异常进程名到err_app或添加异常端口到err_port-->
<monitor>
#进程数量检测
for app_info in `cat $runing_file`
do
    check_process "$app_info"
done

#端口检测
for p in `echo $port`
do
    check_port "$bind_ip" "$p"
done
</monitor>

<!--程序异常处理,根据状态检测是否正常决定-->
<resolve>
for app in `echo $err_app`
do
    $install_path/admin/restart.sh $app
done
</resolve>

<!--需要进行md5校验的文件列表,请使用相对安装目录的路径,带"<空格>-"后缀为不需要进行校验-->
<md5>
bin/*
lib/*
admin/*
bin/*.pid -
bin/*core* -
bin/*.tmp -
bin/*.stat -
bin/*.log -
bin/*.data -
bin/*.dat -
bin/*.bin -
bin/*.info -
</md5>

<!--需要进行安装时替换内容的文件列表,请使用相对安装目录的路径-->
<substitute>
init.xml
conf/*
etc/*
</substitute>

<!--数据清理配置-->
<clear_file>
#目录              #阀值        #命令  #参数 #目标
log                85%:2000M    tar    10    *.log
admin/data/backup  90%:50M      tar    10    *
admin/data/tmp     90%:10M      delete 1     *

#----说明-----
#目录:需要监控的目录,使用相对安装目录路径
#阀值:触发清理操作的条件[分区使用百分比:目录最大空间<M|m>]
#命令:delete(删除指定时间前文件),tar(压缩指定时间前文件),clear(清空超过指定大小文件)
#参数:delete,tar(默认天数,后缀h为小时,m为分钟),clear(文件大小k)
#目标:可以清理的文件,接受通配符

#----示例-----
#目录     #阀值    #命令   #参数  #目标
#log      80%:10M  delete  30     stat*.log
#data     90%:10M  tar     30     */*.dat
#log      90%:10M  clear   50000  debug/err*.log
</clear_file>


<install_on_complete>
chown user_00.users $install_path/conf/*
</install_on_complete>




分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics