Redis数据持久化机制

一、Redis 数据持久化机制

1. 三种持久化方式

# Redis 数据持久化机制
RDB (Redis Database):
  - 特性: 全量快照,二进制格式
  - 命令: SAVE(阻塞), BGSAVE(后台)
  - 配置: save <seconds> <changes>
  - 文件: dump.rdb
  - 优点: 文件小,恢复快
  - 缺点: 可能丢失最后一次保存后的数据

AOF (Append Only File):
  - 特性: 记录所有写命令
  - 策略: 
    appendfsync always    # 每个命令同步
    appendfsync everysec  # 每秒同步(默认)
    appendfsync no        # 操作系统决定
  - 文件: appendonly.aof
  - 优点: 数据完整性高
  - 缺点: 文件大,恢复慢

混合模式 (RDB + AOF):
  - Redis 4.0+ 支持
  - 结合两者优点
  - AOF文件包含RDB格式的全量数据

2. 配置检查

# 查看当前持久化配置
redis-cli CONFIG GET save
redis-cli CONFIG GET appendonly
redis-cli CONFIG GET dir
redis-cli CONFIG GET dbfilename
redis-cli CONFIG GET appendfilename

# 查看持久化信息
redis-cli INFO persistence

二、RDB 文件恢复

1. 自动恢复

# 1. 确保RDB文件在正确位置
ls -la /var/lib/redis/dump.rdb
# 默认位置: dir配置的目录 + dbfilename配置的文件名

# 2. 检查文件完整性
redis-check-rdb /var/lib/redis/dump.rdb

# 3. 启动Redis(自动加载RDB)
sudo systemctl start redis-server
# 或
redis-server /path/to/redis.conf

# 4. 验证数据
redis-cli INFO keyspace
redis-cli DBSIZE
redis-cli KEYS "*" | head -20

2. 从备份恢复

# 场景:从备份文件恢复
# 1. 停止Redis
sudo systemctl stop redis-server
# 或
redis-cli SHUTDOWN NOSAVE

# 2. 备份当前数据(如果有)
cp /var/lib/redis/dump.rdb /var/lib/redis/dump.rdb.backup.$(date +%Y%m%d)

# 3. 复制备份文件到数据目录
cp /backup/dump.rdb.20250101 /var/lib/redis/dump.rdb

# 4. 设置正确权限
chown redis:redis /var/lib/redis/dump.rdb
chmod 644 /var/lib/redis/dump.rdb

# 5. 启动Redis
sudo systemctl start redis-server

# 6. 验证恢复
redis-cli --stat

3. 恢复特定时间点的RDB

#!/bin/bash
# restore_rdb.sh - 恢复指定时间的RDB备份
BACKUP_DIR="/backup/redis"
DATA_DIR="/var/lib/redis"
RESTORE_DATE="20250101"

echo "停止Redis服务..."
sudo systemctl stop redis-server

echo "备份当前数据..."
if [ -f "$DATA_DIR/dump.rdb" ]; then
    cp "$DATA_DIR/dump.rdb" "$DATA_DIR/dump.rdb.bak.$(date +%Y%m%d_%H%M%S)"
fi

echo "查找备份文件..."
BACKUP_FILE=$(find "$BACKUP_DIR" -name "*dump.rdb.$RESTORE_DATE*" | head -1)

if [ -z "$BACKUP_FILE" ]; then
    echo "错误: 找不到$RESTORE_DATE的备份文件"
    exit 1
fi

echo "恢复备份: $BACKUP_FILE"
cp "$BACKUP_FILE" "$DATA_DIR/dump.rdb"

echo "设置权限..."
chown redis:redis "$DATA_DIR/dump.rdb"
chmod 644 "$DATA_DIR/dump.rdb"

echo "启动Redis服务..."
sudo systemctl start redis-server

echo "验证恢复..."
sleep 2
redis-cli INFO keyspace
echo "恢复完成!"

三、AOF 文件恢复

1. AOF 文件修复

# 1. 检查AOF文件完整性
redis-check-aof --fix /var/lib/redis/appendonly.aof

# 修复过程会:
# - 截断到最后一个完整命令
# - 移除损坏的部分
# - 生成修复后的文件

# 2. 如果修复失败,尝试从备份恢复
cp /backup/appendonly.aof.20250101 /var/lib/redis/appendonly.aof

# 3. 如果AOF文件很大,可以重写压缩
redis-cli BGREWRITEAOF
# 或
redis-cli CONFIG SET auto-aof-rewrite-percentage 100
redis-cli CONFIG SET auto-aof-rewrite-min-size 64mb

2. AOF 恢复流程

#!/bin/bash
# restore_aof.sh - 恢复AOF文件
AOF_DIR="/var/lib/redis"
BACKUP_DIR="/backup/redis"

echo "1. 停止Redis服务..."
sudo systemctl stop redis-server

echo "2. 备份当前AOF文件..."
if [ -f "$AOF_DIR/appendonly.aof" ]; then
    cp "$AOF_DIR/appendonly.aof" "$AOF_DIR/appendonly.aof.bak.$(date +%Y%m%d_%H%M%S)"
fi

echo "3. 从备份恢复最新AOF..."
LATEST_AOF=$(ls -t $BACKUP_DIR/appendonly.aof.* | head -1)
if [ -n "$LATEST_AOF" ]; then
    cp "$LATEST_AOF" "$AOF_DIR/appendonly.aof"
fi

echo "4. 修复AOF文件(如果需要)..."
redis-check-aof --fix "$AOF_DIR/appendonly.aof"

echo "5. 设置权限..."
chown redis:redis "$AOF_DIR/appendonly.aof"
chmod 644 "$AOF_DIR/appendonly.aof"

echo "6. 修改配置为只使用AOF..."
cat > /tmp/redis-restore.conf << EOF
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
aof-load-truncated yes
dir $AOF_DIR
EOF

echo "7. 以恢复模式启动..."
redis-server /tmp/redis-restore.conf --daemonize yes
sleep 3

echo "8. 验证数据..."
redis-cli INFO persistence
redis-cli DBSIZE

echo "9. 转换为RDB+AOF混合模式..."
redis-cli CONFIG SET appendonly yes
redis-cli CONFIG SET aof-use-rdb-preamble yes
redis-cli BGSAVE

echo "10. 重启为正常模式..."
sudo systemctl start redis-server

四、混合模式恢复 (RDB+AOF)

1. 恢复流程

# 1. 确保同时有RDB和AOF文件
ls -la /var/lib/redis/
# 应有: dump.rdb 和 appendonly.aof

# 2. 修改配置文件
cat > /etc/redis/redis-restore.conf << 'EOF'
# 基本配置
port 6379
bind 127.0.0.1
dir /var/lib/redis

# 启用AOF
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec

# 启用RDB
save 900 1
save 300 10
save 60 10000
dbfilename dump.rdb

# 混合模式
aof-use-rdb-preamble yes
aof-load-truncated yes
EOF

# 3. 启动Redis
redis-server /etc/redis/redis-restore.conf

# Redis会:
# 1. 先加载RDB文件(如果存在)
# 2. 然后重放AOF文件中的命令
# 3. 如果aof-use-rdb-preamble=yes,AOF文件可能包含RDB格式数据

2. 强制从AOF恢复

# 如果RDB损坏,强制从AOF恢复
# 1. 重命名或移除RDB文件
mv /var/lib/redis/dump.rdb /var/lib/redis/dump.rdb.bak

# 2. 确保AOF配置正确
redis-cli CONFIG SET appendonly yes
redis-cli CONFIG SET save ""

# 3. 重启Redis
sudo systemctl restart redis-server

五、从备份文件恢复

1. 定期备份策略

#!/bin/bash
# backup_redis.sh - 自动备份脚本
BACKUP_DIR="/backup/redis"
DATA_DIR="/var/lib/redis"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=30

# 创建备份目录
mkdir -p "$BACKUP_DIR"

echo "[$(date)] 开始Redis备份..."

# 1. RDB备份
echo "创建RDB快照..."
redis-cli SAVE  # 或 BGSAVE
sleep 2

if [ -f "$DATA_DIR/dump.rdb" ]; then
    cp "$DATA_DIR/dump.rdb" "$BACKUP_DIR/dump.rdb.$DATE"
    echo "RDB备份完成: $BACKUP_DIR/dump.rdb.$DATE"
fi

# 2. AOF备份
if [ -f "$DATA_DIR/appendonly.aof" ]; then
    cp "$DATA_DIR/appendonly.aof" "$BACKUP_DIR/appendonly.aof.$DATE"
    echo "AOF备份完成: $BACKUP_DIR/appendonly.aof.$DATE"
fi

# 3. 配置备份
if [ -f "/etc/redis/redis.conf" ]; then
    cp "/etc/redis/redis.conf" "$BACKUP_DIR/redis.conf.$DATE"
fi

# 4. 清理旧备份
find "$BACKUP_DIR" -name "dump.rdb.*" -mtime +$RETENTION_DAYS -delete
find "$BACKUP_DIR" -name "appendonly.aof.*" -mtime +$RETENTION_DAYS -delete
find "$BACKUP_DIR" -name "redis.conf.*" -mtime +$RETENTION_DAYS -delete

echo "[$(date)] 备份完成"
echo "磁盘使用:"
du -sh "$BACKUP_DIR"

2. 恢复最新备份

#!/bin/bash
# restore_latest.sh
BACKUP_DIR="/backup/redis"
DATA_DIR="/var/lib/redis"

# 停止Redis
sudo systemctl stop redis-server

# 恢复最新RDB
LATEST_RDB=$(ls -t $BACKUP_DIR/dump.rdb.* | head -1)
if [ -n "$LATEST_RDB" ]; then
    echo "恢复RDB: $LATEST_RDB"
    cp "$LATEST_RDB" "$DATA_DIR/dump.rdb"
fi

# 恢复最新AOF
LATEST_AOF=$(ls -t $BACKUP_DIR/appendonly.aof.* | head -1)
if [ -n "$LATEST_AOF" ]; then
    echo "恢复AOF: $LATEST_AOF"
    cp "$LATEST_AOF" "$DATA_DIR/appendonly.aof"
fi

# 设置权限
chown redis:redis "$DATA_DIR/dump.rdb" 2>/dev/null || true
chown redis:redis "$DATA_DIR/appendonly.aof" 2>/dev/null || true

# 启动Redis
sudo systemctl start redis-server

echo "恢复完成,验证数据..."
sleep 2
redis-cli INFO keyspace

六、灾难恢复场景

场景1:服务器完全损坏

# 新服务器恢复流程
# 1. 安装Redis
sudo apt update
sudo apt install redis-server redis-tools

# 2. 停止新Redis
sudo systemctl stop redis-server

# 3. 从备份服务器获取数据
# 假设备份在S3/OSS/NFS
aws s3 cp s3://my-bucket/redis-backup/dump.rdb.latest /var/lib/redis/dump.rdb
aws s3 cp s3://my-bucket/redis-backup/appendonly.aof.latest /var/lib/redis/appendonly.aof

# 或从远程服务器SCP
scp user@backup-server:/backup/redis/* /var/lib/redis/

# 4. 修复文件权限
chown redis:redis /var/lib/redis/*
chmod 644 /var/lib/redis/*

# 5. 检查文件完整性
redis-check-rdb /var/lib/redis/dump.rdb
redis-check-aof --fix /var/lib/redis/appendonly.aof

# 6. 修改配置
sed -i 's/^bind 127.0.0.1/# bind 127.0.0.1/' /etc/redis/redis.conf
sed -i 's/^protected-mode yes/protected-mode no/' /etc/redis/redis.conf

# 7. 启动Redis
sudo systemctl start redis-server
sudo systemctl enable redis-server

# 8. 验证
redis-cli -h 新服务器IP INFO replication
redis-cli -h 新服务器IP DBSIZE

场景2:误删除数据

# 1. 立即停止写入
redis-cli CONFIG SET appendonly no
redis-cli CONFIG SET save ""

# 2. 如果是误删除,且AOF开启
# 找到删除命令在AOF文件中的位置
grep -n "DEL\|FLUSHALL\|FLUSHDB" /var/lib/redis/appendonly.aof

# 3. 创建AOF备份
cp /var/lib/redis/appendonly.aof /tmp/app.aof.bak

# 4. 编辑AOF文件,删除误操作的命令
# 使用vim或sed删除对应的行
vim /var/lib/redis/appendonly.aof
# 或
sed -i '1234,1235d' /var/lib/redis/appendonly.aof  # 删除1234-1235行

# 5. 修复AOF
redis-check-aof --fix /var/lib/redis/appendonly.aof

# 6. 重启Redis
sudo systemctl restart redis-server

# 7. 检查数据是否恢复

场景3:RDB文件损坏

# 1. 检查损坏程度
redis-check-rdb /var/lib/redis/dump.rdb

# 2. 尝试从AOF恢复
# 如果AOF存在且较新
sudo systemctl stop redis-server
mv /var/lib/redis/dump.rdb /var/lib/redis/dump.rdb.corrupted
sudo systemctl start redis-server

# 3. 如果只有RDB,尝试修复
# 使用redis-rdb-tools分析
pip install rdbtools
rdb --command json /var/lib/redis/dump.rdb > /tmp/dump.json

# 4. 从JSON中提取可恢复数据
cat /tmp/dump.json | jq 'select(.type == "string")' > /tmp/strings.json

# 5. 编写恢复脚本
cat > /tmp/restore.py << 'EOF'
import json
import redis

with open('/tmp/strings.json') as f:
    data = json.load(f)

r = redis.Redis(host='localhost', port=6379, db=0)
for item in data:
    if item['type'] == 'string':
        r.set(item['key'], item['value'])
    elif item['type'] == 'hash':
        r.hmset(item['key'], item['value'])
    # ... 处理其他类型
EOF
python3 /tmp/restore.py

七、主从复制恢复

1. 从节点提升为主节点

# 主节点宕机,将从节点提升
# 在从节点执行
redis-cli SLAVEOF NO ONE

# 修改从节点配置
sed -i 's/^slaveof.*/# slaveof/' /etc/redis/redis.conf
sed -i 's/^replicaof.*/# replicaof/' /etc/redis/redis.conf

# 重启使配置生效
sudo systemctl restart redis-server

# 其他从节点指向新主节点
redis-cli -h 其他从节点 SLAVEOF 新主节点IP 6379

2. 重建主从

# 原主节点恢复后,作为从节点加入
# 1. 在新主节点创建当前数据快照
redis-cli -h 新主节点 BGSAVE

# 2. 等待RDB文件生成
redis-cli -h 新主节点 INFO persistence | grep rdb_bgsave_in_progress

# 3. 原主节点作为从节点
redis-cli SLAVEOF 新主节点IP 6379

# 4. 验证同步
redis-cli INFO replication

3. 增量恢复(部分同步)

# 如果从节点落后不多,可以使用部分重同步
# 1. 检查复制偏移量
redis-cli INFO replication | grep -E "(master_repl_offset|slave_repl_offset)"

# 2. 如果偏移量差异在复制积压缓冲区范围内
# 会自动进行部分重同步

# 3. 如果偏移量差异太大,需要全量同步
redis-cli SLAVEOF NO ONE
redis-cli SLAVEOF 主节点IP 6379

八、集群模式恢复

1. Redis Cluster 节点恢复

# 查看集群状态
redis-cli --cluster check 节点IP:端口

# 节点故障恢复
# 1. 重启故障节点
sudo systemctl restart redis@节点端口

# 2. 重新加入集群
redis-cli --cluster add-node 节点IP:端口 集群中任意节点IP:端口

# 3. 重新分配槽位
redis-cli --cluster fix 集群中任意节点IP:端口
redis-cli --cluster rebalance 集群中任意节点IP:端口

# 4. 验证
redis-cli --cluster info 集群中任意节点IP:端口

2. 集群备份恢复

#!/bin/bash
# 备份Redis集群所有节点
CLUSTER_NODES="node1:7000 node2:7001 node3:7002"
BACKUP_DIR="/backup/redis-cluster"
DATE=$(date +%Y%m%d_%H%M%S)

mkdir -p "$BACKUP_DIR/$DATE"

for node in $CLUSTER_NODES; do
    ip=$(echo $node | cut -d: -f1)
    port=$(echo $node | cut -d: -f2)
    
    echo "备份节点 $node..."
    
    # 保存RDB
    redis-cli -h $ip -p $port SAVE
    
    # 复制RDB文件
    NODE_DIR=$(redis-cli -h $ip -p $port CONFIG GET dir | tail -1)
    scp $ip:$NODE_DIR/dump.rdb $BACKUP_DIR/$DATE/dump.$ip.$port.rdb
    
    # 复制AOF文件(如果启用)
    scp $ip:$NODE_DIR/appendonly.aof $BACKUP_DIR/$DATE/appendonly.$ip.$port.aof 2>/dev/null || true
done

echo "备份完成: $BACKUP_DIR/$DATE"

九、监控与验证

1. 恢复后验证脚本

#!/bin/bash
# verify_restore.sh
REDIS_HOST="localhost"
REDIS_PORT="6379"

echo "=== Redis恢复验证报告 ==="
echo "生成时间: $(date)"
echo ""

# 1. 基本信息
echo "1. Redis基本信息:"
redis-cli -h $REDIS_HOST -p $REDIS_PORT INFO server | grep -E "(redis_version|uptime_in_days)"
echo ""

# 2. 数据统计
echo "2. 数据统计:"
redis-cli -h $REDIS_HOST -p $REDIS_PORT INFO keyspace
echo ""

# 3. 内存使用
echo "3. 内存使用:"
redis-cli -h $REDIS_HOST -p $REDIS_PORT INFO memory | grep -E "(used_memory_human|used_memory_peak_human|mem_fragmentation_ratio)"
echo ""

# 4. 持久化状态
echo "4. 持久化状态:"
redis-cli -h $REDIS_HOST -p $REDIS_PORT INFO persistence | grep -E "(rdb_last_save_time|aof_enabled|aof_rewrite_in_progress|loading)"
echo ""

# 5. 采样检查
echo "5. 随机采样键值:"
for db in {0..15}; do
    count=$(redis-cli -h $REDIS_HOST -p $REDIS_PORT -n $db DBSIZE)
    if [ $count -gt 0 ]; then
        echo "  DB$db: $count 个键"
        redis-cli -h $REDIS_HOST -p $REDIS_PORT -n $db RANDOMKEY | head -5 | while read key; do
            if [ -n "$key" ]; then
                type=$(redis-cli -h $REDIS_HOST -p $REDIS_PORT -n $db TYPE "$key")
                echo "    - $key ($type)"
            fi
        done
    fi
done
echo ""

# 6. 性能测试
echo "6. 简单性能测试:"
start=$(date +%s%N)
for i in {1..100}; do
    redis-cli -h $REDIS_HOST -p $REDIS_PORT PING > /dev/null
done
end=$(date +%s%N)
echo "  PING 100次耗时: $(( (end-start)/1000000 ))ms"

2. 监控关键指标

# 实时监控
watch -n 5 '
echo "=== Redis状态监控 ==="
echo "时间: $(date)"
echo ""
redis-cli INFO stats | grep -E "(total_connections_received|total_commands_processed|instantaneous_ops_per_sec)"
echo ""
redis-cli INFO memory | grep -E "(used_memory_human|used_memory_peak_human)"
echo ""
redis-cli INFO persistence | grep -E "(rdb_last_bgsave_status|aof_last_bgrewrite_status|loading)"
echo ""
redis-cli INFO keyspace
'

十、最佳实践与预防措施

1. 预防措施

# 1. 定期备份
crontab -e
# 添加
0 2 * * * /opt/scripts/backup_redis.sh

# 2. 监控告警
# 监控以下指标:
# - 内存使用率 > 80%
# - RDB/AOF失败
# - 从节点断开连接
# - 键数量突变

# 3. 配置优化
cat > /etc/redis/redis-safe.conf << 'EOF'
# 数据安全配置
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes

# AOF配置
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
aof-load-truncated yes
aof-use-rdb-preamble yes

# 内存管理
maxmemory 2gb
maxmemory-policy allkeys-lru

# 持久化目录权限
dir /var/lib/redis
EOF

2. 灾难恢复计划

# Redis灾难恢复计划

## 1. 恢复优先级
P0(紧急): 主节点完全损坏
P1(高): 从节点损坏,数据部分丢失
P2(中): 单点数据损坏
P3(低): 性能恢复

## 2. 恢复时间目标 (RTO)
- P0: < 15分钟
- P1: < 1小时
- P2: < 4小时
- P3: < 24小时

## 3. 恢复点目标 (RPO)
- 重要数据: < 5分钟数据丢失
- 一般数据: < 1小时数据丢失

## 4. 联系人清单
- 运维负责人: 张三 (13800138000)
- 开发负责人: 李四 (13900139000)
- 数据库DBA: 王五 (13700137000)

## 5. 恢复步骤
1. 评估损坏范围
2. 选择恢复策略
3. 执行恢复操作
4. 验证数据完整性
5. 监控运行状态
6. 记录和分析

3. 自动化恢复工具

#!/usr/bin/env python3
# auto_restore.py - 自动化恢复工具
import subprocess
import sys
import time
import json
from pathlib import Path

class RedisRestore:
    def __init__(self, redis_host='localhost', redis_port=6379):
        self.host = redis_host
        self.port = redis_port
        
    def run_cmd(self, cmd):
        """执行Redis命令"""
        try:
            result = subprocess.run(
                cmd, shell=True, capture_output=True, text=True, timeout=30
            )
            return result.returncode, result.stdout, result.stderr
        except subprocess.TimeoutExpired:
            return 1, "", "Command timeout"
    
    def restore_from_backup(self, backup_type='latest'):
        """从备份恢复"""
        print(f"开始从{backup_type}备份恢复...")
        
        # 1. 停止Redis
        print("停止Redis服务...")
        self.run_cmd("sudo systemctl stop redis-server")
        
        # 2. 查找备份文件
        backup_dir = Path("/backup/redis")
        if backup_type == 'latest':
            rdb_files = list(backup_dir.glob("dump.rdb.*"))
            if rdb_files:
                latest_rdb = max(rdb_files, key=lambda x: x.stat().st_mtime)
            else:
                return False, "No backup files found"
        else:
            latest_rdb = backup_dir / f"dump.rdb.{backup_type}"
        
        # 3. 恢复文件
        data_dir = Path("/var/lib/redis")
        print(f"恢复文件: {latest_rdb} -> {data_dir/'dump.rdb'}")
        subprocess.run(["cp", str(latest_rdb), str(data_dir/"dump.rdb")])
        
        # 4. 设置权限
        subprocess.run(["chown", "redis:redis", str(data_dir/"dump.rdb")])
        
        # 5. 启动Redis
        print("启动Redis服务...")
        self.run_cmd("sudo systemctl start redis-server")
        
        # 6. 验证
        time.sleep(5)
        code, out, err = self.run_cmd("redis-cli INFO keyspace")
        
        if code == 0 and "db0" in out:
            return True, "恢复成功"
        else:
            return False, f"恢复失败: {err}"

if __name__ == "__main__":
    restorer = RedisRestore()
    
    if len(sys.argv) > 1:
        backup = sys.argv[1]
    else:
        backup = 'latest'
    
    success, message = restorer.restore_from_backup(backup)
    print(f"结果: {success}, 消息: {message}")
    sys.exit(0 if success else 1)

关键要点总结

  1. 定期测试恢复流程​ – 至少每季度测试一次
  2. 多重备份​ – 本地+异地+云存储
  3. 监控告警​ – 实时监控持久化状态
  4. 文档化​ – 详细的恢复操作手册
  5. 权限控制​ – 限制危险命令(FLUSHALL, CONFIG)
  6. 版本管理​ – 备份文件与Redis版本匹配

已发布

分类

来自

标签: