一、Redis 数据持久化机制
1. 三种持久化方式
# Redis 数据持久化机制
RDB (Redis Database):
- 特性: 全量快照,二进制格式
- 命令: SAVE(阻塞), BGSAVE(后台)
- 配置: save <seconds> <changes>
- 文件: dump.rdb
- 优点: 文件小,恢复快
- 缺点: 可能丢失最后一次保存后的数据
AOF (Append Only File):
- 特性: 记录所有写命令
- 策略:
appendfsync always # 每个命令同步
appendfsync everysec # 每秒同步(默认)
appendfsync no # 操作系统决定
- 文件: appendonly.aof
- 优点: 数据完整性高
- 缺点: 文件大,恢复慢
混合模式 (RDB + AOF):
- Redis 4.0+ 支持
- 结合两者优点
- AOF文件包含RDB格式的全量数据
2. 配置检查
# 查看当前持久化配置
redis-cli CONFIG GET save
redis-cli CONFIG GET appendonly
redis-cli CONFIG GET dir
redis-cli CONFIG GET dbfilename
redis-cli CONFIG GET appendfilename
# 查看持久化信息
redis-cli INFO persistence
二、RDB 文件恢复
1. 自动恢复
# 1. 确保RDB文件在正确位置
ls -la /var/lib/redis/dump.rdb
# 默认位置: dir配置的目录 + dbfilename配置的文件名
# 2. 检查文件完整性
redis-check-rdb /var/lib/redis/dump.rdb
# 3. 启动Redis(自动加载RDB)
sudo systemctl start redis-server
# 或
redis-server /path/to/redis.conf
# 4. 验证数据
redis-cli INFO keyspace
redis-cli DBSIZE
redis-cli KEYS "*" | head -20
2. 从备份恢复
# 场景:从备份文件恢复
# 1. 停止Redis
sudo systemctl stop redis-server
# 或
redis-cli SHUTDOWN NOSAVE
# 2. 备份当前数据(如果有)
cp /var/lib/redis/dump.rdb /var/lib/redis/dump.rdb.backup.$(date +%Y%m%d)
# 3. 复制备份文件到数据目录
cp /backup/dump.rdb.20250101 /var/lib/redis/dump.rdb
# 4. 设置正确权限
chown redis:redis /var/lib/redis/dump.rdb
chmod 644 /var/lib/redis/dump.rdb
# 5. 启动Redis
sudo systemctl start redis-server
# 6. 验证恢复
redis-cli --stat
3. 恢复特定时间点的RDB
#!/bin/bash
# restore_rdb.sh - 恢复指定时间的RDB备份
BACKUP_DIR="/backup/redis"
DATA_DIR="/var/lib/redis"
RESTORE_DATE="20250101"
echo "停止Redis服务..."
sudo systemctl stop redis-server
echo "备份当前数据..."
if [ -f "$DATA_DIR/dump.rdb" ]; then
cp "$DATA_DIR/dump.rdb" "$DATA_DIR/dump.rdb.bak.$(date +%Y%m%d_%H%M%S)"
fi
echo "查找备份文件..."
BACKUP_FILE=$(find "$BACKUP_DIR" -name "*dump.rdb.$RESTORE_DATE*" | head -1)
if [ -z "$BACKUP_FILE" ]; then
echo "错误: 找不到$RESTORE_DATE的备份文件"
exit 1
fi
echo "恢复备份: $BACKUP_FILE"
cp "$BACKUP_FILE" "$DATA_DIR/dump.rdb"
echo "设置权限..."
chown redis:redis "$DATA_DIR/dump.rdb"
chmod 644 "$DATA_DIR/dump.rdb"
echo "启动Redis服务..."
sudo systemctl start redis-server
echo "验证恢复..."
sleep 2
redis-cli INFO keyspace
echo "恢复完成!"
三、AOF 文件恢复
1. AOF 文件修复
# 1. 检查AOF文件完整性
redis-check-aof --fix /var/lib/redis/appendonly.aof
# 修复过程会:
# - 截断到最后一个完整命令
# - 移除损坏的部分
# - 生成修复后的文件
# 2. 如果修复失败,尝试从备份恢复
cp /backup/appendonly.aof.20250101 /var/lib/redis/appendonly.aof
# 3. 如果AOF文件很大,可以重写压缩
redis-cli BGREWRITEAOF
# 或
redis-cli CONFIG SET auto-aof-rewrite-percentage 100
redis-cli CONFIG SET auto-aof-rewrite-min-size 64mb
2. AOF 恢复流程
#!/bin/bash
# restore_aof.sh - 恢复AOF文件
AOF_DIR="/var/lib/redis"
BACKUP_DIR="/backup/redis"
echo "1. 停止Redis服务..."
sudo systemctl stop redis-server
echo "2. 备份当前AOF文件..."
if [ -f "$AOF_DIR/appendonly.aof" ]; then
cp "$AOF_DIR/appendonly.aof" "$AOF_DIR/appendonly.aof.bak.$(date +%Y%m%d_%H%M%S)"
fi
echo "3. 从备份恢复最新AOF..."
LATEST_AOF=$(ls -t $BACKUP_DIR/appendonly.aof.* | head -1)
if [ -n "$LATEST_AOF" ]; then
cp "$LATEST_AOF" "$AOF_DIR/appendonly.aof"
fi
echo "4. 修复AOF文件(如果需要)..."
redis-check-aof --fix "$AOF_DIR/appendonly.aof"
echo "5. 设置权限..."
chown redis:redis "$AOF_DIR/appendonly.aof"
chmod 644 "$AOF_DIR/appendonly.aof"
echo "6. 修改配置为只使用AOF..."
cat > /tmp/redis-restore.conf << EOF
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
aof-load-truncated yes
dir $AOF_DIR
EOF
echo "7. 以恢复模式启动..."
redis-server /tmp/redis-restore.conf --daemonize yes
sleep 3
echo "8. 验证数据..."
redis-cli INFO persistence
redis-cli DBSIZE
echo "9. 转换为RDB+AOF混合模式..."
redis-cli CONFIG SET appendonly yes
redis-cli CONFIG SET aof-use-rdb-preamble yes
redis-cli BGSAVE
echo "10. 重启为正常模式..."
sudo systemctl start redis-server
四、混合模式恢复 (RDB+AOF)
1. 恢复流程
# 1. 确保同时有RDB和AOF文件
ls -la /var/lib/redis/
# 应有: dump.rdb 和 appendonly.aof
# 2. 修改配置文件
cat > /etc/redis/redis-restore.conf << 'EOF'
# 基本配置
port 6379
bind 127.0.0.1
dir /var/lib/redis
# 启用AOF
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
# 启用RDB
save 900 1
save 300 10
save 60 10000
dbfilename dump.rdb
# 混合模式
aof-use-rdb-preamble yes
aof-load-truncated yes
EOF
# 3. 启动Redis
redis-server /etc/redis/redis-restore.conf
# Redis会:
# 1. 先加载RDB文件(如果存在)
# 2. 然后重放AOF文件中的命令
# 3. 如果aof-use-rdb-preamble=yes,AOF文件可能包含RDB格式数据
2. 强制从AOF恢复
# 如果RDB损坏,强制从AOF恢复
# 1. 重命名或移除RDB文件
mv /var/lib/redis/dump.rdb /var/lib/redis/dump.rdb.bak
# 2. 确保AOF配置正确
redis-cli CONFIG SET appendonly yes
redis-cli CONFIG SET save ""
# 3. 重启Redis
sudo systemctl restart redis-server
五、从备份文件恢复
1. 定期备份策略
#!/bin/bash
# backup_redis.sh - 自动备份脚本
BACKUP_DIR="/backup/redis"
DATA_DIR="/var/lib/redis"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=30
# 创建备份目录
mkdir -p "$BACKUP_DIR"
echo "[$(date)] 开始Redis备份..."
# 1. RDB备份
echo "创建RDB快照..."
redis-cli SAVE # 或 BGSAVE
sleep 2
if [ -f "$DATA_DIR/dump.rdb" ]; then
cp "$DATA_DIR/dump.rdb" "$BACKUP_DIR/dump.rdb.$DATE"
echo "RDB备份完成: $BACKUP_DIR/dump.rdb.$DATE"
fi
# 2. AOF备份
if [ -f "$DATA_DIR/appendonly.aof" ]; then
cp "$DATA_DIR/appendonly.aof" "$BACKUP_DIR/appendonly.aof.$DATE"
echo "AOF备份完成: $BACKUP_DIR/appendonly.aof.$DATE"
fi
# 3. 配置备份
if [ -f "/etc/redis/redis.conf" ]; then
cp "/etc/redis/redis.conf" "$BACKUP_DIR/redis.conf.$DATE"
fi
# 4. 清理旧备份
find "$BACKUP_DIR" -name "dump.rdb.*" -mtime +$RETENTION_DAYS -delete
find "$BACKUP_DIR" -name "appendonly.aof.*" -mtime +$RETENTION_DAYS -delete
find "$BACKUP_DIR" -name "redis.conf.*" -mtime +$RETENTION_DAYS -delete
echo "[$(date)] 备份完成"
echo "磁盘使用:"
du -sh "$BACKUP_DIR"
2. 恢复最新备份
#!/bin/bash
# restore_latest.sh
BACKUP_DIR="/backup/redis"
DATA_DIR="/var/lib/redis"
# 停止Redis
sudo systemctl stop redis-server
# 恢复最新RDB
LATEST_RDB=$(ls -t $BACKUP_DIR/dump.rdb.* | head -1)
if [ -n "$LATEST_RDB" ]; then
echo "恢复RDB: $LATEST_RDB"
cp "$LATEST_RDB" "$DATA_DIR/dump.rdb"
fi
# 恢复最新AOF
LATEST_AOF=$(ls -t $BACKUP_DIR/appendonly.aof.* | head -1)
if [ -n "$LATEST_AOF" ]; then
echo "恢复AOF: $LATEST_AOF"
cp "$LATEST_AOF" "$DATA_DIR/appendonly.aof"
fi
# 设置权限
chown redis:redis "$DATA_DIR/dump.rdb" 2>/dev/null || true
chown redis:redis "$DATA_DIR/appendonly.aof" 2>/dev/null || true
# 启动Redis
sudo systemctl start redis-server
echo "恢复完成,验证数据..."
sleep 2
redis-cli INFO keyspace
六、灾难恢复场景
场景1:服务器完全损坏
# 新服务器恢复流程
# 1. 安装Redis
sudo apt update
sudo apt install redis-server redis-tools
# 2. 停止新Redis
sudo systemctl stop redis-server
# 3. 从备份服务器获取数据
# 假设备份在S3/OSS/NFS
aws s3 cp s3://my-bucket/redis-backup/dump.rdb.latest /var/lib/redis/dump.rdb
aws s3 cp s3://my-bucket/redis-backup/appendonly.aof.latest /var/lib/redis/appendonly.aof
# 或从远程服务器SCP
scp user@backup-server:/backup/redis/* /var/lib/redis/
# 4. 修复文件权限
chown redis:redis /var/lib/redis/*
chmod 644 /var/lib/redis/*
# 5. 检查文件完整性
redis-check-rdb /var/lib/redis/dump.rdb
redis-check-aof --fix /var/lib/redis/appendonly.aof
# 6. 修改配置
sed -i 's/^bind 127.0.0.1/# bind 127.0.0.1/' /etc/redis/redis.conf
sed -i 's/^protected-mode yes/protected-mode no/' /etc/redis/redis.conf
# 7. 启动Redis
sudo systemctl start redis-server
sudo systemctl enable redis-server
# 8. 验证
redis-cli -h 新服务器IP INFO replication
redis-cli -h 新服务器IP DBSIZE
场景2:误删除数据
# 1. 立即停止写入
redis-cli CONFIG SET appendonly no
redis-cli CONFIG SET save ""
# 2. 如果是误删除,且AOF开启
# 找到删除命令在AOF文件中的位置
grep -n "DEL\|FLUSHALL\|FLUSHDB" /var/lib/redis/appendonly.aof
# 3. 创建AOF备份
cp /var/lib/redis/appendonly.aof /tmp/app.aof.bak
# 4. 编辑AOF文件,删除误操作的命令
# 使用vim或sed删除对应的行
vim /var/lib/redis/appendonly.aof
# 或
sed -i '1234,1235d' /var/lib/redis/appendonly.aof # 删除1234-1235行
# 5. 修复AOF
redis-check-aof --fix /var/lib/redis/appendonly.aof
# 6. 重启Redis
sudo systemctl restart redis-server
# 7. 检查数据是否恢复
场景3:RDB文件损坏
# 1. 检查损坏程度
redis-check-rdb /var/lib/redis/dump.rdb
# 2. 尝试从AOF恢复
# 如果AOF存在且较新
sudo systemctl stop redis-server
mv /var/lib/redis/dump.rdb /var/lib/redis/dump.rdb.corrupted
sudo systemctl start redis-server
# 3. 如果只有RDB,尝试修复
# 使用redis-rdb-tools分析
pip install rdbtools
rdb --command json /var/lib/redis/dump.rdb > /tmp/dump.json
# 4. 从JSON中提取可恢复数据
cat /tmp/dump.json | jq 'select(.type == "string")' > /tmp/strings.json
# 5. 编写恢复脚本
cat > /tmp/restore.py << 'EOF'
import json
import redis
with open('/tmp/strings.json') as f:
data = json.load(f)
r = redis.Redis(host='localhost', port=6379, db=0)
for item in data:
if item['type'] == 'string':
r.set(item['key'], item['value'])
elif item['type'] == 'hash':
r.hmset(item['key'], item['value'])
# ... 处理其他类型
EOF
python3 /tmp/restore.py
七、主从复制恢复
1. 从节点提升为主节点
# 主节点宕机,将从节点提升
# 在从节点执行
redis-cli SLAVEOF NO ONE
# 修改从节点配置
sed -i 's/^slaveof.*/# slaveof/' /etc/redis/redis.conf
sed -i 's/^replicaof.*/# replicaof/' /etc/redis/redis.conf
# 重启使配置生效
sudo systemctl restart redis-server
# 其他从节点指向新主节点
redis-cli -h 其他从节点 SLAVEOF 新主节点IP 6379
2. 重建主从
# 原主节点恢复后,作为从节点加入
# 1. 在新主节点创建当前数据快照
redis-cli -h 新主节点 BGSAVE
# 2. 等待RDB文件生成
redis-cli -h 新主节点 INFO persistence | grep rdb_bgsave_in_progress
# 3. 原主节点作为从节点
redis-cli SLAVEOF 新主节点IP 6379
# 4. 验证同步
redis-cli INFO replication
3. 增量恢复(部分同步)
# 如果从节点落后不多,可以使用部分重同步
# 1. 检查复制偏移量
redis-cli INFO replication | grep -E "(master_repl_offset|slave_repl_offset)"
# 2. 如果偏移量差异在复制积压缓冲区范围内
# 会自动进行部分重同步
# 3. 如果偏移量差异太大,需要全量同步
redis-cli SLAVEOF NO ONE
redis-cli SLAVEOF 主节点IP 6379
八、集群模式恢复
1. Redis Cluster 节点恢复
# 查看集群状态
redis-cli --cluster check 节点IP:端口
# 节点故障恢复
# 1. 重启故障节点
sudo systemctl restart redis@节点端口
# 2. 重新加入集群
redis-cli --cluster add-node 节点IP:端口 集群中任意节点IP:端口
# 3. 重新分配槽位
redis-cli --cluster fix 集群中任意节点IP:端口
redis-cli --cluster rebalance 集群中任意节点IP:端口
# 4. 验证
redis-cli --cluster info 集群中任意节点IP:端口
2. 集群备份恢复
#!/bin/bash
# 备份Redis集群所有节点
CLUSTER_NODES="node1:7000 node2:7001 node3:7002"
BACKUP_DIR="/backup/redis-cluster"
DATE=$(date +%Y%m%d_%H%M%S)
mkdir -p "$BACKUP_DIR/$DATE"
for node in $CLUSTER_NODES; do
ip=$(echo $node | cut -d: -f1)
port=$(echo $node | cut -d: -f2)
echo "备份节点 $node..."
# 保存RDB
redis-cli -h $ip -p $port SAVE
# 复制RDB文件
NODE_DIR=$(redis-cli -h $ip -p $port CONFIG GET dir | tail -1)
scp $ip:$NODE_DIR/dump.rdb $BACKUP_DIR/$DATE/dump.$ip.$port.rdb
# 复制AOF文件(如果启用)
scp $ip:$NODE_DIR/appendonly.aof $BACKUP_DIR/$DATE/appendonly.$ip.$port.aof 2>/dev/null || true
done
echo "备份完成: $BACKUP_DIR/$DATE"
九、监控与验证
1. 恢复后验证脚本
#!/bin/bash
# verify_restore.sh
REDIS_HOST="localhost"
REDIS_PORT="6379"
echo "=== Redis恢复验证报告 ==="
echo "生成时间: $(date)"
echo ""
# 1. 基本信息
echo "1. Redis基本信息:"
redis-cli -h $REDIS_HOST -p $REDIS_PORT INFO server | grep -E "(redis_version|uptime_in_days)"
echo ""
# 2. 数据统计
echo "2. 数据统计:"
redis-cli -h $REDIS_HOST -p $REDIS_PORT INFO keyspace
echo ""
# 3. 内存使用
echo "3. 内存使用:"
redis-cli -h $REDIS_HOST -p $REDIS_PORT INFO memory | grep -E "(used_memory_human|used_memory_peak_human|mem_fragmentation_ratio)"
echo ""
# 4. 持久化状态
echo "4. 持久化状态:"
redis-cli -h $REDIS_HOST -p $REDIS_PORT INFO persistence | grep -E "(rdb_last_save_time|aof_enabled|aof_rewrite_in_progress|loading)"
echo ""
# 5. 采样检查
echo "5. 随机采样键值:"
for db in {0..15}; do
count=$(redis-cli -h $REDIS_HOST -p $REDIS_PORT -n $db DBSIZE)
if [ $count -gt 0 ]; then
echo " DB$db: $count 个键"
redis-cli -h $REDIS_HOST -p $REDIS_PORT -n $db RANDOMKEY | head -5 | while read key; do
if [ -n "$key" ]; then
type=$(redis-cli -h $REDIS_HOST -p $REDIS_PORT -n $db TYPE "$key")
echo " - $key ($type)"
fi
done
fi
done
echo ""
# 6. 性能测试
echo "6. 简单性能测试:"
start=$(date +%s%N)
for i in {1..100}; do
redis-cli -h $REDIS_HOST -p $REDIS_PORT PING > /dev/null
done
end=$(date +%s%N)
echo " PING 100次耗时: $(( (end-start)/1000000 ))ms"
2. 监控关键指标
# 实时监控
watch -n 5 '
echo "=== Redis状态监控 ==="
echo "时间: $(date)"
echo ""
redis-cli INFO stats | grep -E "(total_connections_received|total_commands_processed|instantaneous_ops_per_sec)"
echo ""
redis-cli INFO memory | grep -E "(used_memory_human|used_memory_peak_human)"
echo ""
redis-cli INFO persistence | grep -E "(rdb_last_bgsave_status|aof_last_bgrewrite_status|loading)"
echo ""
redis-cli INFO keyspace
'
十、最佳实践与预防措施
1. 预防措施
# 1. 定期备份
crontab -e
# 添加
0 2 * * * /opt/scripts/backup_redis.sh
# 2. 监控告警
# 监控以下指标:
# - 内存使用率 > 80%
# - RDB/AOF失败
# - 从节点断开连接
# - 键数量突变
# 3. 配置优化
cat > /etc/redis/redis-safe.conf << 'EOF'
# 数据安全配置
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
# AOF配置
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
aof-load-truncated yes
aof-use-rdb-preamble yes
# 内存管理
maxmemory 2gb
maxmemory-policy allkeys-lru
# 持久化目录权限
dir /var/lib/redis
EOF
2. 灾难恢复计划
# Redis灾难恢复计划
## 1. 恢复优先级
P0(紧急): 主节点完全损坏
P1(高): 从节点损坏,数据部分丢失
P2(中): 单点数据损坏
P3(低): 性能恢复
## 2. 恢复时间目标 (RTO)
- P0: < 15分钟
- P1: < 1小时
- P2: < 4小时
- P3: < 24小时
## 3. 恢复点目标 (RPO)
- 重要数据: < 5分钟数据丢失
- 一般数据: < 1小时数据丢失
## 4. 联系人清单
- 运维负责人: 张三 (13800138000)
- 开发负责人: 李四 (13900139000)
- 数据库DBA: 王五 (13700137000)
## 5. 恢复步骤
1. 评估损坏范围
2. 选择恢复策略
3. 执行恢复操作
4. 验证数据完整性
5. 监控运行状态
6. 记录和分析
3. 自动化恢复工具
#!/usr/bin/env python3
# auto_restore.py - 自动化恢复工具
import subprocess
import sys
import time
import json
from pathlib import Path
class RedisRestore:
def __init__(self, redis_host='localhost', redis_port=6379):
self.host = redis_host
self.port = redis_port
def run_cmd(self, cmd):
"""执行Redis命令"""
try:
result = subprocess.run(
cmd, shell=True, capture_output=True, text=True, timeout=30
)
return result.returncode, result.stdout, result.stderr
except subprocess.TimeoutExpired:
return 1, "", "Command timeout"
def restore_from_backup(self, backup_type='latest'):
"""从备份恢复"""
print(f"开始从{backup_type}备份恢复...")
# 1. 停止Redis
print("停止Redis服务...")
self.run_cmd("sudo systemctl stop redis-server")
# 2. 查找备份文件
backup_dir = Path("/backup/redis")
if backup_type == 'latest':
rdb_files = list(backup_dir.glob("dump.rdb.*"))
if rdb_files:
latest_rdb = max(rdb_files, key=lambda x: x.stat().st_mtime)
else:
return False, "No backup files found"
else:
latest_rdb = backup_dir / f"dump.rdb.{backup_type}"
# 3. 恢复文件
data_dir = Path("/var/lib/redis")
print(f"恢复文件: {latest_rdb} -> {data_dir/'dump.rdb'}")
subprocess.run(["cp", str(latest_rdb), str(data_dir/"dump.rdb")])
# 4. 设置权限
subprocess.run(["chown", "redis:redis", str(data_dir/"dump.rdb")])
# 5. 启动Redis
print("启动Redis服务...")
self.run_cmd("sudo systemctl start redis-server")
# 6. 验证
time.sleep(5)
code, out, err = self.run_cmd("redis-cli INFO keyspace")
if code == 0 and "db0" in out:
return True, "恢复成功"
else:
return False, f"恢复失败: {err}"
if __name__ == "__main__":
restorer = RedisRestore()
if len(sys.argv) > 1:
backup = sys.argv[1]
else:
backup = 'latest'
success, message = restorer.restore_from_backup(backup)
print(f"结果: {success}, 消息: {message}")
sys.exit(0 if success else 1)
关键要点总结
- 定期测试恢复流程 – 至少每季度测试一次
- 多重备份 – 本地+异地+云存储
- 监控告警 – 实时监控持久化状态
- 文档化 – 详细的恢复操作手册
- 权限控制 – 限制危险命令(FLUSHALL, CONFIG)
- 版本管理 – 备份文件与Redis版本匹配