首先是关于删除,把之前删除的代码进行了一个改进,就可以批量删除了,保存2天的数据
from pymongo import MongoClient
from gridfs import *
import datetime
client = MongoClient(host = "172.25.16.176", port=27017)
# db = client.mailfile
db = client["mailfile"]
gfs = GridFS(db, collection="fs")
def deleteFile():
while True:
files = gfs.find().sort("uploadDate", 1).limit(1000)
for file in files:
uploadDate = file.uploadDate + datetime.timedelta(hours=48)
# uploadDate = uploadDate.strftime("%Y-%m-%d %H:%M:%S")
if(uploadDate < datetime.datetime.now()):
print(file._id,file.uploadDate)
gfs.delete(file._id)
else:
print(file.uploadDate)
return
if __name__ == '__main__':
# db.collection.repairDatabase()
# db.runCommand({ repairDatabase: 1 })
deleteFile()
# need_number = ObjectId(100) #控制要删除的数量
# print(need_number)
# DeleteMongofile(*need_number)
删除后就发现了一个问题。navicat看到数据占用空间已经减少,然而磁盘空间并没有被释放
下面就是解决方案,直接上干货!!!
方法1
删除的差不多了,就直接给备份出来,把现在的集合(表)给干掉,然后重新恢复回去 ,搞定!
备份
root@debian:/home# mongodump --db mailfile --collection fs.files --out /home/
2020-12-18T13:01:09.732+0800 writing mailfile.fs.files to
2020-12-18T13:01:09.760+0800 done dumping mailfile.fs.files (13680 documents)
root@debian:/home# mongodump --db mailfile --collection fs.chunks --out /home/
2020-12-18T13:01:43.613+0800 writing mailfile.fs.chunks to
2020-12-18T13:01:46.610+0800 [###.....................] mailfile.fs.chunks 3228/23951 (13.5%)
2020-12-18T13:01:49.610+0800 [######..................] mailfile.fs.chunks 6415/23951 (26.8%)
2020-12-18T13:01:52.610+0800 [#########...............] mailfile.fs.chunks 9119/23951 (38.1%)
2020-12-18T13:01:55.610+0800 [############............] mailfile.fs.chunks 12518/23951 (52.3%)
2020-12-18T13:01:58.610+0800 [################........] mailfile.fs.chunks 16840/23951 (70.3%)
2020-12-18T13:02:01.610+0800 [#####################...] mailfile.fs.chunks 21115/23951 (88.2%)
2020-12-18T13:02:03.438+0800 [########################] mailfile.fs.chunks 23951/23951 (100.0%)
2020-12-18T13:02:03.438+0800 done dumping mailfile.fs.chunks (23951 documents)
导入
cd /home/mailfile
mongorestore --db mailfile --collection fs.chunks /home/mailfile/fs.chunks.bson
mongorestore --db mailfile --collection fs.files /home/mailfile/fs.files.bson
root@debian:/home/mailfile# mongorestore --db mailfile --collection fs.chunks /home/mailfile/fs.chunks.bson
2020-12-18T13:33:26.517+0800 checking for collection data in /home/mailfile/fs.chunks.bson
2020-12-18T13:33:26.519+0800 restoring to existing collection mailfile.fs.chunks without dropping
2020-12-18T13:33:26.519+0800 reading metadata for mailfile.fs.chunks from /home/mailfile/fs.chunks.metadata.json
2020-12-18T13:33:26.522+0800 restoring mailfile.fs.chunks from /home/mailfile/fs.chunks.bson
2020-12-18T13:33:29.517+0800 [####....................] mailfile.fs.chunks 692MB/3.69GB (18.3%)
2020-12-18T13:33:32.517+0800 [#########...............] mailfile.fs.chunks 1.44GB/3.69GB (39.1%)
2020-12-18T13:33:35.517+0800 [#########...............] mailfile.fs.chunks 1.45GB/3.69GB (39.3%)
2020-12-18T13:33:38.517+0800 [#############...........] mailfile.fs.chunks 2.10GB/3.69GB (57.0%)
2020-12-18T13:33:41.517+0800 [##################......] mailfile.fs.chunks 2.77GB/3.69GB (75.2%)
2020-12-18T13:33:44.517+0800 [###################.....] mailfile.fs.chunks 2.96GB/3.69GB (80.4%)
2020-12-18T13:33:47.517+0800 [#######################.] mailfile.fs.chunks 3.59GB/3.69GB (97.2%)
2020-12-18T13:33:48.333+0800 [########################] mailfile.fs.chunks 3.69GB/3.69GB (100.0%)
2020-12-18T13:33:48.333+0800 restoring indexes for collection mailfile.fs.chunks from metadata
2020-12-18T13:33:48.600+0800 finished restoring mailfile.fs.chunks (1476 documents, 0 failures)
2020-12-18T13:33:48.600+0800 1476 document(s) restored successfully. 0 document(s) failed to restore.
root@debian:/home/mailfile# mongorestore --db mailfile --collection fs.files /home/mailfile/fs.files.bson
2020-12-18T13:34:01.714+0800 checking for collection data in /home/mailfile/fs.files.bson
2020-12-18T13:34:01.731+0800 restoring to existing collection mailfile.fs.files without dropping
2020-12-18T13:34:01.731+0800 reading metadata for mailfile.fs.files from /home/mailfile/fs.files.metadata.json
2020-12-18T13:34:01.749+0800 restoring mailfile.fs.files from /home/mailfile/fs.files.bson
2020-12-18T13:34:01.848+0800 restoring indexes for collection mailfile.fs.files from metadata
2020-12-18T13:34:01.848+0800 finished restoring mailfile.fs.files (13680 documents, 0 failures)
2020-12-18T13:34:01.848+0800 13680 document(s) restored successfully. 0 document(s) failed to restore.
这种方法简单粗暴,用于生产环境可能会丢数据。
方法2
还是得删除一些数据后,用compat命令搞定。查网上的文章都是用repairDatabase命令去修复,然鹅人家直接提示这个命令已经被移除了,推荐你使用compact,用reIndex去重建索引,如果你需要恢复数据,请看
文档http://dochub.mongodb.org/core/repair
> db.runCommand({ repairDatabase: 1 })
{
"ok" : 0,
"errmsg" : "This command has been removed. If you would like to compact your data, use the 'compact' command. If you would like to rebuild indexes, use the 'reIndex' command. If you need to recover data, please see the documentation for repairing your database offline: http://dochub.mongodb.org/core/repair",
"code" : 59,
"codeName" : "CommandNotFound"
}
我们这里是GridFS .没有其他什么的 ,所以用别的命令也不好使
> use mailfile
switched to db mailfile
> db.compact
mailfile.compact
> db.runCommand({compact:"fs"})#这里我当成集合,根本就不好使
{
"ok" : 0,
"errmsg" : "collection does not exist",#直接说集合不存在,所以你得知道你是集合还是文件,不然就懵逼了。
"code" : 26,
"codeName" : "NamespaceNotFound"
}
> db.runCommand({compact:"fs.files"})#写这个就OK
{ "ok" : 1 }
> db.runCommand({compact:"fs.chunks"})
{ "ok" : 1 }
> #下面重建索引
> db.runCommand({reIndex:"fs.files"})
{
"nIndexesWas" : 2,
"nIndexes" : 2,
"indexes" : [
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "mailfile.fs.files"
},
{
"v" : 2,
"key" : {
"filename" : 1,
"uploadDate" : 1
},
"name" : "filename_1_uploadDate_1",
"ns" : "mailfile.fs.files"
}
],
"ok" : 1
}
> db.runCommand({reIndex:"fs.chunks"})
{
"nIndexesWas" : 2,
"nIndexes" : 2,
"indexes" : [
{
"v" : 2,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "mailfile.fs.chunks"
},
{
"v" : 2,
"unique" : true,
"key" : {
"files_id" : 1,
"n" : 1
},
"name" : "files_id_1_n_1",
"ns" : "mailfile.fs.chunks"
}
],
"ok" : 1
}
>
GridFS这货就2个东西
fs.files fs.chunks
这2个货就是存文件用的 也就是他的集合 。
后期可以把释放磁盘空间这个写到代码里去一气呵成。