1、拆分:读一个文件写多个文件
2、合并:读多个文件写入到一个文件
3、父子进程:必须等待子进程拆分之后才合并(if 、else if结构,如下)
pid = fork();
if (pid == 0)
{
//业务操作——调用函数等
exit(0);
}
else if (pid > 0)
{
int res = wait();//waitpid
}
代码版本一:未把路径存入链表进行排序等
void breakFile(string srcFile, string dstFile)
{
int readfd = 0, writefd = 0;
int res1 = 0, res2 = 0;
//char* buf = new char[50];/ 只能读取8字节
char buf[102400] = { 0 };//存二进制数据
string dstName = "";
int tpNum = 1;
char c[90] = { 0 };
umask(0);
//只读的方式打开
readfd = open(srcFile.c_str(), O_RDONLY, 0777);
//文件存在
if (readfd < 0 || writefd < 0)//打开文件失败,返回-1
{
perror("open file error");
}
else
{
//读取的内容不为空就继续读取
while ((res1 = read(readfd, buf, sizeof(buf))) > 0)
{
snprintf(c, 90, "%d", tpNum);
dstName = dstFile + "/" + string(c) + ".avi";
cout << "文件名 " << dstName << endl;
writefd = open(dstName.c_str(), O_CREAT | O_WRONLY, 0777);
cout << "res1 = " << res1 << endl;
res2 = write(writefd, buf, res1);
if (res2 > 0)
{
//cout << "res2 = " << res2 << endl;
//读取完一次就清空,为下次做准备
close(writefd);
dstName = "";
tpNum++;
bzero(buf, sizeof(buf));
}
}
close(readfd);
}
//cout << "breakFile finished " << endl;
}
void split_merge_file(string srcPath, string dstPath)
{
//使用wait:视频文件拆分合并
int pid = 0;
int status = 0;
int res = 0;
struct dirent* tpdirect;
DIR* dir;
struct stat tpstat;//保存路径信息的结构体
int readfd = 0, writefd = 0;
int res1 = 0, res2 = 0;
static const long size = 1024000;
char buf[size] = { 0 };
stat(srcPath.c_str(), &tpstat);
if (S_ISDIR(tpstat.st_mode) == 1)//是文件夹
{
perror("open file error(is dir)");
return;
}
else
{
//文件总大小
long file_size = tpstat.st_size;
cout << "file size is: " << file_size << " byte" << endl;
//拆分文件个数
int n = file_size / size + 1;
cout << "the number of splited file is: " << n << endl;
pid = fork();
if (pid == 0)//子进程
{
//cout << "子进程 pid =" << getpid() << "开始拆分" << endl;
//拆分文件
breakFile(srcPath, dstPath);
cout << "子进程 pid =" << getpid() << "拆分成功" << endl;
//退出子进程
_exit(1);
}
else if (pid > 0)
{
res = waitpid(pid, &status, NULL);
if (WIFEXITED(status))
{
//cout << "父进程 status =" << WEXITSTATUS(status) << endl;
if (WEXITSTATUS(status) == 1)
{
//cout << "返回给父进程的" << "status = 1" << endl;
if ((dir = opendir(dstPath.c_str())) == NULL)
{
perror("open dir error");//perror可以查看具体错误
}
else
{
stat(dstPath.c_str(), &tpstat);
char splitFileName[200] = { 0 };
char mergeFileName[200] = { 0 };
strcat(mergeFileName, dstPath.c_str());
strcat(mergeFileName, "/merge.avi");
//cout << "mergeFileName =" << mergeFileName << endl;
int index = 1;//记录搜素到第几个文件
umask(0);//要在循环外打开要合并(写入)的文件
writefd = open(mergeFileName, O_CREAT | O_WRONLY, 0777);//O_APPEND不行
while ((tpdirect = readdir(dir)) != NULL)//循环遍历这个文件夹
{
cout << "tpdirect->d_names =" << tpdirect->d_name << endl;
bzero(splitFileName, sizeof(splitFileName));
//sprintf(mergeFile, "%s%s%s", dstPath, "/", tpdirect->d_name);
strcat(splitFileName, dstPath.c_str());
strcat(splitFileName, "/");
strcat(splitFileName, tpdirect->d_name);
cout << "splitFileName =" << splitFileName << endl;
stat(splitFileName, &tpstat);
if (S_ISDIR(tpstat.st_mode) == 1)//拆分完的文件夹中是否还有其他文件夹
{
continue;
}
else
{
cout << "index =" << index << endl;
if (index == n + 1)
{
break;
}
index++;
//cout << "tpdirect->d_names =" << tpdirect->d_name << endl;
umask(0);
//只读的方式打开
readfd = open(splitFileName, O_RDONLY, 0777);
//文件存在——只写
//writefd = open(mergeFileName, O_CREAT | O_WRONLY, 0777);
cout << "mergeFileName =" << mergeFileName << endl;
if (readfd < 0 || writefd < 0)//打开文件失败,返回-1
{
perror("open file error");
}
else
{
//读取的内容不为空就继续读取
while ((res1 = read(readfd, buf, sizeof(buf))) > 0)
{
cout << "res1 = " << res1 << endl;
res2 = write(writefd, buf, res1);
cout << "res2 = " << res2 << endl;
//读取完一次就清空,为下次做准备
if (res2 > 0)
{
close(readfd);//先关闭读的
bzero(buf, sizeof(buf));
break;
}
}
//cout << "continue" << endl;
continue;
}
}
}
cout << "合并完成 " << endl;
close(writefd);
}
}
else
{
cout << "子进程拆分文件失败 " << endl;
}
}
}
}
}
int main(){
split_merge_file("/root/projects/Warcraft3_End.avi", "/root/projects/breakFIle");
return 0;
}
代码版本二:main中使用进程 ,把路径存入链表进行排序,省略拆分
int main()
{
int pid = 0;
int status = 0;
pid = fork();
if (pid == 0)
{
int res = breakFile("/root/projects/Warcraft3_End.avi","/root/projects/breakFIle");
if (res > 0)
{
exit(1);
}
else
{
exit(0);
}
}
else if (pid > 0)
{
cout<< "父进程开始...等待子进程拆分文件结束" << endl;
int wait_res = wait(&status);
if (WIFEXITED(status))
{
cout << "status=" << WEXITSTATUS(status) << endl;
if (WEXITSTATUS(status) == 1)
{
cout << "子进程拆分文件成功,可以开始合并文件" << endl;
mergeFile("/root/projects/breakFIle");
}
else if(WEXITSTATUS(status) == 0)
{
cout << "子进程拆分文件失败" << endl;
}
}
}
return 0;
}
//文件合并
void mergeFile(char filePath[])
{
//list容器——存路径
list pathList;
//迭代器
list::iterator it;
//opendir返回值
DIR* dir;
//readdir返回值
struct dirent* dir_stru;
//拼接文件完整路径
char base[50];
int readfd = 0, writefd = 0;
//io操作的缓冲区
char buf[102400] = { 0 };
//读返回值
int res = 0;
char url[100] = { 0 };
strcat(url, filePath);
strcat(url, "/mergeAll.avi");
cout << "url "<d_name,".") == 0 || strcmp(dir_stru->d_name, "..") == 0)
{
continue;//跳过当前文件夹和上一级文件夹,.和..只是预留路径,用来操作命令使用
}
//DT_REG = 8是文件 10表示硬链接 4表示文件夹 比结构体指针方便
else if (dir_stru->d_type == DT_REG)
{
cout << "dir_stru->d_name = " << dir_stru->d_name << endl;
//拼接路径
sprintf(base,"%s/%s",filePath ,dir_stru->d_name);
pathList.push_back(base);//string类型可以直接接收char数组
cout <<"完整路径 = " << base << endl;
}
}
//路径列表的排序
pathList.sort([](const string& a, const string& b) {
return atoi(a.substr(25, a.find(".")).c_str()) < atoi(b.substr(25, b.find(".")).c_str());
});
umask(0);
writefd = open(url,O_CREAT | O_WRONLY,0777);
cout << "url " << url << endl;
for (it = pathList.begin(); it != pathList.end(); it++)
{
cout << "排序后路径" << *it << endl;
readfd = open((*it).c_str(),O_RDONLY, 0777);
//读多个写一个
while ((res = read(readfd,buf,sizeof(buf)))!=0)
{
write(writefd,buf,res);
}
close(readfd);
}
cout << "合并成功" << endl;
close(writefd);
closedir(dir);
}
优化:如果传进来的路径带有.符号(非法路径)要判断,在if ((dir = opendir(filePath)) == NULL)之前。
3、效果
遇到的问题:在拆分后循环读取文件夹的时候,只读取到文件7.avi,其他都没有读到,导致合并后的文件大小为7.avi的大小
解决:包括路径列表的排序在内的后面的操作要放在while 循环外面【while ((dir_stru = readdir(dir)) !=NULL)外面】,即先循环把路径添加到链表中再进行读写
总结:
1、合并文件步骤
- 根据路径打开文件(opendir),判断是文件夹则退出,是文件则过滤掉.和..再循环读取(readdir)文件(d_type==8),然后再拼接成完整路径存入链表。
- 用find函数查找路径中特定字符的下标(.字符),substr函数切割路径(切割出/和.之间的数字字符),sort函数对整形数字排序,达成对路径列表的排序。
- 遍历链表中的路径,循环读取内容写入目标文件,全部写完关闭目标文件
原创不易,转载请注明出处:
c++ 文件拆分与合并——结合linux进程管理wait/waitpid