1.创建文件夹
CreateDirectory(%%1,NULL);
2.创建文件
CFile file;
file.Open(%%1,CFile::modeCreate|CFile::modeWrite);
3.删除文件
DeleteFile(%%1);
4.删除文件夹
RemoveDirectory(%%1);
5.删除一个目录下所有的文件夹
CFileFind finder;
CString path;
path.Format("%s//*.*",%%1);
BOOL bWorking = finder.FindFile(path);
while (bWorking)
{
bWorking = finder.FindNextFile();
if (finder.IsDirectory() &&!finder.IsDots())
{
RemoveDirectory(finder.GetFilePath());
}
}
6.清空文件夹
RemoveDirectory(%%1);
CreateDirectory(%%1,NULL);
7.读取文件
char sRead[5120];
CFile mFile(_T(%%1),CFile::modeRead);
while (sRead!=NULL)
{
mFile.Read(sRead,5120);
CString %%2(sRead);
%%3
}
mFile.Close();
8.写入文件
CFile mFile(_T(%%1),CFile::modeWrite|CFile::modeCreate);
mFile.Write(%%2,sizeof(%%2));
mFile.Flush();
mFile.Close();
9.写入随机文件
char szTempPath[_MAX_PATH],szTempfile[_MAX_PATH];
GetTempPath(_MAX_PATH, szTempPath);
GetTempFileName(szTempPath,_T("my_"),0,szTempfile);
CFile m_tempFile(szTempfile,CFile::modeCreate|CFile:: modeWrite);
char m_char='a';
m_tempFile.Write(&m_char,2);
m_tempFile.Close();
//循环写入多个值
strTempA;
int i;
int nCount=6;
//共有6个文件名需要保存
for (i=0;i{strTemp.Format("%d",i);
strTempA=文件名;
//文件名可以从数组,列表框等处取得.
::WritePrivateProfileString("UseFileName","FileName"+strTemp,strTempA,
c://usefile//usefile.ini);
}
strTemp.Format("%d",nCount);
::WritePrivateProfileString("FileCount","Count",strTemp,"c://usefile//usefile.ini");
//将文件总数写入,以便读出.
//读出
nCount=::GetPrivateProfileInt("FileCount","Count",0,"c://usefile//usefile.ini");
for(i=0;i{strTemp.Format("%d",i);
strTemp="FileName"+strTemp;
::GetPrivateProfileString("CurrentIni",strTemp,"default.fil",strTempA.GetBuffer(MAX_PATH),MAX_PATH,"c://usefile//usefile.ini");
//使用strTempA中的内容.
}
1.Seek
Seek(100L,CFile::begin);//移动到文件开始后100字节处
Seek(100L,CFile::end);
Seek(100L,CFile::current);
2.GetPosition
得到文件当前位置,返回32位文件指针
DWORD pos=file.GetPostition();
file.Seek(pos,CFile::begin);
3.GetLength()
10.读取文件属性
DWORD dwAttrs = GetFileAttributes(%%1);
if(dwAttrs & FILE_ATTRIBUTE_READONLY) {
%%2
}
if(dwAttrs & FILE_ATTRIBUTE_NORMAL){
%%3
}
11.写入属性
SetFileAttributes(%%1,dwAttrs |FILE_ATTRIBUTE_READONLY);
12.枚举一个目录下所有文件夹
CFileFind finder;
CString path;
path.Format("%s//*.*",%%1);
BOOL bWorking = finder.FindFile(path);
while (bWorking) {
bWorking = finder.FindNextFile();
if(finder.IsDirectory() &&!finder.IsDots()){
CString %%1=finder.GetFilePath();
%%2
}
}
13.复制文件夹
/*
#include
using namespace std;
*/
deque
WIN32_FIND_DATA FileData;
HANDLE hSearch;
DWORD dwAttrs;
char szDirPath[] = %%2;
char szNewPath[MAX_PATH];
BOOL fFinished = FALSE;
if (!CreateDirectory(szDirPath, NULL)) {
//不能创建新的目录
return;
}
CString path;
path.Format("%s//*.*",%%1);
hSearch = FindFirstFile(path, &FileData);
if (hSearch == INVALID_HANDLE_VALUE) {
return;
}
while (ctr.size>0) {
if(!fFinished)
lstrcpy(szNewPath, szDirPath);
lstrcat(szNewPath, FileData.cFileName);
if(CopyFile(FileData.cFileName, szNewPath, FALSE)) {
dwAttrs= GetFileAttributes(FileData.cFileName);
if(!(dwAttrs & FILE_ATTRIBUTE_READONLY)) {
SetFileAttributes(szNewPath,
dwAttrs| FILE_ATTRIBUTE_READONLY);
}
}
else {
//不能复制文件
return;
}
if(!FindNextFile(hSearch, &FileData)) {
if(GetLastError() == ERROR_NO_MORE_FILES) {
//遍历文件夹完成
fFinished = TRUE;
}
else {
//找不到下一个文件
return;
}
}
}
}
FindClose(hSearch);
14.复制一个目录下所有的文件夹到另一个文件夹下
/*
#include
using namespace std;
*/
deque
WIN32_FIND_DATA FileData;
HANDLE hSearch;
DWORD dwAttrs;
char szDirPath[] = %%2;
char szNewPath[MAX_PATH];
BOOL fFinished = FALSE;
if (!CreateDirectory(szDirPath,NULL))
{
//不能创建新的目录
return;
}
CString path;
CFileFind finder;
path.Format("%s//*.*",%%1);
BOOL bWorking = finder.FindFile(path);
while (bWorking)
{
bWorking = finder.FindNextFile();
if(finder.IsDirectory() &&!finder.IsDots()){
hSearch =FindFirstFile(finder.GetFilePath()+"//*.*", &FileData);
if (hSearch == INVALID_HANDLE_VALUE)
{
return;
}
while (!fFinished)
{
lstrcpy(szNewPath, szDirPath);
lstrcat(szNewPath, FileData.cFileName);
if (CopyFile(FileData.cFileName, szNewPath,FALSE))
{
dwAttrs =GetFileAttributes(FileData.cFileName);
if (!(dwAttrs & FILE_ATTRIBUTE_READONLY))
{
SetFileAttributes(szNewPath,
dwAttrs | FILE_ATTRIBUTE_READONLY);
}
}
else
{
//不能复制文件
return;
}
if (!FindNextFile(hSearch, &FileData))
{
if (GetLastError() == ERROR_NO_MORE_FILES)
{
//遍历文件夹完成
fFinished = TRUE;
}
else
{
//找不到下一个文件
return;
}
}
}
FindClose(hSearch);
}
}
15.移动文件夹
/*
#include
using namespace std;
*/
deque
WIN32_FIND_DATA FileData;
HANDLE hSearch;
DWORD dwAttrs;
char szDirPath[] = %%2;
char szNewPath[MAX_PATH];
BOOL fFinished = FALSE;
if (!CreateDirectory(szDirPath, NULL))
{
//不能创建新的目录
return;
}
CString path;
path.Format("%s//*.*",%%1);
hSearch = FindFirstFile(path, &FileData);
if (hSearch == INVALID_HANDLE_VALUE)
{
return;
}
while (!fFinished)
{
lstrcpy(szNewPath, szDirPath);
lstrcat(szNewPath, FileData.cFileName);
if (CopyFile(FileData.cFileName, szNewPath,FALSE))
{
dwAttrs =GetFileAttributes(FileData.cFileName);
if (!(dwAttrs & FILE_ATTRIBUTE_READONLY))
{
SetFileAttributes(szNewPath,
dwAttrs | FILE_ATTRIBUTE_READONLY);
}
}
else
{
//不能复制文件
return;
}
if (!FindNextFile(hSearch, &FileData))
{
if (GetLastError() == ERROR_NO_MORE_FILES)
{
//遍历文件夹完成
fFinished = TRUE;
}
else
{
//找不到下一个文件
return;
}
}
}
FindClose(hSearch);
RemoveDirectory(%%1);
16.移动一个文件夹下所有的文件夹到另一个目录下
/*
#include
using namespace std;
*/
deque
WIN32_FIND_DATA FileData;
HANDLE hSearch;
DWORD dwAttrs;
char szDirPath[] = %%2;
char szNewPath[MAX_PATH];
BOOL fFinished = FALSE;
if (!CreateDirectory(szDirPath,NULL))
{
//不能创建新的目录
return;
}
CString path;
path.Format("%s//*.*",%%1);
BOOL bWorking = finder.FindFile(path);
while (bWorking)
{
bWorking = finder.FindNextFile();
if(finder.IsDirectory() &&!finder.IsDots()){
hSearch =FindFirstFile(finder.GetFilePath()+"//*.*", &FileData);
if (hSearch == INVALID_HANDLE_VALUE)
{
return;
}
while (!fFinished)
{
lstrcpy(szNewPath, szDirPath);
lstrcat(szNewPath, FileData.cFileName);
if (CopyFile(FileData.cFileName, szNewPath,FALSE))
{
dwAttrs =GetFileAttributes(FileData.cFileName);
if (!(dwAttrs & FILE_ATTRIBUTE_READONLY))
{
SetFileAttributes(szNewPath,
dwAttrs | FILE_ATTRIBUTE_READONLY);
}
}
else
{
//不能复制文件
return;
}
if (!FindNextFile(hSearch, &FileData))
{
if (GetLastError() == ERROR_NO_MORE_FILES)
{
//遍历文件夹完成
fFinished = TRUE;
}
else
{
//找不到下一个文件
return;
}
}
}
FindClose(hSearch);
RemoveDirectory(finder.GetFilePath().GetBuffer(0));
}
}
17.以一个文件夹的框架在另一个目录创建文件夹和空文件
/*
#include
using namespace std;
*/
deque
WIN32_FIND_DATA FileData;
HANDLE hSearch;
DWORD dwAttrs;
char szDirPath[] = %%2;
char szNewPath[MAX_PATH];
BOOL fFinished = FALSE;
if (!CreateDirectory(szDirPath, NULL))
{
//不能创建新的目录
return;
}
CString path;
path.Format("%s//*.*",%%1);
hSearch = FindFirstFile(path, &FileData);
if (hSearch == INVALID_HANDLE_VALUE)
{
return;
}
while (!fFinished)
{
lstrcpy(szNewPath, szDirPath);
lstrcat(szNewPath, FileData.cFileName);
HANDLE hFile=CreateFileHandlehFile=CreateFile(szNewPath,GENERIC_READ,FILE_SHARE_READ,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL|FILE_FLAG_SEQUENTIAL_SCAN,NULL);
if(!hFile)
{
//不能创建文件
return;
}
if (!FindNextFile(hSearch, &FileData))
{
if (GetLastError() == ERROR_NO_MORE_FILES)
{
//遍历文件夹完成
fFinished = TRUE;
}
else
{
//找不到下一个文件
return;
}
}
}
FindClose(hSearch);
18.复制文件
CopyFile(%%1,%%2,true);
19.复制一个文件夹下所有的文件到另一个目录
/*
#include
using std::string;
*/
char sep='/';
#ifdef _WIN32
sep='//';
#endif
CFileFind finder;
CString path;
path.Format("%s//*.*",%%1);
BOOL bWorking = finder.FindFile(path);
while (bWorking)
{
bWorking = finder.FindNextFile();
if(!finder.IsDirectory() || finder.IsDots()){
string s(finder.GetFileName());
if(s.rfind(sep,s.length())!=string::npos)
{
char *file=substr(i+1,s.length()-i).c_str();
CString sourcefile;
sourcefile.Format("%s%c%s",%%1,sep,file);
CString targetfile;
targetfile.Format("%s%c%s",%%2,file);
CopyFile(sourcefile,targetfile,true);
}
}
}
20.提取扩展名
CString path(%%1);
CString %%2=path.Mid(path.ReverseFind('.'));
21.提取文件名
CString path(%%1);
CString %%2=path.Mid(path.ReverseFind('//')+1);
22.提取文件路径
char appName[MAX_PATH];
GetModualFileName(NULL,appName,MAX_PATH);
CString %%1(appName);
23.替换扩展名
/*
#include
using std::string;
*/
string s(%%1);
string newExt(%%2);
string::size_type i=s.rfind('.',s.length());
if(i!=string::npos)
s.replace(i+1,newExt.length(),newExt);
CString %%3(s);
24.追加路径
/*
#include
#include
#include
#include
using namespace std;
using namespace boost::filesystem;
*/
try {
path p1=complete(path(%%2,native),
path(%%1,native));
path p2=system_complete(path(%%2,native));
CString %%3(p3);
%%4
}
catch(exception& e){
//e.what();
}
25.移动文件
MoveFile(%%1,%%2);
26.移动一个文件夹下所有文件到另一个目录
/*
#include
using std::string;
*/
char sep='/';
#ifdef _WIN32
sep='//';
#endif
CFileFind finder;
CString path;
path.Format("%s//*.*",%%1);
BOOL bWorking = finder.FindFile(path);
while (bWorking)
{
bWorking = finder.FindNextFile();
if(!finder.IsDirectory() || finder.IsDots()){
string s(finder.GetFileName());
CString sourcefile(%%1);
if(s.rfind(sep,s.length())!=string::npos)
{
sourcefile=sourcefile+"//"+s.substr(i+1,s.length()-i);
CString targetfile(s.substr(i+1,s.length()-i));
targetfile=%%2+"//"+targetfile/;
MoveFile(sourcefile.GetBuffer(0),targetfile.GetBuffer(0),true);
}
}
}
27.指定目录下搜索文件
CString strFileTitle;
CFileFind finder;
BOOL bWorking = finder.FindFile(%%1);//"C://windows//sysbkup//*.cab"
while(bWorking)
{
bWorking=finder.FindNextFile();
strFileTitle=finder.GetFileTitle();
}
28.打开对话框
CString %%1;
CFileDialog dlg(TRUE);///TRUE为OPEN对话框,FALSE为SAVE AS对话框
//dlg.m_ofn.lpstrInitialDir=_T("d://");//这里就设置了对话框的默认目录d盘
dlg.m_ofn.lpstrFilter="Document/0*.doc/0AllFiles(*.*)/0*.*/0/0";
if(dlg.DoModal()==IDOK)
%%1=dlg.GetPathName();
29.文件分割
CFile m_File;
CString m_Filename,m_FileTitle,m_FilePath;
m_FileName=%%1;
char pBuf[4096];
if(m_File.Open(m_FileName,CFile::modeRead |CFile::shareDenyWrite))
{
m_FileName=m_File.GetPathName();
m_FileTitle=m_File.GetFileTitle();
DWORD FileLength=m_File.GetLength();
DWORD PartLength=FileLength/2+FileLength%2;
int nCount=1;
CString strName;
CFile wrFile;
DWORD ReadBytes;
while(true)
{
ReadBytes=m_File.Read(pBuf,PartLength);
strName.Format("%s%d",m_FIleTitle,nCount);
wrFile.Open(strName,CFile::modeWrite |CFile::modeCreate);
wrFile.Write(pBuf,ReadBytes);
wrFile.Close();
if(ReadBytes
break;
nCount++;
}
m_File.Close();
}
else
AfxMessageBox("不能打开文件");
30.文件合并
//#include
using std::string;
string s(%%1);
char sep='/';
#ifdef _WIN32
sep='//';
#endif
size_t sz=s.rfind(sep,s.length());
if(sz!=string::npos)
{
CFile Out;
CStringstrFilename(s.substr(i+1,s.length()-i));
if(Out.Open(%%2+"//"+strfilename,cfile::modewrite%7ccfile::modecreate)){
for(int i=1;i<=2;i++)
{
CString Filename;
Filename.Format("%s//%s%d",%%2,strfilename,atoi(i));
CFile In;
if(In.Open(Filename,CFile::modeRead)){
char cbBuffer[4096];
int nFilesize=In.GetLength();
while(nFilesize>0){
int nSize=sizeof(cbBuffer);
if(nSize>nFilesize)
nSize=nFilesize;
try{
In.Read(cbBuffer,nSize);
}
catch(CFileException *e){
char *lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM,
NULL,e->m_lOsError,
MAKELANGID(LANG_NEUTRAL,
SUBLANG_DEFAULT),
(LPSTR)&lpMsgBuf,0,NULL)>0){
AfxMessageBox(lpMsgBuf);
LocalFree(lpMsgBuf);
}
e->Delete();
return;
}
try{
Out.Write(cbBuffer,nSize);
}
catch(CFileException *e){
char *lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM,
NULL,e->m_lOsError,
MAKELANGID(LANG_NEUTRAL,
SUBLANG_DEFAULT),
(LPSTR)&lpMsgBuf,0,NULL)>0){
AfxMessageBox(lpMsgBuf);
LocalFree(lpMsgBuf);
}
e->Delete();
return;
}
nFilesize=nSize;
}
}
else
AfxMessageBox("不能打开"+Filename);
}
}
}
else
AfxMessageBox("不能创建输出文件");
31.文件简单加密
//#include
using std::string;
string s(%%1);
char sep='/';
#ifdef _WIN32
sep='//';
#endif
size_t sz=s.rfind(sep,s.length());
CString outfile;
if(sz!=string::npos)
{
CFile Out,In;
int nFIlesize;
char *lpMsgBuf;
CStringstrFilename(s.substr(i+1,s.length()-i));
if(!in.Open(%%1,CFile::modeRead)){
//不能打开输入文件
return;
}
outfile.Format("//enc_",%%2,strfilename);
if(!Out.Open(outfile,CFile::modewrite|CFile::modeCreate)){
//不能打开输出文件
return;
}
}
nFilesize=In.GetLength();
lpBuffer=new char[nFilesize];
if(lpBuffer==NULL){
//不能分配复制缓存
return;
}
CFileStatus rStatus;
In.GetStatus(%%1,rStatus);
try{
In.Read(cbBuffer,nFilesize);
}
catch(CFileException *e){
char *lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM,
NULL,e->m_lOsError,
MAKELANGID(LANG_NEUTRAL,
SUBLANG_DEFAULT),
(LPSTR)&lpMsgBuf,0,NULL)>0){
AfxMessageBox(lpMsgBuf);
LocalFree(lpMsgBuf);
}
e->Delete();
return;
}
for(int i=0;i
{
int ibt=lpBuffer[i];
ibt+=100;
ibt%=256;
bpBuffer[i]=(char)ibt;
}
try{
Out.Write(cbBuffer,nFilesize);
}
catch(CFileException *e){
char *lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM,
NULL,e->m_lOsError,
MAKELANGID(LANG_NEUTRAL,
SUBLANG_DEFAULT),
(LPSTR)&lpMsgBuf,0,NULL)>0){
AfxMessageBox(lpMsgBuf);
LocalFree(lpMsgBuf);
}
e->Delete();
return;
}
Out.Close();
//In.Close();
CFile::SetStatus(outfile,rstatus);
delete[] lpBuffer;
}
32.文件简单解密
//#include
using std::string;
string s(%%1);
char sep='/';
#ifdef _WIN32
sep='//';
#endif
size_t sz=s.rfind(sep,s.length());
CString infile;
if(sz!=string::npos)
{
CFile Out,In;
int nFIlesize;
char *lpMsgBuf;
CStringstrFilename(s.substr(i+1,s.length()-i));
infile.Format("%s//enc_%s",%%2,strfilename)
if(!in.Open(infile,CFile::moderead)){
//不能打开输入文件
return;
}
if(!Out.Open(%%1,CFile::modeWrite|CFile::modeCreate)){
//不能打开输出文件
return;
}
nFilesize=In.GetLength();
lpBuffer=new char[nFilesize];
if(lpBuffer==NULL){
//不能分配复制缓存
return;
}
CFileStatus rStatus;
In.GetStatus(infile,rstatus);
try{
In.Read(cbBuffer,nFilesize);
}
catch(CFileException *e){
char *lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |FORMAT_MESSAGE_FROM_SYSTEM,
NULL,e->m_lOsError,
MAKELANGID(LANG_NEUTRAL,
SUBLANG_DEFAULT),
(LPSTR)&lpMsgBuf,0,NULL)>0){
AfxMessageBox(lpMsgBuf);
LocalFree(lpMsgBuf);
}
e->Delete();
return;
}
for(int i=0;i
{
int ibt=lpBuffer[i];
ibt-=100;ibt+=256;
ibt%=256;
bpBuffer[i]=(char)ibt;
}
try{
Out.Write(cbBuffer,nFilesize);
}
catch(CFileException *e){
char *lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |FORMAT_MESSAGE_FROM_SYSTEM,
NULL,e->m_lOsError,
MAKELANGID(LANG_NEUTRAL,
SUBLANG_DEFAULT),
(LPSTR)&lpMsgBuf,0,NULL)>0){
AfxMessageBox(lpMsgBuf);
LocalFree(lpMsgBuf);
}
e->Delete();
return;
}
Out.Close();
//In.Close();
CFile::SetStatus(%%1,rStatus);
delete[] lpBuffer;
}
33.读取ini文件属性
CStdioFile inifile(%%1,CFile::modeRead);
CString path = inifile.GetFilePath();
inifile.Close();
char key[1024];
DWORD bytes = GetPrivateProfileString(%%2,%%3,%%4,key,1024,path);
if(bytes < 1024)
key[bytes] = '/0';
CString %%5(key);
34.合并一个目录下所有的文件
CString Directory;
Directory.Format("%s//*.*",%%1);
CFileFind FFile;
CFile Out;
if(Out.Open(%%2,CFile::modeWrite|CFile::modeCreate)){
BOOL bFound=FFile.FindFile(Directory);
while(bFound)
{
bFound=FFile.FileNextFile();
if(!FFile.IsDirectory() &&!FFile.IsDots())
{
CString Filename=FFile.GetFileName();
CFile In;
if(In.Open(Filename,CFile::modeRead)){
char cbBuffer[4096];
int nFIlesize=In.GetLength();
while(nFIlesize>0){
{
int nSize=sizeof(cbBuffer);
if(nSize>nFilesize)
nSize=nFilesize;
try {
In.Read(cbBuffer,nSize);
}
catch(CFileException *e){
char *lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM,
NULL,e->m_lOsError,
MAKELANGID(LANG_NEUTRAL,
SUBLANG_DEFAULT),
(LPSTR)&lpMsgBuf,0,NULL)>0){
AfxMessageBox(lpMsgBuf);
LocalFree(lpMsgBuf);
}
e->Delete();
return;
}
try {
Out.Write(cbBuffer,nSize);
}
catch(CFileException *e){
char *lpMsgBuf;
if(FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |FORMAT_MESSAGE_FROM_SYSTEM,
NULL,e->m_lOsError,
MAKELANGID(LANG_NEUTRAL,
SUBLANG_DEFAULT),
(LPSTR)&lpMsgBuf,0,NULL)>0){
AfxMessageBox(lpMsgBuf);
LocalFree(lpMsgBuf);
}
e->Delete();
return;
}
nFilesize=nSize;
}
}
else
AfxMessageBox("不能打开"+Filename);
}
}
}
}
else
AfxMessageBox("不能创建输出文件");
35.写入ini文件属性
/*
CStdioFile inifile(%%1,CFile::modeRead);
CString path = inifile.GetFilePath();
inifile.Close();
int bytes =GetPrivateProfileInt(%%2,%%3,%%4,path);
*/
WritePrivateProfileString(%%2,%%3,%%4,path);
36.获得当前路径
TCHAR szDir[MAX_PATH];
GetCurrentDirectory(MAX_PATH,szDir);
CString %%1;
%%1.Format("%s",szDir);
37.读取XML数据库
/*
#include
using namespace std;
*/
char sRead[5192];
const char* name="Name";
const char* name2="author";
const char* root="ProductData";
const char* subNodeTag="Product";
const char* ID="pid";
//%%2="ProductData"//%%4="pid" //%%6="author"
//%%3="Product"//%%5="Name"
char sRead[5192];
CFile mFile(_T(%%1),CFile::modeRead);
mFile.Read(sRead,5192);
if(sRead!=NULL)
{
string tmp;
while(sRead!=NULL)
{
tmp.append(sRead);
mFile.Read(sRead,5192);
}
stringtarget("001"),globalTag;globalTag.append("<");globalTag.appendoot);globalTag.append(">");
stringpropTag1;propTag1.append("<");propTag1.append(name);propTag1.append(">");
string endTag1;endTag1.append("");endTag1.append(name);endTag1.append(">");
stringpropTag2;propTag2.append("<");propTag2.append(name2);propTag2.append(">");
stringendTag2;endTag2.append("");endTag2.append(name2);endTag2.append(">");
int offset=tmp.find_first_of(globalTag);
while(offset)
{
offset=tmp.find_first_of(globalTag);
string description;
tmp.copy(description.begin(),tmp.find_first_of("/"",offset+1)-offset);
if(target.compare(description)==0)
{
string prop,prop2;
offset=tmp.find_first_of(propTag1,offset)+strlen(name)+2;
tmp.copy(prop.begin(),tmp.find_first_of(endTag1,offset)-offset,offset);
offset=tmp.find_first_of(propTag2,offset)+strlen(name2)+2;
tmp.copy(prop2.begin(),tmp.find_first_of(endTag2,offset)-offset,offset);
//CString %%8(prop),%%9(prop2);
//%%10
return 0;
}
}
}
else
return -1;
38.写入XML数据库
/*
#include
using namespace std;
*/
char sRead[5192];
int no;
const char* name="Name";
const char* name2="author";
const char* root="ProductData";
const char* subNodeTag="Product";
const char* ID="pid";
//%%2="ProductData"//%%4="pid" //%%6="port"
//%%3="Product"//%%5="Name" //%%7="author"
CString temp;
char sRead[5192];
string description;
CFile mFile(_T(%%1),CFile::modeRead);
mFile.Read(sRead,5192);
if(sRead!=NULL)
{
string tmp;
while(sRead!=NULL)
{
tmp.append(sRead);
memset(sRead,0,5192);
mFile.Read(sRead,5192);
}
temp.Format("<%s%s",subNodeTag,ID);
intoffset=tmp.find_last_of(temp)+strlen(subNodeTag) +strlen(ID)+4;
temp.Format("/"><%s",name);
tmp.copy(description.begin(),tmp.find_last_of(temp)-offset,offset);
no=atoi(description.c_str())+1;
mFile.Close();
temp.Format("%s>",root);
CString temp2;
temp2.Format("<%s%s=/"%d/"><%s>%s%s><%s>%s%s",subNodeTag,ID,no,name,"bbbbbbbbbbbbbbbb",name,name2,"cccccccccccccc",name2);
tmp.insert(tmp.find_last_of(temp),temp2);
CFilefile(_T("Produces.xml"),CFile::modeWrite);
file.Write(tmp.c_str(),tmp.size());
file.Flush();
file.Close();
}
else
{
CFilefile(_T(%%1),CFile::modeWrite|CFile::modeCreate);
temp.Format("<%s><%s%s=/"0/"><%s>%s%s><%s>%s%s>%s>%s>",root,subNodeTag,ID,name,"bbbbbbbbbbbbbbbb",name,name2,"cccccccccccccc",name2,subNodeTag,root);
file.Write(temp.GetBuffer(0),temp.GetLength());
file.Flush();
file.Close();
}
39.ZIP压缩文件
//www.zlib.net
/*
#ifdef _DEBUG
#pragma comment(lib,"zlibd.lib")
#else
#pragma comment(lib,"zlib.lib")
#endif
#include "zlib.h"
#include "zconf.h"
*/
HANDLE hFile, hFileToWrite;
CString strFilePath;
m_ctrEdit.GetWindowText(strFilePath);
//打开要进行压缩的文件
hFile = CreateFile(strFilePath, // file name
GENERIC_READ, // open for reading
FILE_SHARE_READ, // share for reading
NULL, // no security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file
NULL
); // no attr. template
if (hFile == INVALID_HANDLE_VALUE)
{
AfxMessageBox("Could not open file toread"); // process error
return;
}
HANDLE hMapFile, hMapFileToWrite;
//创建一个文件映射
hMapFile = CreateFileMapping(hFile, // Currentfile handle.
NULL, // Default security.
PAGE_READONLY, // Read/write permission.
0, // Max. object size.
0, // Size of hFile.
"ZipTestMappingObjectForRead"
); // Name of mapping object.
if (hMapFile == NULL)
{
AfxMessageBox("Could not create filemapping object");
return;
}
LPVOID lpMapAddress, lpMapAddressToWrite;
//创建一个文件映射的视图用来作为source
lpMapAddress = MapViewOfFile(hMapFile, //Handle to mapping object.
FILE_MAP_READ, // Read/write permission
0, // Max. object size.
0, // Size of hFile.
0); // Map entire file.
if (lpMapAddress == NULL)
{
AfxMessageBox("Could not map view offile");
return;
}
DWORD dwFileLength,dwFileLengthToWrite;
dwFileLength = GetFileSize(hFile, NULL);
m_dwSourceFileLength = dwFileLength;
//因为压缩函数的输出缓冲必须比输入大0.1% + 12然后一个DWORD用来保存压缩前的大小,
//解压缩的时候用,当然还可以保存更多的信息,这里用不到
dwFileLengthToWrite =(double)dwFileLength*1.001 + 12 +sizeof(DWORD);
//以下是创建一个文件,用来保存压缩后的文件
hFileToWrite =CreateFile("demoFile.rar", // demoFile.rar
GENERIC_WRITE|GENERIC_READ, // open for writing
0, // do not share
NULL, // no security
CREATE_ALWAYS, // overwrite existing
FILE_ATTRIBUTE_NORMAL , // normal file
NULL); // no attr. template
if (hFileToWrite == INVALID_HANDLE_VALUE)
{
AfxMessageBox("Could not open file towrite"); // process error
return;
}
hMapFileToWrite =CreateFileMapping(hFileToWrite, // Current file handle.
NULL, // Default security.
PAGE_READWRITE, // Read/write permission.
0, // Max. object size.
dwFileLengthToWrite, // Size of hFile.
"ZipTestMappingObjectForWrite"); //Name of mapping object.
if (hMapFileToWrite == NULL)
{
AfxMessageBox("Could not create filemapping object for write");
return;
}
lpMapAddressToWrite =MapViewOfFile(hMapFileToWrite, //Handle to mapping object.FILE_MAP_WRITE, //Read/write permission0, // Max. object size.
0, // Size of hFile.
0
); // Map entire file.
if (lpMapAddressToWrite == NULL)
{
AfxMessageBox("Could not map view of file");
return;
}
//这里是将压缩前的大小保存在文件的第一个DWORD里面
LPVOID pBuf = lpMapAddressToWrite;
(*(DWORD*)pBuf) = dwFileLength;
pBuf = (DWORD*)pBuf + 1;
//这里就是最重要的,zlib里面提供的一个方法,将源缓存的数据压缩至目的缓存
//原形如下:
//int compress (Bytef *dest, uLongf *destLen,const Bytef*source, uLong sourceLen);
//参数destLen返回实际压缩后的文件大小。
compress((Bytef*)pBuf,&dwFileLengthToWrite,(Bytef*)lpMapAddress, dwFileLength);
UnmapViewOfFile(lpMapAddress);
CloseHandle(hMapFile);
CloseHandle(hFile);
UnmapViewOfFile(lpMapAddressToWrite);
CloseHandle(hMapFileToWrite);
//这里将文件大小重新设置一下
SetFilePointer(hFileToWrite,dwFileLengthToWrite+ sizeof(DWORD) ,NULL,FILE_BEGIN);
SetEndOfFile(hFileToWrite);
CloseHandle(hFileToWrite);
40.ZIP解压缩
//www.zlib.net
/*
#ifdef _DEBUG
#pragma comment(lib,"zlibd.lib")
#else
#pragma comment(lib,"zlib.lib")
#endif
#include "zlib.h"
#include "zconf.h"
*/
HANDLE hFile, hFileToWrite;
CString strFilePath=%%1;
//打开要进行解压缩的文件
hFile = CreateFile(strFilePath, // file name
GENERIC_READ, // open for reading
FILE_SHARE_READ, // share for reading
NULL, // no security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file
NULL
); // no attr. template
if (hFile == INVALID_HANDLE_VALUE)
{
AfxMessageBox("Could not open file toread"); // process error
return;
}
HANDLE hMapFile, hMapFileToWrite;
//创建一个文件映射
hMapFile = CreateFileMapping(hFile, // Currentfile handle.
NULL, // Default security.
PAGE_READONLY, // Read/write permission.
0, // Max. object size.
0, // Size of hFile.
"ZipTestMappingObjectForRead"); //Name of mapping object.
if (hMapFile == NULL)
{
AfxMessageBox("Could not create filemapping object");
return;
}
LPVOID lpMapAddress, lpMapAddressToWrite;
//创建一个文件映射的视图用来作为source
lpMapAddress = MapViewOfFile(hMapFile, //Handle to mapping
object.FILE_MAP_READ, // Read/write permission
0, // Max. object size.
0, // Size of hFile.
0); // Map entire file.
if (lpMapAddress == NULL)
{
AfxMessageBox("Could not map view offile");
return;
}
DWORD dwFileLength,dwFileLengthToWrite;
dwFileLength = GetFileSize(hFile, NULL) -sizeof(DWORD);
//因为压缩函数的输出缓冲必须比输入大0.1% + 12然后一个DWORD用来保存压缩前的大小,
//解压缩的时候用,当然还可以保存更多的信息,这里用不到
// dwFileLengthToWrite =(double)dwFileLength*1.001 + 12 +sizeof(DWORD);
dwFileLengthToWrite = (*(DWORD*)lpMapAddress);
LPVOID pSourceBuf = lpMapAddress;
pSourceBuf = (DWORD*)pSourceBuf + 1;
//以下是创建一个文件,用来保存压缩后的文件
hFileToWrite = CreateFile(%%2, // createdemo.gz
GENERIC_WRITE|GENERIC_READ, // open for writing
0, // do not share
NULL, // no security
CREATE_ALWAYS, // overwrite existing
FILE_ATTRIBUTE_NORMAL , // normal file
NULL
); // no attr. template
if (hFileToWrite == INVALID_HANDLE_VALUE)
{
AfxMessageBox("Could not open file towrite"); //process error
return;
}
hMapFileToWrite =CreateFileMapping(hFileToWrite, // Currentfile handle.
NULL, // Default security.
PAGE_READWRITE, // Read/write permission.
0, // Max. object size.
dwFileLengthToWrite, // Size of hFile.
"ZipTestMappingObjectForWrite"); //Name of mapping object.
if (hMapFileToWrite == NULL)
{
AfxMessageBox("Could not create filemapping object for write");
return;
}
lpMapAddressToWrite =MapViewOfFile(hMapFileToWrite, //Handle to mapping object.
FILE_MAP_WRITE, // Read/write permission
0, // Max. object size.
0, // Size of hFile.
0
); // Map entire file.
if (lpMapAddressToWrite == NULL)
{
AfxMessageBox("Could not map view of file");
return;
}
//这里是将压缩前的大小保存在文件的第一个DWORD里面
LPVOID pBuf = lpMapAddressToWrite;
//这里就是最重要的,zlib里面提供的一个方法,将源缓存的数据压缩至目的缓存
//原形如下:
//int compress (Bytef *dest, uLongf *destLen,const Bytef *source, uLong sourceLen);
//参数destLen返回实际压缩后的文件大小。
uncompress((Bytef*)pBuf,&dwFileLengthToWrite,(Bytef*)pSourceBuf, dwFileLength);
UnmapViewOfFile(lpMapAddress);
CloseHandle(hMapFile);
CloseHandle(hFile);
UnmapViewOfFile(lpMapAddressToWrite);
CloseHandle(hMapFileToWrite);
//这里将文件大小重新设置一下
SetFilePointer(hFileToWrite,dwFileLengthToWrite,NULL,FILE_BEGIN);
SetEndOfFile(hFileToWrite);
CloseHandle(hFileToWrite);
41.获得应用程序完整路径
char appName[MAX_PATH];
GetModuleFileName(NULL,appName,MAX_PATH);
CString %%1(appName);
42.递归删除目录中的文件
CString Directory(%%1);
CStringArray csa;
int count=0;
if(Directory.Right(1)!="//")
Directory+="//";
Directory+="*.*";
CFileFInd FFile;
csa.add(Directory);
while(count
{
if(FFile.FindFile(csa.GetAt(i)))
{
bFound=FFile.FindNextFile();
if(!FFile.IsDirectory() &&!FFile.IsDots())
{
DeleteFile(FFile.GetFilePath());
}
else if(FFile.IsDirectory())
{
csa.Add(FilePath+"//*.*");
}
}
else
count++;
}
43.ZIP压缩文件夹
//www.zlib.net
/*
#include
#include
#include
#include
#include
#include
#if defined(MSDOS) || defined(OS2) ||defined(WIN32) ||
defined(__CYGWIN__)
# include
# include
# define SET_BINARY_MODE(file)setmode(fileno(file),O_BINARY)
#else
# define SET_BINARY_MODE(file)
#endif
#define CHUNK 16384
//#define USE_TAG
#ifdef USE_TAG
#define COMPRESS_FILE_TAG_HEAD"<<<"
#define COMPRESS_FILE_TAG_TAIL">>>"
#define COMPRESS_FILE_TAG_END_LEN 3 // must bestrlen
(COMPRESS_FILE_TAG_HEAD) =strlen(COMPRESS_FILE_TAG_TAIL)
#else
#define COMPRESS_FILE_TAG_HEAD ""
#define COMPRESS_FILE_TAG_TAIL ""
#define COMPRESS_FILE_TAG_END_LEN 0 // must bestrlen
(COMPRESS_FILE_TAG_HEAD) =strlen(COMPRESS_FILE_TAG_TAIL)
#endif
*/
/**//**//**//* Compress from file source tofile dest until EOF on source.
def() returns Z_OK on success, Z_MEM_ERROR ifemory could not be allocated for processing, Z_STREAM_ERROR if an invalidcompression
level is supplied, Z_VERSION_ERROR if theversion of zlib. And the version of the library linked do not matc, or Z_ERRNOif there isan error reading or writing the files. */
static it def(FILE *source, FILE *dest, intlevel)
int ret, flush;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
/**//**//**//* allocate deflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
ret = deflateInit(&strm, level);
if (ret != Z_OK)
return ret;
/**//**//**//* compress until end of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
strm.next_in = in;
/**//**//**//* run deflate() on input untiloutput
buffernot full, finish
compression if all of source has been read in*/
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = deflate(&strm, flush); /**//**//**//*no
bad return value */
assert(ret != Z_STREAM_ERROR); /**//**//**//*
state not clobbered */
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have ||ferror
(dest)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
assert(strm.avail_in == 0); /**//**//**//* all
input will be used */
/**//**//**//* done when last data in fileprocessed
*/
} while (flush != Z_FINISH);
assert(ret == Z_STREAM_END); /**//**//**//*stream
will be complete */
/**//**//**//* clean up and return */
(void)deflateEnd(&strm);
return Z_OK;
}
/**//**//**//* Decompress from file source tofile dest until stream ends or EOF. inf() returns Z_OK on success, Z_MEM_ERRORif memory could not be allocated for processing, Z_DATA_ERROR if the deflatedata is invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and theversion of the library linked do not match, or Z_ERRNO
if there is an error reading or writing thefiles. */
static int inf(FILE *source, FILE *dest)
{
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
/**//**//**//* allocate inflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit(&strm);
if (ret != Z_OK)
return ret;
/**//**//**//* decompress until deflate streamends or end
of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
if (strm.avail_in == 0)
break;
strm.next_in = in;
/**//**//**//* run inflate() on input untiloutput
buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /**//**//**//*
state not clobbered */
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /**//**//**//* and
fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have ||ferror
(dest)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
/**//**//**//* done when inflate() says it'sdone */
} while (ret != Z_STREAM_END);
/**//**//**//* clean up and return */
(void)inflateEnd(&strm);
return ret == Z_STREAM_END ? Z_OK :Z_DATA_ERROR;
}
/**//**//**//* report a zlib or i/o error */
static void zerr(int ret)
{
fputs("zpipe: ", stderr);
switch (ret) {
case Z_ERRNO:
if (ferror(stdin))
fputs("error reading stdin ",stderr);
if (ferror(stdout))
fputs("error writing stdout ",stderr);
break;
case Z_STREAM_ERROR:
fputs("invalid compression level ",stderr);
break;
case Z_DATA_ERROR:
fputs("invalid or incomplete deflate data", stderr);
break;
case Z_MEM_ERROR:
fputs("out of memory ", stderr);
break;
case Z_VERSION_ERROR:
fputs("zlib version mismatch! ",stderr);
}
}
//以上就是zpipe.c的几个主要函数:def()、inf()和zerr(),def()是压缩函数,主要使用了zlib的deflate()接口;inf()是压缩函数,主要使用了zlib的inflate()接口;zerr()是错误打印函数。
static int write_zfile_file_header(const char*file,FILE *zfile)
{
int len;
len = strlen(file);
if (fwrite(COMPRESS_FILE_TAG_HEAD, 1,COMPRESS_FILE_TAG_END_LEN,zfile) != COMPRESS_FILE_TAG_END_LEN || ferror(zfile))
{
fprintf(stderr,"When writing file or dirheader to zfile: write error. ");
return 1;
}
if (fwrite(file, 1, len, zfile) != len||ferror(zfile))
{
fprintf(stderr,"When writing file or dirheader to zfile: write error. ");
return 1;
}
if (fwrite(COMPRESS_FILE_TAG_TAIL,1,COMPRESS_FILE_TAG_END_LEN, zfile) != COMPRESS_FILE_TAG_END_LEN ||ferror(zfile))
{
fprintf(stderr,"When writing file or dirheader to
zfile: write error. ");
return1;
}
return 0;
}
/**//* compress or decompress from stdin tostdout */
static int compress_dir(char *file_in,FILE*fd_out)
{
FILE *fd_in;
struct _finddata_t find_data;
char file[128];
long lf;
int ret;
write_zfile_file_header(file_in,fd_out);
sprintf(file,"%s%s",file_in,"/*");
if((lf = _findfirst(file,&find_data))==-1l) // LOOKOUT:not eleven, but one andlowercase 'L'
{
fprintf(stdout,"file not found. ");
}
else
{
do
{
if(!strcmp(find_data.name,".") ||!strcmp(find_data.name,".."))
continue;
fprintf(stdout,"%s",find_data.name);
sprintf(file,"%s%s%s",file_in,"/",find_data.name);
if(find_data.attrib & _A_SUBDIR)
{
fprintf(stdout," ---directory--- ");
ret = compress_dir(file,fd_out);
}
else
{
write_zfile_file_header(file,fd_out);
if(access(file, 2) != 0) //W_OK=2
{
int attrib;
attrib = _chmod(file,0);
_chmod(file,1,attrib & ~_A_RDONLY);
fprintf(stderr,"When writing file: Noprivilege to write file %s. ",file);
return -1;
}
fd_in = fopen(file,"rb+");
SET_BINARY_MODE(fd_in);
ret = def(fd_in, fd_out,Z_DEFAULT_COMPRESSION);
if (ret != Z_OK)
zerr(ret);
else
fprintf(stdout," zip over ");
fclose(fd_in);
}
}while( _findnext(lf, &find_data ) == 0 );
}
return 0;
}
//int argc, char **argv
struct _finddata_t find_data;
FILE *fd_in;
FILE *fd_out;
const char *file_dir;
char file_out[100];
int ret;
if (argc == 2)
{
file_dir = argv[1];
if(_findfirst(file_dir,&find_data)==-1l) //LOOKOUT: not eleven, but one andlowercase 'L'
{
fprintf(stderr,"File or dir %s notfound.",file_dir);
return 1;
}
if(find_data.attrib & _A_SUBDIR)
{
sprintf(file_out,"%s%s",file_dir,".z");
fd_out = fopen(file_out,"wb+");
SET_BINARY_MODE(fd_out);
fprintf(stdout,"Dir %s being Compressed...",file_dir);
ret = compress_dir(file_dir,fd_out);
fclose(fd_out);
}
else
{
fprintf(stdout,"File %s being Compressed...",file_dir);
sprintf(file_out,"%s%s",file_dir,".z");
fd_in = fopen(file_dir,"rb+");
fd_out = fopen(file_out,"wb+");
SET_BINARY_MODE(fd_in);
SET_BINARY_MODE(fd_out);
ret = def(fd_in, fd_out,Z_DEFAULT_COMPRESSION);
fclose(fd_in);
fclose(fd_out);
}
if (ret != 0)
{
fprintf(stderr,"Compress Error!!!!!!!!!!!!!! ");
zerr(ret);
}
else
fprintf(stdout,"Compress OK---------------");
}
else {
fprintf(stdout,"zod usage: zod[file]/[directory] ");
}
getch();
44.验证DTD
/*
#include
#include
*/
using namespace std;
using namespace xercesc;
try {
// Initialize Xerces and obtain a SAX2 parser
XercesInitializer init;
auto_ptr
parser(XMLReaderFactory::createXMLReader());
// Enable validation
parser->setFeature(XMLUni::fgSAX2CoreValidation,true);
// Register error handler to receivenotifications
// of DTD violations
CircusErrorHandler error;
parser->setErrorHandler(&error);
parser->parse("animals.xml");
} catch (const SAXException& e) {
cout << "xml error: " <
return EXIT_FAILURE;
} catch (const XMLException& e) {
cout << "xml error: " <
return EXIT_FAILURE;
} catch (const exception& e) {
cout << e.what() << "/n";
return EXIT_FAILURE;
}
45.Schema验证
/*
#include
#include
#include
Handy definitions of constants.
#include
Create a SAX2 parser object.
*/
SAX2XMLReader* parser =XMLReaderFactory::createXMLReader();
// Set the appropriate features on the parser.Enable namespaces, schema validation, and the checking of all Schemaconstraints. We refer to these as "common features" in followingexamples.
parser->setFeature(XMLUni::fgSAX2CoreNameSpaces,true);
parser->setFeature(XMLUni::fgSAX2CoreValidation,true);
parser->setFeature(XMLUni::fgXercesDynamic,false);
parser->setFeature(XMLUni::fgXercesSchema,true);
parser->setFeature(XMLUni::fgXercesSchemaFullChecking,true);
// Set appropriate ContentHandler,ErrorHandler, and EntityResolver.
// These will be referred to as "commonhandlers" in subsequent examples.
// You will use a default handler provided byXerces-C++ (no op action).
// Users should write their own handlers andinstall them.
DefaultHandler handler;
parser->setContentHandler(&handler);
// The object parser calls when it detectsviolations of the schema.
parser->setErrorHandler(&handler);
// The object parser calls to find the schemaand
// resolve schema imports/includes.
parser->setEntityResolver(&handler);
// Parse the XML document.
// Document content sent to registeredContentHandler instance.
parser->parse(xmlFile);
// Delete the parser instance.
delete parser;
46.Grep
/*
#include
#include "regexpr2.h"
using namespace std;
using namespace regex;
*/
match_results results;
rpattern pat(%%2);
char sRead[5120];
CFile mFile(_T(%%1),CFile::modeRead);
CString content;
while(sRead!=NULL)
{
mFile.Read(sRead,5120);
content+=CString(sRead);
}
mFile.Close();
CString line;
CString sub;
char seperator='/n';
for(intpos=0;AfxExtractSubString(line,content,pos,seperator);++pos)
{
if(line.Trim()!="")
{
string str(line);
match_results::backref_typebr=pat.match(str,results);
if(br.matched){
//br
}
}
}
/*
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft, 1994 - 2002.
//
// File: restack.h
//
// Functions: a quick-'n'-dirty, type-unsafestack used by the iterative
// regular expression algorithm
//
// Notes: Care must be taken when using thisstack. You must pop off
// the correct type of object, otherwise youget garbage. Also,
// if you push anything that has a non-trivialdestructor, then
// be sure to explicitely pop everything offthe stack and don't
// use the unsafe_long_jump method.
//
// Author: Eric Niebler ( [email protected])
//
// History: 11/15/2001 ericne Created
//
//----------------------------------------------------------------------------
#ifndef HETERO_STACK_H
#define HETERO_STACK_H
#include
#include
#include
#include
#include
#ifndef REGEX_CDECL
#ifdef _MSC_VER
#define REGEX_CDECL __cdecl
#else
#define REGEX_CDECL
#endif
#endif
#define COMMA ,
#if !defined(_MSC_VER) | 1200 < _MSC_VER
# define REGEX_VC6(x)
# define REGEX_NVC6(x) x
#else
# define REGEX_VC6(x) x
# define REGEX_NVC6(x)
#endif
namespace regex
{
namespace detail
{
// For compile-time assertions that generate
// no run-time overhead.
template< bool f > struct static_assert;
template<> structstatic_assert
// Work-around for a template-templateparameter problem on VC7.0
template< typename T > struct type2type {typedef T type; };
template< bool F > struct bool2type {enum { value = F }; };
typedef bool2type
typedef bool2type
#ifdef _MSC_VER
// warning C4127: conditional expression isconstant
// warning C4189: local variable is initializedbut not referenced
// warning C4244: conversion from 'T' to 'int',possible loss of data
// warning C4510: default constructor could notbe generated
// warning C4610: struct can never beinstantiated - user defined constructor required
// warning C4800: forcing value to bool 'true'or 'false' (performance warning)
#pragma warning( push )
#pragma warning( disable : 4127 4189 4244 45104610 4800 )
// Make sure nobody has tampered with thepacking before defining the
// alignof structure
#pragma pack( push )
#pragma pack() // use the default packing
#endif
template< typename T >
class alignof
{
struct helper
{
helper();
char m_c;
T m_t;
};
public:
enum { value = sizeof(helper)-sizeof(T)
};
#ifdef _MSC_VER
#pragma pack( pop )
#endif
//
// Type traits
//
typedef char (&yes_type)[1];
typedef char (&no_type)[2];
template< bool >
struct select_helper
{
template< typename T, typename U >
struct nested
{
typedef T type;
};
};
template<>
struct select_helper
{
template< typename T, typename U >
struct nested
{
typedef U type;
};
};
// For use in conditional typedefs
template< bool F, typename T, typename U>
struct select
{
typedef typenameselect_helper
};
template< typename U >
struct convertible_helper
{
static yes_type check( U );
static no_type REGEX_CDECL check(...);
};
template< typename T >
struct factory
{
static T& make();
};
template< typename T, typename U >
struct is_convertible
{
enum { value =(sizeof(convertible_helper::check(factory
};
template< size_t N >
struct is_power_of_two
{
enum { value = 1==N || 0==(N%2) &&is_power_of_two
};
template<>
struct is_power_of_two<0>
{
enum { value = false };
};
// Very primative implementation of is_scalar.This doesn't work
// for void, reference types, array types orfunction types, but
// we don't use those types from hetero_stack.
struct bool_convertible {bool_convertible(bool); };
template< typename T >
struct is_scalar
{
enum { value =is_convertible
};
template< typename T >
struct has_trivial_copy
{
enum { value = is_scalar
};
template< typename T >
struct has_trivial_assignment
{
enum { value = is_scalar
};
template< typename T >
struct has_trivial_destructor
{
enum { value = is_scalar
};
template< bool > struct destroyer_helper
{
template< typename T >
static void destroy( T const * pT )
{
pT, pT->~T();
}
};
template<> structdestroyer_helper
{
template< typename T >
static void destroy( T const * )
{
}
};
template< typename T >
void destroy( T const * pT )
{
destroyer_helper
}
struct type_vtable
{
std::type_info const * typeinfo_ptr;
size_t size;
size_t aligned_size;
void (*destroy)( void * );
void (*copy)( void *, void const * );
};
template< typename T, size_t AlignmentT >
class type_info_ex
{
static void destroy( void * pv )
{
T const * pT = static_cast
regex::detail::destroy( pT );
(void)pv;
(void)pT;
}
static void copy( void * dst, void const * src)
{
new ( dst ) T( *static_cast
}
public:
static type_vtable const vtable;
static bool equals( type_vtable const * ptm )
{
return ptm == & vtable ||*ptm->typeinfo_ptr == typeid(T);
}
};
template< typename T,size_t AlignmentT >
type_vtable consttype_info_ex
{
&typeid(T),
sizeof(T),
( sizeof(T) + AlignmentT - 1 ) & ~(AlignmentT - 1 ),
has_trivial_destructor
&type_info_ex
};
template< typename T >
inline T & to_type( void * pv )
{
return *static_cast
}
} // namespace detail
//--------------------------------------------------------------------------
//
// Class: hetero_stack
//
// Description: Fast, heterogeneous stack.
//
// Methods: allocate - reserve space on stack
// unwind - unwind the stack
// hetero_stack - c'tor
// ~hetero_stack - d'tor, release all dynamicmemory
// push - push an object on the stack
// pop - pop an object from the stack
//
// Members: m_first_node -
// m_current_node -
//
// Typedefs: byte_t -
//
// History: 10/19/2001 - ericne - Created
//
//--------------------------------------------------------------------------
template
<
size_t AlignmentT = sizeof(void*),
bool RuntimeTypeCheckT = true, // should weperform run-time type checking?
bool AssumePodT = false, // assume non-throwingcopy/assign/destroy for better perf
size_t DynamicBlockSizeT = 4096, // blocksallocated from heap are this size
size_t StaticBlockSizeT = 1024 // initial blockon stack is this size
>
class hetero_stack
{
typedef unsigned char byte_t;
typedef detail::type_vtable const* vtable_ptr;
public:
typedefhetero_stack
template< typename T >
struct aligned_sizeof
{
enum
{
// round up sizeof(T) to the nearest multipleof AlignmentT
no_rtti = ( sizeof( T ) + AlignmentT - 1 )& ~( AlignmentT - 1 ),
with_rtti = RuntimeTypeCheckT ?
no_rtti +aligned_sizeof
no_rtti
};
};
private:
struct stack_node
{
struct header
{
stack_node * m_back;
stack_node * m_next;
byte_t * m_current; // ptr into m_mem. allocfrom here
byte_t * m_end; // ptr to last+1 byte_t inm_mem
};
union
{
header m_head;
byte_t m_align[ aligned_sizeof
};
// This is the buffer into which values will bepushed and popped.
// It is guaranteed to meet the AlignmentTrequirements because of
// the union above.
byte_t m_mem[1];
size_t size() const // throw()
{
return static_cast
}
};
enum
{
DYNAMIC_BLOCK_SIZE =
DynamicBlockSizeT > sizeof( stack_node ) ?
DynamicBlockSizeT : sizeof( stack_node )
};
union
{
stack_node m_node;
byte_t m_buf[aligned_sizeof
} m_first_node;
stack_node * m_current_node;
// Cache these for faster access
byte_t * m_begin;
byte_t * m_current;
byte_t * m_end;
byte_t * grow( size_t size ) //throw(std::bad_alloc)
{
// write the cached value of current into thenode.
// OK to do this even if later statementsthrow.
m_current_node->m_head.m_current =m_current;
// Do we have a node with available memoryalready?
if( m_current_node->m_head.m_next )
{
// Does this node have enough room?
if( size <=m_current_node->m_head.m_next->size() )
{
m_current_node =m_current_node->m_head.m_next;
m_current = m_current_node->m_head.m_current= m_current_node->m_mem + size;
m_end = m_current_node->m_head.m_end;
return m_begin = m_current_node->m_mem;
}
// Create a new node and insert it into thelist
stack_node * new_node =static_cast
::operator new( size + offsetof( stack_node,m_mem ) ) );
new_node->m_head.m_back = m_current_node;
new_node->m_head.m_next =m_current_node->m_head.m_next;
m_current = m_end =new_node->m_head.m_current =
new_node->m_head.m_end = new_node->m_mem+ size;
m_current_node->m_head.m_next->m_head.m_back= new_node;
m_current_node->m_head.m_next = new_node;
m_current_node = new_node;
return m_begin = m_current_node->m_mem;
}
// We need to create a new node from scratch
size_t new_size = detail::regex_max( size,
static_cast
stack_node * new_node =static_cast
::operator new( new_size + offsetof(stack_node, m_mem ) ) );
new_node->m_head.m_back = m_current_node;
new_node->m_head.m_next = 0;
m_current = new_node->m_head.m_current =new_node->m_mem + size;
m_end = new_node->m_head.m_end =new_node->m_mem + new_size;
m_current_node->m_head.m_next = new_node;
m_current_node = new_node;
return m_begin = m_current_node->m_mem;
}
byte_t * allocate( size_t size ) //throw(std::bad_alloc)
{
// This is the ptr to return
byte_t * mem = m_current;
// Advance the high-water mark
m_current += size;
// Check to see if we have overflowed thisbuffer
if( std::less
{
// oops, back this out.
m_current = mem;
// allocate a new block and return a ptr to thenew memory
return grow( size );
}
return mem;
}
byte_t * unwind( byte_t * pb ) // throw()
{
// roll back the stack
m_current = pb;
// If we've unwound this whole block, then makethe
// previous node the current node
if( m_current == m_begin )
{
// write the cached value of m_current intom_current_node
m_current_node->m_head.m_current =m_current;
m_current_node =m_current_node->m_head.m_back;
// update the cache
m_begin = m_current_node->m_mem;
m_current =m_current_node->m_head.m_current;
m_end = m_current_node->m_head.m_end;
}
return pb;
}
byte_t * unwind( size_t size ) // throw()
{
return unwind( m_current - size );
}
void long_jump_impl( void * jump_ptr,detail::bool2type
{
safe_long_jump( jump_ptr );
}
void long_jump_impl( void * jump_ptr,detail::bool2type
{
unsafe_long_jump( jump_ptr );
}
struct real_unwinder;
friend struct real_unwinder;
struct real_unwinder
{
real_unwinder( stack_type * pstack, size_t size) // throw()
: m_pstack(pstack), m_size(size) {}
~real_unwinder() // throw()
{
if( m_pstack )
m_pstack->unwind( m_size );
}
void dismiss() // throw()
{
m_pstack = 0;
}
private:
real_unwinder( real_unwinder const & );
real_unwinder & operator=( real_unwinderconst & );
stack_type * m_pstack;
size_t m_size;
};
struct dummy_unwinder
{
dummy_unwinder( stack_type *, size_t ) {} //throw()
void dismiss() {} // throw()
};
// Disallow these for now. Might implement themlater.
hetero_stack( hetero_stack const & );
hetero_stack & operator=( hetero_stackconst & );
public:
class type_error : public std::logic_error
{
std::type_info const * m_prequested_type;
std::type_info const * m_pactual_type;
public:
type_error
(
std::type_info const & requested_type,
std::type_info const & actual_type,
std::string const & s = "type error inhetero_stack"
) // throw()
: std::logic_error( s + " (requested type:" + requested_type.name()
+ ", actual type: " +actual_type.name() + ")" )
, m_prequested_type( &requested_type )
, m_pactual_type( &actual_type )
{
}
std::type_info const & requested_type()const // throw()
{
return *m_prequested_type;
}
std::type_info const & actual_type() const// throw()
{
return *m_pactual_type;
}
};
hetero_stack() // throw()
: m_current_node( &m_first_node.m_node )
{
m_first_node.m_node.m_head.m_back = &m_first_node.m_node;
m_first_node.m_node.m_head.m_next = 0;
m_begin = m_current =m_first_node.m_node.m_head.m_current = m_first_node.m_node.m_mem;
m_end = m_first_node.m_node.m_head.m_end =m_first_node.m_buf + sizeof( m_first_node );
}
~hetero_stack() // throw()
{
// AlignmentT must be a power of two
detail::static_assert
// Call any destructors for objects still onthe stack
if( RuntimeTypeCheckT && ! AssumePodT )
{
long_jump( m_first_node.m_node.m_mem );
}
// delete all the memory blocks
m_current_node =m_first_node.m_node.m_head.m_next;
for( stack_node * next_node; m_current_node;m_current_node = next_node )
{
next_node = m_current_node->m_head.m_next;
::operator delete( static_cast
}
}
template< typename T >
inline void push( T const & t ) //throw(std::bad_alloc,...)
{
// Make sure that the alignment for type T isnot worse
// than our declared alignment.
detail::static_assert<( AlignmentT >=detail::alignof
static_cast
// If T won't throw in copy c'tor then we don'tneed to use an unwinder object.
typedef typename detail::select< AssumePodT|| detail::has_trivial_copy
dummy_unwinder, real_unwinder >::typeunwinder;
// If this throws, it doesn't change state,
// so there is nothing to roll back.
byte_t * pb = allocate(aligned_sizeof
// Rolls back the allocate if later steps throw
// BUGBUG we can do the alloc, but not updatem_current until after
// the copy c'tor to avoid the need for anunwinder object
unwinder guard( this, aligned_sizeof
new ( pb ) T( t ); // Could throw if !has_trivial_copy
// If we are debugging the stack, then push apointer to the type_info
// for this type T. It will be checked inpop().
if( RuntimeTypeCheckT )
{
detail::to_type
}
// ok, everything succeeded -- dismiss theguard
guard.dismiss();
}
template< typename T >
inline void pop( T & t ) // throw(...)
{
detail::static_assert<( AlignmentT >= detail::alignof
static_cast
// If we are debugging the stack, then inpush() we pushed a pointer
// to the type_info struct for this type T.Check it now.
if( RuntimeTypeCheckT )
{
byte_t * pti = m_current -aligned_sizeof
if( !detail::type_info_ex
throw type_error( typeid( T ),*detail::to_type
}
// Don't change state yet because assignment opcould throw!
byte_t * pT = m_current -aligned_sizeof
t = detail::to_type
T const & ref = detail::to_type
regex::detail::destroy( &ref );
unwind( pT );
}
// Call this version of pop when you don't needthe popped value
template< typename T >
inline void pop(REGEX_VC6(detail::type2type
{
detail::static_assert<( AlignmentT >=detail::alignof
static_cast
// If we are debugging the stack, then inpush() we pushed a pointer
// to the type_info struct for this type T.Check it now.
if( RuntimeTypeCheckT )
{
byte_t * pti = m_current -aligned_sizeof
if( ! detail::type_info_ex
throw type_error( typeid( T ),*detail::to_type
}
byte_t * pv = unwind(aligned_sizeof
T const & ref = detail::to_type
regex::detail::destroy( &ref );
}
// Call this version of pop when you don't needthe popped value and
// throwing an exception isn't an option
template< typename T >
inline bool pop( std::nothrow_t const & )// throw()
{
detail::static_assert<( AlignmentT >=detail::alignof
static_cast
// If we are debugging the stack, then inpush() we pushed a pointer
// to the type_info struct for this type T.Check it now.
if( RuntimeTypeCheckT )
{
byte_t * pti = m_current - aligned_sizeof
if( !detail::type_info_ex
return false; // type error, can't throw sobail.
}
byte_t * pv = unwind(aligned_sizeof
T const & ref = detail::to_type
regex::detail::destroy( &ref );
return true;
}
template< typename T >
inline T & top(REGEX_VC6(detail::type2type
{
detail::static_assert<( AlignmentT >=detail::alignof
static_cast
if( RuntimeTypeCheckT )
{
// If we are debugging the stack, then the topof the stack is a
// pointer to a type_info struct. Assert thatwe have the correct type.
byte_t * pti = m_current -aligned_sizeof
if( !detail::type_info_ex
throw type_error( typeid( T ),*detail::to_type
}
byte_t * pT = m_current -aligned_sizeof
return detail::to_type
}
// Fetch the type_info for the element at thetop of the stack
std::type_info const & top_type() const //throw()
{
detail::static_assert< RuntimeTypeCheckT> const type_check;
static_cast
byte_t * pti = m_current - aligned_sizeof
return *detail::to_type
}
// Get a pointer to the top of the stack
void * set_jump() const // throw()
{
return m_current;
}
// Quick and dirty stack unwind. Does not calldestructors.
void unsafe_long_jump( void *const jump_ptr )// throw()
{
for( ;; )
{
if( std::less
std::less
{
m_current_node->m_head.m_current =m_current_node->m_mem;
m_current_node =m_current_node->m_head.m_back;
}
else
{
m_begin = m_current_node->m_mem;
m_current = m_current_node->m_head.m_current= static_cast
m_end = m_current_node->m_head.m_end;
return;
}
}
}
// Safe long jump; does call destructors ifRuntimeTypeCheckT is true.
void safe_long_jump( void *const jump_ptr ) //throw()
{
detail::static_assert< RuntimeTypeCheckT> const type_check;
static_cast
while( m_current != jump_ptr )
{
// The top of the stack is a pointer to atype_vtable struct.
m_current -=aligned_sizeof
vtable_ptr pvtable =detail::to_type
// find the start of the object
m_current -= pvtable->aligned_size;
// call the destructor for T
if( pvtable->destroy )
{
pvtable->destroy( m_current );
}
// move to the previous buffer if necessary
if( m_current == m_begin && m_current!= jump_ptr )
{
m_current_node->m_head.m_current =m_current;
m_current_node =m_current_node->m_head.m_back;
m_begin = m_current_node->m_mem;
m_current =m_current_node->m_head.m_current;
m_end = m_current_node->m_head.m_end;
}
}
}
// Stack unwind. If RuntimeTypeCheckT&& !AssumePodT, then destructors
// are called. Otherwise they are not.
void long_jump( void * jump_ptr ) // throw()
{
long_jump_impl( jump_ptr,detail::bool2type
}
struct stack_guard
{
stack_type * m_ps;
void * m_jump_ptr;
explicit stack_guard( stack_type * ps )
: m_ps( ps )
, m_jump_ptr( ps->set_jump() )
{
}
~stack_guard()
{
m_ps->long_jump( m_jump_ptr );
}
};
bool empty() const // throw()
{
return m_current == m_first_node.m_node.m_mem;
}
// Use scoped_push for automaticallypushing/popping
// things to and from the stack. This isespecially useful
// if you want to push a bunch of things"atomically". For
// instance:
//
// typedef hetero_stack<>::scoped_popscoped_pop;
// scoped_pop p1 = stack.scoped_push( int(1) );// could throw
// scoped_pop p2 = stack.scoped_push(std::string("foo") ); // could throw
// stack.push( float(3.14159) ); // could throw
// p2.dismiss(); // ok, nothing threw, so ...
// p1.dismiss(); // ... dismiss the scoped_pops
//
// If p2 and p1 are not dismissed, as in thecase when an
// exception gets thrown, then theyautomatically pop their
// arguments from the stack.
class scoped_pop_base
{
scoped_pop_base & operator=(scoped_pop_base const & ); // disallow assignment
protected:
mutable stack_type * m_pstack;
explicit scoped_pop_base( stack_type * pstack )// throw(std::bad_alloc,...)
: m_pstack( pstack )
{
}
scoped_pop_base( scoped_pop_base const &right ) // throw() // destructive copy
: m_pstack( right.m_pstack )
{
right.dismiss();
}
public:
void dismiss() const // throw()
{
m_pstack = 0;
}
};
template< typename T >
class scoped_pop_t : public scoped_pop_base
{
scoped_pop_t & operator=( scoped_pop_tconst & ); // disallow assignment
public:
scoped_pop_t( stack_type * pstack, T const& t ) // throw(std::bad_alloc,...)
: scoped_pop_base( pstack )
{
// Note that if this throws an exception thedestructor
// will not get called, which is what we want.
m_pstack->push( t );
}
~scoped_pop_t() // throw()
{
// If we own this stack space, pop it.
if( m_pstack )
m_pstack->template pop
}
};
template< typename T >
scoped_pop_t
{
return scoped_pop_t
}
typedef scoped_pop_base const & scoped_pop;
};
#ifdef _MSC_VER
#pragma warning( pop )
#endif
} // namespace regex
#endif
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft, 1994 - 2002.
//
// File: syntax2.h
//
// Contents: syntax modules for regexpr
//
// Classes: perl_syntax, posix_syntax
//
// Author: Eric Niebler ( [email protected])
//
// History: 3-29-00ericne Created
//
//----------------------------------------------------------------------------
#ifndef SYNTAX_H
#define SYNTAX_H
#ifdef _MSC_VER
#pragma warning( push )
// warning C4786: identifier was truncated to'255' characters in the debug information
#pragma warning( disable : 4786 )
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#ifndef ARRAYSIZE
# define ARRAYSIZE( a )(sizeof(a)/sizeof((a)[0]))
#endif
#ifndef UCHAR_MAX
# define UCHAR_MAX 0xff
#endif
#ifndef WCHAR_MAX
# define WCHAR_MAX ((wchar_t)-1)
#endif
#ifdef _MSC_VER
# include
# define REGEX_ASSERT(x) _ASSERTE(x)
# define REGEX_FORCEINLINE __forceinline
# define REGEX_SELECTANY __declspec(selectany)
# define REGEX_CDECL __cdecl
# define REGEX_SEH_TRY __try
# define REGEX_SEH_EXCEPT(x) __except( x )
# define REGEX_RESET_STK_OFLW() _resetstkoflw()
# if 1200 < _MSC_VER
# define REGEX_NOINLINE __declspec(noinline)
# define REGEX_DEPRECATED__declspec(deprecated)
# define REGEX_DEPENDENT_TYPENAME typename
# else
# define REGEX_NOINLINE
# define REGEX_DEPRECATED
# define REGEX_DEPENDENT_TYPENAME
# endif
#else
# include
# define REGEX_ASSERT(x) assert(x)
# define REGEX_NOINLINE
# define REGEX_FORCEINLINE inline
# define REGEX_SELECTANY
# define REGEX_CDECL
# define REGEX_SEH_TRY
# define REGEX_SEH_EXCEPT(x) if( false )
# define REGEX_RESET_STK_OFLW() ((void)0)
# define REGEX_DEPRECATED
# define REGEX_DEPENDENT_TYPENAME typename
#endif
#define REGEX_STRING(CharT,sz) (::regex::detail::literal
#define REGEX_CHAR(CharT,ch)(static_cast
#if defined(_MSC_VER) & _CPPLIB_VER <=310
namespace std
{
template<>
struct iterator_traits< char * >
{ // get traits from iterator _Iter
typedef random_access_iterator_tagiterator_category;
typedef char value_type;
typedef ptrdiff_t difference_type;
typedef difference_type distance_type; //retained
typedef char * pointer;
typedef char & reference;
};
template<>
struct iterator_traits< char const * >
{ // get traits from iterator _Iter
typedef random_access_iterator_tagiterator_category;
typedef char value_type;
typedef ptrdiff_t difference_type;
typedef difference_type distance_type; //retained
typedef char * pointer;
typedef char & reference;
};
template<>
struct iterator_traits< wchar_t * >
{ // get traits from iterator _Iter
typedef random_access_iterator_tagiterator_category;
typedef wchar_t value_type;
typedef ptrdiff_t difference_type;
typedef difference_type distance_type; //retained
typedef wchar_t * pointer;
typedef wchar_t & reference;
};
template<>
struct iterator_traits< wchar_t const * >
{ // get traits from iterator _Iter
typedef random_access_iterator_tagiterator_category;
typedef wchar_t value_type;
typedef ptrdiff_t difference_type;
typedef difference_type distance_type; //retained
typedef wchar_t * pointer;
typedef wchar_t & reference;
};
}
#endif
namespace regex
{
class bad_regexpr : publicstd::invalid_argument
{
public:
explicit bad_regexpr( std::string const & s)
: std::invalid_argument( s ) {}
virtual ~bad_regexpr() throw() {}
};
//
// Flags to control how matching occurs
//
enum REGEX_FLAGS
{
NOFLAGS = 0x0000,
NOCASE = 0x0001, // ignore case
GLOBAL = 0x0002, // match everywhere in thestring
MULTILINE = 0x0004, // ^ and $ can matchinternal line breaks
SINGLELINE = 0x0008, // . can match newlinecharacter
RIGHTMOST = 0x0010, // start matching at theright of the string
NOBACKREFS = 0x0020, // only meaningful whenused with GLOBAL and substitute
FIRSTBACKREFS = 0x0040, // only meaningful whenused with GLOBAL
ALLBACKREFS = 0x0080, // only meaningful whenused with GLOBAL
NORMALIZE = 0x0100, // Preprocess patterns:"//n" => "/n", etc.
EXTENDED = 0x0200, // ignore whitespace inpattern
};
// For backwards compatibility
REGEX_FLAGS const noflags = NOFLAGS;
// helper functions to make it easier tocombine
// the regex flags.
inline REGEX_FLAGS operator|( REGEX_FLAGS f1,REGEX_FLAGS f2 )
{
return ( REGEX_FLAGS ) ( ( unsigned )f1 | (unsigned )f2 );
}
inline REGEX_FLAGS & operator|=(REGEX_FLAGS & f1, REGEX_FLAGS f2 )
{
return f1 = ( f1 | f2 );
}
inline REGEX_FLAGS operator&( REGEX_FLAGSf1, REGEX_FLAGS f2 )
{
return ( REGEX_FLAGS ) ( ( unsigned )f1 & (unsigned )f2 );
}
inline REGEX_FLAGS & operator&=(REGEX_FLAGS & f1, REGEX_FLAGS f2 )
{
return f1 = ( f1 & f2 );
}
#if !defined(_MSC_VER) | 1200 < _MSC_VER
inline REGEX_FLAGS operator~( REGEX_FLAGS f )
{
return ( REGEX_FLAGS ) ~( unsigned )f;
}
#endif
//
// The following are the tokens that can beemitted by the syntax module.
// Don't reorder this list!!!
//
enum TOKEN
{
NO_TOKEN = 0,
// REGULAR TOKENS
BEGIN_GROUP,
END_GROUP,
ALTERNATION,
BEGIN_LINE,
END_LINE,
BEGIN_CHARSET,
MATCH_ANY,
ESCAPE,
// QUANTIFICATION TOKENS
ONE_OR_MORE,
ZERO_OR_MORE,
ZERO_OR_ONE,
ONE_OR_MORE_MIN,
ZERO_OR_MORE_MIN,
ZERO_OR_ONE_MIN,
BEGIN_RANGE,
RANGE_SEPARATOR,
END_RANGE,
END_RANGE_MIN,
// ESCAPE SEQUENCES
ESC_DIGIT,
ESC_NOT_DIGIT,
ESC_SPACE,
ESC_NOT_SPACE,
ESC_WORD,
ESC_NOT_WORD,
ESC_BEGIN_STRING,
ESC_END_STRING,
ESC_END_STRING_z,
ESC_WORD_BOUNDARY,
ESC_NOT_WORD_BOUNDARY,
ESC_WORD_START,
ESC_WORD_STOP,
ESC_QUOTE_META_ON,
ESC_QUOTE_META_OFF,
// SUBSTITUTION TOKENS
SUBST_BACKREF,
SUBST_PREMATCH,
SUBST_POSTMATCH,
SUBST_MATCH,
SUBST_ESCAPE,
SUBST_QUOTE_META_ON,
SUBST_UPPER_ON,
SUBST_UPPER_NEXT,
SUBST_LOWER_ON,
SUBST_LOWER_NEXT,
SUBST_ALL_OFF,
// CHARSET TOKENS
CHARSET_NEGATE,
CHARSET_ESCAPE,
CHARSET_RANGE,
CHARSET_BACKSPACE,
CHARSET_END,
CHARSET_ALNUM,
CHARSET_NOT_ALNUM,
CHARSET_ALPHA,
CHARSET_NOT_ALPHA,
CHARSET_BLANK,
CHARSET_NOT_BLANK,
CHARSET_CNTRL,
CHARSET_NOT_CNTRL,
CHARSET_DIGIT,
CHARSET_NOT_DIGIT,
CHARSET_GRAPH,
CHARSET_NOT_GRAPH,
CHARSET_LOWER,
CHARSET_NOT_LOWER,
CHARSET_PRINT,
CHARSET_NOT_PRINT,
CHARSET_PUNCT,
CHARSET_NOT_PUNCT,
CHARSET_SPACE,
CHARSET_NOT_SPACE,
CHARSET_UPPER,
CHARSET_NOT_UPPER,
CHARSET_XDIGIT,
CHARSET_NOT_XDIGIT,
// EXTENSION TOKENS
EXT_NOBACKREF,
EXT_POS_LOOKAHEAD,
EXT_NEG_LOOKAHEAD,
EXT_POS_LOOKBEHIND,
EXT_NEG_LOOKBEHIND,
EXT_INDEPENDENT,
EXT_COMMENT,
EXT_CONDITION,
EXT_RECURSE,
EXT_UNKNOWN
};
namespace detail
{
template< typename CharT > structliteral;
template<> struct literal
{
static char const * string( char const * sz,wchar_t const * ) { return sz; }
template< char ch, wchar_t > structcharacter { enum { value = ch }; };
};
template<> struct literal
{
static wchar_t const * string( char const *,wchar_t const * sz ) { return sz; }
template< char, wchar_t ch > structcharacter { enum { value = ch }; };
};
struct posix_charset_type
{
char const * m_szcharset;
size_t cchars;
};
extern posix_charset_type constg_rgposix_charsets[];
extern size_t const g_cposix_charsets;
template< typename IterT >
bool is_posix_charset( IterT icur, IterT iend,char const * szcharset )
{
for( ; iend != icur && char() !=*szcharset; ++icur, ++szcharset )
{
if( *icur != *szcharset )
return false;
}
return char() == *szcharset;
}
// Forward-declare the class that holds all theinformation
// about the set of characters that can bematched by a charset
struct charset;
void free_charset( charset const * );
template< typename CharT >
struct charset_map_node
{
std::basic_string
charset const * m_rgcharsets[2]; // 0==case,1==nocase
charset_map_node()
{
m_rgcharsets[0] = m_rgcharsets[1] = 0;
}
charset_map_node( charset_map_node const &node )
{
*this = node;
}
charset_map_node & operator=(charset_map_node const & node )
{
m_str = node.m_str;
m_rgcharsets[0] = node.m_rgcharsets[0];
m_rgcharsets[1] = node.m_rgcharsets[1];
return *this;
}
void set( std::basic_string
{
clear();
m_str = str;
}
void clear()
{
std::basic_string
free_charset( m_rgcharsets[0] );
free_charset( m_rgcharsets[1] );
m_rgcharsets[0] = m_rgcharsets[1] = 0;
}
};
template< typename CharT >
class charset_map
{
std::map
public:
typedef typename std::map
~charset_map()
{
for( iterator iter = m_map.begin(); m_map.end()!= iter; ++iter )
iter->second.clear();
}
charset_map_node
iterator begin() { return m_map.begin(); }
iterator end() { return m_map.end(); }
iterator find( CharT ch ) { return m_map.find(ch ); }
void erase( iterator iter ) { m_map.erase( iter); }
};
inline detail::charset_map
{
static detail::charset_map
return s_charset_map;
}
inline detail::charset_map
{
static detail::charset_map
return s_charset_map;
}
inline detail::charset_map
{
static detail::charset_map
return s_charset_map;
}
inline detail::charset_map
{
static detail::charset_map
return s_charset_map;
}
inline bool regex_isspace( char ch )
{
using namespace std;
return 0 != isspace( ch );
}
inline bool regex_isspace( wchar_t wch )
{
using namespace std;
return 0 != iswspace( wch );
}
template< typename T >
T const & regex_max( T const & lhs, Tconst & rhs )
{
return ( lhs > rhs ) ? lhs : rhs;
}
template< typename T >
T const & regex_min( T const & lhs, Tconst & rhs )
{
return ( lhs < rhs ) ? lhs : rhs;
}
} // namespace detail
//
// The perl_syntax class encapsulates the Perl5 regular expression syntax. It is
// used as a template parameter tobasic_rpattern. To customize regex syntax, create
// your own syntax class and use it as atemplate parameter instead.
//
class perl_syntax_base
{
protected:
perl_syntax_base()
{
}
static TOKEN const s_rgreg[ UCHAR_MAX + 1 ];
static TOKEN const s_rgescape[ UCHAR_MAX + 1 ];
static TOKEN look_up( char ch, TOKEN const rg[])
{
return rg[ static_cast
}
static TOKEN look_up( wchar_t ch, TOKEN const rg[])
{
return UCHAR_MAX < ch ? NO_TOKEN : rg[static_cast
}
};
//--------------------------------------------------------------------------
//
// Class: perl_syntax
//
// Description: Module that encapsulates thePerl syntax
//
// Methods: eat_whitespace -
// min_quant -
// perl_syntax -
// perl_syntax -
// set_flags -
// get_flags -
// reg_token -
// quant_token -
// charset_token -
// subst_token -
// ext_token -
// get_charset_map -
// invalid_charset -
// register_intrinsic_charset -
// _invalid_charset -
// _invalid_charset -
//
// Members: m_flags -
// s_charset_map -
//
// Typedefs: iterator -
// const_iterator -
// char_type -
//
// History: 11/16/2001 - ericne - Created
//
//--------------------------------------------------------------------------
template< typename CharT >
class perl_syntax : protected perl_syntax_base
{
public:
typedef typenamestd::basic_string
typedef typenamestd::basic_string
typedef CharT char_type;
template< typename OtherT > struct rebind{ typedef perl_syntax
private:
REGEX_FLAGS m_flags;
const_iterator eat_whitespace( iterator &icur, const_iterator iend )
{
if( m_flags & EXTENDED )
{
while( iend != icur && ( REGEX_CHAR(CharT,'#')== *icur || detail::regex_isspace( *icur ) ) )
{
if( REGEX_CHAR(CharT,'#') == *icur++ )
{
while( iend != icur &®EX_CHAR(CharT,'/n') != *icur++ ) {}
}
else
{
for( ; iend != icur &&detail::regex_isspace( *icur ); ++icur ) {}
}
}
}
return icur;
}
bool min_quant( iterator & icur,const_iterator iend )
{
return ( iend != eat_whitespace( ++icur, iend )&& REGEX_CHAR(CharT,'?') == *icur ? ( ++icur, true ) : false );
}
public:
perl_syntax( REGEX_FLAGS flags )
: m_flags( flags )
{
}
perl_syntax( perl_syntax
: m_flags( sy.m_flags )
{
}
void set_flags( REGEX_FLAGS flags )
{
m_flags = flags;
}
REGEX_FLAGS get_flags() const
{
return m_flags;
}
TOKEN reg_token( iterator & icur,const_iterator iend )
{
REGEX_ASSERT( iend != icur );
if( iend == eat_whitespace( icur, iend ) )
return NO_TOKEN;
TOKEN tok = look_up( *icur, s_rgreg );
if( tok )
++icur;
if( ESCAPE == tok && iend != icur )
{
tok = look_up( *icur, s_rgescape );
if( tok )
++icur;
else
tok = ESCAPE;
}
return tok;
}
TOKEN quant_token( iterator & icur,const_iterator iend )
{
REGEX_ASSERT( iend != icur );
if( iend == eat_whitespace( icur, iend ) )
return NO_TOKEN;
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'*'):
tok = min_quant( icur, iend ) ?ZERO_OR_MORE_MIN : ZERO_OR_MORE;
break;
case REGEX_CHAR(CharT,'+'):
tok = min_quant( icur, iend ) ? ONE_OR_MORE_MIN: ONE_OR_MORE;
break;
case REGEX_CHAR(CharT,'?'):
tok = min_quant( icur, iend ) ? ZERO_OR_ONE_MIN: ZERO_OR_ONE;
break;
case REGEX_CHAR(CharT,'}'):
tok = min_quant( icur, iend ) ? END_RANGE_MIN :END_RANGE;
break;
case REGEX_CHAR(CharT,'{'):
tok = BEGIN_RANGE;
++icur;
break;
case REGEX_CHAR(CharT,','):
tok = RANGE_SEPARATOR;
++icur;
break;
}
return tok;
}
TOKEN charset_token( iterator & icur,const_iterator iend )
{
REGEX_ASSERT( iend != icur );
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'-'):
tok = CHARSET_RANGE;
++icur;
break;
case REGEX_CHAR(CharT,'^'):
tok = CHARSET_NEGATE;
++icur;
break;
case REGEX_CHAR(CharT,']'):
tok = CHARSET_END;
++icur;
break;
case REGEX_CHAR(CharT,'//'):
tok = CHARSET_ESCAPE;
if( iend == ++icur )
break;
switch( *icur )
{
case REGEX_CHAR(CharT,'b'):
tok = CHARSET_BACKSPACE;
++icur;
break;
case REGEX_CHAR(CharT,'d'):
tok = ESC_DIGIT;
++icur;
break;
case REGEX_CHAR(CharT,'D'):
tok = ESC_NOT_DIGIT;
++icur;
break;
case REGEX_CHAR(CharT,'s'):
tok = ESC_SPACE;
++icur;
break;
case REGEX_CHAR(CharT,'S'):
tok = ESC_NOT_SPACE;
++icur;
break;
case REGEX_CHAR(CharT,'w'):
tok = ESC_WORD;
++icur;
break;
case REGEX_CHAR(CharT,'W'):
tok = ESC_NOT_WORD;
++icur;
break;
}
break;
case REGEX_CHAR(CharT,'['):
if( REGEX_CHAR(CharT,':') == *( ++icur )-- )
{
for( size_t i=0; !tok && i
{
if(detail::is_posix_charset
{
tok = TOKEN( CHARSET_ALNUM + i );
std::advance( icur,detail::g_rgposix_charsets[i].cchars );
}
}
}
break;
}
return tok;
}
TOKEN subst_token( iterator & icur,const_iterator iend )
{
REGEX_ASSERT( iend != icur );
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'//'):
tok = SUBST_ESCAPE;
if( iend != ++icur )
switch( *icur )
{
case REGEX_CHAR(CharT,'Q'):
tok = SUBST_QUOTE_META_ON;
++icur;
break;
case REGEX_CHAR(CharT,'U'):
tok = SUBST_UPPER_ON;
++icur;
break;
case REGEX_CHAR(CharT,'u'):
tok = SUBST_UPPER_NEXT;
++icur;
break;
case REGEX_CHAR(CharT,'L'):
tok = SUBST_LOWER_ON;
++icur;
break;
case REGEX_CHAR(CharT,'l'):
tok = SUBST_LOWER_NEXT;
++icur;
break;
case REGEX_CHAR(CharT,'E'):
tok = SUBST_ALL_OFF;
++icur;
break;
}
break;
case REGEX_CHAR(CharT,'$'):
tok = SUBST_BACKREF;
if( iend != ++icur )
switch( *icur )
{
case REGEX_CHAR(CharT,'&'):
tok = SUBST_MATCH;
++icur;
break;
case REGEX_CHAR(CharT,'`'):
tok = SUBST_PREMATCH;
++icur;
break;
case REGEX_CHAR(CharT,'/''):
tok = SUBST_POSTMATCH;
++icur;
break;
}
break;
}
return tok;
}
TOKEN ext_token( iterator & icur,const_iterator iend )
{
REGEX_ASSERT( iend != icur );
if( iend == eat_whitespace( icur, iend ) )
return NO_TOKEN;
bool finclude;
TOKEN tok = NO_TOKEN;
if( REGEX_CHAR(CharT,'?') == *icur )
{
tok = EXT_UNKNOWN;
++icur;
if( m_flags & EXTENDED )
for( ; iend != icur &&detail::regex_isspace( *icur ); ++icur ) {}
if( iend != icur )
{
switch( *icur )
{
case REGEX_CHAR(CharT,':'):
tok = EXT_NOBACKREF;
++icur;
break;
case REGEX_CHAR(CharT,'='):
tok = EXT_POS_LOOKAHEAD;
++icur;
break;
case REGEX_CHAR(CharT,'!'):
tok = EXT_NEG_LOOKAHEAD;
++icur;
break;
case REGEX_CHAR(CharT,'#'):
tok = EXT_COMMENT;
++icur;
break;
case REGEX_CHAR(CharT,'('):
tok = EXT_CONDITION;
++icur;
break;
case REGEX_CHAR(CharT,'R'):
tok = EXT_RECURSE;
++icur;
break;
case REGEX_CHAR(CharT,'<'):
if( iend == eat_whitespace( ++icur, iend ) )
break;
switch( *icur )
{
case REGEX_CHAR(CharT,'='):
tok = EXT_POS_LOOKBEHIND;
++icur;
break;
case REGEX_CHAR(CharT,'!'):
tok = EXT_NEG_LOOKBEHIND;
++icur;
break;
}
break;
case REGEX_CHAR(CharT,'>'):
tok = EXT_INDEPENDENT;
++icur;
break;
default:
finclude = true;
do
{
if( REGEX_CHAR(CharT,':') == *icur )
{
tok = EXT_NOBACKREF;
++icur;
break;
}
if( REGEX_CHAR(CharT,')') == *icur )
{
tok = EXT_NOBACKREF;
break;
}
if( REGEX_CHAR(CharT,'-') == *icur &&finclude )
finclude = false;
else if( REGEX_CHAR(CharT,'i') == *icur )
m_flags = ( REGEX_FLAGS ) ( finclude ? (m_flags | NOCASE ) : ( m_flags & ~NOCASE ) );
else if( REGEX_CHAR(CharT,'m') == *icur )
m_flags = ( REGEX_FLAGS ) ( finclude ? (m_flags | MULTILINE ) : ( m_flags & ~MULTILINE ) );
else if( REGEX_CHAR(CharT,'s') == *icur )
m_flags = ( REGEX_FLAGS ) ( finclude ? (m_flags | SINGLELINE ) : ( m_flags & ~SINGLELINE ) );
else if( REGEX_CHAR(CharT,'x') == *icur )
m_flags = ( REGEX_FLAGS ) ( finclude ? (m_flags | EXTENDED ) : ( m_flags & ~EXTENDED ) );
else
break;
} while( iend != eat_whitespace( ++icur, iend ));
break;
}
}
}
return tok;
}
// Functions used for making user-definedintrinsic character sets
static detail::charset_map
{
return detail::get_perl_charset_map( CharT() );
}
static bool invalid_charset( CharT ch )
{
return _invalid_charset( ch );
}
static void register_intrinsic_charset( CharTch, std::basic_string
{
perl_syntax sy( NOFLAGS );
if( invalid_charset( ch ) )
throw bad_regexpr( "invalid characterspecified to register_intrinsic_charset" );
std::basic_string
typenamestd::basic_string
if( BEGIN_CHARSET != sy.reg_token( ibegin,pat.end() ) )
throw bad_regexpr( "expecting beginning ofcharset" );
regex::detail::charset_map
regex::detail::charset_map_node
map_node.set( std::basic_string
}
private:
static bool _invalid_charset( char ch )
{
using namespace std;
return NO_TOKEN != s_rgescape[static_cast
|| isdigit( ch ) || 'e' == ch || 'x' == ch ||'c' == ch;
}
static bool _invalid_charset( wchar_t ch )
{
return UCHAR_MAX >= ch &&_invalid_charset( static_cast
}
};
//--------------------------------------------------------------------------
//
// Class: posix_syntax
//
// Description: Implements the basic POSIXregular expression syntax
//
// Methods: posix_syntax -
// posix_syntax -
// get_flags -
// set_flags -
// reg_token -
// quant_token -
// charset_token -
// subst_token -
// ext_token -
// get_charset_map -
// invalid_charset -
// register_intrinsic_charset -
//
// Members: m_flags -
// s_charset_map -
//
// Typedefs: iterator -
// const_iterator -
// char_type -
//
// History: 11/16/2001 - ericne - Created
//
// --------------------------------------------------------------------------
template< typename CharT >
class posix_syntax
{
REGEX_FLAGS m_flags;
public:
typedef typenamestd::basic_string
typedef typenamestd::basic_string
typedef CharT char_type;
template< typename OtherT > struct rebind{ typedef posix_syntax
posix_syntax( REGEX_FLAGS flags )
: m_flags( flags )
{
}
posix_syntax( posix_syntax
: m_flags( sy.m_flags )
{
}
REGEX_FLAGS get_flags() const
{
return m_flags;
}
void set_flags( REGEX_FLAGS flags )
{
m_flags = flags;
}
TOKEN reg_token( iterator & icur,const_iterator iend )
{
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'.'):
tok = MATCH_ANY;
++icur;
break;
case REGEX_CHAR(CharT,'^'):
tok = BEGIN_LINE;
++icur;
break;
case REGEX_CHAR(CharT,'$'):
tok = END_LINE;
++icur;
break;
case REGEX_CHAR(CharT,'['):
tok = BEGIN_CHARSET;
++icur;
break;
case REGEX_CHAR(CharT,'//'):
tok = ESCAPE;
++icur;
if( iend != icur )
{
switch( *icur )
{
case REGEX_CHAR(CharT,'('):
tok = BEGIN_GROUP;
++icur;
break;
case REGEX_CHAR(CharT,')'):
tok = END_GROUP;
++icur;
break;
case REGEX_CHAR(CharT,'|'):
tok = ALTERNATION;
++icur;
break;
}
}
break;
}
return tok;
}
TOKEN quant_token( iterator & icur,const_iterator iend )
{
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'*'):
tok = ZERO_OR_MORE;
++icur;
break;
case REGEX_CHAR(CharT,','):
tok = RANGE_SEPARATOR;
++icur;
break;
case REGEX_CHAR(CharT,'//'):
++icur;
if( iend != icur )
{
switch( *icur )
{
case REGEX_CHAR(CharT,'?'):
tok = ZERO_OR_ONE;
++icur;
break;
case REGEX_CHAR(CharT,'+'):
tok = ONE_OR_MORE;
++icur;
break;
case REGEX_CHAR(CharT,'{'):
tok = BEGIN_RANGE;
++icur;
break;
case REGEX_CHAR(CharT,'}'):
tok = END_RANGE;
++icur;
break;
default:
--icur;
break;
}
}
else
{
--icur;
}
}
return tok;
}
TOKEN charset_token( iterator & icur,const_iterator iend )
{
TOKEN tok = NO_TOKEN;
switch( *icur )
{
case REGEX_CHAR(CharT,'^'):
tok = CHARSET_NEGATE;
++icur;
break;
case REGEX_CHAR(CharT,'-'):
tok = CHARSET_RANGE;
++icur;
break;
case REGEX_CHAR(CharT,']'):
tok = CHARSET_END;
++icur;
break;
case REGEX_CHAR(CharT,'['):
if( REGEX_CHAR(CharT,':') == *( ++icur )-- )
{
for( size_t i=0; !tok && i
{
if(detail::is_posix_charset
{
tok = TOKEN( CHARSET_ALNUM + i );
std::advance( icur,detail::g_rgposix_charsets[i].cchars );
}
}
}
break;
}
return tok;
}
TOKEN subst_token( iterator & icur,const_iterator iend )
{
TOKEN tok = NO_TOKEN;
if( REGEX_CHAR(CharT,'//') == *icur )
{
tok = SUBST_ESCAPE;
++icur;
if( iend != icur &®EX_CHAR(CharT,'0') <= *icur && REGEX_CHAR(CharT,'9') >= *icur)
{
tok = SUBST_BACKREF;
}
}
return tok;
}
TOKEN ext_token( iterator &, const_iterator)
{
return NO_TOKEN;
}
// Functions for making user-defined intrinsiccharacter sets
static detail::charset_map
{
return detail::get_posix_charset_map( CharT());
}
static bool invalid_charset( CharT ch )
{
return _invalid_charset( ch );
}
static void register_intrinsic_charset( CharTch, std::basic_string
{
posix_syntax sy( NOFLAGS );
if( invalid_charset( ch ) )
throw bad_regexpr( "invalid characterspecified to register_intrinsic_charset" );
std::basic_string
typenamestd::basic_string
if( BEGIN_CHARSET != sy.reg_token( ibegin,pat.end() ) )
throw bad_regexpr( "expecting beginning ofcharset" );
regex::detail::charset_map
regex::detail::charset_map_node
map_node.set( std::basic_string
}
private:
static bool _invalid_charset( char ch )
{
static char const s_invalid[] ="0123456789()|?+{}//exc";
return 0 !=std::char_traits
}
static bool _invalid_charset( wchar_t ch )
{
return UCHAR_MAX >= ch && _invalid_charset(static_cast
}
};
} // namespace regex
#ifdef _MSC_VER
#pragma warning( pop )
#endif
#endif
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft, 1994 - 2002.
//
// File: reimpl2.h
//
// Functions: helpers for matching andsubstituting regular expressions
//
// Notes: implementation details that reallybelong in a cpp file,
// but can't because of template weirdness
//
// Author: Eric Niebler ( [email protected])
//
// History: 8/15/2001 ericne Created
//
//----------------------------------------------------------------------------
#ifndef REIMPL_H
#define REIMPL_H
//
// Helper functions for match and substitute
//
namespace detail
{
// For use while doing uppercase/lowercaseconversions:
inline char regex_toupper( char ch ) { usingnamespace std; return ( char )toupper( ch ); }
inline char regex_tolower( char ch ) { usingnamespace std; return ( char )tolower( ch ); }
inline wchar_t regex_toupper( wchar_t ch ) {using namespace std; return ( wchar_t )towupper( ch ); }
inline wchar_t regex_tolower( wchar_t ch ) {using namespace std; return ( wchar_t )towlower( ch ); }
template< typename IBeginT, typename IEndT>
inline void regex_toupper( IBeginT ibegin,IEndT iend )
{
typedef typenamestd::iterator_traits
typedef std::char_traits
for( ; iend != ibegin; ++ibegin )
traits_type::assign( *ibegin, regex_toupper(*ibegin ) );
}
template< typename IBeginT, typename IEndT>
inline void regex_tolower( IBeginT ibegin,IEndT iend )
{
typedef typenamestd::iterator_traits
typedef std::char_traits
for( ; iend != ibegin; ++ibegin )
traits_type::assign( *ibegin, regex_tolower(*ibegin ) );
}
//
// Helper fn for swapping two auto_ptr's
//
template< typename T >
inline void swap_auto_ptr(std::auto_ptr
{
std::auto_ptr
left = right;
right = temp;
}
template< typename T >
inline void reset_auto_ptr(std::auto_ptr
{
std::auto_ptr
left = temp;
}
template< typename T, typename U >
inline void reset_auto_ptr(std::auto_ptr
{
std::auto_ptr
left = temp;
}
typedef int instantiator;
inline instantiator REGEX_CDECLinstantiator_helper( ... )
{
return instantiator();
}
//--------------------------------------------------------------------------
//
// Class: match_param
//
// Description: Struct that contains the stateof the matching operation.
// Passed by reference to allrecursive_match_all and recursive_match_this routines.
//
// Methods: match_param - ctor
//
// Members: ibufferbegin - start of the buffer
// ibegin - start of this iteration
// iend - end of the string
// prgbackrefs - pointer to backref array
//
// History: 8/14/2000 - ericne - Created
//
//--------------------------------------------------------------------------
template< typename IterT >
struct match_param
{
typedef backref_tag
typedef sub_expr_base
// for performance reasons, the most frequentlyused fields
// are placed at offsets which are a power of 2(assuming
// a 32-bit architecture, and iterators whichare 32 bits).
backref_type * m_prgbackrefs; // offsetof == 0
IterT m_iend; // offsetof == 4
IterT m_icur; // offsetof == 8
size_t m_cbackrefs;
sub_expr_ptr m_pnext; // offsetof == 16
IterT m_ibufferbegin;
IterT m_imatchbegin;
sub_expr_ptr m_pfirst;
unsafe_stack * m_pstack; // offsetof == 32
bool m_no0len;
bool m_reserved;
match_param
(
IterT ibufferbegin,
IterT imatchbegin,
IterT iend,
backref_type * prgbackrefs,
size_t cbackrefs
)
: m_prgbackrefs( prgbackrefs )
, m_iend( iend )
, m_icur( imatchbegin )
, m_cbackrefs( cbackrefs )
, m_pnext( 0 )
, m_ibufferbegin( ibufferbegin )
, m_imatchbegin( imatchbegin )
, m_pfirst( 0 )
, m_pstack( 0 )
, m_no0len( false )
, m_reserved( false )
{
}
};
//--------------------------------------------------------------------------
//
// Class: arena_allocator
//
// Description: A small, fast allocator forspeeding up pattern compilation.
// Every basic_rpattern object has an arena asa member.
// sub_expr objects can only be allocated fromthis arena.
// Memory is alloc'ed in chunks using theunderlying allocator.
// Chunks are freed en-masse when clear() orfinalize() is called.
//
// History: 8/17/2001 - ericne - Created
//
// Notes: This is NOT a std-compliant allocatorand CANNOT be used with
// STL containers. arena_allocator objectsmaintain state, and
// STL containers are allowed to assume theirallocators do
// not maintain state. In regexpr2.cpp, Idefine slist<>, a simple
// arena-friendly singly-linked list for usewith the arena
// allocator.
//
//--------------------------------------------------------------------------
template< typename AllocT =std::allocator
struct pool_impl
{
typedef typename rebind
struct mem_block
{
size_t m_offset;
size_t m_blocksize;
mem_block * m_pnext;
unsigned char m_data[ 1 ];
};
#if !defined(_MSC_VER) | 1200 < _MSC_VER
struct pool_data : char_allocator_type
{
pool_data( size_t default_size,char_allocator_type const & alloc )
: char_allocator_type( alloc )
, m_pfirst( 0 )
, m_default_size( default_size )
{
}
mem_block * m_pfirst;
size_t m_default_size;
char_allocator_type & get_allocator()
{
return *this;
}
} m_data;
#else
struct pool_data
{
pool_data( size_t default_size,char_allocator_type const & alloc )
: m_alloc( alloc )
, m_pfirst( 0 )
, m_default_size( default_size )
{
}
char_allocator_type m_alloc;
mem_block * m_pfirst;
size_t m_default_size;
char_allocator_type & get_allocator()
{
return m_alloc;
}
} m_data;
#endif
void new_block( size_t size );
void clear();
void * allocate( size_t size );
explicit pool_impl( size_t default_size,char_allocator_type const & alloc = char_allocator_type() );
~pool_impl();
char_allocator_type get_allocator() const
{
returnconst_cast
}
};
template< typename T, typename AllocT =std::allocator
class arena_allocator
{
public:
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T *pointer;
typedef T const *const_pointer;
typedef T & reference;
typedef T const & const_reference;
typedef T value_type;
typedef typename rebind
typedef pool_impl
typedef typename rebind
explicit arena_allocator( size_t default_size,char_alloc_type const & alloc = char_alloc_type() )
: m_pool( 0 )
{
char_alloc_type char_alloc( alloc );
pool_alloc_type pool_alloc(convert_allocator
m_pool = pool_alloc.allocate( 1, 0 );
pool_alloc.construct( m_pool, pool_impl_t(default_size, char_alloc ) ); // can't throw
}
#if !defined(_MSC_VER) | 1200 < _MSC_VER
arena_allocator( arena_allocator const &that )
: m_pool( that.m_pool )
{
}
#endif
template< typename U >
arena_allocator( arena_allocator const& that )
: m_pool( that.m_pool )
{
}
~arena_allocator()
{ // Many arena_allocators may point to m_pool,so don't delete it.
} // Rather, wait for someone to callfinalize().
pointer allocate( size_type size, void const *=0 )
{
return static_cast
}
void deallocate( void *, size_type )
{ // no-op. deallocation happens when pool isfinalized or cleared.
}
void construct( pointer p, T const & t )
{
new( static_cast
}
void destroy( pointer p )
{
regex::detail::destroy( p );
}
#if !defined(_MSC_VER) | 1200 < _MSC_VER
template< typename U > struct rebind
{
typedef arena_allocator other;
};
#endif
void clear()
{
m_pool->clear();
}
void finalize()
{
char_alloc_type char_alloc(m_pool->get_allocator() );
pool_alloc_type pool_alloc(convert_allocator
pool_alloc.destroy( m_pool );
pool_alloc.deallocate( m_pool, 1 );
m_pool = 0;
}
void swap( arena_allocator & that )
{
using std::swap;
swap( m_pool, that.m_pool );
}
// the pool lives here
pool_impl_t * m_pool;
};
// Dummy struct used by the pool allocator toalign returned pointers
struct not_pod
{
virtual ~not_pod() {}
};
template< typename AllocT >
inline pool_impl
: m_data( default_size, alloc )
{
}
template< typename AllocT >
inline pool_impl
{
clear();
}
template< typename AllocT >
inline void pool_impl
{
for( mem_block * pnext; m_data.m_pfirst;m_data.m_pfirst = pnext )
{
pnext = m_data.m_pfirst->m_pnext;
m_data.get_allocator().deallocate(reinterpret_cast
}
}
template< typename AllocT >
inline void pool_impl
{
size_t blocksize = regex_max(m_data.m_default_size, size ) + offsetof( mem_block, m_data );
mem_block * pnew =reinterpret_cast
if( 0 == pnew )
{
throw std::bad_alloc();
}
pnew->m_offset = 0;
pnew->m_blocksize = blocksize;
pnew->m_pnext = m_data.m_pfirst;
m_data.m_pfirst = pnew;
}
template< typename AllocT >
inline void *pool_impl
{
if( 0 == size )
size = 1;
if( 0 == m_data.m_pfirst ||m_data.m_pfirst->m_offset + size > m_data.m_default_size )
new_block( size );
void * pnew = m_data.m_pfirst->m_data +m_data.m_pfirst->m_offset;
// ensure returned pointers are always suitablyaligned
m_data.m_pfirst->m_offset += ( ( size +alignof
& ~( alignof
return pnew;
}
// The regex_arena is a basic, vanilla arena_allocator.
typedef arena_allocator
template< typename T >
type_with_size<3> allocator_picker(arena_allocator
template<> struct rebind_helper<3>
{
template< typename, typename ElemT>
struct inner
{
typedef arena_allocator
};
};
//--------------------------------------------------------------------------
//
// Class: sub_expr_base
//
// Description: patterns are"compiled" into a directed graph of sub_expr_base
// structs. Matching is accomplished by traversingthis graph.
//
// Methods: ~sub_expr_base - virt dtor socleanup happens correctly
// recursive_match_all - match thissub-expression and all following
// sub-expression
//
// History: 8/14/2000 - ericne - Created
//
// --------------------------------------------------------------------------
template< typename IterT >
struct sub_expr_base
{
virtual bool recursive_match_all_s(match_param
virtual bool recursive_match_all_c(match_param
virtual bool iterative_match_this_s(match_param
virtual bool iterative_match_this_c(match_param
virtual bool iterative_rematch_this_s(match_param
virtual bool iterative_rematch_this_c(match_param
virtual ~sub_expr_base() = 0; // (offset 24)
// Use the regex_arena for memory management
static void * operator new( size_t size,regex_arena & arena )
{
return arena.allocate( size );
}
static void operator delete( void *,regex_arena & )
{
}
// Invoke the d'tor, but don't bother freeingmemory. That will
// happen automatically when the arena objectgets destroyed.
static void operator delete( void * )
{
}
// For choosing an appropriate virtual functionbased on a compile time constant
bool recursive_match_all(match_param
{
return recursive_match_all_s( param, icur );
}
bool recursive_match_all(match_param
{
return recursive_match_all_c( param, icur );
}
bool iterative_match_this(match_param
{
return iterative_match_this_s( param );
}
bool iterative_match_this(match_param
{
return iterative_match_this_c( param );
}
bool iterative_rematch_this(match_param
{
return iterative_rematch_this_s( param );
}
bool iterative_rematch_this(match_param
{
return iterative_rematch_this_c( param );
}
private:
// don't allocate sub-expressions directly onthe heap; they should
// be allocated from an arena
static void * operator new( size_t size )throw( std::bad_alloc );
// disable all the vector new's and delete's.
static void * operator new[]( size_t size,regex_arena & arena ) throw( std::bad_alloc );
static void operator delete[]( void *, regex_arena& );
static void * operator new[]( size_t size )throw( std::bad_alloc );
static void operator delete[]( void * );
};
template< typename IterT >
inlinesub_expr_base
{
}
//--------------------------------------------------------------------------
//
// Class: subst_node
//
// Description: Substitution strings are parsedinto an array of these
// structures in order to speed up substoperations.
//
// Members: stype - type of this struct
// .m_subst_string - do a string substitution
// .m_subst_backref - do a bacref substitution
// op - execute an operation
//
// History: 8/14/2000 - ericne - Created
//
//--------------------------------------------------------------------------
struct subst_node
{
enum
{
PREMATCH = -1,
POSTMATCH = -2
};
enum subst_type
{
SUBST_STRING,
SUBST_BACKREF,
SUBST_OP
};
enum op_type
{
UPPER_ON = SUBST_UPPER_ON,
UPPER_NEXT = SUBST_UPPER_NEXT,
LOWER_ON = SUBST_LOWER_ON,
LOWER_NEXT = SUBST_LOWER_NEXT,
ALL_OFF = SUBST_ALL_OFF
};
struct string_offsets
{
ptrdiff_t m_rstart;
ptrdiff_t m_rlength;
};
subst_type m_stype;
union
{
string_offsets m_subst_string;
size_t m_subst_backref;
op_type m_op;
};
};
typedef std::list
size_t DEFAULT_BLOCK_SIZE();
template< typename IterT >
class boyer_moore;
//--------------------------------------------------------------------------
//
// Class: basic_rpattern_base_impl
//
// Description:
//
// Methods: basic_rpattern_base_impl - ctor
// flags - get the state of the flags
// uses_backrefs - true if the backrefs arereferenced
// get_first_subexpression - return ptr tofirst sub_expr struct
// get_width - get min/max nbr chars thispattern can match
// loops - if false, we only need to try tomatch at 1st position
// cgroups - number of visible groups
// _cgroups_total - total number of groups,including hidden ( ?: ) groups
// get_pat - get string representing thepattern
// get_subst - get string representing thesubstitution string
// get_subst_list - get the list of subst nodes
// _normalize_string - perform characterescaping
//
// Members: m_fuses_backrefs - true if subststring refers to backrefs
// m_floop - false if pat only needs to bematched in one place
// m_cgroups - total count of groups
// m_cgroups_visible - count of visible groups
// m_flags - the flags
// m_nwidth - width of this pattern
// m_pat - pattern string
// m_subst - substitution string
// m_subst_list - list of substitution nodes
// m_pfirst - ptr to first subexpression tomatch
//
// Typedefs: char_type -
// string_type -
// size_type -
//
// History: 8/14/2000 - ericne - Created
//
//--------------------------------------------------------------------------
template< typename IterT >
class basic_rpattern_base_impl
{
basic_rpattern_base_impl(basic_rpattern_base_impl
basic_rpattern_base_impl & operator=(basic_rpattern_base_impl
protected:
typedef typenamestd::iterator_traits
typedef std::char_traits
typedef std::basic_string
typedef size_t size_type;
typedef backref_tag
typedef std::vector
friend struct regex_access
explicit basic_rpattern_base_impl
(
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT,
string_type const & pat = string_type(),
string_type const & subst = string_type()
) //throw()
: m_arena( DEFAULT_BLOCK_SIZE() )
, m_fuses_backrefs( false )
, m_floop( true )
, m_fok_to_recurse( true )
, m_cgroups( 0 )
, m_cgroups_visible( 0 )
, m_flags( flags )
, m_mode( mode )
, m_nwidth( uninit_width() )
, m_pat( new string_type( pat ) )
, m_subst( new string_type( subst ) )
, m_subst_list()
, m_pfirst( 0 )
, m_invisible_groups()
, m_search( 0 )
{
}
virtual ~basic_rpattern_base_impl()
{
// We're not going to be calling destructorsbecause all allocated
// memory associated with the parsed patternresides in the arena.
// The memory will be freed when the arena getsdestroyed.
//delete m_pfirst;
reset_auto_ptr( m_pat );
reset_auto_ptr( m_subst );
m_arena.finalize();
}
regex_arena m_arena; // The sub_expr arena
bool m_fuses_backrefs; // true if thesubstitution uses backrefs
bool m_floop; // false ifm_pfirst->recursive_match_all only needs to be called once
bool m_fok_to_recurse; // false if the patternwould recurse too deeply
size_t m_cgroups; // number of groups ( alwaysat least one )
size_t m_cgroups_visible; // number of visiblegroups
REGEX_FLAGS m_flags; // flags used to customizesearch/replace
REGEX_MODE m_mode; // Used to pick the fast orsafe algorithm
width_type m_nwidth; // width of the pattern
std::auto_ptr
std::auto_ptr
subst_list_type m_subst_list; // used to speedup substitution
sub_expr_base
std::list
boyer_moore
size_t _cgroups_total() const //throw()
{
return m_cgroups;
}
bool _loops() const //throw()
{
return m_floop;
}
size_t _get_next_group_nbr()
{
return m_cgroups++;
}
void _normalize_string( string_type & str )const //throw()
{
if( NORMALIZE & flags() )
process_escapes( str, true );
}
bool _save_backrefs() const //throw()
{
return m_fuses_backrefs || ! ( flags() &NOBACKREFS );
}
sub_expr_base
{
return m_pfirst;
}
REGEX_FLAGS flags() const //throw()
{
return m_flags;
}
REGEX_MODE mode() const // throw()
{
return m_mode;
}
width_type get_width() const //throw()
{
return m_nwidth;
}
size_t cgroups() const //throw()
{
return m_cgroups_visible;
}
string_type const & get_pat() const//throw()
{
return *m_pat;
}
string_type const & get_subst() const//throw()
{
return *m_subst;
}
bool _ok_to_recurse() const; //throw();
void swap(basic_rpattern_base_impl
enum { npos = static_cast
static instantiator instantiate()
{
typedef basic_rpattern_base_impl this_type;
return instantiator_helper
(
&this_type::_ok_to_recurse,
&this_type::swap
);
}
};
template< typename IterT >
struct regex_access
{
typedef basic_rpattern_base_impl< IterT >rpattern_type;
typedef typename rpattern_type::size_typesize_type;
typedef typename rpattern_type::char_typechar_type;
typedef typename rpattern_type::traits_typetraits_type;
typedef typename rpattern_type::backref_typebackref_type;
static bool _do_match_iterative_helper_s
(
sub_expr_base
match_param
IterT icur
);
static bool _do_match_iterative_helper_c
(
sub_expr_base
match_param
IterT icur
);
static bool _do_match_recursive_s
(
sub_expr_base
match_param
IterT icur
);
static bool _do_match_recursive_c
(
sub_expr_base
match_param
IterT icur
);
static bool _do_match_impl
(
rpattern_type const & pat,
match_param
bool const use_null
);
static bool _do_match_with_stack
(
rpattern_type const & pat,
match_param
bool const use_null
);
template< typename Alloc1T, typename Alloc2T>
static void _fixup_backrefs
(
std::vector
std::list
)
{
typedef typenamestd::list
// Remove information about the"invisible" groups
if( rgbackrefs[0].matched )
{
size_t dropped = 0;
iter_type const end = invisible.end();
iter_type curr = invisible.begin(), next =invisible.begin();
for( ; end != curr; curr = next, ++dropped )
{
if( end == ++next )
{
std::copy(
rgbackrefs.begin() + *curr + 1,
rgbackrefs.end(),
rgbackrefs.begin() + *curr - dropped );
}
else
{
std::copy(
rgbackrefs.begin() + *curr + 1,
rgbackrefs.begin() + *next,
rgbackrefs.begin() + *curr - dropped );
}
}
rgbackrefs.resize( rgbackrefs.size() - dropped);
}
else
{
rgbackrefs.resize( rgbackrefs.size() -invisible.size() );
}
}
template< typename AllocT >
static bool _do_try_match
(
rpattern_type const & pat,
match_param
std::vector
bool const use_null
)
{
bool success;
rgbackrefs.resize( pat._cgroups_total() );
param.m_prgbackrefs = & rgbackrefs[0];
param.m_cbackrefs = rgbackrefs.size();
REGEX_SEH_TRY
{
if( pat._ok_to_recurse() )
{
success = _do_match_impl( pat, param, use_null);
}
else
{
success = _do_match_with_stack( pat, param,use_null );
}
}
REGEX_SEH_EXCEPT( REGEX_SEH_STACK_OVERFLOW ==_exception_code() )
{
// we have overflowed the stack. reset theguard page.
REGEX_RESET_STK_OFLW();
// This match fails silently.
for( size_t i=0; i < param.m_cbackrefs; ++i)
{
param.m_prgbackrefs[i] =static_init
}
success = false;
}
_fixup_backrefs( rgbackrefs,pat.m_invisible_groups );
return success;
}
template< typename AllocT >
static bool _do_match
(
rpattern_type const & pat,
basic_match_results
IterT ibegin,
IterT iend,
bool use_null
)
{
typedef typenamebasic_match_results
results.m_ibegin = ibegin;
match_param
if( GLOBAL & pat.flags() ) // do a globalfind
{
// The NOBACKREFS flag is ignored in the matchmethod.
bool const fAll = ( ALLBACKREFS == (ALLBACKREFS & pat.flags() ) );
bool const fFirst = ( FIRSTBACKREFS == (FIRSTBACKREFS & pat.flags() ) );
backref_vector rgtempbackrefs(results.m_rgbackrefs.get_allocator() );
while( _do_try_match( pat, param,results.m_rgbackrefs, use_null ) )
{
backref_type const & br =param.m_prgbackrefs[0];
// Handle specially the backref flags
if( fFirst )
{
rgtempbackrefs.push_back( br );
}
else if( fAll )
{
rgtempbackrefs.insert(
rgtempbackrefs.end(),
results.m_rgbackrefs.begin(),
results.m_rgbackrefs.end() );
}
else
{
rgtempbackrefs.swap( results.m_rgbackrefs );
}
param.m_imatchbegin = br.second;
param.m_no0len = ( br.first == br.second );
}
// restore the backref vectors
results.m_rgbackrefs.swap( rgtempbackrefs );
return ! results.m_rgbackrefs.empty();
}
else
{
return _do_try_match( pat, param, results.m_rgbackrefs,use_null );
}
}
template< typename AllocT >
static bool _do_match_c
(
rpattern_type const & pat,
basic_match_results
char_type const * szbegin
)
{
if( RIGHTMOST & pat.flags() )
{
// We need to know the end of the string ifwe're doing a
// RIGHTMOST match.
char_type const * szend = szbegin;
std::advance( szend, traits_type::length(szbegin ) );
return _do_match( pat, results, szbegin, szend,false );
}
else
{
return _do_match( pat, results, szbegin, 0,true );
}
}
static size_t _do_count
(
rpattern_type const & pat,
IterT ibegin,
IterT iend,
bool use_null
)
{
size_t cmatches = 0;
std::vector
// If your compile breaks here, it is becauseCharT const * is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
match_param
while( _do_try_match( pat, param, rgbackrefs,use_null ) )
{
backref_type const & br =param.m_prgbackrefs[0];
++cmatches;
param.m_imatchbegin = br.second;
param.m_no0len = ( br.first == br.second );
}
return cmatches;
}
template< typename CharT, typename TraitsT,typename AllocT >
static size_t _do_split
(
rpattern_type const & pat,
basic_split_results
IterT ibegin,
IterT iend,
int limit,
bool use_null
)
{
typedef typename basic_split_results
typedef typename rebind
std::vector
convert_allocator
typedef typename rebind
char_allocator_type char_allocator =
convert_allocator
// reserve some initial space
results.strings().clear();
results.strings().reserve( 10 );
match_param
while( 1 != limit && _do_try_match(pat, param, rgbackrefs, use_null ) )
{
backref_type const & br =param.m_prgbackrefs[0];
param.m_no0len = ( br.first == br.second );
// discard zero-width matches at the beginningand end of the buffer
if( param.m_no0len )
{
// if we're at the beginning, skip
if( br.first == param.m_ibufferbegin )
continue;
// if we're at the end, break
if( use_null ? 0 == *param.m_imatchbegin :param.m_imatchbegin == param.m_iend )
break;
}
string_type tmp( param.m_imatchbegin, br.first,char_allocator );
results.strings().push_back( tmp );
param.m_imatchbegin = br.second;
// add any groups
for( size_t i = 1; i < rgbackrefs.size();++i )
{
backref_type const & br = rgbackrefs[i];
string_type tmp( br.first, br.second,char_allocator );
results.strings().push_back( tmp );
}
if( limit > 0 )
--limit;
}
// append the last string, unless it's emptyand limit is 0
if( use_null )
{
if( *param.m_imatchbegin || 0 != limit )
results.strings().push_back( string_type(&*param.m_imatchbegin, char_allocator ) );
}
else
{
if( param.m_imatchbegin != param.m_iend || 0 !=limit )
results.strings().push_back( string_type(param.m_imatchbegin, param.m_iend, char_allocator ) );
}
// remove trailing empty fields
if( 0 == limit )
{
while( results.size() &&results.back().empty() )
{
results.strings().pop_back();
}
}
return results.size();
}
template< typename CharT, typename TraitsT,typename AllocT >
static size_t _do_subst_internal
(
std::basic_string
basic_subst_results
rpattern_type const & pat,
size_type strpos,
size_type strlen
)
{
typedef subst_list_type::const_iterator iter_type;
enum { UPPER = -1, NIL, LOWER } next = NIL,rest = NIL;
bool first = true;
size_t old_strpos = strpos;
typename std::basic_string
std::advance( itstrlen, strpos + strlen );
std::basic_string
for( iter_type isubst =pat.m_subst_list.begin(); pat.m_subst_list.end() != isubst; ++isubst )
{
size_t sublen = 0;
typename std::basic_string
typename std::basic_string
typenamestd::basic_string
typenamestd::basic_string
typename std::basic_string
std::advance( itstrpos, strpos );
switch( isubst->m_stype )
{
case subst_node::SUBST_STRING:
itsubpos2 = subst.begin();
std::advance( itsubpos2,isubst->m_subst_string.m_rstart );
itsublen2 = itsubpos2;
std::advance( itsublen2,isubst->m_subst_string.m_rlength );
if( first )
str.replace( itstrpos, itstrlen, itsubpos2,itsublen2 );
else
str.insert( itstrpos, itsubpos2, itsublen2 );
sublen = std::distance( itsubpos2, itsublen2 );
break;
case subst_node::SUBST_BACKREF:
switch( isubst->m_subst_backref )
{
case subst_node::PREMATCH:
itsubpos1 = results.backref_str().begin();
itsublen1 = itsubpos1;
std::advance( itsublen1, sublen =results.rstart() );
break;
case subst_node::POSTMATCH:
itsubpos1 = results.backref_str().begin();
std::advance( itsubpos1, results.rstart() +results.rlength() );
itsublen1 = results.backref_str().end();
break;
default:
itsubpos1 = results.backref_str().begin();
std::advance( itsubpos1, results.rstart(isubst->m_subst_backref ) );
itsublen1 = itsubpos1;
std::advance( itsublen1, results.rlength(isubst->m_subst_backref ) );
break;
}
if( first )
str.replace( itstrpos, itstrlen, itsubpos1,itsublen1 );
else
str.insert( itstrpos, itsubpos1, itsublen1 );
sublen = std::distance( itsubpos1, itsublen1 );
break;
case subst_node::SUBST_OP:
switch( isubst->m_op )
{
case subst_node::UPPER_ON:
rest = UPPER;
break;
case subst_node::UPPER_NEXT:
next = UPPER;
break;
case subst_node::LOWER_ON:
rest = LOWER;
break;
case subst_node::LOWER_NEXT:
next = LOWER;
break;
case subst_node::ALL_OFF:
rest = NIL;
break;
default:
REGEX_ASSERT(false);
break;
}
continue; // jump to the next item in the list
default:
REGEX_ASSERT(false);
break;
}
first = false;
// Are we upper- or lower-casing this string?
if( rest )
{
typename std::basic_string
std::advance( ibegin, strpos );
typename std::basic_string
std::advance( iend, sublen );
switch( rest )
{
case UPPER:
regex_toupper( ibegin, iend );
break;
case LOWER:
regex_tolower( ibegin, iend );
break;
default:
REGEX_ASSERT(false);
break;
}
}
// Are we upper- or lower-casing the nextcharacter?
if( next )
{
switch( next )
{
case UPPER:
traits_type::assign( str[strpos],regex_toupper( str[strpos] ) );
break;
case LOWER:
traits_type::assign( str[strpos],regex_tolower( str[strpos] ) );
break;
default:
REGEX_ASSERT(false);
break;
}
next = NIL;
}
strpos += sublen;
}
// If *first* is still true, then we never calledstr.replace, and the substitution
// string is empty. Erase the part of thestring that the pattern matched.
if( first )
str.erase( strpos, strlen );
// return length of the substitution
return strpos - old_strpos;
}
template< typename CharT, typename TraitsT,typename AllocT >
static size_t _do_subst
(
rpattern_type const & pat,
std::basic_string
basic_subst_results
size_type pos,
size_type len
)
{
typedef std::basic_string
typedef typename basic_subst_results
results.m_pbackref_str = pat._save_backrefs() ?&( results.m_backref_str = str ) : &str;
results.m_ibegin =results.m_pbackref_str->begin();
size_t csubst = 0;
size_type stop_offset =results.m_pbackref_str->size();
if( len != rpattern_type::npos )
stop_offset = regex_min( size_t( pos + len ),stop_offset );
match_param
std::advance( param.m_imatchbegin, pos );
std::advance( param.m_iend, stop_offset );
param.m_ibufferbegin = param.m_imatchbegin;
if( GLOBAL & pat.flags() )
{
bool const fAll = ( ALLBACKREFS == (ALLBACKREFS & pat.flags() ) );
bool const fFirst = ( FIRSTBACKREFS == (FIRSTBACKREFS & pat.flags() ) );
backref_vector rgtempbackrefs(results.m_rgbackrefs.get_allocator() ); // temporary vector used iffsave_backrefs
size_type pos_offset = 0; // keep track of howmuch the backref_str and
// the current string are out of sync
while( _do_try_match( pat, param,results.m_rgbackrefs, false ) )
{
backref_type const & br =param.m_prgbackrefs[0];
++csubst;
size_type match_length = std::distance(br.first, br.second );
pos = std::distance( results.m_ibegin, br.first);
size_type subst_length = _do_subst_internal(str, results, pat, pos + pos_offset, match_length );
if( pat._save_backrefs() )
{
pos += match_length;
pos_offset += ( subst_length - match_length );
// Handle specially the backref flags
if( fFirst )
{
rgtempbackrefs.push_back( br );
}
else if( fAll )
{
rgtempbackrefs.insert(
rgtempbackrefs.end(),
results.m_rgbackrefs.begin(),
results.m_rgbackrefs.end() );
}
else
{
rgtempbackrefs.swap( results.m_rgbackrefs );
}
}
else
{
pos += subst_length;
stop_offset += ( subst_length - match_length );
results.m_ibegin =results.m_pbackref_str->begin();
// we're not saving backref information, so wedon't
// need to do any special backref maintenancehere
}
// prevent a pattern that matches 0 charactersfrom matching
// again at the same point in the string
param.m_no0len = ( 0 == match_length );
param.m_imatchbegin = results.m_ibegin;
std::advance( param.m_imatchbegin, pos ); //ineffecient for bidirectional iterators.
param.m_iend = results.m_ibegin;
std::advance( param.m_iend, stop_offset ); //ineffecient for bidirectional iterators.
}
// If we did special backref handling, swap thebackref vectors
if( pat._save_backrefs() )
{
results.m_rgbackrefs.swap( rgtempbackrefs );
}
else if( ! results.m_rgbackrefs[0].matched )
{
results.m_rgbackrefs.clear();
}
}
else if( _do_try_match( pat, param,results.m_rgbackrefs, false ) )
{
backref_type const & br =param.m_prgbackrefs[0];
++csubst;
_do_subst_internal(
str, results, pat,
std::distance( results.m_ibegin, br.first ),
std::distance( br.first, br.second ) );
results.m_ibegin =results.m_pbackref_str->begin();
}
if( NOBACKREFS == ( pat.flags() &NOBACKREFS ) )
{
results.m_rgbackrefs.clear();
}
return csubst;
}
static instantiator instantiate()
{
return instantiator_helper
(
®ex_access::_do_match_iterative_helper_s,
®ex_access::_do_match_iterative_helper_c,
®ex_access::_do_match_recursive_s,
®ex_access::_do_match_recursive_c,
®ex_access::_do_match_with_stack,
®ex_access::_do_match_impl
);
}
};
//
// Some helper functions needed byprocess_escapes
//
template< typename CharT >
inline bool regex_isxdigit( CharT ch )
{
return ( REGEX_CHAR(CharT,'0') <= ch&& REGEX_CHAR(CharT,'9') >= ch )
|| ( REGEX_CHAR(CharT,'a') <= ch &®EX_CHAR(CharT,'f') >= ch )
|| ( REGEX_CHAR(CharT,'A') <= ch &®EX_CHAR(CharT,'F') >= ch );
}
template< typename CharT >
inline int regex_xdigit2int( CharT ch )
{
if( REGEX_CHAR(CharT,'a') <= ch &®EX_CHAR(CharT,'f') >= ch )
return ch - REGEX_CHAR(CharT,'a') + 10;
if( REGEX_CHAR(CharT,'A') <= ch &®EX_CHAR(CharT,'F') >= ch )
return ch - REGEX_CHAR(CharT,'A') + 10;
return ch - REGEX_CHAR(CharT,'0');
}
} // namespace detail
//--------------------------------------------------------------------------
//
// Function: process_escapes
//
// Description: Turn the escape sequnces /f /n/r /t /v // into their
// ASCII character equivalents. Also,optionally process
// perl escape sequences.
//
// Returns: void
//
// Arguments: str - the string to process
// fPattern - true if the string is to beprocessed as a regex
//
// Notes: When fPattern is true, the perlescape sequences are not
// processed. If there is an octal or hexexcape sequence, we
// don't want to turn it into a regexmetacharacter here. We
// leave it unescaped so the regex parsercorrectly interprests
// it as a character literal.
//
// History: 8/1/2001 - ericne - Created
//
//--------------------------------------------------------------------------
template< typename CharT, typename TraitsT,typename AllocT >
inline void process_escapes(std::basic_string
{
typedef typename std::basic_string
size_type i = 0;
size_type const npos =std::basic_string
if( str.empty() )
return;
while( npos != ( i = str.find(REGEX_CHAR(CharT,'//'), i ) ) )
{
if( str.size() - 1 == i )
return;
switch( str[i+1] )
{
case REGEX_CHAR(CharT,'a'):
str.replace( i, 2, 1, REGEX_CHAR(CharT,'/a') );
break;
case REGEX_CHAR(CharT,'b'):
if( ! fPattern )
str.replace( i, 2, 1, REGEX_CHAR(CharT,'/b') );
else
++i;
break;
case REGEX_CHAR(CharT,'e'):
str.replace( i, 2, 1, CharT( 27 ) );
break;
case REGEX_CHAR(CharT,'f'):
str.replace( i, 2, 1, REGEX_CHAR(CharT,'/f') );
break;
case REGEX_CHAR(CharT,'n'):
str.replace( i, 2, 1, REGEX_CHAR(CharT,'/n') );
break;
case REGEX_CHAR(CharT,'r'):
str.replace( i, 2, 1, REGEX_CHAR(CharT,'/r') );
break;
case REGEX_CHAR(CharT,'t'):
str.replace( i, 2, 1, REGEX_CHAR(CharT,'/t') );
break;
case REGEX_CHAR(CharT,'v'):
str.replace( i, 2, 1, REGEX_CHAR(CharT,'/v') );
break;
case REGEX_CHAR(CharT,'//'):
if( fPattern )
{
if( i+3 < str.size() &®EX_CHAR(CharT,'//') == str[i+2] && REGEX_CHAR(CharT,'//') ==str[i+3] )
str.erase( i, 2 );
++i;
}
else
str.erase( i, 1 );
break;
case REGEX_CHAR(CharT,'0'): caseREGEX_CHAR(CharT,'1'): case REGEX_CHAR(CharT,'2'): case REGEX_CHAR(CharT,'3'):
case REGEX_CHAR(CharT,'4'): caseREGEX_CHAR(CharT,'5'): case REGEX_CHAR(CharT,'6'): case REGEX_CHAR(CharT,'7'):
if( ! fPattern )
{
size_t j=i+2;
CharT ch = CharT( str[i+1] -REGEX_CHAR(CharT,'0') );
for( ; j-i < 4 && j < str.size()&& REGEX_CHAR(CharT,'0') <= str[j] && REGEX_CHAR(CharT,'7')>= str[j]; ++j )
ch = CharT( ch * 8 + ( str[j] -REGEX_CHAR(CharT,'0') ) );
str.replace( i, j-i, 1, ch );
}
break;
case REGEX_CHAR(CharT,'x'):
if( ! fPattern )
{
CharT ch = 0;
size_t j=i+2;
for( ; j-i < 4 && j < str.size()&& detail::regex_isxdigit( str[j] ); ++j )
ch = CharT( ch * 16 + detail::regex_xdigit2int(str[j] ) );
str.replace( i, j-i, 1, ch );
}
break;
case REGEX_CHAR(CharT,'c'):
if( ! fPattern && i+2 < str.size() )
{
CharT ch = str[i+2];
if( REGEX_CHAR(CharT,'a') <= ch &®EX_CHAR(CharT,'z') >= ch )
ch = detail::regex_toupper( ch );
str.replace( i, 3, 1, CharT( ch ^ 0x40 ) );
}
break;
default:
if( fPattern )
++i;
else
str.erase( i, 1 );
break;
}
++i;
if( str.size() <= i )
return;
}
}
#endif
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft, 1994 - 2002.
//
// File: syntax2.cpp
//
// Contents: data definitions for the syntaxmodules
//
// Classes:
//
// Functions:
//
// Coupling:
//
// Notes:
//
// Author: Eric Niebler ( [email protected])
//
// History: 3-29-00ericne Created
//
//----------------------------------------------------------------------------
#include "syntax2.h"
namespace regex
{
REGEX_SELECTANY TOKEN constperl_syntax_base::s_rgreg[ UCHAR_MAX + 1 ] =
{
/* 0*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 8*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 16*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 24*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 32*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,END_LINE, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 40*/ BEGIN_GROUP, END_GROUP, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, MATCH_ANY, NO_TOKEN,
/* 48*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 56*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 64*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 72*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 80*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 88*/ NO_TOKEN, NO_TOKEN, NO_TOKEN,BEGIN_CHARSET, ESCAPE, NO_TOKEN, BEGIN_LINE, NO_TOKEN,
/* 96*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/*104*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/*112*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/*120*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,ALTERNATION, NO_TOKEN, NO_TOKEN, NO_TOKEN
// and the rest are 0...
};
REGEX_SELECTANY TOKEN constperl_syntax_base::s_rgescape[ UCHAR_MAX + 1 ] =
{
/* 0*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 8*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 16*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 24*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 32*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 40*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 48*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 56*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 64*/ NO_TOKEN, ESC_BEGIN_STRING,ESC_NOT_WORD_BOUNDARY, NO_TOKEN,
ESC_NOT_DIGIT, ESC_QUOTE_META_OFF, NO_TOKEN,NO_TOKEN,
/* 72*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 80*/ NO_TOKEN, ESC_QUOTE_META_ON, NO_TOKEN,ESC_NOT_SPACE,
NO_TOKEN, NO_TOKEN, NO_TOKEN, ESC_NOT_WORD,
/* 88*/ NO_TOKEN, NO_TOKEN, ESC_END_STRING,NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/* 96*/ NO_TOKEN, NO_TOKEN, ESC_WORD_BOUNDARY, NO_TOKEN,
ESC_DIGIT, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/*104*/ NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN,
/*112*/ NO_TOKEN, NO_TOKEN, NO_TOKEN,ESC_SPACE,
NO_TOKEN, NO_TOKEN, NO_TOKEN, ESC_WORD,
/*120*/ NO_TOKEN, NO_TOKEN, ESC_END_STRING_z,NO_TOKEN,
NO_TOKEN, NO_TOKEN, NO_TOKEN, NO_TOKEN
// and the rest are 0...
};
namespace detail
{
REGEX_SELECTANY extern posix_charset_type constg_rgposix_charsets[] =
{
{ "[:alnum:]", 9 },
{ "[:^alnum:]", 10 },
{ "[:alpha:]", 9 },
{ "[:^alpha:]", 10 },
{ "[:blank:]", 9 },
{ "[:^blank:]", 10 },
{ "[:cntrl:]", 9 },
{ "[:^cntrl:]", 10 },
{ "[:digit:]", 9 },
{ "[:^digit:]", 10 },
{ "[:graph:]", 9 },
{ "[:^graph:]", 10 },
{ "[:lower:]", 9 },
{ "[:^lower:]", 10 },
{ "[:print:]", 9 },
{ "[:^print:]", 10 },
{ "[:punct:]", 9 },
{ "[:^punct:]", 10 },
{ "[:space:]", 9 },
{ "[:^space:]", 10 },
{ "[:upper:]", 9 },
{ "[:^upper:]", 10 },
{ "[:xdigit:]", 10 },
{ "[:^xdigit:]", 11 }
};
REGEX_SELECTANY extern size_t constg_cposix_charsets = ARRAYSIZE( g_rgposix_charsets );
} // namespace detail
} // namespace regex
/***
*resetstk - Recover from Stack overflow.
*
* Copyright (c) Microsoft Corporation. Allrights reserved.
*
*Purpose:
* Defines the _resetstkoflw() function.
*
*******************************************************************************/
#if defined(_MSC_VER) & _MSC_VER < 1300
#include
#include
#include
#define MIN_STACK_REQ_WIN9X 0x11000
#define MIN_STACK_REQ_WINNT 0x2000
#ifdef _WIN64
typedef unsigned __int64 REGEX_DWORD_PTR;
#else
typedef unsigned __int32 REGEX_DWORD_PTR;
#endif
struct osplatform_getter
{
int m_osplatform;
osplatform_getter() : m_osplatform( 0 )
{
OSVERSIONINFOA osvi;
osvi.dwOSVersionInfoSize =sizeof(OSVERSIONINFOA);
if( GetVersionExA( & osvi ) )
m_osplatform = osvi.dwPlatformId;
}
};
inline int get_osplatform()
{
static osplatform_getter consts_osplatform_getter;
return s_osplatform_getter.m_osplatform;
};
/***
* void _resetstkoflw(void) - Recovers fromStack Overflow
*
* Purpose:
* Sets the guard page to its position beforethe stack overflow.
*
* Exit:
* Returns nonzero on success, zero on failure
*
*******************************************************************************/
extern "C" int __cdecl_resetstkoflw(void)
{
LPBYTE pStack, pGuard, pStackBase, pMinGuard;
MEMORY_BASIC_INFORMATION mbi;
SYSTEM_INFO si;
DWORD PageSize;
DWORD flNewProtect;
DWORD flOldProtect;
// Use _alloca() to get the current stackpointer
pStack = static_cast
// Find the base of the stack.
if (VirtualQuery(pStack, &mbi, sizeof mbi)== 0)
return 0;
pStackBase = static_cast
// Find the page just below where the stackpointer currently points.
// This is the new guard page.
GetSystemInfo(&si);
PageSize = si.dwPageSize;
pGuard = (LPBYTE) (((REGEX_DWORD_PTR)pStack& ~(REGEX_DWORD_PTR)(PageSize - 1))
- PageSize);
// If the potential guard page is too close tothe start of the stack
// region, abandon the reset effort for lack ofspace. Win9x has a
// larger reserved stack requirement.
pMinGuard = pStackBase + ((get_osplatform() ==VER_PLATFORM_WIN32_WINDOWS)
? MIN_STACK_REQ_WIN9X
: MIN_STACK_REQ_WINNT);
if (pGuard < pMinGuard)
return 0;
// On a non-Win9x system, release the stackregion below the new guard
// page. This can't be done for Win9x becauseof OS limitations.
if (get_osplatform() !=VER_PLATFORM_WIN32_WINDOWS) {
if (pGuard > pStackBase)
VirtualFree(pStackBase, pGuard - pStackBase,MEM_DECOMMIT);
VirtualAlloc(pGuard, PageSize, MEM_COMMIT,PAGE_READWRITE);
}
// Enable the new guard page.
flNewProtect = get_osplatform() ==VER_PLATFORM_WIN32_WINDOWS
? PAGE_NOACCESS
: PAGE_READWRITE | PAGE_GUARD;
return VirtualProtect(pGuard, PageSize,flNewProtect, &flOldProtect);
}
#endif
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft, 1994 - 2002.
//
// File: regexpr2.h
//
// Contents: classes for regular expressionpattern matching a-la perl
//
// Classes: basic_rpattern_base
//
// Functions: rpattern::match
// rpattern::substitute
// match_results::cbackrefs
// match_results::backref
// match_results::all_backrefs
// match_results::backref_str
//
// Author: Eric Niebler ( [email protected])
//
//----------------------------------------------------------------------------
#ifndef REGEXPR_H
#define REGEXPR_H
#ifdef _MSC_VER
// warning C4189: local variable is initializedbut not referenced
// warning C4290: C++ exception specificationignored except to indicate a function is not __declspec(nothrow)
// warning C4702: unreachable code
// warning C4710: function 'blah' not inlined
// warning C4786: identifier was truncated to'255' characters in the debug information
# pragma warning( push )
# pragma warning( disable : 4189 4290 4702 47104786 )
# define REGEX_SEH_STACK_OVERFLOW 0xC00000FDL
# if 1200 < _MSC_VER
# include
# else
extern "C" int __cdecl_resetstkoflw(void);
# endif
extern "C" unsigned long __cdecl_exception_code(void);
#endif
#include
#include
#include
#include
#include
#include
#include "syntax2.h"
#include "restack.h"
namespace regex
{
// This is the default alignment for the unsafeheterogeneous stack.
// If you are getting a compiler error in oneof the unsafe_stack
// methods, then compile with-DREGEX_STACK_ALIGNMENT=16 or 32
#ifndef REGEX_STACK_ALIGNMENT
# define REGEX_STACK_ALIGNMENT sizeof( void* )
#endif
#if !defined( REGEX_DEBUG ) & ( defined(DEBUG ) | defined( _DEBUG ) | defined( DBG ) )
# define REGEX_DEBUG 1
#else
# define REGEX_DEBUG 0
#endif
#if !defined( REGEX_DEBUG_ITERATORS ) &defined( _HAS_ITERATOR_DEBUGGING )
# define REGEX_DEBUG_ITERATORS 1
#else
# define REGEX_DEBUG_ITERATORS 0
#endif
namespace detail
{
#if REGEX_DEBUG | REGEX_DEBUG_ITERATORS
// Turn on hetero_stack's run-time typechecking
typedefhetero_stack
#else
// Assume that all types pushed on stack havetrivial destructors.
typedefhetero_stack
#endif
// Used to initialize variables with the samevalue they would have
// if they were initialized as a static global.( Ptrs get NULL,
// integer types get 0, etc, etc )
template< typename T > struct static_init{ static T const value; };
template< typename T > T conststatic_init
//
// Forward declarations
//
template< typename IterT > classsub_expr;
template< typename IterT > classmatch_group_base;
template< typename IterT > classbasic_rpattern_base_impl;
template< typename IterT > structmatch_param;
template< typename IterT > structsub_expr_base;
template< typename IterT > structregex_access;
// an iterator that keeps track of whether itis singular or not.
template< typename IterT > structsmart_iter
{
IterT m_iter;
bool m_valid;
smart_iter()
: m_iter( static_init
, m_valid( false )
{
}
smart_iter( smart_iter const & rhs )
: m_iter( rhs.m_iter )
, m_valid( rhs.m_valid )
{
}
smart_iter( IterT iter ) // implicit conversionOK!
: m_iter( iter )
, m_valid( true )
{
}
smart_iter & operator=( smart_iter const& rhs )
{
m_iter = rhs.m_iter;
m_valid = rhs.m_valid;
return *this;
}
friend bool operator==( smart_iter const &lhs, smart_iter const & rhs )
{
if( !lhs.m_valid || !rhs.m_valid )
return lhs.m_valid == rhs.m_valid;
else
return lhs.m_iter == rhs.m_iter;
}
friend bool operator!=( smart_iter const &lhs, smart_iter const & rhs )
{
return ! operator==( lhs, rhs );
}
};
template< typename IterT > structiter_select
{
typedef typename select
<
REGEX_DEBUG_ITERATORS &&!is_scalar
smart_iter
IterT
>::type type;
};
template< int SizeT > structtype_with_size { char buffer[ SizeT ]; };
// make up for the fact that the VC6std::allocator does
// not have template constructors
template< typename ToT, typename FromT >
std::allocator
{
return std::allocator
}
template< typename ToT, typename FromT >
FromT const & REGEX_CDECLconvert_allocator( FromT const & from, ... )
{
return from;
}
template< int > struct rebind_helper;
// unknown allocator
template< typename T >
type_with_size<1> REGEX_CDECLallocator_picker( T const &, ... );
template<> struct rebind_helper<1>
{
template< typename AllocT, typename ElemT>
struct inner
{
REGEX_NVC6( typedef typename AllocT::templaterebind
};
};
// std::allocator
template< typename T >
type_with_size<2> allocator_picker(std::allocator
template<> struct rebind_helper<2>
{
template< typename, typename ElemT >
struct inner
{
typedef std::allocator
};
};
template< typename AllocT, typename ElemT>
struct rebind
{
enum { alloc_type =sizeof(allocator_picker(factory
typedef typenamerebind_helper
};
}
//--------------------------------------------------------------------------
//
// Class: width_type
//
// Description: represents the width of asub-expression
//
// Members: m_min - smallest number ofcharacters a sub-expr can span
// m_max - largest number of characters asub-expr can span
//
// History: 8/14/2000 - ericne - Created
//
//--------------------------------------------------------------------------
struct width_type
{
size_t m_min;
size_t m_max;
};
inline width_type const uninit_width()
{
width_type const width = { size_t( -1 ),size_t( -1 ) };
return width;
}
// Helper function for processing escapesequences
template< typename CharT, typename TraitsT,typename AllocT >
void process_escapes( std::basic_string
//--------------------------------------------------------------------------
//
// Class: backref_tag
//
// Description: Struct which contains aback-reference. It is a template
// on the iterator type.
//
// Methods: backref_tag - c'tor
// operator bool - so that if( br ) is true ifthis br matched
// operator! - inverse of operator bool()
//
// Members: reserved - move along, nothing tosee here
//
// History: 8/9/2001 - ericne - Created
//
//--------------------------------------------------------------------------
template< typename IterT >
class backref_tag : public std::pair
{
struct detail_t { detail_t * d; };
template< typename OStreamT, typename OtherT>
void REGEX_CDECL _do_print( OStreamT &sout, OtherT, ... ) const
{
typedef typename OStreamT::char_type char_type;
typedef typename OStreamT::traits_typetraits_type;
std::ostreambuf_iterator
for( IterT iter = first; iter != second; ++iter,++iout )
*iout = *iter;
}
// overload that is optimized for bare char*
template< typename OStreamT >
void _do_print( OStreamT & sout, typenameOStreamT::char_type const *, int ) const
{
sout.write( first,static_cast
}
public:
typedef IterT iterator_type;
typedef typenamestd::iterator_traits
typedef std::basic_string
typedef typenamedetail::iter_select
explicit backref_tag
(
IterT i1 =detail::static_init
IterT i2 =detail::static_init
)
: std::pair
, matched( false )
, reserved1( i1 )
, reserved2( 0 )
, reserved3( false )
, reserved4( detail::static_init
, reserved5(detail::static_init
{
}
IterT begin() const
{
return first;
}
IterT end() const
{
return second;
}
string_type const str() const
{
return matched ? string_type( first, second ) :string_type();
}
// Use the "safe bool" idiom. Thisallows implicit conversion to bool,
// but not to int. It also disallows conversionto void*.
typedef detail_t * detail_t::* bool_type;
operator bool_type() const //throw()
{
return matched ? &detail_t::d : 0;
}
bool operator!() const //throw()
{
return ! matched;
}
template< typename CharT, typename TraitsT>
std::basic_ostream
{
_do_print( sout, IterT(), 0 );
return sout;
}
bool matched;
//private:
IterT reserved1; // used for internalbook-keeping
size_t reserved2; // used for internalbook-keeping
bool reserved3; // used for internalbook-keeping
smart_iter_type reserved4; // used for internalbook-keeping
smart_iter_type reserved5; // used for internalbook-keeping
};
//namespace detail
//{
// indexing into the backref vector is fasterif the backref_tag struct
// has a size that is a power of 2.
//staticstatic_assert<32==sizeof(backref_tag
//}
// --------------------------------------------------------------------------
//
// Class: basic_match_results
//
// Description: Use this structure forreturning match/substitute results
// out from the match()/substitute() methods.
//
// Methods: cbackrefs -
// backref -
// all_backrefs -
// rlength -
//
// Members: m_rgbackrefs -
//
// Typedefs: const_iterator -
// backref_type -
// backref_vector -
//
// History: 8/8/2001 - ericne - Created
//
//--------------------------------------------------------------------------
template
<
typename IterT,
typename AllocT = std::allocator
>
struct basic_match_results
{
// const_iterator is deprecated. Useiterator_type instead.
REGEX_DEPRECATED typedef IterT const_iterator;
typedef IterT iterator_type;
typedef backref_tag
typedef typename detail::rebind
typedef std::vector
friend struct detail::regex_access
explicit basic_match_results( allocator_typeconst & alloc = allocator_type() )
: m_rgbackrefs( alloc )
{
}
virtual ~basic_match_results()
{
}
size_t cbackrefs() const //throw()
{
return m_rgbackrefs.size();
}
backref_type const & backref( size_t cbackref) const //throw( std::out_of_range )
{
return m_rgbackrefs.at( cbackref );
}
backref_vector const & all_backrefs() const//throw()
{
return m_rgbackrefs;
}
size_t rstart( size_t cbackref = 0 ) const//throw( std::out_of_range )
{
return std::distance( m_ibegin,m_rgbackrefs.at( cbackref ).first );
}
size_t rlength( size_t cbackref = 0 ) const//throw( std::out_of_range )
{
return std::distance( m_rgbackrefs.at( cbackref).first, m_rgbackrefs.at( cbackref ).second );
}
private:
backref_vector m_rgbackrefs;
IterT m_ibegin;
};
// Unnecessary and deprecated
template< typename CharT, typename AllocT =std::allocator
struct basic_match_results_c : publicbasic_match_results
{
typedef basic_match_results
REGEX_DEPRECATED typedef typenamebase::const_iterator const_iterator;
typedef typename base::iterator_typeiterator_type;
typedef typename base::backref_typebackref_type;
typedef typename base::allocator_typeallocator_type;
typedef typename base::backref_vectorbackref_vector;
explicit basic_match_results_c( allocator_typeconst & alloc = allocator_type() )
: basic_match_results
{
}
};
template< typename CharT, typename TraitsT,typename AllocT >
struct subst_results_base
{
typedef typename detail::rebind
typedef std::basic_string
typedef typename string_type::const_iteratoriterator_type;
typedef basic_match_results
};
//
// For storing the results of a substitute()operation
//
template
<
typename CharT,
typename TraitsT =std::char_traits
typename AllocT = std::allocator
>
struct basic_subst_results : publicsubst_results_base
{
typedef typename detail::rebind
typedef std::basic_string
typedef typename string_type::const_iteratoriterator_type;
typedef basic_match_results
typedef typename base::backref_typebackref_type;
typedef typename base::allocator_typeallocator_type;
typedef typename base::backref_vectorbackref_vector;
friend structdetail::regex_access
explicit basic_subst_results( allocator_typeconst & alloc = allocator_type() )
: basic_match_results< iterator_type, AllocT>( alloc )
, m_backref_str(detail::convert_allocator
, m_pbackref_str( &m_backref_str )
{
}
string_type const & backref_str() const//throw()
{
return *m_pbackref_str;
}
private:
string_type m_backref_str;
string_type const * m_pbackref_str;
};
template< typename CharT, typename TraitsT,typename AllocT >
struct split_results_base
{
typedef typename detail::rebind
typedef std::basic_string
typedef typename detail::rebind
typedefstd::vector
};
//
// For storing the results of a split()operation
//
template
<
typename CharT,
typename TraitsT =std::char_traits
typename AllocT = std::allocator
>
struct basic_split_results : privatesplit_results_base
{
typedef CharT char_type;
typedef typename detail::rebind
typedef std::basic_string
typedef typename detail::rebind
typedef std::vector
typedef string_vector base;
explicit basic_split_results( allocator_typeconst & alloc = allocator_type() )
: base( alloc )
{
}
#if !defined(_MSC_VER) | 1200 < _MSC_VER
typedef typename allocator_type::pointerpointer;
typedef typename allocator_type::const_pointerconst_pointer;
#else
typedef string_type * pointer;
typedef string_type const * const_pointer;
#endif
// shortcuts to the most basic read-onlycontainer operations
typedef typename base::size_type size_type;
typedef typename base::difference_typedifference_type;
typedef typename base::value_type value_type;
typedef typename base::reference reference;
typedef typename base::const_referenceconst_reference;
typedef typename base::iterator iterator;
typedef typename base::const_iteratorconst_iterator;
typedef typename base::reverse_iteratorreverse_iterator;
typedef typename base::const_reverse_iteratorconst_reverse_iterator;
using base::begin;
using base::end;
using base::rbegin;
using base::rend;
using base::operator[];
using base::at;
using base::size;
using base::front;
using base::back;
string_vector & strings()
{
return *this;
}
string_vector const & strings() const
{
return *this;
}
};
//
// The REGEX_MODE is a way of controlling howmatching occurs.
//
enum REGEX_MODE
{
MODE_FAST, // Uses the fast, recursivealgorithm. Could overflow stack.
MODE_SAFE, // Uses the slow, iterativealgorithm. Can't overflow stack.
MODE_MIXED, // Uses a heuristic toautomatically determine which algorithm
// is the most appropriate for this pattern.
// MS VC++ has structured exception handling,which makes the
// consequences of a stack overflow much lesssevere. Because of this,
// it is possible to use the "fast"algorithm always on MS platforms,
#ifdef _MSC_VER
MODE_DEFAULT = MODE_FAST
#else
MODE_DEFAULT = MODE_MIXED
#endif
};
//
// helper function for resetting the intrinsiccharacter sets.
// This should be called after changing thelocale with setlocale()
//
template< typename CharT >
void reset_intrinsic_charsets( CharT ch =CharT( 0 ) );
// This is for implementation details thatreally belong in the
// cpp file, but can't go there because oftemplate strangeness.
#include "reimpl2.h"
//--------------------------------------------------------------------------
//
// Class: basic_rpattern_base
//
// Description:
//
// Methods: basic_rpattern_base - c'tor
// basic_rpattern_base -
// basic_rpattern_base -
// init - ( re )initialize the pattern
// init -
// set_substitution - set the substitutionstring
// _find_next_group - parse the next group ofthe pattern
// _find_next - parse the next sub_expr of thepattern
// _find_atom - parse the next atom of thepattern
// _quantify - quantify the sub_expr
// _common_init - perform some commoninitialization tasks
// _parse_subst - parse the substitution string
// _add.m_subst_backref - add a backref node tothe subst list
//
// Members: m_invisible_groups - list of hiddengroups
//
// Typedefs: syntax_type -
// backref_type -
// backref_vector -
// string_type -
// size_type -
//
// History: 8/14/2000 - ericne - Created
// 8/5/2001 - ericne - complete overhaul
//
//--------------------------------------------------------------------------
template< typename IterT, typename SyntaxT>
class basic_rpattern_base : protected detail::basic_rpattern_base_impl
{
protected:
typedefdetail::basic_rpattern_base_impl
public:
typedef SyntaxT syntax_type;
typedef typename impl::char_type char_type;
typedef typename impl::traits_type traits_type;
typedef typename impl::string_type string_type;
typedef typename impl::size_type size_type;
typedef typename impl::backref_typebackref_type;
typedef typename impl::backref_vectorbackref_vector;
void init
(
string_type const & pat,
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT
); //throw( bad_regexpr, std::bad_alloc );
void init
(
string_type const & pat,
string_type const & subst,
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT
); //throw( bad_regexpr, std::bad_alloc );
void set_substitution
(
string_type const & subst
); //throw( bad_regexpr, std::bad_alloc );
using impl::flags;
using impl::mode;
using impl::get_width;
using impl::cgroups;
using impl::get_pat;
using impl::get_subst;
using impl::swap;
using impl::npos;
protected:
basic_rpattern_base() //throw()
:detail::basic_rpattern_base_impl
{
}
basic_rpattern_base(basic_rpattern_base
:detail::basic_rpattern_base_impl
{
// Don't call _normalize_string(). Ifthat.flags()&NORMALIZE,
// then subst has already been normalized.
_common_init( this->m_flags );
_parse_subst( *this->m_subst,this->m_fuses_backrefs, this->m_subst_list ); // must come after_common_init
}
explicit basic_rpattern_base
(
string_type const & pat,
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT
) //throw( bad_regexpr, std::bad_alloc )
:detail::basic_rpattern_base_impl
{
_common_init( this->m_flags );
}
basic_rpattern_base
(
string_type const & pat,
string_type const & subst,
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT
) //throw( bad_regexpr, std::bad_alloc )
:detail::basic_rpattern_base_impl
{
_common_init( this->m_flags );
_normalize_string( *this->m_subst );
_parse_subst( *this->m_subst,this->m_fuses_backrefs, this->m_subst_list ); // must come after_common_init
}
basic_rpattern_base & operator=
(
basic_rpattern_base
) //throw( bad_regexpr, std::bad_alloc )
{
basic_rpattern_base
swap( temp );
return *this;
}
detail::match_group_base
(
typename string_type::iterator & ipat,
detail::match_group_base
std::vector
);
bool _find_next
(
typename string_type::iterator & ipat,
detail::match_group_base
std::vector
);
void _find_atom
(
typename string_type::iterator & ipat,
detail::match_group_base
syntax_type & sy
);
void _quantify
(
std::auto_ptr
typename string_type::iterator & ipat,
bool is_group,
syntax_type & sy
);
void _add_subst_backref
(
detail::subst_node & snode,
size_t nbackref,
ptrdiff_t rstart,
bool & uses_backrefs,
detail::subst_list_type & subst_list
) const;
void _parse_subst
(
string_type & subst,
bool & uses_backrefs,
detail::subst_list_type & subst_list
) const;
void _common_init( REGEX_FLAGS flags );
static detail::instantiator instantiate()
{
typedef basic_rpattern_base this_type;
return detail::instantiator_helper
(
&detail::basic_rpattern_base_impl
static_cast
static_cast
&this_type::set_substitution,
&this_type::_find_next_group,
&this_type::_find_next,
&this_type::_find_atom,
&this_type::_add_subst_backref,
&this_type::_parse_subst,
&this_type::_common_init
);
}
};
//--------------------------------------------------------------------------
//
// Class: basic_rpattern
//
// Description: generic regex pattern object
//
// Methods: basic_rpattern - c'tor
// basic_rpattern -
// basic_rpattern -
// match - match from begin iter to end iter
// match - match a null-terminated string
// match - match a std::string
// count - count matches from begin iter to enditer
// count - count matches in a null-terminatedstring
// count - count matches in a std::string
// substitute - do substitutions in astd::string
// _do_match - internal implementation
// _do_count - internal implementation
//
// History: 8/13/2001 - ericne - Created
//
//--------------------------------------------------------------------------
template
<
typename IterT,
typename SyntaxT =perl_syntax
>
class basic_rpattern : public basic_rpattern_base
{
typedefdetail::basic_rpattern_base_impl
template< typename CharT >
static void same_char_types( CharT, CharT ) {}
public:
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
basic_rpattern() //throw()
: basic_rpattern_base
{
}
basic_rpattern( basic_rpattern const & that)
: basic_rpattern_base
{
}
explicit basic_rpattern
(
string_type const & pat,
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT
) //throw( bad_regexpr, std::bad_alloc )
: basic_rpattern_base
{
}
basic_rpattern
(
string_type const & pat,
string_type const & subst,
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT
) //throw( bad_regexpr, std::bad_alloc )
: basic_rpattern_base
{
}
basic_rpattern & operator=(basic_rpattern
{
basic_rpattern_base
return *this;
}
// Iter2 must be convertible to type IterT
template< typename OtherT, typename AllocT>
backref_type const & match
(
OtherT ibegin,
OtherT iend,
basic_match_results
) const
{
// If your compile breaks here, it is becauseOtherT is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
detail::static_assert
( void ) iterator_types_are_not_convertible;
if( detail::regex_access
{
return results.backref(0);
}
else
{
returndetail::static_init
}
}
template< typename CharT, typename AllocT>
backref_type const & match
(
CharT * szbegin,
basic_match_results
) const
{
// If your compile breaks here, it is becauseCharT* is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
detail::static_assert
( void ) iterator_types_are_not_convertible;
if(detail::regex_access
{
return results.backref(0);
}
else
{
returndetail::static_init
}
}
template< typename CharT, typename TraitsT,typename AllocT >
backref_type const & match
(
std::basic_string
basic_match_results
size_type pos = 0,
size_type len =static_cast
) const
{
// If your compile breaks here, it is becauseiter_type is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
typedef typename std::basic_string
detail::static_assert< detail::is_convertible
( void ) iterator_types_are_not_convertible;
IterT ibegin = str.begin(), iend = str.begin();
if( len == npos || pos + len >= str.size() )
iend = IterT(str.end());
else
std::advance( iend, pos + len );
std::advance( ibegin, pos );
return match( ibegin, iend, results );
}
template< typename OtherT >
size_t count( OtherT ibegin, OtherT iend )const
{
// If your compile breaks here, it is becauseOtherT is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
detail::static_assert
( void ) iterator_types_are_not_convertible;
returndetail::regex_access
}
template< typename CharT >
size_t count( CharT * szbegin ) const
{
// If your compile breaks here, it is becauseCharT* is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
detail::static_assert
( void ) iterator_types_are_not_convertible;
returndetail::regex_access
}
template< typename CharT, typename TraitsT,typename AllocT >
size_t count
(
std::basic_string
size_type pos = 0,
size_type len =static_cast
) const
{
// If your compile breaks here, it is becauseiter_type is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
typedef typename std::basic_string
detail::static_assert
( void ) iterator_types_are_not_convertible;
IterT ibegin = str.begin(), iend = str.begin();
if( len == npos || pos + len >= str.size() )
iend = IterT(str.end());
else
std::advance( iend, pos + len );
std::advance( ibegin, pos );
return count( ibegin, iend );
}
template< typename OtherT, typename CharT,typename TraitsT, typename AllocT >
size_t split
(
OtherT ibegin,
OtherT iend,
basic_split_results
int limit = 0
) const
{
// If your compile breaks here, it is becauseOtherT is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
detail::static_assert
( void ) iterator_types_are_not_convertible;
return detail::regex_access
}
template< typename Char1T, typename Char2T,typename TraitsT, typename AllocT >
size_t split
(
Char1T * szbegin,
basic_split_results
int limit = 0
) const
{
// If your compile breaks here, it is becauseIter2 is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
detail::static_assert
( void ) iterator_types_are_not_convertible;
// If your compile breaks here, it's becausethe string you passed in doesn't have
// the same character type as yoursplit_results struct
same_char_types( Char1T(), Char2T() );
// If your compile breaks here, it is becauseCharT const * is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
returndetail::regex_access
}
template< typename CharT, typename TraitsT,typename AllocT >
size_t split
(
std::basic_string
basic_split_results
int limit = 0,
size_type pos = 0,
size_type len =static_cast
) const
{
// If your compile breaks here, it is becauseiter_type is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
typedef typename std::basic_string
detail::static_assert
( void ) iterator_types_are_not_convertible;
IterT ibegin = str.begin(), iend = str.begin();
if( len == npos || pos + len >= str.size() )
iend = IterT(str.end());
else
std::advance( iend, pos + len );
std::advance( ibegin, pos );
return split( ibegin, iend, results, limit );
}
template< typename CharT, typename TraitsT,typename AllocT >
size_t substitute
(
std::basic_string
basic_subst_results
size_type pos = 0,
size_type len =static_cast
) const
{
// If your compile breaks here, it is becauseiter_type is not
// convertible to type IterT. Check thedeclaration of your rpattern object.
typedef typename std::basic_string
detail::static_assert
( void ) iterator_types_are_not_convertible;
return detail::regex_access
}
};
//--------------------------------------------------------------------------
//
// Class: basic_rpattern_c
//
// Description: a pattern object optimized formatching C-style, NULL-
// terminated strings. It treats thenull-terminator as
// the end-of-string condition.
//
// Methods: basic_rpattern_c - c'tor
// basic_rpattern_c -
// basic_rpattern_c -
// match - match a null-terminated string
// count - count matches in a null-terminatedstring
// _do_match_c - internal implementation
//
// History: 8/13/2001 - ericne - Created
//
//--------------------------------------------------------------------------
template< typename CharT, typename SyntaxT =perl_syntax
class basic_rpattern_c : public basic_rpattern_base
{
typedefdetail::basic_rpattern_base_impl
public:
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
typedef typename basic_rpattern_base
basic_rpattern_c() //throw()
: basic_rpattern_base
{
}
basic_rpattern_c( basic_rpattern_c const &that )
: basic_rpattern_base
{
}
explicit basic_rpattern_c
(
string_type const & pat,
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT
) //throw( bad_regexpr, std::bad_alloc )
: basic_rpattern_base
{
}
basic_rpattern_c & operator=(basic_rpattern_c
{
basic_rpattern_base
return *this;
}
template< typename AllocT >
backref_type const & match
(
CharT const * szbegin,
basic_match_results_c
) const
{
if( detail::regex_access
{
return results.backref(0);
}
else
{
returndetail::static_init
}
}
size_t count( CharT const * szbegin ) const
{
return detail::regex_access
}
};
#if defined(UNICODE) | defined(_UNICODE)
typedef wchar_t rechar_t;
#else
typedef char rechar_t;
#endif
typedef std::basic_string
// On many implementations of the STL,string::iterator is not a typedef
// for char*. Rather, it is a wrapper class. Asa result, the regex code
// gets instantiated twice, once for barepointers (rpattern_c) and once for
// the wrapped pointers (rpattern). But ifthere is a conversion from the
// bare ptr to the wrapped ptr, then we onlyneed to instantiate the template
// for the wrapped ptr, and the code will workfor the bare ptrs, too.
// This can be a significant space savings. TheREGEX_FOLD_INSTANTIONS
// macro controls this optimization. Thedefault is "off" for backwards
// compatibility. To turn the optimization on,compile with:
// -DREGEX_FOLD_INSTANTIATIONS=1
#ifndef REGEX_FOLD_INSTANTIATIONS
#define REGEX_FOLD_INSTANTIATIONS 0
#endif
typedef ::regex::detail::select
<
REGEX_FOLD_INSTANTIATIONS &&
detail::is_convertible
restring::const_iterator,
rechar_t const *
>::type lpctstr_t;
// For matching against null-terminated strings
typedef basic_rpattern
typedef basic_rpattern
// For matching against std::strings
typedefbasic_rpattern
typedefbasic_rpattern
// Default to perl syntax
typedef perl_rpattern rpattern;
typedef perl_rpattern_c rpattern_c;
// typedefs for the commonly used match_resultsand subst_results
typedefbasic_match_results
typedef basic_match_results
typedef basic_subst_results
typedef basic_split_results
#if defined(_MSC_VER) & 1200 < _MSC_VER
// These are no longer useful, and will go awayin a future release
// You should be using the version without the_c
# pragma deprecated( basic_rpattern_c )
# pragma deprecated( basic_match_results_c )
#endif
#define STATIC_RPATTERN_EX( type, var, params )/
static type const var params;
#define STATIC_RPATTERN( var, params ) /
STATIC_RPATTERN_EX( regex::rpattern, var,params )
#define STATIC_RPATTERN_C( var, params ) /
STATIC_RPATTERN_EX( regex::rpattern_c, var,params )
#if defined(_MSC_VER) & 1200 < _MSC_VER
#pragma deprecated(STATIC_RPATTERN_EX)
#endif
//
// ostream inserter operator forback-references
//
template< typename CharT, typename TraitsT,typename IterT >
inline std::basic_ostream
(
std::basic_ostream
backref_tag
)
{
return br.print( sout );
}
} // namespace regex
//
// specializations for std::swap
//
namespace std
{
template<>
inline void swap( regex::detail::regex_arena& left, regex::detail::regex_arena & right )
{
left.swap( right );
}
template< typename IterT, typename SyntaxT>
inline void swap(regex::basic_rpattern_base
{
left.swap( right );
}
}
#ifdef _MSC_VER
#pragma warning( pop )
#endif
#endif
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft, 1994 - 2002.
//
// File: regexpr2.cpp
//
// Contents: implementation for rpatternmethods, definitions for all the
// subexpression types used to perform thematching, the
// charset class definition .
//
// Classes: too many to list here
//
// Functions:
//
// Author: Eric Niebler ( [email protected])
//
// History: 12-11-1998ericne Created
// 01-05-2001 ericneRemoved dependency on VC's choice
// of STL iterator types.
// 08-15-2001 ericneRemoved regexpr class, moved match
// state to match_results container.
// 09-17-2001nathann Add DEBUG_HEAP_SUPPORT
// 11-16-2001 ericneAdd stack-conservative algorithm
//
//----------------------------------------------------------------------------
#ifdef _MSC_VER
// unlimited inline expansion ( compile with/Ob1 or /Ob2 )
# pragma inline_recursion( on )
# pragma inline_depth( 255 )
// warning C4127: conditional expression isconstant
// warning C4355: 'this' : used in base memberinitializer list
// warning C4702: unreachable code
// warning C4710: function 'blah' not inlined
// warning C4786: identifier was truncated to'255' characters in the debug information
# pragma warning( push )
# pragma warning( disable : 4127 4355 4702 47104786 )
#endif
#include
#include
#include
#include
#include
#include
#include
#ifdef __MWERKS__
# include
#endif
// If the implementation file has been includedin the header, then we
// need to mark some functions as inline toprevent them from being multiply
// defined. But if the implementation file isnot included in the header,
// we can't mark them as inline, otherwise thelinker won't find them.
#ifdef REGEXPR_H
# define REGEXPR_H_INLINE inline
#else
# define REGEXPR_H_INLINE
# include "regexpr2.h"
#endif
#ifdef REGEX_TO_INCLUDE
# include REGEX_TO_INCLUDE
#endif
// $PORT$
// _alloca is not standard
#ifndef alloca
# define alloca _alloca
#endif
namespace regex
{
namespace detail
{
inline wctype_t REGEX_CDECL regex_wctype( charconst * sz )
{
using namespace std;
return wctype( sz );
}
namespace
{
#ifdef __GLIBC__
struct regex_ctype_t
{
int m_ctype;
wctype_t m_wctype;
};
#define REGEX_DECL_CTYPE(desc) /
inline regex_ctype_t const & wct_ ## desc()/
{ /
static regex_ctype_t const s_wct = { _IS ##desc, regex_wctype(#desc) };/
return s_wct; /
}
REGEX_DECL_CTYPE(alnum)
REGEX_DECL_CTYPE(alpha)
REGEX_DECL_CTYPE(blank)
REGEX_DECL_CTYPE(cntrl)
REGEX_DECL_CTYPE(digit)
REGEX_DECL_CTYPE(graph)
REGEX_DECL_CTYPE(lower)
REGEX_DECL_CTYPE(print)
REGEX_DECL_CTYPE(punct)
REGEX_DECL_CTYPE(space)
REGEX_DECL_CTYPE(upper)
REGEX_DECL_CTYPE(xdigit)
regex_ctype_t const wct_zero = { 0, 0 };
inline regex_ctype_t & operator |= (regex_ctype_t & lhs, regex_ctype_t const & rhs )
{
lhs.m_ctype |= rhs.m_ctype;
lhs.m_wctype |= rhs.m_wctype;
return lhs;
}
inline regex_ctype_t operator | ( regex_ctype_tlhs, regex_ctype_t const & rhs )
{
return lhs |= rhs;
}
inline int REGEX_CDECL regex_isctype( int ch,regex_ctype_t const & desc )
{
return __isctype( ch, desc.m_ctype );
}
inline int REGEX_CDECL regex_iswctype( wint_twc, regex_ctype_t desc )
{
using namespace std;
return iswctype( wc, desc.m_wctype );
}
inline bool operator == ( regex_ctype_t const& lhs, regex_ctype_t const & rhs )
{
return lhs.m_ctype == rhs.m_ctype &&lhs.m_wctype == rhs.m_wctype;
}
inline bool operator != ( regex_ctype_t const& lhs, regex_ctype_t const & rhs )
{
return lhs.m_ctype != rhs.m_ctype ||lhs.m_wctype != rhs.m_wctype;
}
#else
typedef wctype_t regex_ctype_t;
#define REGEX_DECL_CTYPE(desc) /
inline regex_ctype_t const wct_ ## desc() /
{ /
static regex_ctype_t const s_wct =regex_wctype(#desc); /
return s_wct; /
}
REGEX_DECL_CTYPE(alnum)
REGEX_DECL_CTYPE(alpha)
REGEX_DECL_CTYPE(cntrl)
REGEX_DECL_CTYPE(digit)
REGEX_DECL_CTYPE(graph)
REGEX_DECL_CTYPE(lower)
REGEX_DECL_CTYPE(print)
REGEX_DECL_CTYPE(punct)
REGEX_DECL_CTYPE(space)
REGEX_DECL_CTYPE(upper)
REGEX_DECL_CTYPE(xdigit)
regex_ctype_t const wct_zero = 0;
#if defined(_MSC_VER) & ( _MSC_VER==1200 |defined(_CPPLIB_VER) )
inline regex_ctype_t const wct_blank() { return_BLANK; } // work around for bug in VC++
inline int REGEX_CDECL regex_isctype( int ch,regex_ctype_t desc )
{
return _isctype( ch, static_cast
}
#else
REGEX_DECL_CTYPE(blank)
inline int REGEX_CDECL regex_isctype( int ch,regex_ctype_t desc )
{
using namespace std;
return iswctype( btowc( ch ), desc );
}
#endif
inline int REGEX_CDECL regex_iswctype( wint_twc, regex_ctype_t desc )
{
using namespace std;
return iswctype( wc, desc );
}
#endif
} // unnamed namespace
template< typename CStringsT, typename IterT>
bool _do_match_iterative(sub_expr_base
// NathanN:
// By defining the symbol REGEX_DEBUG_HEAP theallocator object
// no longer sub allocates memory. This enablesheap checking tools like
// AppVerifier & PageHeap to find errorslike buffer overruns
#if !defined( REGEX_DEBUG_HEAP ) ®EX_DEBUG
# define REGEX_DEBUG_HEAP 1
#else
# define REGEX_DEBUG_HEAP 0
#endif
REGEXPR_H_INLINE size_t DEFAULT_BLOCK_SIZE()
{
#if REGEX_DEBUG_HEAP
// put each allocation in its own mem_block
return 1;
#else
// put multiple allocation in each mem_block
return 352;
#endif
}
template< typename IBeginT, typename IEndT>
inline size_t parse_int( IBeginT & ibegin,IEndT iend, size_t const max_ = size_t( -1 ) )
{
typedef typenamestd::iterator_traits
size_t retval = 0;
while( iend != ibegin &®EX_CHAR(char_type,'0') <= *ibegin && REGEX_CHAR(char_type,'9')>= *ibegin && max_ > retval )
{
retval *= 10;
retval += static_cast
++ibegin;
}
if( max_ < retval )
{
retval /= 10;
--ibegin;
}
return retval;
}
//--------------------------------------------------------------------------
//
// Class: boyer_moore
//
// Description: fast sub-string searchalgorithm
//
// Members: m_begin - iter to first char inpattern sequence
// m_last - iter to last char in patternsequence
// m_len - length of the pattern sequence
// m_off - array of offsets, indexed by ASCIIchar values
//
// History: 6/8/2003 - ericne - Created
//
//--------------------------------------------------------------------------
template< typename IterT >
class boyer_moore
{
typedef typenamestd::iterator_traits
typedef typename std::char_traits
enum { OFFSET_SIZE = UCHAR_MAX + 1 };
IterT m_begin;
IterT m_last;
char_type const* m_low_last;
unsigned char m_len;
unsigned char m_off[ OFFSET_SIZE ];
static unsigned char hash_char( char ch ) {return static_cast
static unsigned char hash_char( signed char ch) { return static_cast
static unsigned char hash_char( unsigned charch ) { return ch; }
static unsigned char hash_char( wchar_t ch ) {return static_cast
template< typename CharT >
static unsigned char REGEX_VC6(REGEX_CDECL)hash_char( CharT ch REGEX_VC6(...) )
{
return static_cast
}
// case-sensitive Boyer-Moore search
template< typename OtherT >
OtherT find_with_case( OtherT begin, OtherT end) const
{
typedef typenamestd::iterator_traits
diff_type const endpos = std::distance( begin,end );
diff_type offset = m_len;
for( diff_type curpos = offset; curpos
{
std::advance( begin, offset );
IterT pat_tmp = m_last;
OtherT str_tmp = begin;
for( ; traits_type::eq( *str_tmp, *pat_tmp );
--pat_tmp, --str_tmp )
{
if( pat_tmp == m_begin )
{
return str_tmp;
}
}
offset = m_off[ hash_char( *begin ) ];
}
return end;
}
// case-insensitive Boyer-Moore search
template< typename OtherT >
OtherT find_without_case( OtherT begin, OtherTend ) const
{
typedef typenamestd::iterator_traits
diff_type const endpos = std::distance( begin,end );
diff_type offset = m_len;
for( diff_type curpos = offset; curpos
{
std::advance( begin, offset );
IterT pat_tmp = m_last;
char_type const* low_tmp = m_low_last;
OtherT str_tmp = begin;
for( ; traits_type::eq( *str_tmp, *pat_tmp ) ||traits_type::eq( *str_tmp, *low_tmp );
--pat_tmp, --str_tmp, --low_tmp )
{
if( pat_tmp == m_begin )
{
return str_tmp;
}
}
offset = m_off[ hash_char( *begin ) ];
}
return end;
}
public:
// initialize the Boyer-Moore search datastructure, using the
// search sub-sequence to prime the pump.
boyer_moore( IterT begin, IterT end, char_typeconst* lower = 0 )
: m_begin( begin )
, m_last( begin )
, m_low_last( lower )
{
typedef typename std::iterator_traits
diff_type diff = std::distance( begin, end );
m_len = static_cast
std::fill_n( m_off, ARRAYSIZE( m_off ), m_len);
--m_len;
for( unsigned char offset = m_len; offset;--offset, ++m_last )
{
m_off[ hash_char( *m_last ) ] = offset;
}
if( m_low_last )
{
for( unsigned char offset = m_len; offset;--offset, ++m_low_last )
{
unsigned char hash = hash_char( *m_low_last );
m_off[ hash ] = regex_min( m_off[ hash ],offset );
}
}
}
template< typename OtherT >
OtherT find( OtherT begin, OtherT end ) const
{
if( m_low_last )
{
return find_without_case( begin, end );
}
else
{
return find_with_case( begin, end );
}
}
static void * operator new( size_t size,regex_arena & arena )
{
return arena.allocate( size );
}
static void operator delete( void *,regex_arena & )
{
}
};
// This class is used to speed up character setmatching by providing
// a bitset that spans the ASCII range.std::bitset is not used because
// the range-checking slows it down.
// Note: The division and modulus operationsare optimized by the compiler
// into bit-shift operations.
class ascii_bitvector
{
typedef unsigned int elem_type;
enum
{
CBELEM = CHAR_BIT * sizeof( elem_type ), //count of bits per element
CELEMS = ( UCHAR_MAX+1 ) / CBELEM // number ofelement in array
};
elem_type m_rg[ CELEMS ];
// Used to inline operations like: bv1 |= ~bv2;without creating temp bit vectors.
struct not_ascii_bitvector
{
ascii_bitvector const & m_ref;
not_ascii_bitvector( ascii_bitvector const& ref )
: m_ref( ref ) {}
private:
not_ascii_bitvector & operator=(not_ascii_bitvector const & );
};
ascii_bitvector( ascii_bitvector const & );
ascii_bitvector & operator=(ascii_bitvector const & );
public:
ascii_bitvector()
{
zero();
}
void zero()
{
std::fill_n( m_rg, ARRAYSIZE( m_rg ), 0 );
}
void set( unsigned char ch )
{
m_rg[ ( ch / CBELEM ) ] |= ( ( elem_type )1U<< ( ch % CBELEM ) );
}
bool operator[]( unsigned char ch ) const
{
return 0 != ( m_rg[ ( ch / CBELEM ) ] & ( (elem_type )1U << ( ch % CBELEM ) ) );
}
not_ascii_bitvector const operator~() const
{
return not_ascii_bitvector( *this );
}
ascii_bitvector & operator|=(ascii_bitvector const & that )
{
for( int i=0; i
m_rg[ i ] |= that.m_rg[ i ];
return *this;
}
ascii_bitvector & operator|=(not_ascii_bitvector const & that )
{
for( int i=0; i
m_rg[ i ] |= ~that.m_ref.m_rg[ i ];
return *this;
}
};
typedef std::pair
// determines if one range is less thenanother.
// used in binary search of range vector
struct range_less
{
bool operator()( range_type const & rg1,range_type const & rg2 ) const
{
return rg1.second < rg2.first;
}
};
// A singly-linked list, which works even ifthe allocator
// has per-instance state.
template< typename T, typenameAllocT=std::allocator
class slist
{
struct cons
{
T car;
cons * cdr;
cons( T const & t, cons * nxt )
: car( t )
, cdr( nxt )
{
}
};
typedef typename rebind
typedef typename rebind
#if !defined(_MSC_VER) | 1200 < _MCS_VER
// Use the empty base optimization to avoidreserving
// space for the allocator if it is empty.
struct slist_impl : cons_allocator
{
cons * m_lst;
slist_impl( cons_allocator const & alloc,cons *lst )
: cons_allocator( alloc )
, m_lst( lst )
{
}
cons_allocator & allocator()
{
return *this;
}
};
#else
struct slist_impl
{
cons_allocator m_alloc;
cons *m_lst;
slist_impl( cons_allocator const & alloc,cons *lst )
: m_alloc( alloc )
, m_lst( lst )
{
}
cons_allocator & allocator()
{
return m_alloc;
}
};
#endif
slist_impl m_impl;
// find the previous node in the list(*prev(lst)==lst)
cons ** prev( cons *lst, cons *hint = 0 )
{
if( m_impl.m_lst == lst )
return &m_impl.m_lst;
if( !hint || hint->cdr != lst )
for( hint=m_impl.m_lst; hint->cdr != lst;hint=hint->cdr )
{}
return &hint->cdr;
}
public:
typedef T value_type;
typedef T* pointer;
typedef T& reference;
typedef T const* const_pointer;
typedef T const& const_reference;
typedef size_t size_type;
struct iterator : publicstd::iterator
{
friend class slist
explicit iterator( cons * pcons = 0 )
: m_pcons( pcons )
{
}
T & operator*() const
{
return m_pcons->car;
}
T * operator->() const
{
return &m_pcons->car;
}
iterator & operator++()
{
m_pcons = m_pcons->cdr;
return *this;
}
iterator operator++( int )
{
iterator i( *this );
++*this;
return i;
}
bool operator==( iterator it )
{
return m_pcons == it.m_pcons;
}
bool operator!=( iterator it )
{
return m_pcons != it.m_pcons;
}
private:
cons * m_pcons;
};
// not ideal, but good enough for gov'mentwork....
typedef iterator const_iterator;
explicit slist( char_allocator const & al =char_allocator() )
: m_impl( convert_allocator
{
}
~slist()
{
clear();
}
void clear()
{
for( cons *nxt; m_impl.m_lst; m_impl.m_lst=nxt)
{
nxt = m_impl.m_lst->cdr;
m_impl.allocator().destroy( m_impl.m_lst );
m_impl.allocator().deallocate( m_impl.m_lst, 1);
}
}
void push_front( T const & t )
{
cons * lst = m_impl.allocator().allocate( 1, 0);
try
{
m_impl.allocator().construct( lst, cons( t,m_impl.m_lst ) );
}
catch(...)
{
m_impl.allocator().deallocate( lst, 1 );
throw;
}
m_impl.m_lst = lst;
}
template< typename PredT >
void sort( PredT pred )
{
// simple insertion sort
cons *rst=m_impl.m_lst;
m_impl.m_lst = 0;
while( rst )
{
cons *cur=m_impl.m_lst, *prv=0;
while( cur && ! pred( rst->car,cur->car ) )
prv=cur, cur=cur->cdr;
if( prv )
prv->cdr=rst, rst=rst->cdr,prv->cdr->cdr=cur;
else
m_impl.m_lst=rst, rst=rst->cdr, m_impl.m_lst->cdr=cur;
}
}
void sort()
{
this->sort( std::less
}
iterator begin() const
{
return iterator( m_impl.m_lst );
}
iterator end() const
{
return iterator();
}
bool empty() const
{
return 0 == m_impl.m_lst;
}
size_t size() const
{
size_t len=0;
for( cons *lst=m_impl.m_lst; lst;lst=lst->cdr, ++len )
{}
return len;
}
iterator erase( iterator it, iterator hint =iterator() )
{
cons **prv = prev( it.m_pcons, hint.m_pcons );// *prv==it.p
*prv = it.m_pcons->cdr;
m_impl.allocator().destroy( it.m_pcons );
m_impl.allocator().deallocate( it.m_pcons, 1 );
return iterator( *prv );
}
void reverse()
{
cons *prv=0, *nxt;
while( m_impl.m_lst )
nxt = m_impl.m_lst->cdr,m_impl.m_lst->cdr = prv, prv = m_impl.m_lst, m_impl.m_lst = nxt;
m_impl.m_lst = prv;
}
};
template< typename AllocT >
struct basic_charset;
template< typename CharT >
struct posixcharsoff_pred
{
CharT m_ch;
posixcharsoff_pred( CharT ch )
: m_ch( ch )
{
}
bool operator()( regex_ctype_t desc ) const
{
return ! local_isctype( m_ch, desc );
}
static int local_isctype( char ch,regex_ctype_t desc )
{
return regex_isctype( ch, desc );
}
static int local_isctype( wchar_t ch,regex_ctype_t desc )
{
return regex_iswctype( ch, desc );
}
};
template< typename CharT, bool CaseT >
struct in_charset_pred
{
CharT m_ch;
in_charset_pred( CharT ch )
: m_ch( ch )
{
}
template< typename AllocT >
bool operator()( basic_charset
{
REGEX_VC6( return pcs->in( m_ch COMMAbool2type
REGEX_NVC6( return pcs->templatein
}
};
template< typename AllocT >
struct basic_charset
{
typedefbasic_charset
typedefslist
typedef slist
typedef slist
typedef slist
typedef typename rebind
bool m_fcompliment;
bool m_fskip_extended_check;
ascii_bitvector m_ascii_bitvector;
regex_ctype_t m_posixcharson;
ranges_type m_ranges;
posixcharsoff_type m_posixcharsoff;
nestedcharsets_type m_nestedcharsets;
explicit basic_charset( char_allocator_typeconst & al = char_allocator_type() )
: m_fcompliment( false )
, m_fskip_extended_check( false )
, m_ascii_bitvector()
, m_posixcharson( wct_zero )
, m_ranges( al )
, m_posixcharsoff( al )
, m_nestedcharsets( al )
{
}
// We'll be inheriting from this, so a virtuald'tor is regretably necessary.
virtual ~basic_charset()
{
}
void clear()
{
m_fcompliment = false;
m_fskip_extended_check = false;
m_ascii_bitvector.zero();
m_posixcharson = wct_zero;
m_ranges.clear();
m_posixcharsoff.clear();
m_nestedcharsets.clear();
}
// merge one charset into another
basic_charset & operator|=( other_typeconst & that )
{
if( that.m_fcompliment )
{
// If no posix-style character sets are used,then we can merge this
// nested character set directly into theenclosing character set.
if( wct_zero == that.m_posixcharson &&
that.m_posixcharsoff.empty() &&
that.m_nestedcharsets.empty() )
{
m_ascii_bitvector |= ~ that.m_ascii_bitvector;
// append the inverse of that.m_ranges tothis->m_ranges
wchar_t chlow = UCHAR_MAX;
typedef typenameother_ranges_type::const_iterator iter_type;
for( iter_type prg = that.m_ranges.begin();that.m_ranges.end() != prg; ++prg )
{
if( UCHAR_MAX + 1 != prg->first )
m_ranges.push_front( range_type( wchar_t(chlow+1 ), wchar_t( prg->first-1 ) ) );
chlow = prg->second;
}
if( WCHAR_MAX != chlow )
m_ranges.push_front( range_type( wchar_t(chlow+1 ), WCHAR_MAX ) );
}
else
{
// There is no simple way to merge this nestedcharacter
// set into the enclosing character set, so wemust save
// a pointer to the nested character set in alist.
m_nestedcharsets.push_front( &that );
}
}
else
{
m_ascii_bitvector |= that.m_ascii_bitvector;
std::copy( that.m_ranges.begin(),
that.m_ranges.end(),
std::front_inserter( m_ranges ) );
m_posixcharson |= that.m_posixcharson;
std::copy( that.m_posixcharsoff.begin(),
that.m_posixcharsoff.end(),
std::front_inserter( m_posixcharsoff ) );
std::copy( that.m_nestedcharsets.begin(),
that.m_nestedcharsets.end(),
std::front_inserter( m_nestedcharsets ) );
}
return *this;
}
// Note overloading based on first parameter
void set_bit( char ch, bool const fnocase )
{
if( fnocase )
{
m_ascii_bitvector.set( static_cast
m_ascii_bitvector.set( static_cast
}
else
{
m_ascii_bitvector.set( static_cast
}
}
// Note overloading based on first parameter
void set_bit( wchar_t ch, bool const fnocase )
{
if( UCHAR_MAX >= ch )
set_bit( static_cast
else
m_ranges.push_front( range_type( ch, ch ) );
}
// Note overloading based on first twoparameters
void set_bit_range( char ch1, char ch2, bool constfnocase )
{
if( static_cast
throw bad_regexpr( "invalid rangespecified in character set" );
if( fnocase )
{
// i is unsigned int to prevent overflow if ch2is UCHAR_MAX
for( unsigned int i = static_cast
i <= static_cast
{
m_ascii_bitvector.set( static_cast
m_ascii_bitvector.set( static_cast
}
}
else
{
// i is unsigned int to prevent overflow if ch2is UCHAR_MAX
for( unsigned int i = static_cast
i <= static_cast
{
m_ascii_bitvector.set( static_cast
}
}
}
// Note overloading based on first twoparameters
void set_bit_range( wchar_t ch1, wchar_t ch2,bool const fnocase )
{
if( ch1 > ch2 )
throw bad_regexpr( "invalid rangespecified in character set" );
if( UCHAR_MAX >= ch1 )
set_bit_range( static_cast
if( UCHAR_MAX < ch2 )
m_ranges.push_front( range_type( regex_max(static_cast
}
void optimize( type2type
{
if( m_ranges.begin() != m_ranges.end() )
{
// this sorts on range_type.m_pfirst ( usesoperator<() for pair templates )
m_ranges.sort();
// merge ranges that overlap
typename ranges_type::iteratoricur=m_ranges.begin(), iprev=icur++;
while( icur != m_ranges.end() )
{
if( icur->first <= iprev->second + 1 )
{
iprev->second = regex_max( iprev->second,icur->second );
icur = m_ranges.erase( icur, iprev );
}
else
{
iprev=icur++;
}
}
}
// For the ASCII range, merge them_posixcharson info
// into the ascii_bitvector
if( wct_zero != m_posixcharson )
{
// BUGBUG this is kind of expensive. Think of abetter way.
for( unsigned int i=0; i<=UCHAR_MAX; ++i )
if( regex_isctype( i, m_posixcharson ) )
m_ascii_bitvector.set( static_cast
}
// m_fskip_extended_check is a cache whichtells us whether we
// need to check the m_posixcharsoff andm_nestedcharsets vectors,
// which would only be used in nesteduser-defined character sets
m_fskip_extended_check =m_posixcharsoff.empty() && m_nestedcharsets.empty();
}
void optimize( type2type
{
optimize( type2type
// the posixcharson info was merged into theascii bitvector,
// so we don't need to ever call regex_isctypeever again.
m_posixcharson = wct_zero;
}
template< bool CaseT, typename CharT >
bool extended_check( CharT ch REGEX_VC6(COMMAbool2type
{
REGEX_ASSERT( m_fskip_extended_check == (m_posixcharsoff.empty() && m_nestedcharsets.empty() ) );
if( m_fskip_extended_check )
{
return false;
}
return ( m_posixcharsoff.end() !=
std::find_if( m_posixcharsoff.begin(),m_posixcharsoff.end(),
posixcharsoff_pred
|| ( m_nestedcharsets.end() !=
std::find_if( m_nestedcharsets.begin(),m_nestedcharsets.end(),
in_charset_pred
}
inline bool in_ranges( wchar_t ch, true_t )const
{
typedef typename ranges_type::const_iteratoriter_type;
iter_type ibegin = m_ranges.begin(), iend =m_ranges.end();
return ibegin != iend &&
std::binary_search( ibegin, iend, range_type(ch, ch ), range_less() );
}
inline bool in_ranges( wchar_t ch, false_t )const
{
typedef typename ranges_type::const_iteratoriter_type;
iter_type ibegin = m_ranges.begin(), iend =m_ranges.end();
if( ibegin == iend )
return false;
wchar_t const chup = regex_toupper( ch );
if( std::binary_search( ibegin, iend,range_type( chup, chup ), range_less() ) )
return true;
wchar_t const chlo = regex_tolower( ch );
if( chup == chlo )
return false;
return std::binary_search( ibegin, iend,range_type( chlo, chlo ), range_less() );
}
// Note overloading based on parameter
template< bool CaseT >
bool in( char ch REGEX_VC6(COMMAbool2type
{
// Whoops, forgot to call optimize() on thischarset
REGEX_ASSERT( wct_zero == m_posixcharson );
return m_fcompliment !=
(
( m_ascii_bitvector[ static_cast
|| ( extended_check REGEX_NVC6(
);
}
// Note overloading based on parameter
template< bool CaseT >
bool in( wchar_t ch REGEX_VC6(COMMAbool2type
{
// use range_match_type to see if this characteris within one of the
// ranges stored in m_rgranges.
return m_fcompliment !=
(
( ( UCHAR_MAX >= ch ) ?
( m_ascii_bitvector[ static_cast
( ( in_ranges( ch, bool2type
|| ( wct_zero != m_posixcharson &®ex_iswctype( ch, m_posixcharson ) ) ) )
|| ( extended_check REGEX_NVC6(
);
}
private:
basic_charset & operator=( basic_charsetconst & that );
basic_charset( basic_charset const & that);
};
// Intrinsic character sets are allocated onthe heap with the standard allocator.
// They are either the built-in character sets,or the user-defined ones.
struct charset : publicbasic_charset
{
charset()
{
}
private:
charset( charset const & );
charset & operator=( charset const & );
};
// charset is no longer an incomplete type sowe now
// know how to destroy one. free_charset() isused in syntax2.h
REGEXPR_H_INLINE void free_charset( charsetconst * pcharset )
{
delete pcharset;
}
// Custom character sets are the ones thatappear in patterns between
// square brackets. They are allocated in aregex_arena to speed up
// pattern compilation and to make rpatternclean-up faster.
struct custom_charset : publicbasic_charset
{
static void * operator new( size_t size,regex_arena & arena )
{
return arena.allocate( size );
}
static void operator delete( void *,regex_arena & ) {}
static void operator delete( void * ) {}
custom_charset( regex_arena & arena )
: basic_charset
{
}
private:
custom_charset( custom_charset const & );
custom_charset & operator=( custom_charsetconst & );
};
template< typename CharT >
class intrinsic_charsets
{
struct intrinsic_charset : public charset
{
intrinsic_charset( bool fcompliment, regex_ctype_tdesc, char const * sz )
{
reset( fcompliment, desc, sz );
}
void reset( bool fcompliment, regex_ctype_tdesc, char const * sz )
{
clear();
m_fcompliment = fcompliment;
m_posixcharson = desc;
for( ; *sz; ++sz )
m_ascii_bitvector.set( static_cast
optimize( type2type
}
private:
intrinsic_charset( intrinsic_charset const& );
intrinsic_charset & operator=(intrinsic_charset const & );
};
static intrinsic_charset &_get_word_charset()
{
static intrinsic_charset s_word_charset( false,wct_alpha()|wct_digit(), "_" );
return s_word_charset;
}
static intrinsic_charset &_get_digit_charset()
{
static intrinsic_charset s_digit_charset(false, wct_digit(), "" );
return s_digit_charset;
}
static intrinsic_charset & _get_space_charset()
{
static intrinsic_charset s_space_charset(false, wct_space(), "" );
return s_space_charset;
}
static intrinsic_charset &_get_not_word_charset()
{
static intrinsic_charset s_not_word_charset(true, wct_alpha()|wct_digit(), "_" );
return s_not_word_charset;
}
static intrinsic_charset &_get_not_digit_charset()
{
static intrinsic_charset s_not_digit_charset(true, wct_digit(), "" );
return s_not_digit_charset;
}
static intrinsic_charset &_get_not_space_charset()
{
static intrinsic_charset s_not_space_charset(true, wct_space(), "" );
return s_not_space_charset;
}
public:
static charset const & get_word_charset()
{
return _get_word_charset();
}
static charset const & get_digit_charset()
{
return _get_digit_charset();
}
static charset const & get_space_charset()
{
return _get_space_charset();
}
static charset const &get_not_word_charset()
{
return _get_not_word_charset();
}
static charset const &get_not_digit_charset()
{
return _get_not_digit_charset();
}
static charset const &get_not_space_charset()
{
return _get_not_space_charset();
}
static void reset()
{
_get_word_charset().reset( false,wct_alpha()|wct_digit(), "_" );
_get_digit_charset().reset( false, wct_digit(),"" );
_get_space_charset().reset( false, wct_space(),"" );
_get_not_word_charset().reset( true,wct_alpha()|wct_digit(), "_" );
_get_not_digit_charset().reset( true,wct_digit(), "" );
_get_not_space_charset().reset( true,wct_space(), "" );
}
};
//
// Operator implementations
//
// Evaluates the beginning-of-string condition
template< typename CStringsT >
struct bos_t
{
template< typename IterT >
static bool eval( match_param
{
return param.m_ibufferbegin == iter;
}
};
// Find the beginning of a line, eitherbeginning of a string, or the character
// immediately following a newline
template< typename CStringsT >
struct bol_t
{
template< typename IterT >
static bool eval( match_param
{
typedef typenamestd::iterator_traits
typedef std::char_traits
return param.m_ibufferbegin == iter ||traits_type::eq( REGEX_CHAR(char_type,'/n'), *--iter );
}
};
// Evaluates end-of-string condition forstring's
template< typename CStringsT >
struct eos_t
{
template< typename IterT >
static bool eval( match_param
{
return param.m_iend == iter;
}
};
template<>
struct eos_t
{
template< typename IterT >
static bool eval( match_param
{
typedef typename std::iterator_traits
typedef std::char_traits
return traits_type::eq( *iter, char_type() );
}
};
// Evaluates end-of-line conditions, either theend of the string, or a
// newline character.
template< typename CStringsT >
struct eol_t
{
template< typename IterT >
static bool eval( match_param
{
typedef typenamestd::iterator_traits
typedef std::char_traits
return param.m_iend == iter
|| traits_type::eq( REGEX_CHAR(char_type,'/n'),*iter );
}
};
template<>
struct eol_t
{
template< typename IterT >
static bool eval( match_param
{
typedef typenamestd::iterator_traits
typedef std::char_traits
return traits_type::eq( *iter, char_type() )
|| traits_type::eq( *iter,REGEX_CHAR(char_type,'/n') );
}
};
// Evaluates perl's end-of-string conditions,either the end of the string, or a
// newline character followed by end of string.( Only used by $ and /Z assertions )
template< typename CStringsT >
struct peos_t
{
template< typename IterT >
static bool eval( match_param
{
typedef typenamestd::iterator_traits
typedef std::char_traits
return param.m_iend == iter
|| ( traits_type::eq(REGEX_CHAR(char_type,'/n'), *iter ) && param.m_iend == ++iter );
}
};
template<>
struct peos_t
{
template< typename IterT >
static bool eval( match_param
{
typedef typenamestd::iterator_traits
typedef std::char_traits
return traits_type::eq( *iter, char_type() )
|| ( traits_type::eq( *iter, REGEX_CHAR(char_type,'/n'))
&& traits_type::eq( *++iter,char_type() ) );
}
};
// compare two characters, case-sensitive
template< typename CharT >
struct ch_neq_t
{
typedef CharT char_type;
typedef std::char_traits
static bool eval( register CharT ch1, registerCharT ch2 )
{
return ! traits_type::eq( ch1, ch2 );
}
};
// Compare two characters, disregarding case
template< typename CharT >
struct ch_neq_nocase_t
{
typedef CharT char_type;
typedef std::char_traits
static bool eval( register CharT ch1, registerCharT ch2 )
{
return ! traits_type::eq( regex_toupper( ch1 ),regex_toupper( ch2 ) );
}
};
//
// helper functions for dealing with widths.
//
inline size_t width_add( size_t a, size_t b )
{
return ( size_t( -1 ) == a || size_t( -1 ) == b? size_t( -1 ) : a + b );
}
inline size_t width_mult( size_t a, size_t b )
{
if( 0 == a || 0 == b )
return 0;
if( size_t( -1 ) == a || size_t( -1 ) == b )
return size_t( -1 );
return a * b;
}
inline bool operator==( width_type const & rhs,width_type const & lhs )
{
return ( rhs.m_min == lhs.m_min &&rhs.m_max == lhs.m_max );
}
inline bool operator!=( width_type const &rhs, width_type const & lhs )
{
return ( rhs.m_min != lhs.m_min || rhs.m_max !=lhs.m_max );
}
inline width_type operator+( width_type const& rhs, width_type const & lhs )
{
width_type width = { width_add( rhs.m_min,lhs.m_min ), width_add( rhs.m_max, lhs.m_max ) };
return width;
}
inline width_type operator*( width_type const& rhs, width_type const & lhs )
{
width_type width = { width_mult( rhs.m_min,lhs.m_min ), width_mult( rhs.m_max, lhs.m_max ) };
return width;
}
inline width_type & operator+=( width_type& rhs, width_type const & lhs )
{
rhs.m_min = width_add( rhs.m_min, lhs.m_min );
rhs.m_max = width_add( rhs.m_max, lhs.m_max );
return rhs;
}
inline width_type & operator*=( width_type& rhs, width_type const & lhs )
{
rhs.m_min = width_mult( rhs.m_min, lhs.m_min );
rhs.m_max = width_mult( rhs.m_max, lhs.m_max );
return rhs;
}
namespace
{
width_type const zero_width = { 0, 0 };
width_type const worst_width = { 0, size_t( -1) };
}
template< typename IterT >
struct width_param
{
std::vector
std::list
width_type m_width;
width_param
(
std::vector
std::list
)
: m_rggroups( rggroups )
, m_invisible_groups( invisible_groups )
, m_width( zero_width )
{
}
private:
width_param & operator=( width_param const& );
};
template< typename CharT >
struct must_have
{
typedef std::basic_string
typedef typename string_type::const_iteratorconst_iterator;
bool m_has;
const_iterator m_begin;
const_iterator m_end;
CharT const * m_lower;
};
template< typename CharT >
struct peek_param
{
// "chars" is a list of characters.If every alternate in a group
// begins with a character or string literal,the "chars" list can
// be used to speed up the matching of a group.
size_t m_cchars;
union
{
CharT m_rgchars[2];
CharT const * m_pchars;
};
// "must" is a string that mustappear in the match. It is used
// to speed up the search.
must_have
};
//--------------------------------------------------------------------------
//
// Class: sub_expr
//
// Description: patterns are"compiled" into a directed graph of sub_expr
// structs. Matching is accomplished bytraversing this graph.
//
// Methods: sub_expr - construct a sub_expr
// recursive_match_this - does this sub_exprmatch at the given location
// width_this - what is the width of thissub_expr
// ~sub_expr - recursively delete the sub_exprgraph
// next - pointer to the next node in the graph
// next - pointer to the next node in the graph
// recursive_match_next - match the rest of thegraph
// recursive_match_all - recursive_match_thisand recursive_match_next
// is_assertion - true if this sub_expr is azero-width assertion
// get_width - find the width of the graph atthis sub_expr
//
// Members: m_pnext - pointer to the next nodein the graph
//
// History: 8/14/2000 - ericne - Created
//
//--------------------------------------------------------------------------
template< typename IterT >
class sub_expr : publicsub_expr_base
{
sub_expr * m_pnext;
protected:
// Only derived classes can instantiate sub_expr's
sub_expr()
: m_pnext( 0 )
{
}
public:
typedef IterT iterator_type;
typedef typenamestd::iterator_traits
typedef std::char_traits
virtual ~sub_expr()
{
delete m_pnext;
}
sub_expr ** pnext()
{
return & m_pnext;
}
sub_expr const * next() const
{
return m_pnext;
}
virtual sub_expr * quantify( size_t, size_t,bool, regex_arena & )
{
throw bad_regexpr( "sub-expression cannotbe quantified" );
}
// Match this object and all subsequent objects
// If recursive_match_all returns false, itmust not change any of param's state
virtual bool recursive_match_all_s(match_param
{
return ( recursive_match_this_s( param, icur )&& recursive_match_next( param, icur, false_t() ) );
}
virtual bool recursive_match_all_c(match_param
{
return ( recursive_match_this_c( param, icur )&& recursive_match_next( param, icur, true_t() ) );
}
// match this object only
virtual bool recursive_match_this_s(match_param
{
return true;
}
virtual bool recursive_match_this_c(match_param
{
return true;
}
// Match all subsequent objects
template< typename CStringsT >
bool recursive_match_next(match_param
{
return m_pnext->recursive_match_all( param,icur, CStringsT() );
}
virtual bool iterative_match_this_s(match_param
{
param.m_pnext = next();
return true;
}
virtual bool iterative_match_this_c(match_param
{
param.m_pnext = next();
return true;
}
virtual bool iterative_rematch_this_s(match_param
{
return false;
}
virtual bool iterative_rematch_this_c(match_param
{
return false;
}
virtual bool is_assertion() const
{
return false;
}
width_type get_width( width_param
{
width_type temp_width = width_this( param );
if( m_pnext )
temp_width += m_pnext->get_width( param );
return temp_width;
}
virtual width_type width_this(width_param
virtual bool peek_this(peek_param
{
return false;
}
};
// An object of type end_of_pattern is used tomark the
// end of the pattern. (Duh!) It is responsiblefor ending
// the recursion, or for letting the searchcontinue if
// the match is zero-width and we are trying tofind a
// non-zero-width match
template< typename IterT >
class end_of_pattern : publicsub_expr
{
bool _do_match_this( match_param
{
return ! param.m_no0len || param.m_imatchbegin!= icur;
}
public:
virtual bool recursive_match_all_s(match_param
{
return _do_match_this( param, icur );
}
virtual bool recursive_match_all_c(match_param
{
return _do_match_this( param, icur );
}
virtual bool iterative_match_this_s(match_param
{
param.m_pnext = 0;
return _do_match_this( param, param.m_icur );
}
virtual bool iterative_match_this_c(match_param
{
param.m_pnext = 0;
return _do_match_this( param, param.m_icur );
}
virtual width_type width_this(width_param
{
return zero_width;
}
};
// Base class for sub-expressions which arezero-width
// ( i.e., assertions eat no characters duringmatching )
// Assertions cannot be quantified.
template< typename IterT >
class assertion : public sub_expr
{
public:
virtual bool is_assertion() const
{
return true;
}
virtual width_type width_this(width_param
{
return zero_width;
}
virtual bool peek_this(peek_param
{
return this->next()->peek_this( peek );
}
};
template< typename OpT, typename OpCT >
struct opwrap
{
typedef OpT op_type;
typedef OpCT opc_type;
};
#define REGEX_OP(x) opwrap
template< typename IterT, typename OpWrapT>
class assert_op : public assertion
{
public:
virtual bool recursive_match_all_s(match_param
{
return ( assert_op::recursive_match_this_s(param, icur ) && this->recursive_match_next( param, icur, false_t()) );
}
virtual bool recursive_match_all_c(match_param
{
return ( assert_op::recursive_match_this_c(param, icur ) && this->recursive_match_next( param, icur, true_t() ));
}
virtual bool recursive_match_this_s(match_param
{
return OpWrapT::op_type::eval( param, icur );
}
v