bingfox

Clucene索引合并剖析

Clucene在IndexWriter::addDocument中对每个加入的文档,就得到新的段名,生成一个新的段信息, segmentInfos数组信息里面加入这个新段信息,当达到合并阀值时触发段的合并操作

(1).void IndexWriter::maybeMergeSegments()方法

//检查是否合并段

void IndexWriter::maybeMergeSegments()

{

//初次建立时每个段就会有一个文档

int64_t targetMergeDocs = minMergeDocs; //默认值是

// find segments smaller than current target size

// 不能超过最大的合并文档个数

while (targetMergeDocs <= maxMergeDocs)

{

int32_t minSegment = segmentInfos->size();

int32_t mergeDocs = 0;

//从后面开始做循环,合并后面到前面的段里的文档

//(1).比如说有一个段已经合并了,有篇文档,在新来一篇文档,也就是一个新段,--minSegment后,因为合并的段

// 的文档数已经等于targetMergeDocs,循环直接退出,mergeDocs的值并没有增加,因此并不会触发合并;

//(2).新来的篇文档,合并为一个新段;重复做,一直到有个新段出现,另外有个单独的段索引,此时新添加一个

// 文档,这篇文档,合并为一个新段,然后合并因子为,targetMergeDocs *= mergeFactor;===>100,

// 然后处理这个段,每个段有篇文档,重新触发合并的操作,依次类推

while (--minSegment >= 0)

{

SegmentInfo* si = segmentInfos->info(minSegment);

if (si->docCount >= targetMergeDocs)

{

break;

}

mergeDocs += si->docCount;

}

if (mergeDocs >= targetMergeDocs)

{

// 超过设置的最大buf内缓存文档数目,开始合并

mergeSegments(minSegment+1);

}

else

{

break;

}

//increase target size:在用乘法做mergeFactor:合并因子

targetMergeDocs *= mergeFactor;

}

(2). void IndexWriter::mergeSegments()方法

void IndexWriter::mergeSegments(const uint32_t minSegment, const uint32_t end)

{

CLVector<SegmentReader*> segmentsToDelete(false);

const char* mergedName = newSegmentName();

//段合并管理类

SegmentMerger merger(this, mergedName);

//将要合并的段添加到segmentsToDelete中

for (size_t i = minSegment; i < end; i++)

{

SegmentInfo* si = segmentInfos->info(i);

//为每一个段的信息生成一个段阅读器

SegmentReader* reader = _CLNEW SegmentReader(si);

merger.add(reader);

if ((reader->getDirectory() == this->directory) || // if we own the directory

(reader->getDirectory() == this->ramDirectory))

{

segmentsToDelete.push_back(reader); // queue segment for deletion

}

//执行实际的段索引的合并操作

int32_t mergedDocCount = merger.merge();

#ifdef _CL_DEBUG_INFO

fprintf(_CL_DEBUG_INFO,"/n into %s (%d docs)/n",mergedName, mergedDocCount);

#endif

//删除旧段索引信息

segmentInfos->clearto(minSegment);// remove old infos & add new

//添加新的段索引信息

segmentInfos->add( _CLNEW SegmentInfo(mergedName, mergedDocCount, directory) );

merger.closeReaders();

LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);

LockWith2 with ( lock, commitLockTimeout,this, &segmentsToDelete, true );

{

SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync

with.run();

}

_CLDELETE( lock );

if (useCompoundFile)

{

char cmpdTmpName[CL_MAX_PATH];

strcpy(cmpdTmpName,mergedName);

strcat(cmpdTmpName,".tmp");

AStringArrayWithDeletor filesToDelete;

merger.createCompoundFile(cmpdTmpName, filesToDelete);

LuceneLock* lock = directory->makeLock(IndexWriter::COMMIT_LOCK_NAME);

LockWithCFS with ( lock,commitLockTimeout,directory, this, mergedName, &filesToDelete);

{

SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync

with.run();

}

_CLDELETE(lock);

}

_CLDELETE_CaARRAY( mergedName );

}

(3). int32_t SegmentMerger::merge()方法

//执行实际的合并操作

int32_t SegmentMerger::merge()

{

int32_t value = mergeFields();

mergeTerms();

mergeNorms();

if (fieldInfos->hasVectors())

{

mergeVectors();

}

return value;

}

(4). int32_t SegmentMerger::mergeFields()方法

int32_t SegmentMerger::mergeFields()

{

//生成新的字段信息

fieldInfos = _CLNEW FieldInfos(); // merge field names

//Condition check to see if fieldInfos points to a valid instance

CND_CONDITION(fieldInfos != NULL,"Memory allocation for fieldInfos failed");

IndexReader* reader = NULL;

int32_t docCount = 0;

//Iterate through all readers

for (uint32_t i = 0; i < readers.size(); i++){

//get the i-th reader

reader = readers[i];

//Condition check to see if reader points to a valid instance

CND_CONDITION(reader != NULL,"No IndexReader found");

StringArrayWithDeletor tmp;

tmp.clear();

//调用段索引阅读器的getFieldNames()方法: class SegmentReader: public IndexReader

reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION_OFFSET, tmp);

addIndexed(reader, fieldInfos, tmp, true, true, true);

tmp.clear();

reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION, tmp);

addIndexed(reader, fieldInfos, tmp, true, true, false);

tmp.clear();

reader->getFieldNames(IndexReader::TERMVECTOR_WITH_OFFSET, tmp);

addIndexed(reader, fieldInfos, tmp, true, false, true);

tmp.clear();

reader->getFieldNames(IndexReader::TERMVECTOR, tmp);

addIndexed(reader, fieldInfos, tmp, true, false, false);

tmp.clear();

reader->getFieldNames(IndexReader::INDEXED, tmp);

addIndexed(reader, fieldInfos, tmp, false, false, false);

tmp.clear();

reader->getFieldNames(IndexReader::UNINDEXED, tmp);

if ( tmp.size() > 0 )

{

TCHAR** arr = _CL_NEWARRAY(TCHAR*,tmp.size()+1);

tmp.toArray(arr);

fieldInfos->add((const TCHAR**)arr, false);

_CLDELETE_ARRAY(arr); //no need to delete the contents, since tmp is responsible for it

}

const char* buf = Misc::segmentname(segment,".fnm");

//写入合并后的字段

fieldInfos->write(directory, buf );

_CLDELETE_CaARRAY(buf);

FieldsWriter* fieldsWriter = _CLNEW FieldsWriter(directory, segment, fieldInfos);

CND_CONDITION(fieldsWriter != NULL,"Memory allocation for fieldsWriter failed");

try

{

IndexReader* reader = NULL;

int32_t maxDoc = 0;

//迭代处理SegmentReader阅读器

for (uint32_t i = 0; i < readers.size(); i++) {

reader = readers[i];

CND_CONDITION(reader != NULL, "No IndexReader found");

//合并后一个段里面会有多个文档

int32_t maxDoc = reader->maxDoc();

//document buffer

Document doc;

//Iterate through all the documents managed by the current reader

for (int32_t j = 0; j < maxDoc; j++){

//Check if the j-th document has been deleted, if so skip it

//如果段索引中该文档没有被删除

if (!reader->isDeleted(j)){

//Get the document

if ( reader->document(j, &doc) ){ //参见bool SegmentReader::document:调用FieldsReader* fieldsReader;

//Add the document to the new FieldsWriter

//写入合并后的文档里字段值

fieldsWriter->addDocument( &doc );

docCount++;

//doc is cleard for re-use

doc.clear();

}

}_CLFINALLY( fieldsWriter->close();

_CLDELETE( fieldsWriter );

);

//返回合并后的文档的数目

return docCount;

}

(5). void SegmentReader::initialize()方法

//段阅读器的信息初始化

void SegmentReader::initialize(SegmentInfo* si)

{

deletedDocs = NULL;

ones = NULL;

deletedDocsDirty = false;

normsDirty=false;

undeleteAll=false;

//段的名称

segment = STRDUP_AtoA(si->name);

//频率流,位置流

freqStream = NULL;

proxStream = NULL;

//instantiate a buffer large enough to hold a directory path

char buf[CL_MAX_PATH];

// Use compound file directory for some files, if it exists

Directory* cfsDir = getDirectory(); //目录由类的构造函数传入:getDirectory()函数里面直接return directory;

SegmentName(buf, CL_MAX_PATH, ".cfs");

if (cfsDir->fileExists(buf)) {

cfsReader = _CLNEW CompoundFileReader(cfsDir, buf);

cfsDir = cfsReader;

}else

cfsReader = NULL;

//Create the name of the field info file with suffix .fnm in buf

//使用前缀得到字段输入流

SegmentName(buf, CL_MAX_PATH, ".fnm");

//类的构造函数:打开目录所在的输入流,读取流文件,重新得到字段信息

fieldInfos = _CLNEW FieldInfos(cfsDir, buf );

//Condition check to see if fieldInfos points to a valid instance

CND_CONDITION(fieldInfos != NULL,"No memory could be allocated for fieldInfos");

//Create the name of the frequence file with suffix .frq in buf

SegmentName(buf,CL_MAX_PATH, ".frq");

//Open an IndexInput freqStream to the frequency file

#ifdef LUCENE_FS_MMAP

if ( cfsDir->getDirectoryType() == FSDirectory::DirectoryType() ){

FSDirectory* fsdir = (FSDirectory*)cfsDir;

freqStream = fsdir->openMMapFile( buf );

} else if (strcmp(cfsDir->getDirectoryType(), "CFS") == 0) { //todo: we should have a CFS Directory

freqStream = cfsDir->openInput(buf,true);

}else

#endif

//频率输入流

freqStream = cfsDir->openInput( buf );

//Condition check to see if freqStream points to a valid instance and was able to open the

//frequency file

CND_CONDITION(freqStream != NULL, "IndexInput freqStream could not open the frequency file");

//Create the name of the prox file with suffix .prx in buf

SegmentName(buf, CL_MAX_PATH,".prx");

//Open an IndexInput proxStream to the prox file

#ifdef LUCENE_FS_MMAP

if (cfsDir->getDirectoryType() == FSDirectory::DirectoryType()) {

FSDirectory* fsdir = (FSDirectory*)cfsDir;

proxStream = fsdir->openMMapFile( buf );

} else if (strcmp(cfsDir->getDirectoryType(), "CFS") == 0) {

proxStream = cfsDir->openInput(buf,true);

} else

#endif

//位置输入流

proxStream = cfsDir->openInput( buf );

//Condition check to see if proxStream points to a valid instance and was able to open the

//prox file

CND_CONDITION(proxStream != NULL, "IndexInput proxStream could not open proximity file");

//Instantiate a FieldsReader for reading the Field Info File

//得到字段阅读器

fieldsReader = _CLNEW FieldsReader(cfsDir, segment, fieldInfos);

CND_CONDITION(fieldsReader != NULL,"No memory could be allocated for fieldsReader");

//得到词条阅读器

tis = _CLNEW TermInfosReader(cfsDir, segment, fieldInfos);

//Condition check to see if tis points to a valid instance

CND_CONDITION(tis != NULL,"No memory could be allocated for tis");

//Check if the segment has deletion according to the SegmentInfo instance si->

// NOTE: the bitvector is stored using the regular directory, not cfs

if (hasDeletions(si)){

//Create a deletion file with suffix .del

SegmentName(buf, CL_MAX_PATH,".del");

//Instantiate a BitVector that manages which documents have been deleted

deletedDocs = _CLNEW BitSet(getDirectory(), buf );

}

openNorms(cfsDir);

//得到词条位置,偏移量阅读器

if (fieldInfos->hasVectors()) { // open term vector files only as needed

termVectorsReaderOrig = _CLNEW TermVectorsReader(cfsDir, segment, fieldInfos);

}else

termVectorsReaderOrig = NULL;

}

(6). void SegmentMerger::mergeTerms()方法

/合并词条

void SegmentMerger::mergeTerms()

{

CND_PRECONDITION(fieldInfos != NULL, "fieldInfos is NULL");

try{

//create a filename for the new Frequency File for segment

const char* buf = Misc::segmentname(segment,".frq");

//Open an IndexOutput to the new Frequency File

freqOutput = directory->createOutput( buf );

//Destroy the buffer of the filename

_CLDELETE_CaARRAY(buf);

//create a filename for the new Prox File for segment

buf = Misc::segmentname(segment,".prx");

//Open an IndexOutput to the new Prox File

proxOutput = directory->createOutput( buf );

//delete buffer

_CLDELETE_CaARRAY( buf );

//Instantiate a new termInfosWriter which will write in directory

//for the segment name segment using the new merged fieldInfos

termInfosWriter = _CLNEW TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);

//Condition check to see if termInfosWriter points to a valid instance

CND_CONDITION(termInfosWriter != NULL,"Memory allocation for termInfosWriter failed") ;

//得到设置的跳跃间隔

skipInterval = termInfosWriter->skipInterval;

//合并段的优先级队列

queue = _CLNEW SegmentMergeQueue(readers.size());

//And merge the Term Infos

mergeTermInfos();

}_CLFINALLY(

if (freqOutput != NULL) { freqOutput->close(); _CLDELETE(freqOutput); }

if (proxOutput != NULL) { proxOutput->close(); _CLDELETE(proxOutput); }

if (termInfosWriter != NULL) { termInfosWriter->close(); _CLDELETE(termInfosWriter); }

if (queue != NULL) { queue->close(); _CLDELETE(queue);}

);

}

(7). void SegmentMerger::mergeTermInfos()方法

/实际合并词条操作

//(1).取出各个段的第一个词条信息加入到优先级队列;

//(2).取优先级队列的第一个元素,弹出来,在到优先级队列的下一个元素,如果这个元素的词条相同的话,持续弹出来,

// 放到匹配的数组中;

//(3).对词条相同的进行合并词条的操作;

//(4).对词条相同的,在取下一个词条信息,加入到优先级队列(之前相同的已经被弹出优先级队列了);

//(5).在依次循环进行处理;

void SegmentMerger::mergeTermInfos()

{

CND_CONDITION(queue != NULL, "Memory allocation for queue failed");

//base is the id of the first document in a segment

int32_t base = 0;

IndexReader* reader = NULL;

SegmentMergeInfo* smi = NULL;

//iterate through all the readers

//循环取出段里面的第一个词条,放到优先级队列queue中

for (uint32_t i = 0; i < readers.size(); i++)

{

//Get the i-th reader

reader = readers[i];

//Condition check to see if reader points to a valid instance

CND_CONDITION(reader != NULL, "No IndexReader found");

//Get the term enumeration of the reader

//得到词条迭代器

TermEnum* termEnum = reader->terms();

//Instantiate a new SegmentMerginfo for the current reader and enumeration

//使用迭代器去进行了类的构造

//reader是SegmentReader的派生类指针

smi = _CLNEW SegmentMergeInfo(base, termEnum, reader);

//Condition check to see if smi points to a valid instance

CND_CONDITION(smi != NULL, "Memory allocation for smi failed") ;

//Increase the base by the number of documents that have not been marked deleted

//so base will contain a new value for the first document of the next iteration

base += reader->numDocs();

//Get the next current term

//next方法执行后SegmentMergeInfo中的term就通过迭代器取得了词条指针

if (smi->next()){

//Store the SegmentMergeInfo smi with the initialized SegmentTermEnum TermEnum

//into the queue

queue->put(smi);

}else{

//Apparently the end of the TermEnum of the SegmentTerm has been reached so

//close the SegmentMergeInfo smi

smi->close();

//And destroy the instance and set smi to NULL (It will be used later in this method)

_CLDELETE(smi);

}

//Instantiate an array of SegmentMergeInfo instances called match

SegmentMergeInfo** match = _CL_NEWARRAY(SegmentMergeInfo*,readers.size()+1);

//Condition check to see if match points to a valid instance

CND_CONDITION(match != NULL, "Memory allocation for match failed") ;

SegmentMergeInfo* top = NULL;

//As long as there are SegmentMergeInfo instances stored in the queue

//循环不断从各个段索引中的.tis文件得出的第一个term

while (queue->size() > 0) {

int32_t matchSize = 0;

// pop matching terms

//Pop the first SegmentMergeInfo from the queue

match[matchSize++] = queue->pop();

//Get the Term of match[0]

Term* term = match[0]->term;

//Condition check to see if term points to a valid instance

CND_CONDITION(term != NULL,"term is NULL") ;

//Get the current top of the queue

top = queue->top();

//For each SegmentMergInfo still in the queue

//Check if term matches the term of the SegmentMergeInfo instances in the queue

//在匹配过程中如果term相同,就在match数组中将SegmentMergeInfo类的实例添加进来,match中

//的term都是相同的,match数组的大小正是该term在文集中的docfreq(文集中有多少个文档包含这个term值)

while (top != NULL && term->equals(top->term) )

{

//A match has been found so add the matching SegmentMergeInfo to the match array

match[matchSize++] = queue->pop();

//Get the next SegmentMergeInfo

top = queue->top();

}

match[matchSize]=NULL;

//add new TermInfo

mergeTermInfo(match); //matchSize

//Restore the SegmentTermInfo instances in the match array back into the queue

//如果存在相同的词条

while (matchSize > 0){

smi = match[--matchSize];

//Condition check to see if smi points to a valid instance

CND_CONDITION(smi != NULL,"smi is NULL") ;

//Move to the next term in the enumeration of SegmentMergeInfo smi

//移到下一个词条在进行处理

if (smi->next()){

//There still are some terms so restore smi in the queue

queue->put(smi);

}else{

//Done with a segment

//No terms anymore so close this SegmentMergeInfo instance

smi->close();

_CLDELETE( smi );

}

_CLDELETE_ARRAY(match);

}

(8). void SegmentMerger::mergeTermInfo()方法

//合并段中相同的词条

void SegmentMerger::mergeTermInfo( SegmentMergeInfo** smis)

{

CND_PRECONDITION(smis != NULL, "smis is NULL");

CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");

CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");

//Get the file pointer of the IndexOutput to the Frequency File

int64_t freqPointer = freqOutput->getFilePointer();

//Get the file pointer of the IndexOutput to the Prox File

int64_t proxPointer = proxOutput->getFilePointer();

//Process postings from multiple segments all positioned on the same term.

int32_t df = appendPostings(smis);

int64_t skipPointer = writeSkip();

//df contains the number of documents across all segments where this term was found

if (df > 0) {

//add an entry to the dictionary with pointers to prox and freq files

termInfo.set(df, freqPointer, proxPointer, (int32_t)(skipPointer - freqPointer));

//Precondition check for to be sure that the reference to

//smis[0]->term will be valid

CND_PRECONDITION(smis[0]->term != NULL, "smis[0]->term is NULL");

//Write a new TermInfo

termInfosWriter->add(smis[0]->term, &termInfo);

}

(9). int32_t SegmentMerger::appendPostings()方法

//多个段含有相同的词条信息,写入词条位置

int32_t SegmentMerger::appendPostings(SegmentMergeInfo** smis)

{

CND_PRECONDITION(smis != NULL, "smis is NULL");

CND_PRECONDITION(freqOutput != NULL, "freqOutput is NULL");

CND_PRECONDITION(proxOutput != NULL, "proxOutput is NULL");

int32_t lastDoc = 0;

int32_t df = 0; //文档计数器

resetSkip();

SegmentMergeInfo* smi = NULL;

//Iterate through all SegmentMergeInfo instances in smis

int32_t i = 0;

while ( (smi=smis[i]) != NULL ){

//Get the i-th SegmentMergeInfo

//Condition check to see if smi points to a valid instance

CND_PRECONDITION(smi!=NULL," is NULL");

//Get the term positions

TermPositions* postings = smi->getPositions();

//Get the base of this segment

int32_t base = smi->base;

//Get the docMap so we can see which documents have been deleted

int32_t* docMap = smi->getDocMap();

//Seek the termpost

postings->seek(smi->termEnum);

while (postings->next())

{

int32_t doc = postings->doc();

//Check if there are deletions

//判断该文档是否已经被删除

if (docMap != NULL)

{

doc=docMap[doc]; // map around deletions

}

doc+= base; // convert to merged space

//Condition check to see doc is eaqual to or bigger than lastDoc

CND_CONDITION(doc >= lastDoc,"docs out of order");

//Increase the total frequency over all segments

df++;

if ((df % skipInterval) == 0)

{

bufferSkip(lastDoc);

}

//Calculate a new docCode

//use low bit to flag freq=1

int32_t docCode = (doc - lastDoc) << 1;

lastDoc = doc;

//Get the frequency of the Term

int32_t freq = postings->freq();

if (freq == 1){

//write doc & freq=1

freqOutput->writeVInt(docCode | 1);

}else{

//write doc

freqOutput->writeVInt(docCode);

//write frequency in doc

freqOutput->writeVInt(freq);

}

int32_t lastPosition = 0;

// write position deltas

for (int32_t j = 0; j < freq; j++)

{

//Get the next position

int32_t position = postings->nextPosition(); //位置迭代器

//Write the difference between position and the last position

proxOutput->writeVInt(position - lastPosition); //写入位置差值

lastPosition = position;

}

i++;

}

//Return total number of documents across all segments where term was found

//返回该词条在整个文档中出现的频率

return df;

}

你可能感兴趣的:(File,null,Lucene,文档,Path,Allocation)

VScode保持vue语法高亮的方式「已注销」 vscode vue.js ide 编辑器前端
VScode保持vue语法高亮的方式：1.安装插件：vetur。打开VScode，Ctrl+P然后输入extinstallvetur然后回车点安装即可。2.在VSCode中使用Ctrl+Shift+P打开命令面板，输入Preferences:OpenSettings(JSON)。3.在配置对象里加上key和value1"emmet.syntaxProfiles":{2"vue-html":"htm
Unity引擎下的ARCore插件学习指南：从入门到实战案例小宝哥Code 元宇宙VRARXR unity 游戏引擎
1.了解ARCore与Unity引擎的结合ARCore是由Google开发的一种增强现实（AR）平台，它能够让Android设备在现实世界中准确定位并映射环境，以便提供AR体验。与Unity结合后，ARCore能够为开发者提供强大的工具来实现流畅、精准的AR应用开发。学习资源：官方文档：ARCore官方文档（ARCoredocumentation）是最权威、最全面的学习资源，包含API参考、示例项
IDEA里的Spring Boot的开发环境搭建北城相拥£ spring boot
SpringBoot的开发环境今天接着介绍SpringBoot的开发环境1.jdk的配置使用IDEA进行开发，在IDEA中配置jdk的方式很简单，打开File->ProjectStructure选择SDKs在JDKhomepath中选择本地jdk的安装目录在Name中为jdk自定义名字通过以上三步骤，即可导入本地安装的jdk。如果是使用STS或者eclipse可以通过两步骤添加：1、window-
How to learn html？基于chatGLM-b生成示例（仅供参考）不是吧这都有重名 html 前端
如何学习HTML目录章节11.1.HTML基础概念章节22.1.HTML文档结构章节33.1.HTML基本标签章节44.1.HTML文本格式化章节55.1.HTML列表和表格章节66.1.HTML表单和输入章节77.1.HTML图片和多媒体章节88.1.HTML链接与导航章节99.1.HTML样式与CSS的基本应用章节1010.1.HTML常用属性与属性标签章节1111.1.HTML注释与特殊字符
node.js毕设基于B_S架构的工艺品展示系统论文+程序凌菲学姐—毕设 node.js 课程设计架构
本系统（程序+源码+数据库+调试部署+开发环境）带文档lw万字以上，文末可获取源码系统程序文件列表开题报告内容一、选题背景关于工艺品展示系统的研究，现有研究主要以工艺品的实体展示、传统营销模式为主，专门针对基于B/S架构的工艺品展示系统的研究较少。在国内外，工艺品行业发展迅速，但传统的展示方式受地域、时间等限制，难以满足日益增长的市场需求。目前存在的争论焦点在于如何在网络环境下，既能全面展示工艺品
若依路由配置教程 yqj234 java 前端服务器
1.路由配置文件2.配置内容介绍{path:"/tool/gen-edit",component:Layout,//在路由下，引用组件的名称，在页面中包括这个组件的内容(页面框架内容)hidden:true,//此页面的内容，在左边的菜单中不用显示。children:[{path:"index",//子路径，拼接在父路径后面的内容（/tool/gen-edit/index)component:(r
python日志库 Happinessคิดถึง Python学习 python 开发语言
更新时间：2023-03-10官方提供的logging封装成以日期命名。参考：logging和logurulogging封装类使用importosimportloggingimportreimporttimefromlogging.handlersimportTimedRotatingFileHandler#logspathdefault：/logs/classsetup_log:def__ini
EtherCAT主站IGH-- 54 -- IGH之控制电机旋转周期同步位置模式（CSP模式）常驻客栈 EtherCAT主站-IGH EtherCAT EtherCAT主站 IGH 常驻客栈 CSP模式周期同步位置模式 igh
EtherCAT主站IGH--54--IGH之控制电机旋转周期同步位置模式（CSP模式）0Ubuntu18.04系统IGH博客、视频欣赏链接1修改电机的ID一代码实现二编译运行该文档修改记录：总结环境：系统：xenomai3.1主站：IGH电机：松下A6B。该博客功能视频欣赏EtherCAT主站-IGH--54--ubuntu18控制电机旋转（CSP模式）环境：系统：ubuntu18.04，xen
oracle数据库无法open，报“ORA-01113 file 1 needs media recovery”错误，教你用“六脉神剑”来开库老苏畅谈运维 oracle 数据恢复数据库 oracle 运维
作者介绍：老苏，10余年DBA工作运维经验，擅长Oracle、MySQL、PG数据库运维（如安装迁移，性能优化、故障应急处理等）公众号：老苏畅谈运维欢迎关注本人公众号，更多精彩与您分享。在系统断电或数据库异常宕机后，数据库启动过程中可能出现ORA-01113ORA-01110这些错误，而你又没有备份，未开归档或者归档已经被删除了，那要怎么办呢？启动报错如下：SQL>startup;FixedSiz
mongodb php 日志分析,MongoDB Slowquery慢查询日志分析平台抱玉于浮光 mongodb php 日志分析
简介MongoDB的慢SQL日志是记录到业务库的system.profile表里，当线上DB运行缓慢时，开发通常联系DBA去排查问题，那么可以将这种机械化的工作，做成一个平台化、可视化的工具出来，让开发在网页里点点鼠标即可查看数据库运行状况，这将大大提高工作效率，降低对DBA的依赖。参考了Perconapt-mongodb-query-digest工具抓取分析的展示思路，并用PHP重构，将分析结果
python动态全局缓存配置 king9666 python 缓存
在内存中缓存配置，但提供手动或自动刷新机制。使用文件的修改时间戳（mtime）来判断文件是否更新，只有在文件更新时重新读取importosimportjson_cached_config=None_cached_config_mtime=Nonedefread_config():global_cached_config,_cached_config_mtimeconfig_file=os.path
Mongodb 慢查询日志分析 - 1 草明 mongodb mongodb python 数据库
Mongodb慢查询日志分析使用mloginfo处理过的日志会在控制台输出,显示还是比较友好的.但是如果内容较大,就不方便查看了,如果可以导入到excel就比较方便筛选/排序.但是mloginfo并没有提供生成到excel的功能.可以通过一个python脚本辅助生成:importpandasaspdimportre#定义文件路径mloginfo_output_file="mloginfo_outp
深入理解 `mmap`：高效的文件访问机制蜗牛沐雨 c++python
在现代编程中，文件操作是不可或缺的一部分。随着数据量的增加，如何高效地读取和写入文件变得尤为重要。mmap（Memory-MappedFile）是一种高效的文件访问机制，它允许将文件或设备映射到进程的地址空间中，使得文件操作就像操作内存一样简单和高效。本文将详细介绍mmap的工作原理、使用方法及其在Python和C语言中的实现。1.mmap的用途mmap的主要用途包括：高效文件访问：通过将文件映射
基于架构的软件开发方法 light900 架构
（1）基于体系结构的设计方法(absd）：是由体系结构驱动的，即指由构成体系结构的商业、质量和功能需求的组合驱动的。absd方法有3个基础：第1基础是功能的分解，第2基础是通过选择体系结构风格来实现质量和商业需求。第3基础是软件模板的使用。ABSD模型把整个软件过程划分为：架构需求、设计、文档化、复审、实现、演化架构需求：需求是指用户对目标软件系统在功能、行为、性能、设计约束等方面的期望。架构需求
探索Apache NetBeans Maven Archetypes —— 构建强大的NetBeans套件滕娴殉
探索ApacheNetBeansMavenArchetypes——构建强大的NetBeans套件netbeans-mavenutils-archetype-nbm-suite-rootApacheIncubatorPagespeedWebsite是ApachePagespeed项目的一个孵化网站，提供了关于ApachePagespeed项目的介绍、文档和示例代码。这个项目适用于Web开发者和性能优
node-exporter安装 fanqiliang630 kubernetes docker yaml centos zabbix
node-exporter可以提供相当丰富的系统资源指标，在指标收集中有举足轻重的地位。编写文章之时所参考的文档和相关操作都是有效的，但是软件版本更新很快，不应完全照抄。1.node-exporter部署：参考博客：https://www.wqblogs.com/2020/04/17/k8s%E9%83%A8%E7%BD%B2node-exporter/#%E4%BA%A4%E4%BB%98nod
将Python程序打包为Windows安装包 bicart 服务器 linux python 开发语言 windows 前端
本文使用创作助手。如果你想将Python程序打包为Windows安装包，你可以使用NSIS（NullsoftScriptableInstallSystem）工具。NSIS是一个免费的Windows安装包创建工具。以下是使用NSIS将Python程序打包为Windows安装包的步骤：首先，确保你已经安装了NSIS。你可以从官方网站（https://nsis.sourceforge.io/Downlo
从零开始构建一个JAVA项目 SUGERBOOM java log4j 开发语言
本篇文章将从结构框架入手，系统介绍一个完整Java程序的结构步骤，不涉及JAVA基础代码学习。在本文章中先简单介绍Maven、Spring、MyBatis三种Java类型。一、分类介绍首先我们先来了解Java程序的类型，不同类型结构略有区别。Java程序的类型可以根据多个分类方式进行划分，以下是一些常见的分类和简要解释：构建工具类型：Maven：用于管理项目的构建、报告和文档的生成。它提供了一种标
MyBatis--09--PageHelper插件分页实现原理知行合一。。。 Spring基础知识--SSM mybatis
提示：文章写完后，目录可以自动生成，如何生成可参考右边的帮助文档文章目录PageHelper分页的实现方式1.debug至此整个查询过程完成，原来PageHelper的分页功能是通过==Limit拼接SQL实现的==。2.PageHelper深度分页查询效率低3.解决方案PageHelper分页的实现方式项目一直使用的是PageHelper实现分页功能，项目前期数据量较少一直没有什么问题。随着业务
php workman 和swoole,workerman和swoole的区别 weixin_39530647 php workman 和swoole
workerman和swoole都是phpsocket服务器框架，都支持长连接、tcp和udp、websocket、异步、分布式部署等workerman纯php写的，swoole是php的c扩展，性能肯定更高，百度、腾讯不少产品的server就是基于swoole的workerman上手更快，文档更丰富，社区活跃，社区基本做到有问必答。(推荐学习：workerman教程)性能上Swoole毕竟是C语
php workerman和swoole,workerman和swoole的区分_PHP开发框架教程我吃掉了一辆奔驰 php
workerman和swoole都是phpsocket服务器框架，都支撑长衔接、tcp和udp、websocket、异步、分布式布置等workerman纯php写的，swoole是php的c扩大，机能一定更高，百度、腾讯不少产物的server就是基于swoole的workerman上手更快，文档更雄厚，社区活泼，社区基础做到有问必答。(引荐进修：workerman教程)机能上Swoole毕竟是C言
json格式的标记文件转yolo格式（txt）的标记文件帅气的亮 json YOLO python
背景我在使用anylabeling标记软件时发现导出的标记文件格式只能是json格式，而我yolov8训练模型的标记文件需要时txt格式的，所以我需要写一个转换脚本脚本1.脚本1这个脚本是针对于矩形框标记的转换，也就是目标检测importjsonimportosdefconvert_json_to_yolo(json_file_path,output_dir,class_mapping):"""将
DataGrip的数据库驱动的离线安装一口酥Hac 数据库数据库
文章目录概要离线安装数据库驱动本地云桌面注意概要在某些工作环境中，由于网络访问受限，可能无法自动下载所需的数据库驱动。离线安装数据库驱动本地本地的DataGrip打开File->DataSources:云桌面云桌面的DataGrip打开File->DataSources:注意请确保下载的驱动版本与您所使用的数据库版本完全兼容，以避免出现连接或功能异常。
【EI复现】【基于改进粒子群算法求解】一种建筑集成光储系统规划运行综合优化方法（Matlab代码实现）创新优化代码学习算法 matlab 人工智能
欢迎来到本博客❤️❤️博主优势：博客内容尽量做到思维缜密，逻辑清晰，为了方便读者。⛳️座右铭：行百里者，半于九十。本文目录如下：目录1概述2运行结果3参考文献4Matlab代码、数据、文档讲解1概述文献来源：摘要：容量优化配置与能量调度是建筑集成光储系统(buildingintegratedphotovoltaic,BIPV)规划和运行阶段的核心问题，合理的容量配置及能量调度能够有效提升系统的经济
excel vba 操作word 替换指定内容并复制到指定文件夹改个名字好哦难 Excel vba excel word c#
PrivateSubCommandButton1_Click()'读取目标文本，并替换后生成新的文本DimobjAppAsObject'Word.ApplicationDimobjDocAsObject'Word.DocumentDimstrTemplatesAsString'模板文件路径名DimstrFileNameAsString'将数据导出到此文件'工具-引用'MicrosoftWord16
【EI复现】【基于改进粒子群算法求解】一种建筑集成光储系统规划运行综合优化方法（Matlab代码实现）砌墙_2301 算法 matlab 人工智能
欢迎来到本博客❤️❤️博主优势：博客内容尽量做到思维缜密，逻辑清晰，为了方便读者。⛳️座右铭：行百里者，半于九十。本文目录如下：目录1概述2运行结果3参考文献4Matlab代码、数据、文档讲解1概述文献来源：摘要：容量优化配置与能量调度是建筑集成光储系统(buildingintegratedphotovoltaic,BIPV)规划和运行阶段的核心问题，合理的容量配置及能量调度能够有效提升系统的经济
opencv 播放视频 smwhotjay opencv
看资料cv播放视频是用VideoCapture。结果始终打不开文件，open(0);倒是可以打开摄像头。于是抛弃他的c++类，用c接口来播放。结果ok.//打开CvCapture*capture=cvCreateFileCapture("a.avi");if(!capture){return-1;}//视频信息intcam_width=(int)cvGetCaptureProperty(captu
【MQ】如何保证消息队列的高性能？ Forest 森林消息队列 MQ kafka
零拷贝Kafka使用到了mmap和sendfile的方式来实现零拷贝。分别对应Java的MappedByteBuffer和FileChannel.transferTo顺序写磁盘Kafka采用顺序写文件的方式来提高磁盘写入性能。顺序写文件，基本减少了磁盘寻道和旋转的次数完成一次磁盘IO，需要经过寻道、旋转和数据传输三个步骤，如果在写磁盘的时候省去寻道、旋转可以极大地提高磁盘读写的性能。Kafka中每
智能工厂的设计软件应用场景的一个例子：为AI聊天工具添加一个知识系统之12 方案再探之3：特定于领域的模板之2 首次尝试和遗留问题解决一水鉴天软件智能智能制造人工语言人工智能
本文提要现在就剩下“体”本身的约定了--这必然是自律自省的，或者称为“戒律”--即“体”的自我训导discipline。完整表述为：严格双相的庄严“相”(侧），完全双性的本质“性”（侧）和双侧side双面face的外观“体”（自身）。通过Class(),Type()和Method()的声明来确保结构化最终能形式化（终结符号），以及形式化最初能结构化（初始断言）。在文档中相关的描述：两种描述文件下面
nginx日志收集、拆分、分析 . . . . . Nginx nginx 运维
Nginx的核心价值：高性能的静态服务反向代理负载均衡nginx的access_log与error_logaccess_log：指的是访问日志，我们通过访问日志可以获取用户的IP、请求处理的时间、浏览器信息等设置access_log语法：access_logpath[format[buffer=size][gzip[=level]][flush=time][if=condition]];设置访问日
mondb入手木zi_鸣 mongodb
windows 启动mongodb 编写bat文件， mongod --dbpath D:\software\MongoDBDATA mongod --help 查询各种配置配置在mongob 打开批处理，即可启动，27017原生端口，shell操作监控端口扩展28017，web端操作端口启动配置文件配置，数据更灵活
大型高并发高负载网站的系统架构 bijian1013 高并发负载均衡
扩展Web应用程序一.概念简单的来说，如果一个系统可扩展，那么你可以通过扩展来提供系统的性能。这代表着系统能够容纳更高的负载、更大的数据集，并且系统是可维护的。扩展和语言、某项具体的技术都是无关的。扩展可以分为两种： 1.
DISPLAY变量和xhost(原创) czmmiao display
DISPLAY 在Linux/Unix类操作系统上, DISPLAY用来设置将图形显示到何处. 直接登陆图形界面或者登陆命令行界面后使用startx启动图形, DISPLAY环境变量将自动设置为:0:0, 此时可以打开终端, 输出图形程序的名称(比如xclock)来启动程序, 图形将显示在本地窗口上, 在终端上输入printenv查看当前环境变量, 输出结果中有如下内容:DISPLAY=:0.0
获取B/S客户端IP 周凡杨 java 编程 jsp Web 浏览器
最近想写个B/S架构的聊天系统，因为以前做过C/S架构的QQ聊天系统，所以对于Socket通信编程只是一个巩固。对于C/S架构的聊天系统，由于存在客户端Java应用，所以直接在代码中获取客户端的IP，应用的方法为： String ip = InetAddress.getLocalHost().getHostAddress(); 然而对于WEB
浅谈类和对象朱辉辉33 编程
类是对一类事物的总称，对象是描述一个物体的特征，类是对象的抽象。简单来说，类是抽象的，不占用内存，对象是具体的，占用存储空间。类是由属性和方法构成的，基本格式是public class 类名{ //定义属性 private/public 数据类型属性名； //定义方法 publ
android activity与viewpager+fragment的生命周期问题肆无忌惮_ viewpager
有一个Activity里面是ViewPager，ViewPager里面放了两个Fragment。第一次进入这个Activity。开启了服务，并在onResume方法中绑定服务后，对Service进行了一定的初始化，其中调用了Fragment中的一个属性。 super.onResume(); bindService(intent, conn, BIND_AUTO_CREATE);
base64Encode对图片进行编码 843977358 base64 图片 encoder
/** * 对图片进行base64encoder编码 * * @author mrZhang * @param path * @return */ public static String encodeImage(String path) { BASE64Encoder encoder = null; byte[] b = null; I
Request Header简介 aigo servlet
当一个客户端(通常是浏览器)向Web服务器发送一个请求是，它要发送一个请求的命令行，一般是GET或POST命令，当发送POST命令时，它还必须向服务器发送一个叫“Content-Length”的请求头(Request Header) 用以指明请求数据的长度，除了Content-Length之外，它还可以向服务器发送其它一些Headers，如：
HttpClient4.3 创建SSL协议的HttpClient对象 alleni123 httpclient 爬虫 ssl
public class HttpClientUtils { public static CloseableHttpClient createSSLClientDefault(CookieStore cookies){ SSLContext sslContext=null; try { sslContext=new SSLContextBuilder().l
java取反 -右移-左移-无符号右移的探讨百合不是茶位运算符位移
取反：在二进制中第一位，1表示符数，0表示正数 byte a = -1; 原码：10000001 反码：11111110 补码：11111111 //异或: 00000000 byte b = -2; 原码：10000010 反码：11111101 补码：11111110 //异或: 00000001
java多线程join的作用与用法 bijian1013 java 多线程
对于JAVA的join，JDK 是这样说的：join public final void join （long millis ）throws InterruptedException Waits at most millis milliseconds for this thread to die. A timeout of 0 means t
Java发送http请求(get 与post方法请求) bijian1013 java spring
PostRequest.java package com.bijian.study; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURL
【Struts2二】struts.xml中package下的action配置项默认值 bit1129 struts.xml
在第一部份，定义了struts.xml文件，如下所示： <!DOCTYPE struts PUBLIC "-//Apache Software Foundation//DTD Struts Configuration 2.3//EN" "http://struts.apache.org/dtds/struts
【Kafka十三】Kafka Simple Consumer bit1129 simple
代码中关于Host和Port是割裂开的，这会导致单机环境下的伪分布式Kafka集群环境下，这个例子没法运行。实际情况是需要将host和port绑定到一起， package kafka.examples.lowlevel; import kafka.api.FetchRequest; import kafka.api.FetchRequestBuilder; impo
nodejs学习api ronin47 nodejs api
NodeJS基础什么是NodeJS JS是脚本语言，脚本语言都需要一个解析器才能运行。对于写在HTML页面里的JS，浏览器充当了解析器的角色。而对于需要独立运行的JS，NodeJS就是一个解析器。每一种解析器都是一个运行环境，不但允许JS定义各种数据结构，进行各种计算，还允许JS使用运行环境提供的内置对象和方法做一些事情。例如运行在浏览器中的JS的用途是操作DOM，浏览器就提供了docum
java-64.寻找第N个丑数 bylijinnan java
public class UglyNumber { /** * 64.查找第N个丑数具体思路可参考 [url] http://zhedahht.blog.163.com/blog/static/2541117420094245366965/[/url] * 题目：我们把只包含因子 2、3和5的数称作丑数（Ugly Number）。例如6、8都是丑数，但14
二维数组（矩阵）对角线输出 bylijinnan 二维数组
/** 二维数组对角线输出两个方向例如对于数组： { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 10, 11, 12 }, { 13, 14, 15, 16 }, slash方向输出： 1 5 2 9 6 3 13 10 7 4 14 11 8 15 12 16 backslash输出： 4 3
[JWFD开源工作流设计]工作流跳跃模式开发关键点(今日更新) comsci 工作流
既然是做开源软件的,我们的宗旨就是给大家分享设计和代码,那么现在我就用很简单扼要的语言来透露这个跳跃模式的设计原理大家如果用过JWFD的ARC-自动运行控制器,或者看过代码,应该知道在ARC算法模块中有一个函数叫做SAN(),这个函数就是ARC的核心控制器,要实现跳跃模式,在SAN函数中一定要对LN链表数据结构进行操作,首先写一段代码,把
redis常见使用 cuityang redis 常见使用
redis 通常被认为是一个数据结构服务器，主要是因为其有着丰富的数据结构 strings、map、 list、sets、 sorted sets 引入jar包 jedis-2.1.0.jar (本文下方提供下载) package redistest; import redis.clients.jedis.Jedis; public class Listtest
配置多个redis dalan_123 redis
配置多个redis客户端 <?xml version="1.0" encoding="UTF-8"?><beans xmlns="http://www.springframework.org/schema/beans" xmlns:xsi=&quo
attrib命令 dcj3sjt126com attr
attrib指令用于修改文件的属性.文件的常见属性有:只读.存档.隐藏和系统. 只读属性是指文件只可以做读的操作.不能对文件进行写的操作.就是文件的写保护. 存档属性是用来标记文件改动的.即在上一次备份后文件有所改动.一些备份软件在备份的时候会只去备份带有存档属性的文件.
Yii使用公共函数 dcj3sjt126com yii
在网站项目中，没必要把公用的函数写成一个工具类，有时候面向过程其实更方便。在入口文件index.php里添加 require_once('protected/function.php'); 即可对其引用，成为公用的函数集合。 function.php如下： <?php /** * This is the shortcut to D
linux 系统资源的查看（free、uname、uptime、netstat） eksliang netstat linux uname linux uptime linux free
linux 系统资源的查看转载请出自出处：http://eksliang.iteye.com/blog/2167081 http://eksliang.iteye.com 一、free查看内存的使用情况语法如下： free [-b][-k][-m][-g] [-t] 参数含义 -b:直接输入free时，显示的单位是kb我们可以使用b(bytes),m
JAVA的位操作符 greemranqq 位运算 JAVA位移 <<>>>
最近几种进制，加上各种位操作符，发现都比较模糊，不能完全掌握，这里就再熟悉熟悉。 1.按位操作符：按位操作符是用来操作基本数据类型中的单个bit,即二进制位，会对两个参数执行布尔代数运算，获得结果。与（&）运算： 1&1 = 1, 1&0 = 0, 0&0 &
Web前段学习网站 ihuning Web
Web前段学习网站菜鸟学习：http://www.w3cschool.cc/ JQuery中文网：http://www.jquerycn.cn/ 内存溢出：http://outofmemory.cn/#csdn.blog http://www.icoolxue.com/ http://www.jikexue
强强联合：FluxBB 作者加盟 Flarum justjavac r
原文：FluxBB Joins Forces With Flarum作者：Toby Zerner译文：强强联合：FluxBB 作者加盟 Flarum译者：justjavac FluxBB 是一个快速、轻量级论坛软件，它的开发者是一名德国的 PHP 天才 Franz Liedke。FluxBB 的下一个版本(2.0)将被完全重写，并已经开发了一段时间。FluxBB 看起来非常有前途的，
java统计在线人数（session存储信息的） macroli java Web
这篇日志是我写的第三次了前两次都发布失败！郁闷极了！由于在web开发中常常用到这一部分所以在此记录一下，呵呵，就到备忘录了！我对于登录信息时使用session存储的，所以我这里是通过实现HttpSessionAttributeListener这个接口完成的。 1、实现接口类，在web.xml文件中配置监听类，从而可以使该类完成其工作。 public class Ses
bootstrp carousel初体验快速构建图片播放 qiaolevip 每天进步一点点学习永无止境 bootstrap 纵观千象
img{ border: 1px solid white; box-shadow: 2px 2px 12px #333; _width: expression(this.width > 600 ? "600px" : this.width + "px"); _height: expression(this.width &
SparkSQL读取HBase数据，通过自定义外部数据源 superlxw1234 spark sparksql sparksql读取hbase sparksql外部数据源
关键字：SparkSQL读取HBase、SparkSQL自定义外部数据源前面文章介绍了SparSQL通过Hive操作HBase表。 SparkSQL从1.2开始支持自定义外部数据源(External DataSource)，这样就可以通过API接口来实现自己的外部数据源。这里基于Spark1.4.0，简单介绍SparkSQL自定义外部数据源，访
Spring Boot 1.3.0.M1发布 wiselyman spring boot
Spring Boot 1.3.0.M1于6.12日发布，现在可以从Spring milestone repository下载。这个版本是基于Spring Framework 4.2.0.RC1,并在Spring Boot 1.2之上提供了大量的新特性improvements and new features。主要包含以下： 1.提供一个新的sprin