在上個星期面試一家公司的筆試題上面的最后一道題就是寫程序查找一個字符串的最長重復子串。當時想了很長時間沒想出什么好方法,就把一個算法復雜度比較高的算法寫上去了?;貋砩蠙C把那個算法寫了一遍測試沒問題,然后自己又到網上面查查還有什么方法,然后發現好像有種叫做后綴樹的方法,然后看那個方法,當時沒給出代碼,看圖看了老半天加之自己想了好幾個小時終于知道后綴樹是個什么東西。然后自己萌生了一個自己寫一個后綴樹算法解決那個重復子串的問題。然后寫了一天終于寫出來了。后續有做了一些測試,發現自己寫的一個只有幾十個字母的字符串比之前的那個算法慢了好幾十倍,想了很久想不出原因,后來自己隨機生成了一個10000個字符的字符串使用后綴樹算法比舊的方法快了4倍,所以后綴樹算法還是一個比較優秀的算法的。但是為了以后能夠回來看下自己寫的東西,所以就寫這篇博客記錄一下自己寫的后綴樹算法的源代碼。一下是代碼
class SuffixTreeNode;
typedef SuffixTreeNode* SuffixTreeNodePtr;
class SuffixTreeNode {
public:
SuffixTreeNode();
void initNode();
SuffixTreeNodePtr& returnChildsAt(int i);
int cmpSameLength(const char *start);
void setHead(const char *start);
const char* getHead();
void setLen(int length);
int getLen();
void setStartPos(int pos);
int getStartPos();
private:
const char *pHead;
int len;
int start;
SuffixTreeNode* childs[256];
};
class SuffixTree {
public:
SuffixTree();
~SuffixTree();
int insert(const char *start, int pos);
private:
SuffixTreeNode* allocNode();
bool allocBufferNode(int size = 1024);
int innerInsert(SuffixTreeNode *pNode, const char *start, int pos, int preSameLength);
SuffixTreeNode* root;
SuffixTreeNode* freeNode;
int maxStrLen;
std::vector<SuffixTreeNode*> nodeBuff;
};
SuffixTreeNode::SuffixTreeNode() {
initNode();
}
void SuffixTreeNode::initNode() {
memset(this, 0, sizeof(SuffixTreeNode));
}
SuffixTreeNodePtr& SuffixTreeNode::returnChildsAt(int i) {
return childs[i];
}
int SuffixTreeNode::cmpSameLength(const char *start) {
int length = 0;
if (pHead != NULL)
for (; (length < len) && (pHead[length] == start[length]); length++);
else
return 0;
return length;
}
void SuffixTreeNode::setHead(const char *start) {
pHead = start;
}
const char* SuffixTreeNode::getHead() {
return pHead;
}
void SuffixTreeNode::setLen(int length) {
len = length;
}
int SuffixTreeNode::getLen() {
return len;
}
void SuffixTreeNode::setStartPos(int pos) {
start = pos;
}
int SuffixTreeNode::getStartPos() {
return start;
}
SuffixTree::SuffixTree() : root(NULL), freeNode(NULL){
}
SuffixTree::~SuffixTree() {
for (size_t i = 0; i < nodeBuff.size(); i++) {
SuffixTreeNode* pNode = nodeBuff.at(i);
if (pNode != NULL) {
free(pNode);
}
}
}
bool SuffixTree::allocBufferNode(int size) {
SuffixTreeNode *pNode = (SuffixTreeNode*)malloc(sizeof(SuffixTreeNode) * size);
if (pNode == NULL) {
return false;
}
nodeBuff.push_back(pNode);
for (int i = 0; i < size; i++) {
pNode->returnChildsAt(0) = freeNode;
freeNode = pNode;
pNode++;
}
return true;
}
SuffixTreeNode* SuffixTree::allocNode() {
if (freeNode == NULL) {
if (!allocBufferNode())
return NULL;
}
assert(freeNode != NULL);
SuffixTreeNode* pNode = freeNode;
freeNode = freeNode->returnChildsAt(0);
return pNode;
}
int SuffixTree::insert(const char *start, int pos) {
if (root == NULL) {
root = allocNode();
if (root == NULL) {
return 0;
}
root->initNode();
maxStrLen = strlen(start);
}
return innerInsert(root, start, pos, 0);
}
int SuffixTree::innerInsert(SuffixTreeNode *pNode, const char *start, int pos, int preSameLength) {
if (pNode == NULL)
return 0;
int sameLength = pNode->cmpSameLength(start);
if (sameLength < pNode->getLen()) {
SuffixTreeNode *pRetNode = allocNode();
if (pRetNode == NULL) {
return 0;
}
pRetNode->initNode();
pRetNode->setHead(pNode->getHead() + sameLength);
pRetNode->setLen(pNode->getLen() - sameLength);
pRetNode->setStartPos(pNode->getStartPos());
pNode->setLen(sameLength);
for (int i = 0; pNode->returnChildsAt(i) != NULL; i++) {
pRetNode->returnChildsAt(i) = pNode->returnChildsAt(i);
pNode->returnChildsAt(i) = NULL;
}
pNode->returnChildsAt(0) = pRetNode;
pRetNode = allocNode();
if (pRetNode == NULL) {
return 0;
}
pRetNode->initNode();
pRetNode->setHead(start + sameLength);
pRetNode->setLen(maxStrLen - (pos + preSameLength + sameLength));
pRetNode->setStartPos(pos);
pNode->returnChildsAt(1) = pRetNode;
}
else if (sameLength == pNode->getLen()) {
int index = 0;
for (;pNode->returnChildsAt(index) != NULL; index++) {
if ((pNode->returnChildsAt(index)->getHead())[0] == start[sameLength]) {
return sameLength + innerInsert(pNode->returnChildsAt(index), start + sameLength, pos, preSameLength + sameLength);
}
}
SuffixTreeNode *pRetNode = allocNode();
if (pRetNode == NULL) {
return 0;
}
pRetNode->initNode();
pRetNode->setHead(start + sameLength);
pRetNode->setLen(maxStrLen - (pos + preSameLength + sameLength));
pRetNode->setStartPos(pos);
pNode->returnChildsAt(index) = pRetNode;
}
return sameLength;
}
string findMax(string &ret) {
int maxLen = 0;
int maxPos = 0;
SuffixTree tree;
const char *str = ret.c_str();
for (int i = 0; str[i] != '\0'; i++) {
int curLen = tree.insert(str + i, i);
if (curLen > maxLen) {
maxLen = curLen;
maxPos = i;
}
}
return ret.substr(maxPos, maxLen);
}findMax函數就是那個找到最長重復子串那個函數了。
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。