Commit ded78c59 authored by ahoms's avatar ahoms
Browse files

* added SimpleRegEx with basic regular expression support


git-svn-id: https://scm.blissgarden.org/svn/lima/trunk@63 45c4679d-1946-429d-baad-37912b19538b
parent d7a50a74
MAKE_CMD = $(MAKE) -C src
all:
$(MAKE_CMD) all
$(MAKE) -C src all
$(MAKE) -C test all
clean:
$(MAKE_CMD) clean
$(MAKE) -C src clean
$(MAKE) -C test clean
#ifndef REGEX_H
#define REGEX_H
#include "Exceptions.h"
#include <string>
#include <vector>
#include <sys/types.h>
#include <regex.h>
namespace lima
{
class SimpleRegEx
{
public:
typedef struct SingleMatch {
std::string::const_iterator start;
std::string::const_iterator end;
} SingleMatchType;
typedef std::vector<SingleMatchType> FullMatchType;
typedef std::vector<FullMatchType> MatchListType;
SimpleRegEx();
SimpleRegEx(const std::string& regex_str);
SimpleRegEx(const SimpleRegEx& regex);
~SimpleRegEx();
SimpleRegEx& operator =(const SimpleRegEx& regex);
SimpleRegEx& operator +=(const SimpleRegEx& regex);
const std::string& getRegExStr() const;
bool singleSearch(const std::string& str, FullMatchType& match,
int nb_groups = 0, int match_idx = 0);
void multiSearch(const std::string& str, MatchListType& match_list,
int nb_groups = 0, int max_nb_match = 0);
bool match(const std::string& str, FullMatchType& match,
int nb_groups = 0);
private:
void set(const std::string& regex_str);
void free();
void throwError(int ret, std::string file, std::string func, int line);
std::string m_str;
regex_t m_regex;
};
inline SimpleRegEx operator +(const SimpleRegEx& re1, const SimpleRegEx& re2)
{
SimpleRegEx re = re1;
return re += re2;
}
} // namespace lima
#endif // REGEX_H
common-objs := Constants.o SizeUtils.o Timestamp.o ThreadUtils.o Exceptions.o \
BufferSave.o MemUtils.o
BufferSave.o MemUtils.o RegEx.o
CXXFLAGS += -I../include -I../../hardware/include -Wall -pthread
......
#include "RegEx.h"
using namespace lima;
using namespace std;
#define CHECK_CALL(ret) \
{ \
int aux_ret = (ret); \
if (aux_ret != 0) \
throwError(aux_ret, __FILE__, __FUNCTION__, \
__LINE__); \
}
SimpleRegEx::SimpleRegEx()
{
set("");
}
SimpleRegEx::SimpleRegEx(const string& regex_str)
{
set(regex_str);
}
SimpleRegEx::SimpleRegEx(const SimpleRegEx& regex)
{
set(regex.m_str);
}
SimpleRegEx::~SimpleRegEx()
{
free();
}
SimpleRegEx& SimpleRegEx::operator =(const SimpleRegEx& regex)
{
set(regex.m_str);
return *this;
}
SimpleRegEx& SimpleRegEx::operator +=(const SimpleRegEx& regex)
{
string regex_str = m_str + regex.m_str;
set(regex_str);
return *this;
}
void SimpleRegEx::set(const string& regex_str)
{
if (regex_str == m_str)
return;
free();
if (!regex_str.empty())
CHECK_CALL(regcomp(&m_regex, regex_str.c_str(), REG_EXTENDED));
m_str = regex_str;
}
void SimpleRegEx::free()
{
if (m_str.empty())
return;
regfree(&m_regex);
m_str.clear();
}
const string& SimpleRegEx::getRegExStr() const
{
return m_str;
}
bool SimpleRegEx::singleSearch(const string& str, FullMatchType& match,
int nb_groups, int match_idx)
{
if (match_idx < 0)
throw LIMA_COM_EXC(InvalidValue, "Invalid match index");
MatchListType match_list;
multiSearch(str, match_list, nb_groups, match_idx + 1);
if (int(match_list.size()) <= match_idx)
return false;
match = match_list[match_idx];
return true;
}
void SimpleRegEx::multiSearch(const string& str, MatchListType& match_list,
int nb_groups, int max_nb_match)
{
if (m_str.empty())
throw LIMA_COM_EXC(InvalidValue, "Regular expression not set");
match_list.clear();
typedef string::const_iterator StrIt;
StrIt sbegin = str.begin();
StrIt send = str.end();
if (nb_groups == 0)
nb_groups = 255;
regmatch_t reg_match[nb_groups];
regmatch_t *mend = reg_match + nb_groups;
StrIt it = sbegin;
for (int i = 0; it != send; i++) {
if ((max_nb_match > 0) && (i == max_nb_match))
break;
string aux(it, send);
int flags = (it != sbegin) ? REG_NOTBOL : 0;
int ret = regexec(&m_regex, aux.c_str(), nb_groups, reg_match,
flags);
if (ret == REG_NOMATCH)
break;
CHECK_CALL(ret);
StrIt match_end = send;
FullMatchType full_match;
for (regmatch_t *m = reg_match; m != mend; ++m) {
if (m->rm_so == -1)
break;
SingleMatchType match;
match.start = it + m->rm_so;
match.end = it + m->rm_eo;
full_match.push_back(match);
match_end = match.end;
}
match_list.push_back(full_match);
it = match_end;
}
}
bool SimpleRegEx::match(const string& str, FullMatchType& match,
int nb_groups)
{
if (!singleSearch(str, match, nb_groups))
return false;
return (match[0].start == str.begin());
}
void SimpleRegEx::throwError(int ret, string file, string func, int line)
{
size_t len = regerror(ret, &m_regex, NULL, 0);
string regerr(len, '\0');
char *data = (char *) regerr.data();
regerror(ret, &m_regex, data, regerr.size());
string err_desc = string("regex: ") + regerr;
throw Exception(Common, Error, err_desc, file, func, line);
}
CXXFLAGS = -Wall -I ../include -pthread
LDFLAGS = -pthread
all: clean testregex
testregex: testregex.o ../src/Common.o
$(CXX) $(LDFLAGS) -o $@ $+
clean:
rm -f *.o testregex
#include "RegEx.h"
#include <iostream>
using namespace lima;
using namespace std;
void test_simple_regex(const string& re_str, const string& s)
{
SimpleRegEx re(re_str);
cout << "re=\"" << re.getRegExStr() << "\"" << endl;
cout << "s=\"" << s << "\"" << endl;
typedef SimpleRegEx::SingleMatchType SingleMatchType;
typedef SimpleRegEx::FullMatchType FullMatchType;
typedef SimpleRegEx::MatchListType MatchListType;
MatchListType match_list;
re.multiSearch(s, match_list);
MatchListType::const_iterator iti = match_list.begin();
string::const_iterator b = s.begin();
for (int i = 0; iti != match_list.end(); ++i, ++iti) {
const FullMatchType& fmatch = *iti;
FullMatchType::const_iterator itj = fmatch.begin();
for (int j = 0; itj != fmatch.end(); ++j, ++itj) {
const SingleMatchType& smatch = *itj;
cout << i << "-" << j << ": "
<< smatch.start - b << "-" << smatch.end - b
<< ": " << string(smatch.start, smatch.end)
<< endl;
}
}
cout << endl;
}
int main(int argc, char *argv[])
{
try {
test_simple_regex("b?ab((ab)(\\2c))?", "abababc.bab");
test_simple_regex("^b?ab((ab)(\\2c))?", "abababc.bab");
} catch (Exception e) {
cerr << "LIMA Exception: " << e << endl;
}
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment