Boost中string的regex用法

头文件

boost/algorithm/string/regex.hpp

作用

string的regex有如下API

find_regex:查找字符串str中第一次满足 regex的子串,返回子串起始,终止位置。

replace_regex:查找字符串str中第一次满足 regex的子串,并用新子串 替换。

replace_all_regex:查找字符串str中所有满足 regex的子串,并用新子串 替换。

erase_regex:查找字符串str中第一次满足 regex的子串,删除

erase_all_regex:查找字符串str中所有满足 regex的子串,删除

find_all_regex:查找字符串str中满足 regex的子串,返回子串 数组

split_regex:将字符串str中所有满足 regex的子串,分割。

join_if_regex:将子串 数组,按照 regex,连接,返回新的字符串。

举例

#include 
#include 
#include 
// equals predicate is used for result comparison
#include // Include unit test framework
#include #include 
#include 
#include 
#include 
#include using namespace std;
using namespace boost;static void find_test()
{string str1("123a1cxxxa23cXXXa456c321");const char* pch1="123a1cxxxa23cXXXa456c321";regex rx("a[0-9]+c");vector vec1( str1.begin(), str1.end() );vector tokens;// find resultsiterator_range nc_result;iterator_range cv_result;iterator_range::iterator> nc_vresult;iterator_range::const_iterator> cv_vresult;iterator_range ch_result;// basic testsnc_result=find_regex( str1, rx );BOOST_CHECK( ( (nc_result.begin()-str1.begin()) == 3) &&( (nc_result.end()-str1.begin()) == 6) );cv_result=find_regex( str1, rx );BOOST_CHECK( ( (cv_result.begin()-str1.begin()) == 3) &&( (cv_result.end()-str1.begin()) == 6) );ch_result=find_regex( pch1, rx );BOOST_CHECK(( (ch_result.begin() - pch1 ) == 3) && ( (ch_result.end() - pch1 ) == 6 ) );// multi-type comparison testnc_vresult=find_regex( vec1, rx );BOOST_CHECK( ( (nc_result.begin()-str1.begin()) == 3) &&( (nc_result.end()-str1.begin()) == 6) );cv_vresult=find_regex( vec1, rx );BOOST_CHECK( ( (cv_result.begin()-str1.begin()) == 3) &&( (cv_result.end()-str1.begin()) == 6) );// find_all_regex testfind_all_regex( tokens, str1, rx );BOOST_REQUIRE( tokens.size()==3 );BOOST_CHECK( tokens[0]==string("a1c") );BOOST_CHECK( tokens[1]==string("a23c") );BOOST_CHECK( tokens[2]==string("a456c") );// split_regex testsplit_regex(    tokens, str1, rx );BOOST_REQUIRE( tokens.size()==4 );BOOST_CHECK( tokens[0]==string("123") );BOOST_CHECK( tokens[1]==string("xxx") );BOOST_CHECK( tokens[2]==string("XXX") );BOOST_CHECK( tokens[3]==string("321") );}static void join_test()
{// Prepare inputsvector tokens1;tokens1.push_back("xx");tokens1.push_back("abc");tokens1.push_back("xx");#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERINGBOOST_CHECK( equals(join_if(tokens1, "-", regex("x+")), "xx-xx") );BOOST_CHECK( equals(join_if(tokens1, "-", regex("[abc]+")), "abc") );
#else BOOST_CHECK( equals(join_if_regex(tokens1, "-", regex("x+")), "xx-xx") );BOOST_CHECK( equals(join_if_regex(tokens1, "-", regex("[abc]+")), "abc") );
#endif 
}static void replace_test()
{string str1("123a1cxxxa23cXXXa456c321");regex rx1("a([0-9]+)c");regex rx2("([xX]+)");regex rx3("_[^_]*_");string fmt1("_A$1C_");string fmt2("_xXx_");vector vec1( str1.begin(), str1.end() );// immutable tests// basic testsBOOST_CHECK( replace_regex_copy( str1, rx1, fmt1 )==string("123_A1C_xxxa23cXXXa456c321") );BOOST_CHECK( replace_all_regex_copy( str1, rx1, fmt1 )==string("123_A1C_xxx_A23C_XXX_A456C_321") );BOOST_CHECK( erase_regex_copy( str1, rx1 )==string("123xxxa23cXXXa456c321") );BOOST_CHECK( erase_all_regex_copy( str1, rx1 )==string(string("123xxxXXX321")) );// output iterator variants teststring strout;replace_regex_copy( back_inserter(strout), str1, rx1, fmt1 );BOOST_CHECK( strout==string("123_A1C_xxxa23cXXXa456c321") );strout.clear();replace_all_regex_copy( back_inserter(strout), str1, rx1, fmt1 );BOOST_CHECK( strout==string("123_A1C_xxx_A23C_XXX_A456C_321") );strout.clear();erase_regex_copy( back_inserter(strout), str1, rx1 );BOOST_CHECK( strout==string("123xxxa23cXXXa456c321") );strout.clear();erase_all_regex_copy( back_inserter(strout), str1, rx1 );BOOST_CHECK( strout==string("123xxxXXX321") );strout.clear();// in-place testreplace_regex( str1, rx1, fmt2 );BOOST_CHECK( str1==string("123_xXx_xxxa23cXXXa456c321") );replace_all_regex( str1, rx2, fmt1 );BOOST_CHECK( str1==string("123__AxXxC___AxxxC_a23c_AXXXC_a456c321") );erase_regex( str1, rx3 );BOOST_CHECK( str1==string("123AxXxC___AxxxC_a23c_AXXXC_a456c321") );erase_all_regex( str1, rx3 );BOOST_CHECK( str1==string("123AxXxCa23ca456c321") );
}int main( int argc, char* [] )
{find_test();join_test();replace_test();return 0;
}

源代码

namespace boost {namespace algorithm {//  find_regex  -----------------------------------------------////! Find regex algorithm/*!Search for a substring matching the given regex in the input.\param Input A container which will be searched.\param Rx A regular expression\param Flags Regex options\return An \c iterator_range delimiting the match. Returned iterator is either \c RangeT::iterator or \c RangeT::const_iterator, depending on the constness of the input parameter.\note This function provides the strong exception-safety guarantee*/template< typename RangeT, typename CharT, typename RegexTraitsT>inline iterator_range< BOOST_STRING_TYPENAME range_iterator::type >find_regex( RangeT& Input, const basic_regex& Rx,match_flag_type Flags=match_default ){iterator_range::type> lit_input(::boost::as_literal(Input));return ::boost::algorithm::regex_finder(Rx,Flags)(::boost::begin(lit_input), ::boost::end(lit_input) );}//  replace_regex --------------------------------------------------------------------////! Replace regex algorithm/*!Search for a substring matching given regex and format it with the specified format.             The result is a modified copy of the input. It is returned as a sequence or copied to the output iterator.\param Output An output iterator to which the result will be copied\param Input An input string\param Rx A regular expression\param Format Regex format definition\param Flags Regex options\return An output iterator pointing just after the last inserted character ora modified copy of the input   \note The second variant of this function provides the strong exception-safety guarantee*/template< typename OutputIteratorT,typename RangeT, typename CharT, typename RegexTraitsT,typename FormatStringTraitsT, typename FormatStringAllocatorT >inline OutputIteratorT replace_regex_copy(OutputIteratorT Output,const RangeT& Input,const basic_regex& Rx,const std::basic_string& Format,match_flag_type Flags=match_default | format_default ){return ::boost::algorithm::find_format_copy( Output,Input,::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::regex_formatter( Format, Flags ) );}//! Replace regex algorithm/*!\overload*/template< typename SequenceT, typename CharT, typename RegexTraitsT,typename FormatStringTraitsT, typename FormatStringAllocatorT >inline SequenceT replace_regex_copy( const SequenceT& Input,const basic_regex& Rx,const std::basic_string& Format,match_flag_type Flags=match_default | format_default ){return ::boost::algorithm::find_format_copy( Input,::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::regex_formatter( Format, Flags ) );}//! Replace regex algorithm/*!Search for a substring matching given regex and format it with the specified format. The input string is modified in-place.\param Input An input string\param Rx A regular expression\param Format Regex format definition\param Flags Regex options*/template< typename SequenceT, typename CharT, typename RegexTraitsT,typename FormatStringTraitsT, typename FormatStringAllocatorT >inline void replace_regex( SequenceT& Input,const basic_regex& Rx,const std::basic_string& Format,match_flag_type Flags=match_default | format_default ){::boost::algorithm::find_format( Input,::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::regex_formatter( Format, Flags ) );}//  replace_all_regex --------------------------------------------------------------------////! Replace all regex algorithm/*!Format all substrings, matching given regex, with the specified format. The result is a modified copy of the input. It is returned as a sequence or copied to the output iterator.\param Output An output iterator to which the result will be copied\param Input An input string\param Rx A regular expression\param Format Regex format definition\param Flags Regex options\return An output iterator pointing just after the last inserted character ora modified copy of the input     \note The second variant of this function provides the strong exception-safety guarantee*/template< typename OutputIteratorT,typename RangeT, typename CharT, typename RegexTraitsT,typename FormatStringTraitsT, typename FormatStringAllocatorT >inline OutputIteratorT replace_all_regex_copy(OutputIteratorT Output,const RangeT& Input,const basic_regex& Rx,const std::basic_string& Format,match_flag_type Flags=match_default | format_default ){return ::boost::algorithm::find_format_all_copy( Output,Input,::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::regex_formatter( Format, Flags ) );}//! Replace all regex algorithm/*!\overload*/template< typename SequenceT, typename CharT, typename RegexTraitsT,typename FormatStringTraitsT, typename FormatStringAllocatorT >inline SequenceT replace_all_regex_copy( const SequenceT& Input,const basic_regex& Rx,const std::basic_string& Format,match_flag_type Flags=match_default | format_default ){return ::boost::algorithm::find_format_all_copy( Input,::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::regex_formatter( Format, Flags ) );}//! Replace all regex algorithm/*!Format all substrings, matching given regex, with the specified format. The input string is modified in-place.\param Input An input string\param Rx A regular expression\param Format Regex format definition\param Flags Regex options            */template< typename SequenceT, typename CharT, typename RegexTraitsT,typename FormatStringTraitsT, typename FormatStringAllocatorT >inline void replace_all_regex( SequenceT& Input,const basic_regex& Rx,const std::basic_string& Format,match_flag_type Flags=match_default | format_default ){::boost::algorithm::find_format_all( Input,::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::regex_formatter( Format, Flags ) );}//  erase_regex --------------------------------------------------------------------////! Erase regex algorithm/*!Remove a substring matching given regex from the input.The result is a modified copy of the input. It is returned as a sequence or copied to the output iterator.                        \param Output An output iterator to which the result will be copied\param Input An input string\param Rx A regular expression\param Flags Regex options\return An output iterator pointing just after the last inserted character ora modified copy of the input    \note The second variant of this function provides the strong exception-safety guarantee*/template< typename OutputIteratorT,typename RangeT, typename CharT, typename RegexTraitsT >inline OutputIteratorT erase_regex_copy(OutputIteratorT Output,const RangeT& Input,const basic_regex& Rx,match_flag_type Flags=match_default ){return ::boost::algorithm::find_format_copy(Output,Input,::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::empty_formatter( Input ) );}//! Erase regex algorithm/*!\overload*/template< typename SequenceT, typename CharT, typename RegexTraitsT >inline SequenceT erase_regex_copy( const SequenceT& Input,const basic_regex& Rx,match_flag_type Flags=match_default ){return ::boost::algorithm::find_format_copy( Input, ::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::empty_formatter( Input ) );}//! Erase regex algorithm/*!Remove a substring matching given regex from the input.The input string is modified in-place.\param Input An input string\param Rx A regular expression\param Flags Regex options*/template< typename SequenceT, typename CharT, typename RegexTraitsT >inline void erase_regex( SequenceT& Input,const basic_regex& Rx,match_flag_type Flags=match_default ){::boost::algorithm::find_format( Input, ::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::empty_formatter( Input ) );}//  erase_all_regex --------------------------------------------------------------------////! Erase all regex algorithm/*!Erase all substrings, matching given regex, from the input.The result is a modified copy of the input. It is returned as a sequence or copied to the output iterator.\param Output An output iterator to which the result will be copied\param Input An input string\param Rx A regular expression\param Flags Regex options\return An output iterator pointing just after the last inserted character ora modified copy of the input                        \note The second variant of this function provides the strong exception-safety guarantee*/template< typename OutputIteratorT,typename RangeT, typename CharT, typename RegexTraitsT >inline OutputIteratorT erase_all_regex_copy(OutputIteratorT Output,const RangeT& Input,const basic_regex& Rx,match_flag_type Flags=match_default ){return ::boost::algorithm::find_format_all_copy(Output,Input,::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::empty_formatter( Input ) );}//! Erase all regex algorithm/*!\overload*/template< typename SequenceT, typename CharT, typename RegexTraitsT >inline SequenceT erase_all_regex_copy( const SequenceT& Input,const basic_regex& Rx,match_flag_type Flags=match_default ){return ::boost::algorithm::find_format_all_copy( Input, ::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::empty_formatter( Input ) );}//! Erase all regex algorithm/*!Erase all substrings, matching given regex, from the input.The input string is modified in-place.\param Input An input string\param Rx A regular expression\param Flags Regex options*/template< typename SequenceT, typename CharT, typename RegexTraitsT>inline void erase_all_regex( SequenceT& Input,const basic_regex& Rx,match_flag_type Flags=match_default ){::boost::algorithm::find_format_all( Input, ::boost::algorithm::regex_finder( Rx, Flags ),::boost::algorithm::empty_formatter( Input ) );}//  find_all_regex ------------------------------------------------------------------////! Find all regex algorithm/*!This algorithm finds all substrings matching the give regexin the input.             Each part is copied and added as a new element to the output container.Thus the result container must be able to hold copiesof the matches (in a compatible structure like std::string) ora reference to it (e.g. using the iterator range class).Examples of such a container are \c std::vectoror \c std::list>\param Result A container that can hold copies of references to the substrings.\param Input A container which will be searched.\param Rx A regular expression\param Flags Regex options\return A reference to the result\note Prior content of the result will be overwritten.\note This function provides the strong exception-safety guarantee*/template< typename SequenceSequenceT, typename RangeT,         typename CharT, typename RegexTraitsT >inline SequenceSequenceT& find_all_regex(SequenceSequenceT& Result,const RangeT& Input,const basic_regex& Rx,match_flag_type Flags=match_default ){return ::boost::algorithm::iter_find(Result,Input,::boost::algorithm::regex_finder(Rx,Flags) );         }//  split_regex ------------------------------------------------------------------////! Split regex algorithm/*! Tokenize expression. This function is equivalent to C strtok. Inputsequence is split into tokens, separated  by separators. Separatoris an every match of the given regex.Each part is copied and added as a new element to the output container.Thus the result container must be able to hold copiesof the matches (in a compatible structure like std::string) ora reference to it (e.g. using the iterator range class).Examples of such a container are \c std::vectoror \c std::list>\param Result A container that can hold copies of references to the substrings.          \param Input A container which will be searched.\param Rx A regular expression\param Flags Regex options\return A reference to the result\note Prior content of the result will be overwritten.\note This function provides the strong exception-safety guarantee*/template< typename SequenceSequenceT, typename RangeT,         typename CharT, typename RegexTraitsT >inline SequenceSequenceT& split_regex(SequenceSequenceT& Result,const RangeT& Input,const basic_regex& Rx,match_flag_type Flags=match_default ){return ::boost::algorithm::iter_split(Result,Input,::boost::algorithm::regex_finder(Rx,Flags) );         }//  join_if ------------------------------------------------------------------//#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING//! Conditional join algorithm/*!This algorithm joins all strings in a 'list' into one long string.Segments are concatenated by given separator. Only segments thatmatch the given regular expression will be added to the resultThis is a specialization of join_if algorithm.\param Input A container that holds the input strings. It must be a container-of-containers.\param Separator A string that will separate the joined segments.\param Rx A regular expression\param Flags Regex options\return Concatenated string.\note This function provides the strong exception-safety guarantee*/template< typename SequenceSequenceT, typename Range1T,             typename CharT, typename RegexTraitsT >inline typename range_value::type join_if(const SequenceSequenceT& Input,const Range1T& Separator,const basic_regex& Rx,match_flag_type Flags=match_default ){// Define working typestypedef typename range_value::type ResultT;typedef typename range_const_iterator::type InputIteratorT;// Parse inputInputIteratorT itBegin=::boost::begin(Input);InputIteratorT itEnd=::boost::end(Input);// Construct container to hold the resultResultT Result;// Roll to the first element that will be addedwhile(itBegin!=itEnd && !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;// Add this elementif(itBegin!=itEnd){detail::insert(Result, ::boost::end(Result), *itBegin);++itBegin;}for(;itBegin!=itEnd; ++itBegin){if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)){// Add separatordetail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));// Add elementdetail::insert(Result, ::boost::end(Result), *itBegin);}}return Result;}#else  // BOOST_NO_FUNCTION_TEMPLATE_ORDERING//! Conditional join algorithm/*!This algorithm joins all strings in a 'list' into one long string.Segments are concatenated by given separator. Only segments thatmatch the given regular expression will be added to the resultThis is a specialization of join_if algorithm.\param Input A container that holds the input strings. It must be a container-of-containers.\param Separator A string that will separate the joined segments.\param Rx A regular expression\param Flags Regex options\return Concatenated string.\note This function provides the strong exception-safety guarantee*/template< typename SequenceSequenceT, typename Range1T,             typename CharT, typename RegexTraitsT >inline typename range_value::type join_if_regex(const SequenceSequenceT& Input,const Range1T& Separator,const basic_regex& Rx,match_flag_type Flags=match_default ){// Define working typestypedef typename range_value::type ResultT;typedef typename range_const_iterator::type InputIteratorT;// Parse inputInputIteratorT itBegin=::boost::begin(Input);InputIteratorT itEnd=::boost::end(Input);// Construct container to hold the resultResultT Result;// Roll to the first element that will be addedwhile(itBegin!=itEnd && !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;// Add this elementif(itBegin!=itEnd){detail::insert(Result, ::boost::end(Result), *itBegin);++itBegin;}for(;itBegin!=itEnd; ++itBegin){if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)){// Add separatordetail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));// Add elementdetail::insert(Result, ::boost::end(Result), *itBegin);}}return Result;}#endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING} // namespace algorithm// pull names into the boost namespaceusing algorithm::find_regex;using algorithm::replace_regex;using algorithm::replace_regex_copy;using algorithm::replace_all_regex;using algorithm::replace_all_regex_copy;using algorithm::erase_regex;using algorithm::erase_regex_copy;using algorithm::erase_all_regex;using algorithm::erase_all_regex_copy;using algorithm::find_all_regex;using algorithm::split_regex;#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERINGusing algorithm::join_if;
#else  // BOOST_NO_FUNCTION_TEMPLATE_ORDERINGusing algorithm::join_if_regex;
#endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING} // namespace boost

 


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部