g++の末尾再帰の最適化 (-foptimize-sibling-calls)

これを当てにできるなら、かなり Boost.Preprocessor 使わずにすむかもという。

#include <functional>
#include <iostream>
#include <cstddef>

template<std::size_t Nrep_, typename Timpl_>
struct iter
{
    inline void operator()(Timpl_& op) const
    {
        op(Nrep_);
        iter<Nrep_ - 1, Timpl_>()(op);
    }
};


template<typename Timpl_>
struct iter<0, Timpl_>
{
    inline void operator()(Timpl_&) const {}
};

struct fun: public std::unary_function<void, std::size_t>
{
    typedef std::size_t argument_type;
    typedef void result_type;

    inline result_type operator()(const argument_type& c)
    {
        std::cout << c << std::endl;
    }
};

main()
{
    fun f;
    iter<5, fun>()(f);
}

このコード (test.cpp) を

g++ -O -foptimize-sibling-calls -S test.cpp

でコンパイルした結果 (c++filt で後処理 & 抜粋)

main:
.LFB1438:
	leal	4(%esp), %ecx
.LCFI7:
	andl	$-16, %esp
	pushl	-4(%ecx)
.LCFI8:
	pushl	%ebp
.LCFI9:
	movl	%esp, %ebp
.LCFI10:
	pushl	%ecx
.LCFI11:
	subl	$20, %esp
.LCFI12:
	movl	$5, 4(%esp)
	movl	std::cout, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >::operator<<(unsigned int)
	movl	%eax, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >& std::endl<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&)
	movl	$4, 4(%esp)
	movl	std::cout, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >::operator<<(unsigned int)
	movl	%eax, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >& std::endl<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&)
	movl	$3, 4(%esp)
	movl	std::cout, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >::operator<<(unsigned int)
	movl	%eax, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >& std::endl<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&)
	movl	$2, 4(%esp)
	movl	std::cout, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >::operator<<(unsigned int)
	movl	%eax, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >& std::endl<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&)
	movl	$1, 4(%esp)
	movl	std::cout, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >::operator<<(unsigned int)
	movl	%eax, (%esp)
	call	std::basic_ostream<char, std::char_traits<char> >& std::endl<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&)
	movl	$0, %eax
	addl	$20, %esp
	popl	%ecx
	popl	%ebp
	leal	-4(%ecx), %esp
	ret

うーん、すばらしい。
ちなみに -O で指定されるどのオプションが -foptimize-sibling-calls とあいまって効果的なのかはいろいろ試したけど分かりませんでした。
gcc のソース読むかな...。