/*************************************************************************
 *
 *  $RCSfile: textsearch.cxx,v $
 *
 *  $Revision: 1.9 $
 *
 *  last change: $Author: jp $ $Date: 2001/08/29 15:57:25 $
 *
 *  The Contents of this file are made available subject to the terms of
 *  either of the following licenses
 *
 *         - GNU Lesser General Public License Version 2.1
 *         - Sun Industry Standards Source License Version 1.1
 *
 *  Sun Microsystems Inc., October, 2000
 *
 *  GNU Lesser General Public License Version 2.1
 *  =============================================
 *  Copyright 2000 by Sun Microsystems, Inc.
 *  901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License version 2.1, as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *
 *  Sun Industry Standards Source License Version 1.1
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.1 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://www.openoffice.org/license.html.
 *
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 *
 *  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 *
 *  Copyright: 2000 by Sun Microsystems, Inc.
 *
 *  All Rights Reserved.
 *
 *  Contributor(s): _______________________________________
 *
 *
 ************************************************************************/
#include <string.h>

#ifndef _DEBUG_HXX
#include <tools/debug.hxx>
#endif
#ifndef _ISOLANG_HXX
#include <tools/isolang.hxx>
#endif
#ifndef _TABLE_HXX
#include <tools/table.hxx>
#endif
#ifndef _COM_SUN_STAR_LANG_LOCALE_HDL_
#include <com/sun/star/lang/Locale.hdl>
#endif
#ifndef _COM_SUN_STAR_LANG_XMULTISERVICEFACTORY_HPP_
#include <com/sun/star/lang/XMultiServiceFactory.hpp>
#endif
#ifndef _COMPHELPER_PROCESSFACTORY_HXX_
#include <comphelper/processfactory.hxx>
#endif
#ifndef _COM_SUN_STAR_I18N_UNICODETYPE_HPP_
#include <com/sun/star/i18n/UnicodeType.hpp>
#endif
#ifndef _COM_SUN_STAR_UTIL_SEARCHFLAGS_HDL_
#include <com/sun/star/util/SearchFlags.hdl>
#endif
#ifndef _UNOTOOLS_CHARCLASS_HXX
#include <unotools/charclass.hxx>
#endif
#ifndef _COM_SUN_STAR_I18N_WORDTYPE_HPP_
#include <com/sun/star/i18n/WordType.hpp>
#endif
#ifndef _COM_SUN_STAR_I18N_TRANSLITERATIONMODULES_HPP_
#include <com/sun/star/i18n/TransliterationModules.hpp>
#endif

#include <textsearch.hxx>
#include <levdis.hxx>

using namespace ::com::sun::star::util;
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::i18n;
using namespace ::com::sun::star::lang;
//using namespace ::com::sun::star::text;

DECLARE_TABLE( JumpTable, long );

inline BOOL IsIgnoreCase( const long nFlag )
{
	return 0 != (nFlag &
				com::sun::star::i18n::TransliterationModules_IGNORE_CASE);
}

TextSearch::TextSearch()
	: pRegExp( 0 ), pWLD( 0 ), pJumpTable( 0 ),	pCharClass( 0 )
{
	SearchOptions aOpt;
	aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
	aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
	//aOpt.Locale = ???;
	setOptions( aOpt );
}

TextSearch::~TextSearch()
{
	delete pWLD;
	delete pJumpTable;
	delete pCharClass;
}

void TextSearch::setOptions( const SearchOptions& rOptions )
							throw( ::com::sun::star::uno::RuntimeException )
{
	aSrchPara = rOptions;

	delete pWLD, pWLD = 0;
	delete pJumpTable, pJumpTable = 0;
	delete pCharClass, pCharClass = new CharClass( aSrchPara.Locale );

	if( xBreak.is() &&
		SearchAlgorithms_APPROXIMATE != aSrchPara.algorithmType )
		xBreak = 0;

	sSrchStr = aSrchPara.searchString;
	if( IsIgnoreCase( aSrchPara.transliterateFlags ))
		pCharClass->toLower( sSrchStr );

	switch( aSrchPara.algorithmType )
	{
		case SearchAlgorithms_REGEXP:
		{
			fnForward = &TextSearch::RESrchFrwrd;
			fnBackward = &TextSearch::RESrchBkwrd;
		}
		break;

		case SearchAlgorithms_APPROXIMATE:
		{
            pWLD = new WLevDistance( sSrchStr.GetBuffer(),
									aSrchPara.changedChars,
									aSrchPara.insertedChars,
									aSrchPara.deletedChars,
									0 != (SearchFlags::LEV_RELAXED &
												aSrchPara.searchFlag ) );

			nLimit = pWLD->GetLimit();
			fnForward = &TextSearch::ApproxSrchFrwrd;
			fnBackward = &TextSearch::ApproxSrchBkwrd;

			Reference< XMultiServiceFactory > xMSF =
										::comphelper::getProcessServiceFactory();
			Reference < XInterface > xI =
				xMSF->createInstance( ::rtl::OUString::createFromAscii(
									"com.sun.star.text.BreakIterator" ) );
			if( xI.is() )
			{
				Any x = xI->queryInterface( ::getCppuType( (const Reference<
													XBreakIterator >*)0) );
				x >>= xBreak;
			}
		}
		break;

		default:
		{
			fnForward = &TextSearch::NSrchFrwrd;
			fnBackward = &TextSearch::NSrchBkwrd;
		}
		break;
	}
}

SearchResult TextSearch::searchForward( const ::rtl::OUString& searchStr,
									sal_Int32 startPos, sal_Int32 endPos )
							throw( ::com::sun::star::uno::RuntimeException )
{
	return (this->*fnForward)( searchStr, startPos, endPos );
}

SearchResult TextSearch::searchBackward( const ::rtl::OUString& searchStr,
									sal_Int32 startPos, sal_Int32 endPos )
							throw(::com::sun::star::uno::RuntimeException)
{
	return (this->*fnBackward)( searchStr, startPos, endPos );
}



//--------------- die Wort-Trennner ----------------------------------

int TextSearch::IsDelimiter( const String& rStr, xub_StrLen nPos ) const
{
	int nRet = 1;
	if( '\x7f' != rStr.GetChar( nPos ))
	{
		unsigned short nCType = pCharClass->getCharacterType( rStr, nPos );
		if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
			  		KCharacterType::LETTER ) & nCType ) )
			nRet = 0;
	}
	return nRet;
}



// --------- methods for the kind of boyer-morre search ------------------

void TextSearch::MakeForwardTab()
{
	// create the jumptable for the search text
	if( pJumpTable )
	{
		if( bIsForwardTab )
			return ;					// the jumpTable is ok
		delete pJumpTable;
	}
	bIsForwardTab = TRUE;

	xub_StrLen n, nLen = sSrchStr.Len();
	pJumpTable = new JumpTable( nLen );

	for( n = 0; n < nLen - 1; ++n )
	{
		sal_Unicode cCh = sSrchStr.GetChar( n );
		xub_StrLen nDiff = nLen - n - 1;
		if( !pJumpTable->Insert( cCh, nDiff ))
			pJumpTable->Replace( cCh, nDiff );
	}
}

void TextSearch::MakeBackwardTab()
{
	// create the jumptable for the search text
	if( pJumpTable )
	{
		if( !bIsForwardTab )
			return ;					// the jumpTable is ok
		delete pJumpTable;
	}
	bIsForwardTab = FALSE;

	xub_StrLen n, nLen = sSrchStr.Len();
	pJumpTable = new JumpTable( nLen );

	for( n = nLen-1; n > 0; --n )
	{
		sal_Unicode cCh = sSrchStr.GetChar( n );
		if( !pJumpTable->Insert( cCh, n ))
			pJumpTable->Replace( cCh, n );
	}
}

xub_StrLen TextSearch::GetDiff( const sal_Unicode cChr )
{
	ULONG nPos;
	if( TABLE_ENTRY_NOTFOUND == pJumpTable->SearchKey( cChr, &nPos ))
		return sSrchStr.Len();
	return pJumpTable->GetObject( nPos );
}


SearchResult TextSearch::NSrchFrwrd( const ::rtl::OUString& searchStr,
								sal_Int32 startPos, sal_Int32 endPos )
								throw(::com::sun::star::uno::RuntimeException)
{
	SearchResult aRet;
	aRet.subRegExpressions = 0;

	String aStr( searchStr );
	xub_StrLen nSuchIdx = aStr.Len();
	xub_StrLen nEnde = endPos;
	if( !nSuchIdx || !sSrchStr.Len() || sSrchStr.Len() > nSuchIdx )
		return aRet;

	xub_StrLen nCmpIdx = startPos;		// start position for the search

	if( nEnde < sSrchStr.Len() )		// position inside the search region ?
		return aRet;

	nEnde -= sSrchStr.Len();

	if( IsIgnoreCase( aSrchPara.transliterateFlags ))
		pCharClass->toLower( aStr );

	MakeForwardTab();					// create the jumptable

	while( nCmpIdx <= nEnde )
	{
		nSuchIdx = sSrchStr.Len() - 1;
		while( nSuchIdx >= 0 && sSrchStr.GetChar( nSuchIdx ) ==
								aStr.GetChar( nCmpIdx + nSuchIdx ))
			if( nSuchIdx == 0 )
			{
				if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
				{
					xub_StrLen nFndEnd = nCmpIdx + sSrchStr.Len();
					BOOL bAtStart = !nCmpIdx;
					BOOL bAtEnd = nFndEnd == endPos;
					BOOL bDelimBefore = bAtStart ||
											IsDelimiter( aStr, nCmpIdx-1 );
					BOOL bDelimBehind = IsDelimiter(  aStr, nFndEnd );
//  *       1 -> only one word in the paragraph
//  *       2 -> at begin of paragraph
//  *       3 -> at end of paragraph
//  *       4 -> inside the paragraph
					if( !(
						( bAtStart && bAtEnd ) ||           // 1
						( bAtStart && bDelimBehind ) ||     // 2
						( bAtEnd && bDelimBefore ) ||       // 3
						( bDelimBefore && bDelimBehind )))  // 4
					break;
				}

				aRet.subRegExpressions = 1;
				aRet.startOffset.realloc( 1 );
				aRet.startOffset[ 0 ] = nCmpIdx;
				aRet.endOffset.realloc( 1 );
				aRet.endOffset[ 0 ] = nCmpIdx + sSrchStr.Len();
				return aRet;
			}
			else
				nSuchIdx--;

		nCmpIdx += GetDiff( aStr.GetChar( nCmpIdx + sSrchStr.Len()-1 ));
	}
	return aRet;
}

SearchResult TextSearch::NSrchBkwrd( const ::rtl::OUString& searchStr,
								sal_Int32 startPos, sal_Int32 endPos )
								throw(::com::sun::star::uno::RuntimeException)
{
	SearchResult aRet;
	aRet.subRegExpressions = 0;

	String aStr( searchStr );
	xub_StrLen nSuchIdx = aStr.Len();
	xub_StrLen nEnde = endPos;
	if( nSuchIdx == 0 || sSrchStr.Len() == 0 || sSrchStr.Len() > nSuchIdx)
		return aRet;

	xub_StrLen nCmpIdx = startPos;		// start position for the search

	MakeBackwardTab();					// create the jumptable

	if( nEnde == nSuchIdx )             // end position for the search
		nEnde = sSrchStr.Len();
	else
		nEnde += sSrchStr.Len();

	if( IsIgnoreCase( aSrchPara.transliterateFlags ))
		pCharClass->toLower( aStr );

	while( nCmpIdx >= nEnde )
	{
		nSuchIdx = 0;
		while( nSuchIdx < sSrchStr.Len() &&
				sSrchStr.GetChar( nSuchIdx ) ==
				  aStr.GetChar( nCmpIdx + nSuchIdx - sSrchStr.Len() ) )
			nSuchIdx++;
		if( nSuchIdx >= sSrchStr.Len() )
		{
			if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
			{
				xub_StrLen nFndStt = nCmpIdx - sSrchStr.Len();
				BOOL bAtStart = !nFndStt;
				BOOL bAtEnd = nCmpIdx == startPos;
				BOOL bDelimBehind = IsDelimiter( aStr, nCmpIdx );
				BOOL bDelimBefore = bAtStart || // begin of paragraph
									IsDelimiter( aStr, nFndStt-1 );
//  *       1 -> only one word in the paragraph
//  *       2 -> at begin of paragraph
//  *       3 -> at end of paragraph
//  *       4 -> inside the paragraph
				if( ( bAtStart && bAtEnd ) ||           // 1
					( bAtStart && bDelimBehind ) ||     // 2
					( bAtEnd && bDelimBefore ) ||       // 3
					( bDelimBefore && bDelimBehind ))   // 4
				{
					aRet.subRegExpressions = 1;
					aRet.startOffset.realloc( 1 );
					aRet.startOffset[ 0 ] = nCmpIdx - 1;
					aRet.endOffset.realloc( 1 );
					aRet.endOffset[ 0 ] = nCmpIdx - sSrchStr.Len() - 1;
					return aRet;
				}
			}
			else
			{
				aRet.subRegExpressions = 1;
				aRet.startOffset.realloc( 1 );
				aRet.startOffset[ 0 ] = nCmpIdx - 1;
				aRet.endOffset.realloc( 1 );
				aRet.endOffset[ 0 ] = nCmpIdx - sSrchStr.Len() - 1;
				return aRet;
			}
		}
		nSuchIdx = GetDiff( aStr.GetChar( nCmpIdx - sSrchStr.Len() ) );
		if( nCmpIdx < nSuchIdx )
			return aRet;

		nCmpIdx -= nSuchIdx;
	}
	return aRet;
}



//---------------------------------------------------------------------------
// ------- Methoden fuer die Suche ueber Regular-Expressions --------------

SearchResult TextSearch::RESrchFrwrd( const ::rtl::OUString& searchStr,
								sal_Int32 startPos, sal_Int32 endPos )
								throw(::com::sun::star::uno::RuntimeException)
{
	SearchResult aRet;
	aRet.subRegExpressions = 0;
	return aRet;
}

/*
 * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
 */
SearchResult TextSearch::RESrchBkwrd( const ::rtl::OUString& searchStr,
								sal_Int32 startPos, sal_Int32 endPos )
								throw(::com::sun::star::uno::RuntimeException)
{
	SearchResult aRet;
	aRet.subRegExpressions = 0;
	return aRet;
}

// Phonetische Suche von Worten
SearchResult TextSearch::ApproxSrchFrwrd( const ::rtl::OUString& searchStr,
								sal_Int32 startPos, sal_Int32 endPos )
								throw(::com::sun::star::uno::RuntimeException)
{
	SearchResult aRet;
	aRet.subRegExpressions = 0;

	if( !xBreak.is() )
		return aRet;

	String aWTemp( searchStr );
	if( IsIgnoreCase( aSrchPara.transliterateFlags ))
		pCharClass->toLower( aWTemp );

	register xub_StrLen nStt, nEnd;

	Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
								aSrchPara.Locale,
								WordType::ANYWORD_IGNOREWHITESPACES, TRUE );

	do {
		if( aWBnd.startPos >= endPos )
			break;
		nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
		nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;

		if( nStt < nEnd &&
			pWLD->WLD( aWTemp.GetBuffer() + nStt, nEnd - nStt ) <= nLimit )
		{
			aRet.subRegExpressions = 1;
			aRet.startOffset.realloc( 1 );
			aRet.startOffset[ 0 ] = nStt;
			aRet.endOffset.realloc( 1 );
			aRet.endOffset[ 0 ] = nEnd + 1;
			break;
		}

		nStt = nEnd;
		aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
									WordType::ANYWORD_IGNOREWHITESPACES);
	} while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.Len() );
	return aRet;
}

SearchResult TextSearch::ApproxSrchBkwrd( const ::rtl::OUString& searchStr,
								sal_Int32 startPos, sal_Int32 endPos )
								throw(::com::sun::star::uno::RuntimeException)
{
	SearchResult aRet;
	aRet.subRegExpressions = 0;

	if( !xBreak.is() )
		return aRet;

	String aWTemp( searchStr );
	if( IsIgnoreCase( aSrchPara.transliterateFlags ))
		pCharClass->toLower( aWTemp );

	register xub_StrLen nStt, nEnd;

	Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
								aSrchPara.Locale,
								WordType::ANYWORD_IGNOREWHITESPACES, TRUE );

	do {
		if( aWBnd.endPos <= endPos )
			break;
		nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
		nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;

		if( nStt < nEnd &&
			pWLD->WLD( aWTemp.GetBuffer() + nStt, nEnd - nStt ) <= nLimit )
		{
			aRet.subRegExpressions = 1;
			aRet.startOffset.realloc( 1 );
			aRet.startOffset[ 0 ] = nEnd ? nEnd-1 : 0;
			aRet.endOffset.realloc( 1 );
			aRet.endOffset[ 0 ] = nStt ? nStt-1 : 0;
			break;
		}

		aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
									WordType::ANYWORD_IGNOREWHITESPACES);
	} while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.Len() );
	return aRet;
}

