// -*- C++ -*-
#include <xapian.h>
#include <ept/core/apt.h>
#include <wibble/regexp.h>
#include <wibble/sys/pipe.h>
#include <wibble/sys/exec.h>

#ifndef EPT_XAPIAN_H
#define EPT_XAPIAN_H

namespace ept {
namespace core {
namespace xapian {

// Allocate value indexes for known values
const Xapian::valueno VAL_APT_INSTALLED_SIZE      =  1;
const Xapian::valueno VAL_APT_PACKAGE_SIZE        =  2;
const Xapian::valueno VAL_POPCON                  = 10;
const Xapian::valueno VAL_ITERATING_RATING        = 20;
const Xapian::valueno VAL_ITERATING_FUNCTIONALITY = 21;
const Xapian::valueno VAL_ITERATING_USABILITY     = 22;
const Xapian::valueno VAL_ITERATING_SECURITY      = 23;
const Xapian::valueno VAL_ITERATING_PERFORMANCE   = 24;
const Xapian::valueno VAL_ITERATING_QUALITY       = 25;
const Xapian::valueno VAL_ITERATING_SUPPORT       = 26;
const Xapian::valueno VAL_ITERATING_ADOPTION      = 27;

struct TagFilter : public Xapian::ExpandDecider
{
    virtual bool operator()(const std::string &term) const {
        return term[0] == 'X' && term[1] == 'T';
    }
};

struct List {
    char m_enqPlace[sizeof(Xapian::Enquire)];
    mutable Xapian::MSet m_matches;
    mutable Xapian::MSet::const_iterator m_iter;
    mutable int m_pos;
    typedef List Type;

    static const size_t chunkSize = 20;

    List head() const {
        seek();
        return *this;
    }

    Token token() const {
        Token t;
        t._id = m_iter.get_document().get_data();
        return t;
    }

    bool operator<( const List &o ) const {
        return token() < o.token();
    }

    void seek() const {
        if ( m_matches.size() == chunkSize && m_iter == m_matches.end() ) {
            m_matches = enq().get_mset( m_pos, chunkSize );
            m_iter = m_matches.begin();
            m_pos += chunkSize;
        }
    }

    bool empty() const {
        if ( m_pos == -1 )
            return true;
        seek();
        return m_matches.size() < 30 && m_iter == m_matches.end();
    }

    List tail() const {
        List t = *this;
        t.seek();
        t.m_iter ++;
        return t;
    }

    Xapian::Enquire const &enq() const {
        return *reinterpret_cast< Xapian::Enquire const * >( m_enqPlace );
    }

    List( Xapian::Enquire _enq )
    {
        Xapian::Enquire *e = new (m_enqPlace) Xapian::Enquire( _enq );
        assert_eq( e, &enq() );
        m_matches = enq().get_mset( 0, chunkSize );
        m_iter = m_matches.begin();
        m_pos = chunkSize;
    }

    List() {}
};

struct Query {
    Xapian::Database *m_db;
    Xapian::Enquire m_enq;
    Xapian::Stem m_stem;
    typedef std::set< std::string > Terms;
    Terms m_include, m_exclude, m_secondary;
    int m_cutoff;
    bool m_expand;

    void setQualityCutoff( int c ) {
        m_cutoff = c;
    }

    void setExpand( bool e ) { m_expand = e; }

    Query( Xapian::Database &e ) : m_db( &e ), m_enq( e ) {
        m_cutoff = 50;
        m_expand = true;
    }

    wibble::Tokenizer queryTokenizer( std::string q ) const {
        return wibble::Tokenizer( q, "[A-Za-z0-9._+:-]+", REG_EXTENDED );
    }

    template< typename Out >
    void tokenizeQuery( std::string q, Out o ) const
    {
        wibble::Tokenizer tok = queryTokenizer( q );
        for (wibble::Tokenizer::const_iterator i = tok.begin(); i != tok.end(); ++i )
        {
            if ( (*i).find( "::" ) != std::string::npos ) { // assume tag
                *o++ = ("XT" + *i);
            } else {
                std::string t = wibble::str::tolower(*i);
                std::string s = m_stem(t);
                *o++ = t;
                if (s != t)
                    *o++ = ("Z" + s);
            }
        }
    }

    template< typename Out >
    void expand( Out o ) const
    {
        Xapian::RSet rset;
        // Get the top 5 results as 'good ones' to compute the search expansion
        Xapian::MSet mset = m_enq.get_mset(0, 5);
        for ( Xapian::MSet::iterator i = mset.begin(); i != mset.end(); ++i )
            rset.add_document(i);
        // Get the expanded set, only expanding the query with tag names
        TagFilter tagf;
        Xapian::ESet eset = m_enq.get_eset(5, rset, &tagf);
        for ( Xapian::ESetIterator i = eset.begin(); i != eset.end(); ++i )
            *o++ = *i;
    }

    void updateEnquire() {
        // set up query now
        Xapian::Query inc( Xapian::Query::OP_OR,
                           m_include.begin(),
                           m_include.end() ),
            exc( Xapian::Query::OP_OR,
                 m_exclude.begin(),
                 m_exclude.end() ),
            secondary( Xapian::Query::OP_OR,
                       m_secondary.begin(),
                       m_secondary.end() ),
            secondary1( Xapian::Query::OP_SCALE_WEIGHT, secondary, 0.02 ),
            query1( Xapian::Query::OP_AND_NOT, inc, exc ),
            query( Xapian::Query::OP_OR, query1, secondary1 );

        m_enq.set_query( query );

        if ( m_expand ) {
            m_expand = false;
            expand( std::inserter( m_include, m_include.begin() ) );
            updateEnquire();
            m_expand = true;
            return;
        }

        Xapian::MSet first = m_enq.get_mset(0, 1, 0, 0, 0);
        Xapian::MSetIterator ifirst = first.begin();
        if ( ifirst != first.end() ) {
            // Xapian::percent cutoff = ifirst.get_percent() * m_cutoff / 100;
            // m_enq.set_cutoff(cutoff);
        }
    }

    List results() {
        updateEnquire();
        return List( m_enq );
    }

    std::map< std::string, int > relevantTags( int n = 30 ) {
        updateEnquire();
        std::map< std::string, int > relev;
        Xapian::RSet rset;
        Xapian::MSet mset = m_enq.get_mset(0, 100);
        for ( Xapian::MSet::iterator i = mset.begin(); i != mset.end(); ++i )
            rset.add_document(i);
        // Get the expanded set, only expanding the query with tag names
        TagFilter tagf;
        Xapian::ESet eset = m_enq.get_eset(n, rset, &tagf);
        for ( Xapian::ESetIterator i = eset.begin(); i != eset.end(); ++i )
            relev.insert( relev.begin(),
                          std::make_pair(
                              std::string( *i, 2, std::string::npos ),
                              i.get_weight() ) );
        return relev;
    }

    void addTerms( std::string t, bool partial = false, bool exclude = false ) {
        if ( t.empty() )
            return;
        Terms &to = exclude ? m_exclude : m_include;
        std::vector< std::string > tok;
        tokenizeQuery( t, std::back_inserter( tok ) );
        if ( partial ) {
            if ( tok.back().size() == 1 ) {
                tok.pop_back();
            } else {
                std::copy(
                    m_db->allterms_begin( tok.back() ),
                    m_db->allterms_end( tok.back() ),
                    std::back_inserter( tok ) );
            }
        }
        std::copy( tok.begin(), tok.end(), std::inserter( to, to.begin() ) );
    }

    void addTerms( const Terms &t, bool exclude = false ) {
        Terms &to = exclude ? m_exclude : m_include;
        std::copy( t.begin(), t.end(), std::inserter( to, to.begin() ) );
    }

    void addSecondaryTerm( const std::string &term, bool partial = false ) {
        if ( partial ) {
            std::copy(
                m_db->allterms_begin( term ),
                m_db->allterms_end( term ),
                std::inserter( m_secondary, m_secondary.begin() ) );
        } else {
            m_include.insert( m_secondary.begin(), term );
        }
    }

};

struct Source
{
protected:
    mutable Xapian::Database m_db;
    Xapian::Stem m_stem;
    mutable bool m_opened;

    /// Return a lowercased copy of the string
    static std::string toLower(const std::string& str);

    /**
     * Add normalised tokens computed from the string to the document doc.
     *
     * pos is used as a sequence generator for entering the token position in
     * the document.
     */
    void normalize_and_add(Xapian::Document& doc, const std::string& term,
                           int& pos) const;

public:
    Source();

    /// Access the Xapian database
    Xapian::Database& db() {
        open();
        return m_db;
    }

    /// Access the Xapian database
    const Xapian::Database& db() const {
        open();
        return m_db;
    }

    void open() const;
    void invalidate() {
        m_db = Xapian::Database();
        m_opened = false;
    }

    /// Timestamp of when the Xapian database was last updated
    time_t timestamp() const;

    void updateLeniently( AptDatabase &apt, OpProgress *op = 0 ) {
        if (apt.timestamp() - timestamp() > 86400 * 8) // a little over a week
            update( op );
    }

    void update( OpProgress *op = 0 ) {
        if ( !op )
            op = new OpProgress();

        wibble::exception::AddContext _ctx( "Rebuilding Xapian database." );
        int outfd;
        std::string op_str;

        wibble::sys::Exec ex( "update-apt-xapian-index" );
        ex.args.push_back( "--batch-mode" );
        ex.searchInPath = true;
        ex.forkAndRedirect( 0, &outfd, 0 );

        wibble::sys::Pipe monit( outfd );
        while ( !monit.eof() ) {
            std::string line = monit.nextLine();
            if ( line.empty() ) {
                usleep( 100000 );
                continue;
            }
            std::cerr << "got : " << line << std::endl;
            if ( wibble::str::startsWith( line, "begin: " ) ) {
                op_str = std::string( line, 7, std::string::npos );
                op->OverallProgress( 0, 100, 100, op_str );
                    
            } else if ( wibble::str::startsWith( line, "done: " ) ) {
                op->Done();
            } else if ( wibble::str::startsWith( line, "progress: " ) ) {
                wibble::ERegexp re( "progress: ([0-9]+)/([0-9]+)", 3 );
                if ( re.match( line ) ) {
                    assert_eq( re[2], "100" );
                    op->OverallProgress( atoi( re[1].c_str() ), 100, 100, op_str );
                }
            }
        }
        ex.waitForSuccess();
        invalidate();
    }

    /// Returns true if the index has data
    bool hasData() const { return timestamp() > 0; }

    Query query( const std::string &s,
                 bool expand = true,
                 int qualityCutoff = 50 )
    {
        Query q( db() );
        q.setQualityCutoff( qualityCutoff );
        q.setExpand( expand );
        q.addTerms( s );
        if ( s.length() > 2 )
            q.addSecondaryTerm( "XP" + s, true );
        return q;
    }

    Query partialQuery( const std::string &s ) {
        Query q( db() );
        q.addTerms( s, true ); // partial
        return q;
    }

    /// Returns true if the index is older than the Apt database information
    // bool needsRebuild(apt::Apt& apt);

    Xapian::docid docidByName(const std::string& pkgname) const;

    /**
     * Tokenize the string and build an OR query with the resulting keywords
     */
    Xapian::Query makeORQuery(const std::string& keywords) const;

    /**
     * Tokenize the string and build an OR query with the resulting keywords.
     *
     * The last token in keywords is considered to be typed only partially, to
     * implement proper search-as-you-type.
     */
    Xapian::Query makePartialORQuery(const std::string& keywords) const;

    /**
     * Build a query with the given keywords, specified as iterators of strings
     */
    template<typename ITER>
    Xapian::Query makeORQuery(const ITER& begin, const ITER& end) const
    {
        return Xapian::Query(Xapian::Query::OP_OR, begin, end);
    }

    /// Return a list of tag-based terms that can be used to expand an OR query
    std::vector<std::string> expand(Xapian::Enquire& enq) const;

//	std::vector<std::string> similar(const std::string& pkg);

    /**
     * Create a query to look for packages similar to the given one
     */
    Xapian::Query makeRelatedQuery(const std::string& pkgname) const;

    /**
     * Get the integer value for
     */
    double getDoubleValue(const std::string& pkgname,
                          Xapian::valueno val_id) const;

    /**
     * Get the integer value for
     */
    int getIntValue(const std::string& pkgname, Xapian::valueno val_id) const;
};

}
}
}

#endif
