require 'elefas'

module Elefas
        
    class ElefasSearcher < ElefasClient
        # New searcher - you may want to restrict to a dedicated collection or document
        # If srcLang is not nil, then the searcher is faster but does not guarantee to enable searches on other languages!
        def initialize(dest,config = nil, srcLang = nil, collection = nil, doc = nil)
            super(dest)
            @st_tuv = @conn.prepare('find_tuv', 'select * from EF_SEG where unit=:unit')
            seg_table = 'ef_seg'
            if srcLang != nil then
                # if ef_seg_$srcLang exists use it
                seg_table = "ef_seg_#{srcLang.downcase}" unless @conn.exec(
                    "select schemaname from pg_tables 
                      where tablename='ef_seg_#{srcLang.downcase}'", 
                    nil, Object, 'schemaname') == nil
            end
            spec = Array.new
            spec << "upper(#{seg_table}.lang)=:lang"; spec << "#{seg_table}.contents % :reduced"
            spec << "ef_doc.name = '#{doc}'" unless doc == nil
            if collection != nil then
                coll_id = @conn.exec("select id from EF_COLLECTION where name=?", collection, Object, 1)
                raise "Collection #{collection} does not exist" if coll_id == nil 
                spec << "ef_doc.collection = #{coll_id}"
            end
            debug = ''
            if config != nil and config['debug'] != nil then
                debug = []
                debug << "#{seg_table}.contents as db_seg" if config['debug'].any? { |item| item =~ /db/i }
                debug << ':text as gv_text' if config['debug'].any? { |item| item =~ /text|gi?v/i }
                debug << "lev(#{seg_table}.contents,:text) as lev" if config['debug'].any? { |item| item =~ /lev/i }
                debug = "," + debug.join(',')
            end
            score_fun = "score(#{seg_table}.contents,:text)"  # default
            if config != nil and defined? config['score'] then
                score_fun = config['score']; score_fun = score_fun['db'] if score_fun.is_a? Hash
                score_fun = "#{$1}(#{seg_table}.contents,:text)" if score_fun =~ /^\@(\w+)$/
            end
            @st_search = @conn.prepare('find_seg', "select ef_unit.*, ef_doc.name as doc, 
                    #{score_fun} as score #{debug}
                 from ef_unit left join #{seg_table} on #{seg_table}.unit = ef_unit.id left join ef_doc on ef_doc.id = ef_unit.doc_id
                where #{spec.join(' and ')} order by score desc")
        end
        
        def track_duplicates!() @found_units = [] end
        
        def self.reduced(phrase)
            phrase = phrase.downcase    #trigrams are case-insensitive
            phrase.gsub!(/[^\p{Letter}\p{Digit}]+/,' ') # trigrams stores only letters and spaces
            loop do
                len = phrase.length
                phrase.gsub!(/\b([\p{Letter}\p{Digit}]+)\s(.+?)\s\1\b/, ' \1 \2 ')  # remove duplicate words
                break if phrase.length == len 
            end
            phrase.strip! ; phrase.gsub!(/\s+/, ' ')
            return phrase
        end
        
        def fuzzySearch(phrase,lang,minScore,maxCount,&f)
            srcLang = lang; traLangs = nil; srcLang, traLangs = $1, $2.split(',') if srcLang =~ /^([\w\-]+):([\w\-\,]+)$/
            @st_search.exec({'text' => phrase, 'reduced' => ElefasSearcher::reduced(phrase), 'lang' => srcLang[0,2].upcase}) do |unit| 
                if @found_units != nil then
                   if @found_units.find_index(unit['id']) == nil then @found_units << unit['id'] else next end 
                end
                next unless unit['score'].to_f * 100 > minScore
                tu = Spongiae::Unit::Multilingual.new(unit['id'],{ 'file' => unit['doc'], 'score' => unit['score'].to_s })
                addAttributeColumns!(tu.attr, unit)
                @st_tuv.exec({'unit' => unit['id']}) do |tuv|
                    next if traLangs != nil and not traLangs.any? tuv['lang']
                    tu.variants[tuv['lang']] = Spongiae::Unit::Variant.new tuv, nil, tuv['contents']
                    addAttributeColumns!(tu.variants[tuv['lang']].attr, tuv)
                end
                yield tu
                maxCount = maxCount - 1; return if maxCount == 0
            end
        end
    
    private
        def addAttributeColumns!(attr,sql)
            attr['creator'] = sql['creator'] if sql['creator'] != nil; attr['creationdate'] = sql['creation_date'] if sql['creation_date'] != nil
            attr['changer'] = sql['changer'] if sql['changer'] != nil; attr['changedate'] = sql['change_date'] if sql['change_date'] != nil
        end
    end # class ElefasSearcher

    class ElefasStatistics < ElefasClient
        def initialize(dest)
            super(dest)
        end
        
        def languages(count_only = false)
            if count_only then
                return @conn.exec('select count(distinct lang) from EF_SEG', nil, Object, 'count')
            else
                return @conn.exec('select distinct lang from EF_SEG', nil, Array, 'lang')
            end
        end
        
        def documentsCount() select_count('DOC') end
        def collectionsCount() select_count('COLLECTION') end
        def unitsCount() select_count('UNIT') end
        def segCount() select_count('SEG') end
        def dbSize() return @conn.exec("SELECT pg_size_pretty(pg_database_size('#{@conn.dbName}')) size", nil, Object, 'size') end
        
        private
        def select_count(table)
            return @conn.exec("select count(*) from EF_#{table}", nil, Object, 'count')
        end        
    end
    
end # module Elefas
