$LOAD_PATH << File.dirname(__FILE__) + '/dll'   # Cannot prefix by 'dll/', some internal processes in .NET would not work
require "Lucene.Net.dll"    # dll
require "Lucene.Net.Contrib.Analyzers.dll"

IndexWriter = Lucene::Net::Index::IndexWriter
Field = Lucene::Net::Documents::Field

module Exilis

	class NgramParser < Lucene::Net::Analysis::Analyzer
		def TokenStream(fieldName, reader) 
			res = Lucene::Net::Analysis::LowerCaseFilter.new(Lucene::Net::Analysis::NGram::NGramTokenizer.new(reader, 4, 4))
			if @stop[fieldName] == nil or @stop[fieldName].empty? then
				return res
			else
                set = System::Collections::Generic::SortedSet.of(System::String).new
                @stop[fieldName].each { |item| set.Add(item) }
				return Lucene::Net::Analysis::StopFilter.new(true, res, set)
			end            
		end
        
		def terms(fieldName, phrase)
			queryTokenStream = self.TokenStream(fieldName + '.t', System::IO::StringReader.new(phrase))
			termAtt = queryTokenStream.method(:GetAttribute).of(Lucene::Net::Analysis::Tokenattributes::ITermAttribute).call()
			queryTokenStream.reset 
			termsMap = {}
			while queryTokenStream.IncrementToken
				if termsMap[termAtt.Term] == nil then                     
					termsMap[termAtt.Term] = 1 
				else 
					termsMap[termAtt.Term] = termsMap[termAtt.Term] + 1 
				end
			end
			queryTokenStream.End
			queryTokenStream.Dispose
			return termsMap
		end
	end
	
	class ExilisWriter
	
		def initialize(destDir,stop={},isNew=false)
            isNew = true if not File.exist? destDir
            parser = NgramParser.new
            parser.stop = stop
            @writer = IndexWriter.new(Lucene::Net::Store::FSDirectory.open(destDir), parser, isNew, IndexWriter::MaxFieldLength.UNLIMITED)
			@count = 0
		end
		
		def write(tu)
			doc = Lucene::Net::Documents::Document.new
			tu.props.each_pair do |name, val|
				doc.add Field.new(".p." + name, val, Field::Store.YES, Field::Index.NO, Field::TermVector.NO)			
			end
			tu.variants.each_pair do |lang, tuv|
				tu.props.each_pair do |name, val|
					doc.add Field.new("#{lang.upcase}.p." + name, val, Field::Store.YES, Field::Index.NO, Field::TermVector.NO)			
				end
				doc.add Field.new("#{lang.upcase}.t", tuv.text, Field::Store.YES, Field::Index.ANALYZED_NO_NORMS, Field::TermVector.YES)
			end
			@writer.AddDocument(doc)
			@count = @count + 1
		end
		
		def close
			@writer.close
		end
	end
    
	class ExilisExplorer
		def initialize(destDir)
			@reader = Lucene::Net::Index::IndexReader.open(Lucene::Net::Store::FSDirectory.open(destDir), true)
        end
        
		def toEntry(doc,id,targets=nil)
			res = Spongiae::Unit::Multilingual.new(id,{})
			doc.GetFields().each do |field|
				case field.name
					when /^\.p\.(.+)$/ then res.props[$1] = field.StringValue
					when /^([\w\-]+)\.t$/ then
                        if targets == nil or targets.any? $1 then
                            if res.variants[$1] == nil then 
                                res.variants[$1] = Spongiae::Unit::Variant.new($1, {}, field.StringValue)
                            else
                                res.variants[$1] = Spongiae::Unit::Variant.new($1, res.variants[$1].props, field.StringValue)                            
                            end
                        end
					when /^([\w\-]+)\.p\.(.+)$/ then
                        if targets == nil or targets.any? $1 then
                            if res.variants[$1] == nil then res.variants[$1] = Spongiae::Unit::Variant.new($1, {}, '--temp--') end
                            res.variants[$1].props[$2] = field.StringValue
                        end
				end
			end
			return res
		end
        
		def findAll(targets=nil)
			(0 .. @reader.MaxDoc - 1).each do |i|
				unless @reader.IsDeleted(i) then
					doc = @reader.document(i)
					yield toEntry(doc,i,targets)
				end
			end
		end
        
        def count() @reader.NumDocs end
            
        def languages
			@reader.getFieldNames(Lucene::Net::Index::IndexReader::FieldOption.INDEXED_WITH_TERMVECTOR).map { |item| $1 if item =~ /^([\w\-]+?)\./ }.select { |item| item != nil }.uniq
        end
    end
        
    
    class ExilisSearcher < ExilisExplorer
		def initialize(destDir)
			super(destDir)
			@searcher = Lucene::Net::Search::IndexSearcher.new(@reader)
			@analyzer = NgramParser.new
			@analyzer.stop = Exilis::readStopWords(destDir + "/stop.lists") if File.exist?(destDir + "/stop.lists")
		end
		
		def fuzzySearch(phrase,lang,minScore,maxCount,&f)
            if lang =~ /^(.+):(.+)/ then
               lang = $1.upcase; targets = $2.upcase.split(/,/); targets << lang
            else
                targets = nil
            end
            
			query = Lucene::Net::Search::BooleanQuery.new
			@analyzer.terms(lang.upcase,phrase).each do |word, count|
				query.add(Lucene::Net::Search::TermQuery.new(Lucene::Net::Index::Term.new(lang.upcase + '.t',word)), Lucene::Net::Search::Occur.SHOULD)
			end
			minShould = query.Clauses.count * minScore / 200
			query.setMinimumNumberShouldMatch(minShould) if minShould > 1  # minScore as %, divided by 2.
			
			topCollector = Lucene::Net::Search::TopScoreDocCollector.Create(maxCount, true)
			@searcher.Search(query, topCollector)

			topCollector.TopDocs.ScoreDocs.each do |sdoc|
				f.call entryById(sdoc.doc,targets)
			end
		end        
    end

end
