require 'spongiae/unit'

require 'nokogiri'
require 'set'

module Spongiae
   module Formats
       
       class HtmlCallbacks < Nokogiri::XML::SAX::Document
           
           def initialize(sub)
               @sub = sub
               @path = ''; @text = ''
               @keys = Set.new
           end
           
           def to_hash(attrs)
               res = Hash.new
               attrs.each { |row| res[row[0]] = row[1] }
               return res
           end
           
           def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
               return if name =~ /^html|head|body$/
               @path = @path + "/#{name}"
               attrs = to_hash(attrs)
               if attrs['id'] != nil and attrs['id'].length > 0 then
                   @path = @path + "[id=#{attrs['id']}]"
               else
                   i = 0; i = i + 1 while @keys.include? "#{@path}[#{i}]"
                   @keys.add "#{@path}[#{i}]"
                   @path = "#{@path}[#{i}]"
               end
           end

           def start_element(name, attrs = []) start_element_namespace(name,attrs) end
           
           def end_element_namespace(name, prefix = nil, uri = nil)
               if name =~ /^p|title|li|h\d$/ then 
                   @text.strip!
                   @text.gsub! /\n/, ' '
                   @text.gsub! /\s+/, ' '
                   @sub.call Spongiae::Unit::Unilingual.new(@file, @path, nil, @text) if @text != nil and @text != ''
                   @text = ''
               end
               @path = @path[0, @path.rindex('/')] if @path.rindex('/') != nil
           end
           
           def end_element(name) end_element_namespace(name) end           
           
           def characters(text) 
               @text = @text + text
           end
       end
       
       class HtmlCallbacksOutput <  HtmlCallbacks
           def initialize(dest,sub)
               super(sub); @dest = dest
           end
           
           def start_element(name, attrs = [])
               super(name, attrs)
               @dest.print "<#{name}"
               attrs.each { |item| @dest.print " #{item[0]} = \"#{item[1]}\"" }
               @dest.print ">"               
           end
           
           def end_element(name)
               super(name)
               @dest.print "</#{name}>"
           end
           
           def characters(text) 
               if @path != nil then 
                   @text = @text + text 
               else
                   @dest.print text
               end
           end
       end
       
       class HtmlDocument
           
           def initialize(file, props = {})
               @file = file
           end
           
           # read_strings : build unit for each string
           # For Plain Text, this is one string per line.
           def read_unit(&sub)
               callback = HtmlCallbacks.new(sub)
               parser = Nokogiri::HTML::SAX::Parser.new(callback)
               parser.parse(File.read(@file, mode: 'rb'))
           end
           
           def translate(dest_file_name,translations_map,props={})
               File.open(dest_file_name, 'w:UTF-8') do |dest|            
                  callback = HtmlCallbacksOutput.new(dest, Proc.new { |unit|
                      tra = unit.text
                      tra = translations_map[unit.id] if translations_map[unit.id] != nil
                      dest.puts tra
                    })
                  # REXML::Document.parse_stream(File.new(@file),callback)
                  parser = Nokogiri::HTML::SAX::Parser.new(callback)
                  parser.parse(File.read(@file, mode: 'rb'))
              end
           end
           
       end
       
   end
end
