require 'anguilla'

module Spongiae
   module XLIFF
       
       ##
       # Gets only meta-info about a file, reading beginning
       # use regexes because anguilla parser would read the full file
       def self.read_meta(file,encoding = 'UTF-8')
           res = {}
           File.open(file, "r:#{encoding}") do |f|
               while line = f.gets
                   res['original'] = $1 if line =~ /\soriginal\s*=\s*["'](.+?)["']/
                   res['srcLang'] = $1 if line =~ /\ssource-language\s*=\s*["'](.+?)["']/
                   res['traLang'] = $1 if line =~ /\starget-language\s*=\s*["'](.+?)["']/
                   return res if (res['srcLang'] != nil) and (res['traLang'] != nil)
               end
           end
       end
       
       ##
       # Reads all units, but only the translation. 
       def self.read_translations_map(file)
           callback = Xliff_Callback.new(false)
           Anguilla::parse(file,callback)
           return callback.result
       end
       
       ##
       # Reads all units, producing translated bilingual objects 
       def self.read_units(file)
           callback = Xliff_Callback.new(true)
           Anguilla::parse(file,callback)
           return callback.result
       end
       
       
       class Xliff_Callback
           include REXML::StreamListener
           
           def initialize(units)
               @result = {}; @units = units; @multi_file = false
           end
           
           attr_reader :result
           
           def tag_start(element, attributes)
               if element == 'trans-unit' or element == 'unit' then 
                   @cur_unit = attributes['id']
                   @cur_text = nil
                   @cur_source = ''
                   @cur_tra = nil   # remains nil until there is almost <target>
               elsif element == 'file' then 
                   @multi_file = true if @cur_subfile != nil 
                   @cur_subfile = attributes['original']
               elsif element == 'source' 
                   @cur_text = ''
               elsif element == 'target'
                   @cur_text = '' 
                   @cur_tra = '' if @cur_tra == nil   # was nil to make distinction between no target and target empty
               end
           end
           
           def yield_next_unit(unit_id)
                id = unit_id; id = "#{@cur_subfile}!!#{id}" if @multi_file
                if @units.is_a? Proc then
                    @units.call(Spongiae::Unit::Bilingual.new(@cur_subfile, unit_id, {}, @cur_source, @cur_tra))
                elsif @units then
                    @result[id] = Spongiae::Unit::Bilingual.new @cur_subfile, unit_id, {}, @cur_source, @cur_tra                        
                else 
                    @result[id] = @cur_text
                end
           end
           protected :yield_next_unit
           
           def tag_end(element)
               if element == 'source'
                   @cur_source = @cur_source + @cur_text
               elsif element == 'target'
                   @cur_tra = @cur_tra + @cur_text
               elsif element == 'trans-unit' or element == 'unit' then 
                   yield_next_unit(@cur_unit)
               end
           end
           
           def text(text) 
               if @cur_text != nil then @cur_text = @cur_text + text end
           end
       end
       
       # Callback which launches action for each segment, not each unit
       class Xliff_Seg_Callback < Xliff_Callback
           def initialize(units) super(units) end
           
           def tag_start(element, attributes)
            # ------- XLIFF 1
               if element == 'trans-unit' then 
                   @cur_seg = 0
                   @sub_src = Hash.new; @sub_tra = Hash.new
                   super(element, attributes)   
               elsif element == 'seg-source' then    
                    @in_seg_source = true
               elsif element == 'mrk' then
                   if attributes['mtype'] == 'seg' then
                        @cur_unit = @cur_unit.to_s unless @cur_unit.is_a? String
                        @cur_seg = attributes.key?('mid') ? attributes['mid'] : @cur_seg + 1
                        @cur_unit = @cur_unit + '/' + @cur_seg.to_s
                        @mrk_level = 0
                        super(@in_seg_source ? 'source' : 'target', attributes) # init new segment
                        @cur_source = ''
                   else
                       @mrk_level += 1 if @mrk_level != nil
                   end                   
            # ------- XLIFF 2
               elsif element == 'unit' then 
                   @cur_seg = 0
                   super(element, attributes)   
               elsif element == 'segment' then     
                   @cur_unit = @cur_unit.to_s unless @cur_unit.is_a? String
                   @cur_seg = attributes.key?('id') ? attributes['id'] : @cur_seg + 1
                   @cur_unit = @cur_unit + '/' + @cur_seg.to_s
                   @cur_text = nil
                   @cur_source = ''
                   @cur_tra = nil   # remains nil until there is almost <target>
               else
                   super(element, attributes)
               end
           end
           
           def tag_end(element)
                if element == 'seg-source' then    # XLIFF 1
                    @in_seg_source = false
               elsif element == 'mrk' and @mrk_level != nil then
                    if @mrk_level > 0 then
                        @mrk_level -= 1
                    else    # 0 => in mtype = seg
                        if @in_seg_source then 
                            super('source')
                            @sub_src[@cur_seg] = @cur_source ; @cur_source = ''
                        else 
                            super('target')
                            @sub_tra[@cur_seg] = @cur_tra ; @cur_tra = nil
                        end
                        @mrk_level = nil
                    end
                elsif element == 'trans-unit' then
                    if @sub_src.empty? then
                       yield_next_unit(@cur_unit)
                    else
                        @sub_src.keys.each do |key0|
                            @cur_source = @sub_src[key0]; @cur_tra = @sub_tra[key0]
                            yield_next_unit(@cur_unit + '/' + key0)
                        end
                    end
               elsif element == 'segment' then     # XLIFF 2
                   # Use same algorithm normally reserved to units
                   # This will also reset all buffers so that next segment do not contain them
                   super('unit')
                   @cur_unit.gsub!(/\/#{@cur_seg}$/,'')
               elsif element == 'unit' then 
                   # Do not register unit, it always contains a segment treated by previous if 
               else
                   super(element)
               end
           end
       end
       
       class XliffWriter
          def initialize(target, file, options, culter, translations_map)
              @target = target ; @culter = culter; @options = options; @file = file; @translations_map = translations_map
              @target.puts '<?xml version="1.0" encoding="UTF-8"?>'
              @prev_file = file
          end
          
          # Factory to choose between version 1 and 2
          def self.create(target, file, options, culter, translations_map)
              if options['--version'] == nil
                  return Xliff1Writer.new(target, file, options, culter, translations_map)
              elsif options['--version'].to_f < 2.0
                  return Xliff1Writer.new(target, file, options, culter, translations_map)
              else
                  return Xliff2Writer.new(target, file, options, culter, translations_map)                  
              end
          end
       end
       
       class Xliff1Writer < XliffWriter
          def initialize(target, file, options, culter, translations_map)
              super(target, file, options, culter, translations_map)
              options['--version'] = '1.2' unless options['--version'] != nil and options['--version'] =~ /1\.\d/
              @target.puts "<xliff xmlns='urn:oasis:names:tc:xliff:document:#{options['--version']}'>"
              @target.puts "    <file original=\"#{file}\" #{xliff_lang_spec(options)}><body>"    
          end
          
          def xliff_lang_spec(options)
              res = ''
              srcKey = options.keys.select { |item| item =~ /lang/i and item =~ /s(ou)?rc/ }
              res = res + " source-language='#{options[srcKey[0]]}'" unless srcKey.count == 0
              traKey = options.keys.select { |item| item =~ /lang/i and item =~ /t(ra|arget)/ }
              res = res + " target-language='#{options[traKey[0]]}'" unless traKey.count == 0
              return res 
          end
          
          def write_unit(unit)
              if unit.file != @prev_file and unit.file != nil then
                  @target.puts '    </body></file>'
                  @target.puts "    <file original=\"#{unit.file}\" #{xliff_lang_spec(@options)}><body>"
                  @prev_file = unit.file
              end
              @target.puts "        <trans-unit id=\"#{unit.id}\">"
              @target.puts "            <source>#{unit.srcText.encode(:xml => :text)}</source>"
              if @culter != nil and options['--version'].to_s =~ /1\.2/ then
                  segments = @culter.cut(unit.srcText)
                  if segments.count > 1 then
                      @target.print  "            <seg-source>"
                      seg_mid = 0
                      segments.each do |txt|
                          seg_mid = seg_mid + 1
                          @target.print $1 if @options.key? '--detach-initial-blank' and txt.sub!(/^(\s+)/,'')
                          @target.print "<mrk mtype=\"seg\" mid=\"#{seg_mid}\">#{txt}</mrk>"                  
                      end
                      @target.puts "</seg-source>"
                  end
              end
              if unit.respond_to? 'traText'
                  tra = unit.traText
                  @target.puts "            <target>#{tra.encode(:xml => :text)}</target>" if tra != nil and tra != '' 
              elsif @translations_map[unit.id] != nil then
                  @target.puts "            <target>#{@translations_map[unit.id].encode(:xml => :text)}</target>"
              end
              @target.puts '        </trans-unit>'
          end
          
          def close
              @target.puts '    </body></file>'
              @target.puts '</xliff>'
          end
       end
       
       class Xliff2Writer < XliffWriter
          def initialize(target, file, options, culter, translations_map)
              super(target, file, options, culter, translations_map)
              options['--version'] = '2.0' unless options['--version'] != nil and options['--version'] =~ /2\.\d/
              @target.puts '<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="' + options['--version'] + '"  xmlns:fs="urn:oasis:names:tc:xliff:fs:2.0" ' + xliff_lang_spec(options) + '>'
              @target.puts "    <file original=\"#{file}\"><body>"
              @prev_file = file
          end
          
          def xliff_lang_spec(options)
              res = ''
              srcKey = options.keys.select { |item| item =~ /lang/i and item =~ /s(ou)?rc/ }
              res = res + " srcLang='#{options[srcKey[0]]}'" unless srcKey.count == 0
              traKey = options.keys.select { |item| item =~ /lang/i and item =~ /t(ra|arget)/ }
              res = res + " trgLang='#{options[traKey[0]]}'" unless traKey.count == 0
              return res 
          end
          
          def write_unit(unit)
              if unit.file != @prev_file and unit.file != nil then
                  @target.puts '    </file>'
                  @target.puts "    <file original=\"#{unit.file}\">"
                  @prev_file = unit.file
              end
              @target.puts "        <unit id=\"#{unit.id}\">"
              if @culter != nil
                  segments = @culter.cut(unit.srcText)
                  if unit.respond_to? 'traText' then tra = @culter.cut(unit.traText) else tra = nil end
                  i = 0; while i < segments.count
                      if @options.key? '--detach-initial-blank' and segments[i].gsub!(/^(\s+)/,'') then
                          @target.puts '            <ignorable>'
                          @target.puts "               <source>#{$1}</source>"                           
                          @target.puts "               <target>#{$1}</target>" if tra != nil and tra.count >= i and tra[i].gsub!(/^(\s+)/,'')
                          @target.puts '            </ignorable>'
                      end
                      @target.puts '            <segment>'
                      @target.puts "                <source>#{segments[i].encode(:xml => :text)}</source>"
                      @target.puts "                <target>#{tra[i].encode(:xml => :text)}</target>" if tra != nil and tra.count >= i
                      @target.puts '            </segment>'
                      i = i + 1
                  end
              else  # in XLIFF 2 must be almost one segment
                  @target.puts '            <segment>'
                  @target.puts "                <source>#{unit.srcText.encode(:xml => :text)}</source>"
                  @target.puts "                <target>#{unit.traText.encode(:xml => :text)}</target>" if unit.respond_to? 'traText'
                  @target.puts '            </segment>'
              end
              @target.puts '        </unit>'
          end
          
          def close
              @target.puts '    </file>'
              @target.puts '</xliff>'
          end
       end       
       
   end
end
