#! /usr/bin/env ruby 

formatOptions = {}
ARGV.each { |item| formatOptions[$1] = $2 if item =~ /^--fmt:([\w\-]+)=(.+)$/ }
ARGV.reject! { |item| item =~ /^--fmt:([\w\-]+)=(.+)$/ }

if ARGV.count < 1
    puts "Syntax: #{__FILE__} [--fmt:option=value]{0,*} <native original>"
    exit
end

$LOAD_PATH << "#{File.dirname(__FILE__)}/../lib"                # For non-standard installation

require 'getoptlong'


options = {}
GetoptLong.new(
  [ '--seg', '-s', GetoptLong::REQUIRED_ARGUMENT ],
  [ '--src-lang', '--srcLang', '-o', GetoptLong::OPTIONAL_ARGUMENT ],
  [ '--extract-tra', '--tra', '-t', GetoptLong::NO_ARGUMENT ],
).each { |key,val| options[key] = val }

culter = nil
# Load culter module only if required. So, Culter is not mandatory to use Spongiae
if options['--seg'] =~ /^simple$/ then
    require 'culter/simple'; culter = Culter::Simple.new
elsif options['--seg']  =~ /\.srx$/ then
    require 'culter/srx'
    culter = Culter::SRX::SrxDocument.new(options['--seg'])
    if options['--src-lang'] != nil then
        culter = culter.segmenter(options['--src-lang'])
    else
        culter = culter.segmenter('x-unknown')
        puts "SRX found, but missing source language, segmentation will be based on common rules"
    end
end

file = ARGV.shift

require 'spongiae/formats/all'

ext = $1 if file =~ /\.(\w+)$/
fmt = Spongiae::Formats::ALL[ext].sniff(file)
fmt.load!
reader = fmt.create(file,formatOptions)

if options['--extract-tra'] then
    def text(unit) unit.traText end
    dest = file.gsub(/\.\w+/, '-target.txt')
else
    def text(unit) unit.srcText end
    dest = file.gsub(/\.\w+/, '-source.txt')
end
puts "Producing #{dest}"

if culter == nil then
    def display(f,culter,str) f.puts str end
else
    def display(f,culter,str)
        culter.cut(str) { |seg| f.puts seg }
    end
end

File.open(dest, "w:UTF-8") do |f|
    reader.read_unit do |unit| 
        display f, culter, text(unit)
    end
end
