#! /usr/bin/env perl

=head1 DESCRIPTION

This perl script creates database or tables for Cyclotis database.

=head1 INSTALLATION REQUIREMENTS

This is not a cgi/module : it must be run in command line (or integrated in another tool, such as the Web creation HTML interface and web services).
You can use it in any machine with Perl, DBI and DBD::Pg installed, even if the PostgreSQL server is installed elsewhere
(but in this case you must check that the PostgreSQL server gives creation rights to users connected via network sockets).

=head1 USAGE

The general format for the command line is C<perl dbcreator.pl [command] [database-name] [other parameters]>

=head2 MODES

First, you must decide in which mode the database will work.

The mode decides which field Postgres may add to every line in order to make the distinction between tables.

To do this, the best solution is to define CYCLOTIS_MODE environment variable. You can also give the mode as first parameter of dbcreator.pl, before command name. However, do not forget that this must be decided before you create the first table, and using dbcreator.pl on an already created database with a different mode may cause errors.

Existing modes:

=over 1

:inc by_void/TableCreator: "=head1 MODE by_void" "=item by_void" 
:inc by_id/noRules/TableCreator: "=head1 MODE by_id::noRules" "=item by_id::noRules" 
:inc by_id/withRules/TableCreator: "=head1 MODE by_id::withRules" "=item by_id::withRules" 
:inc by_path/noLangs/TableCreator: "=head1 MODE by_path::noLangs" "=item by_path:noLangs" 
:inc by_path/withLangs/TableCreator: "=head1 MODE by_path::withLangs" "=item by_path:withLangs" 
:inc by_code/TableCreator: "=head1 MODE by_code" "=item by_code" 
:inc by_code/theme/TableCreator: "=head1 MODE by_code::theme" "=item by_code::theme" 
:inc by_code/lang/TableCreator: "=head1 MODE by_code::lang" "=item by_code::lang" 

=back

:inc DbCreator: "=head2 Database name"

=head2 COMMANDS

=cut

my $command = shift || 'help';

=head3 perl dbcreator.pl help [command] 

Display help for given command using pod2txt

=cut
if ($command =~ /help/) {
   my $cmd = shift;
   
   if (! $cmd) {
		print STDERR << "EOF";
perl $0 <command> <parameters>

List of commands (type 'perl help <command-name> for details):	
EOF
		open (THIS, $0); my @modes;
		while (<THIS>) { push(@modes,$1) and print "\t$1\n" if /=item/ and m!:inc ([\w\/]+)/TableCreator!;  }
		close (THIS);
		print STDERR << "EOF";

Also available help pages:
	dbname			Detail of the syntax to specify database and parameters
	
Modes (to be selected via CYCLOTIS_MODE environment variable) (type help with given mode for details)
EOF
		while (my $m = shift @modes) { print STDERR "\t$m\n"; }
   } elsif ($cmd =~ /^dbname/) {
		require File::Basename; open (THIS, File::Basename::dirname($0) . "/DbCreator.pm"); open (CONV, "| pod2text"); my $st = 0;
		while (<THIS>) { 
			if (/^=head2 Database name/) { s/head2/head1/; $st = 1; }
			if ($st and /=cut/) { close(THIS); close(CONV); exit; }
			print CONV $_ if $st;
		}
   } elsif ($cmd =~ /^by/) {		
		require File::Basename; $cmd =~ s!::!/!g; open (THIS, File::Basename::dirname($0) . "/$cmd/TableCreator.pm"); open (CONV, "| pod2text"); 
		while (<THIS>) { 
			if (/=cut/) { close(THIS); close(CONV); exit; }
			print CONV $_;
		}
   } else {
		open (THIS, $0); open (CONV, "| pod2text"); my $st = 0;
		while (<THIS>) { 
			if (/=head3 perl dbcreator.pl ($cmd)\b/) { s/head3/head1/; $st = 1; }
			print CONV $_ if $st;
			if ($st and /=cut/) { close(THIS); close(CONV); exit; }
		}
   }
   exit;
}

require DbCreator;

=head3 perl dbcreator.pl create-db [database name]

Creates an empty database with only procedures loaded

=cut
if ($command =~ /create\W?db/) {
   DbCreator::createDatabase(@ARGV);
   exit;
}

require DBI;	# now, all commands (except create-db) require DBI.

my $dbParam = shift || die "$0 $command : missing database name"; 
my $mode = $ENV{CYCLOTIS_MODE} || 'by_void'; if ($dbParam =~ /^by_/) { $mode = $dbParam; $dbParam = shift; } $mode = "by_$mode" unless $mode =~ /^by_/;
my $dbiSpec = &DbCreator::analyse_dbSpec($dbParam); # internal format -> dbi string
my ($user, $pass, $owner);
if ($dbiSpec =~ s/username=(.+?)(;|$)//) { $user = $1; }
if ($dbiSpec =~ s/password=(.+?)(;|$)//) { $pass = $1; }
if ($dbiSpec =~ s/owner=(.+?)(;|$)//) { $owner = $1; }
my $dbh = DBI->connect($dbiSpec,$user,$pass);

=head3 perl dbcreator.pl init-db [database name]

Creates minimal contents for a database. Identical to create-db except that the database catalog must already exist and be empty

=cut
if ($command =~ /init\W?db/) {
   DbCreator::initDatabase($dbParam);
   exit;
}


=head3 perl dbcreator.pl create-schema [database] [schema-name]

Creates a Postgresql schema (i.e. namespace for tables)

Since any DBI pool (dancer's, Apache's, etc.) use one connection per database, even in the same server,
it is more efficient to have only one database with all tables accessed by the same server via HTTP
(the situation is different if you use direct Postgresql access).

On the other hand, all are accessed by the same Postgresql user, so it makes impossible to define privileges
so they must be defined at HTTP level.

=cut
if ($command =~ /create\W?sc/) { print STDERR "Create-schema\n"; &DbCreator::doCreate_schema($dbh, shift, "owner=$owner", @ARGV); $dbh->disconnect; exit; }

=head3 perl dbcreator.pl create-table [database] [table] [parents] 

Creates the table, and its parents if not yet created.

[database] uses the same format as previous command.

Values for parent tables (you can inherit from two tables, separated by ','):

=over 1

=item MEM 

(standard, without context or modification data) : by default.

=item MEMUP 

(memory with modification author/date) : may be associated to a rule to keep one version of a segment

=item MEMX 

(memory with properties and note): stores same content as a TMX

=item MEMID

(memory with context): stores an information to distinguish segments 
Context is not an identifier: the identifier is (src,context)

=item GLOS 

(with context and note, but no author/date): contains a glossary, for terminology 

=item or 

any already created table

=back

In case you use MEMX or MEMID, there is also the possibility to configure type for properties or context column.
The default behaviour is to inherit from standard tables, so to keep their type if they already exist.
If the tables do not exist, the script will use the value of environment variables CYCLOTIS_PROPS_TYPE or CYCLOTIS_CONTEXT_TYPE, 
or I<text> if the variables are not defined. Note that once the tables exist, environment variables are never used again.

You have also the possibility to add one of the two above parameters :

=over 1

=item props=(json|hstore|text)     

Use another type for properties. Possible values are:

=over 2

=item text (default)

Properties are stored in a TEXT column. Format is similar to URL. This format preserves order and supports multiple properties.

=item hstore

Uses Postgresql's type HSTORE. Loads the extension in the database, it not yet existing. 
This binary format has better performances than pure text, but does not preserve order, and does support only one value per property.

=item json

Uses Postgresql's tpe JSON. Requires Postgresql 9.2 or more.
This format preserves order and supports multiple properties (this is non-strict JSON value). 
It is a text format, but more readable than the first type.

=back

=item tool_info=(json|hstore|text|null)

Adds a column which will contain data specific to CAT clients.
Format is the same as for properties, except that default value is null, meaning that we do not want to create this column at all.

=item context=(text|char\d|varchar\d|int)  

Type for the context field.

For context, you can use "char" or "varchar", with a number of characters. Be sure your client uses the correct size.

You can also use an int, in which case the context will be reduced to its Java hash code.

Structured types (json or hstore) are not usable because Postgresql does not implement "=" operator for them.

=back

In this case, if the standard tables already exist, and if almost one column type does not match parameters,
new tables with names suffixed by the first letter of type names will be created.

You can also specify source and target languages, which will be stored in meta_info table.

=cut
if ($command =~ /create\W?table/) { 
	$mode .= '::TableCreator'; eval "require $mode";
	my $TableCreator = bless { dbh => $dbh, dbiSpec => $dbiSpec }, $mode;
	$TableCreator->loadTables($0 =~ m!/! ? substr($0, 0, rindex($0,'/')) : '.', $dbh);
	$TableCreator->doCreate_table(@ARGV, "owner=$owner"); $dbh->disconnect; exit; 
}

=head3 perl dbcreator.pl create-rule [database] [table] [keyfield] [action] [props|note=...]+

Creates a rule associated to the given table, saying in which situation we consider a segment as duplicate
which should be treated as an update (instead of an insertion).

[database] uses the same format as previous command. Table name can contain a schema prefix, else we use 'public'.

Possible keyfields:

=over 1

=item src 

keeps only one version for each source text
(i.e. if anybody inserts a segment with same source, it will replace the previous one)

=item author

keeps only one version for each source+author pair
(i.e. if the same author inserts a segment with same source, it will replace the previous one)

=item context

keeps only one version for each source+context pair
(i.e. if anybody inserts a segment with same source and same context, it will replace the previous one)

=back

Possible actions:

=over 1

=item replace-only

A new line with same value for key fields will replace the old one. Deletions are physically applied. Default value.

=item replace-null

A new line with same value for key fields will replace the old one. Deletions are equivalent to setting translation to null.

This option is useful for applications which use 'all' query to build a cache (like OmegaT in Project mode)
as it enables to retreive deleted segments as well.

=item version

Keep both versions of the segment (old and new one), with changedate as a discriminant.
Deletions are equivalent to affect a changedate without creating a new version.

=back

Next parameters indicate what to do with properties or notes in case the rule implies to update a line instead of insertion.

Default is to replace the properties/notes as well, so what the user will read is the very last inserted properties and notes.
Alternatives include:

=over 1

=item concat

Only if the column is of type TEXT or VARCHAR, the new value is the concatenation of old and new value.

=item merge

Only if the value is of type HSTORE, the new value is a key-per-key replacement. Values which are not in the new entry are kept as is.

=item keep

Do not replace the value, keep the old one.

=back

=cut
if ($command =~ /create\W?rule/) { DbCreator::doCreate_rule($dbh, @ARGV); $dbh->disconnect; exit; }

=head3 perl dbcreator.pl create-function [database] [function-name]

Creates one of the standard Cyclotis SQL functions. As they are not necessary all the time they are not created by default.

Values for parent table:

=over 1

=item tokenize 

cut string by words, and transform them to stems (without grammatical suffixes)

=item score 

alternative score based on levenshtein distance

=back

=cut
if ($command =~ /create\W?fun/) { DbCreator::doCreate_function($dbh, @ARGV); $dbh->disconnect; exit; }


=head3 perl dbcreator.pl create-index [database] [table] [idxname] [lang] 

Creates an index for the given table

Loads GIST extension if necessary

Values for idxname:

=over 1

=item fuzzy

trigram GIST index in the source

=item concSrc

stemming index of the source

=item concTra

stemming index of the translation

=back

operations indicate if you want to index not the field itself but a calculated value. Possibilities include (concatenable using ',')

=over 1

=item untag

Removes tags in text; next parameter can indicate which regular expression you consider as a tag; defaults to <.+?> 

=item tokenize

Postgresql will replace string by succession of stems (terms without grammatical suffixes)

=back

lang parameter used only for concordance indexes: 
it contains language name in english; by default the value is 'simple'.

=cut
if ($command =~ /create\W?in?de?x/) { DbCreator::doCreate_index($dbh, @ARGV); $dbh->disconnect; exit; }


=head3 perl dbcreator.pl create-dict [database] [langName] [dict-file]? [affix-file]? [stop-words]?

Loads an ispell dictionnary

The files must already be in tsearch_data directory from Postgres.

=cut
if ($command =~ /(create|load)\W?(ispell-)?dict/) { DbCreator::doCreate_dictionary($dbh, @ARGV); $dbh->disconnect; exit; }

=head3 perl dbcreator.pl create-text-search-config [database] [langName] [origin] [token=dictionnaries]+

Create a text search configuration, usable in concordance queries

=cut
if ($command =~ /create-text(-search)?-config/) { DbCreator::doCreate_textSearchConfig($dbh, @ARGV); $dbh->disconnect; exit; }

=head3 perl dbcreator.pl exec-script [database] [script file]

The file is a succession of commands, similar to the previous ones, except that we do not repeat database name. create-db is not scriptable.

Sample script:

    create-schema mySchema
    create-table mySchema.table1 memx
    create-rule mySchema.table1 author replace-only
    create-index mySchema.table1 date
    create-index mySchema.table1 concTra fr
    create-table mySchema.table2 memup


=cut
if ($command =~ /exec/) { 
	$mode .= '::TableCreator'; eval "require $mode";
	my $TableCreator = bless { dbh => $dbh, dbiSpec => $dbiSpec }, $mode;
	$TableCreator->loadTables($0 =~ m!/! ? substr($0, 0, rindex($0,'/')) : '.', $dbh);

	open (SCRIPT, shift) || die "Syntax: $0 exec-script [database] [script file name]";
	while (<SCRIPT>) {
		my ($cmd, @args) = split(/\s+/);
		if ($cmd =~ /(create\W?)?sc/) { DbCreator::doCreate_schema($dbh, @args); next; }
		if ($cmd =~ /(create\W?)?table/) { $TableCreator->doCreate_table(@args); next; }
		if ($cmd =~ /(create\W?)?rule/) { DbCreator::doCreate_rule($dbh, @args); next; }
		if ($cmd =~ /(create\W?)?in?de?x/) { DbCreator::doCreate_index($dbh, @args); next; }
		if ($cmd =~ /(create|load\W?)(ispell-)?dict/) { DbCreator::doCreate_dictionary($dbh, @args); next; }
		if ($cmd =~ /(create\W?)text(-search)?-config/) { DbCreator::doCreate_textSearchConfig($dbh, @args); next; }
	}
	close (SCRIPT);
	$dbh->disconnect; exit;
}

die "Unknown command : $command";


=head1 License

Copyright 2013-2015 Silvestris Project (http://www.silvestris-lab.org/)

Licensed under the EUPL, Version 1.1 or – as soon they will be approved by the European Commission - subsequent versions of the EUPL (the "Licence");
You may not use this work except in compliance with the Licence.
You may obtain a copy of the Licence at: http://ec.europa.eu/idabc/eupl

Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the Licence for the specific language governing permissions and limitations under the Licence. 

=cut
