#
# Filename:	GutenMark.cfg
# Purpose:	Provides various foreign-language customizations for
#		GutenMark software.
# Mods:		11/20/01 RSB.	Created.
#		12/18/01 RSB.	Added US.places.gz to the wordlist.
#				Added german.names.gz to the default.
#		12/23/01 RSB.	Added language names.
#		06/30/02 RSB.	Corrected some comments.
#
# This file provides a set of foreign-language profiles, only one
# of which is actually used by GutenMark.  Each profile begins
# with a line giving the name of the configuration, like so:
#	[name]
# The profile is selected on the GutenMark command-line, like so:
#	GutenMark --profile=name ...
# For example,
#	GutenMark --profile=french Input.txt Output.html
# would correspond to 
#	[french]
# By default, if the "--profile" option is not used, the "english" 
# profile is used.  Conversely, if the "--profile" option 
# is used but the selected language does not have an
# explicit profile, then GutenMark has a default order in which
# the searching is done:
#	native namelist
#	other namelists, in no particular order
#	placenames, in no particular order
#	native wordlist
#	other wordlists, in no particular order
#
# Following the line with "[name]" on it is a sequence of lines
# defining the order in which the various wordlists and namelists
# are searched.  The reason this is important is that GutenMark will
# accept the FIRST listing of the word that it finds, and so you
# want to search the wordlists native to the language of the text
# prior to searching foreign wordlists.  These files usually have 
# names like
#	something.names.gz
#	something.words.gz
# where "something" represents the name of a language.  Also, it 
# usually makes sense to search all of the namelists
# before searching all of the wordlists.  Wildcards can also
# be used, as in 
#	*.names.gz
#	*.words.gz
# GutenMark will gracefully handle missing namelists/wordlists,
# and won't bother to examine the same namelist/wordlist twice,
# so you needn't worry about being too careful with them.
#
# Each wordlist/namelist is categorized as "native" or as "foreign".
# The difference between "native" and "foreign" words is simply
# that GutenMark will automatically italicize the foreign words.
# In English, all foreign words other than proper names are 
# italicized, though this seems to be a less usual practice than before.
# If this is not appropriate for the particular language of the
# text, a profile should be used in which all of the wordlists
# are marked as "native".  Usually, all namelists are marked 
# as "native", even when they are foreign names, because names
# aren't automatically italicized.
#
# In addition, specific language names can be associated with each
# wordlist.  This is useful when multiple wordlists appear for a given
# language (such as english.names.gz, US.places.gz, and english.words.gz,
# which are all in the English language) and to distinguish usage when
# the same word appears in many wordlists (such as "die" in English,
# German, and Latin).  
#
# Oh, and blank lines and comments are ignored.  Comments begin
# with '#' in column 1.

# A profile in which NO wordlists are processed.
[none]

# A short English-language profile.  This particular language selection
# and ordering is based on my own (RSB) very subjective experience with
# PG texts, and isn't intended to be a judgement on the importance
# of any particular language.  The general syntax is
#	native=filename [language]
#		or
#	foreign=filename [language]
# You may wonder why french.names.gz is classified as "native" in the 
# English-language profile below.  The reason is that French names, though
# a part of the French language, would not typically be italicized in 
# English.  The actual language names used are not really important; 
# but it is important for the labels used to match.  Also, if the language
# name is omitted, it will default to either "native" or "foreign". 
[english]
native=special.words.gz English
native=english.names.gz English
native=US.places.gz English
native=french.names.gz French
native=english.words.gz English
foreign=german2.words.gz German
foreign=german.words.gz German
foreign=french.words.gz French
foreign=latin.words.gz Latin
foreign=italian.words.gz Italian
foreign=spanish.words.gz Spanish

# A BIG profile if you don't mind spending a little processing
# power to get a better result.  The NonUS.places.gz wordlist
# is an interesting case.  It consists entirely of placenames
# outside of the U.S. (though not necessarily outside of the 
# English-speaking world), but is characterized as being 
# "English".  This is because the wordlist was compiled by 
# an English-speaking agency, and therefore is how these
# places might be referred to in English.  Conversely, 
# french.names.gz is compiled from French sources (even though
# categorized as "native" for our purposes), and therefore
# presumably represents names as they would be used by 
# French-speaking people. 
#
# A good general rule would be to define the language field
# not in terms of the supposed contents of the wordlist, but
# rather in terms of the native language of the person or 
# agency who has compiled the wordlist.

[english_all]
native=special.words.gz English
native=english.names.gz English
native=US.places.gz English
native=french.names.gz French
native=NonUS.places.gz English
native=english.words.gz English
foreign=german2.words.gz German
foreign=german.words.gz German
foreign=french.words.gz French
foreign=latin.words.gz Latin
foreign=italian.words.gz Italian
foreign=spanish.words.gz Spanish
foreign=norwegian.words.gz Norwegian
foreign=gaelic.words.gz Gaelic
foreign=danish.words.gz Danish
foreign=swedish.words.gz Swedish
foreign=finnish.words.gz Finnish

# A profile for testing the performance of uncompressed
# wordlists vs. compressed wordlists.
[test]
native=special.words
native=english.names
native=US.places
native=*.names
native=*.places
native=english.words
foreign=nonenglish.words
foreign=german2.words
foreign=german.words
foreign=french.words
foreign=latin.words
foreign=italian.words
foreign=*.words

# Here's a template, to act as a starting point for another profile:
[mylanguage]
native=mylanguage.names.gz
native=*.names.gz
native=*.places.gz
native=mylanguage.words.gz
foreign=*.words.gz