Index: dps/word2x-zero/Makefile.in
diff -c dps/word2x-zero/Makefile.in:1.1.1.1 dps/word2x-zero/Makefile.in:1.1.1.2
*** dps/word2x-zero/Makefile.in:1.1.1.1 Sun May 9 16:13:51 1999
--- dps/word2x-zero/Makefile.in Sun May 9 16:15:34 1999
***************
*** 1,4 ****
! # $Id: Makefile.in,v 1.1.1.1 1999/05/09 15:13:51 dps Exp $
#
# My test system is Linux 2.1.30, gcc 2.7.2, libc 5.4.27
--- 1,4 ----
! # $Id: Makefile.in,v 1.1.1.2 1999/05/09 15:15:34 dps Exp $
#
# My test system is Linux 2.1.30, gcc 2.7.2, libc 5.4.27
***************
*** 11,26 ****
CCFLAGS=-O3 @DEFS@
LIBOBJS=wordwrap.o nullproc.o tblock.o scan_num.o map_chars.o col-align.o \
! compat.o ukdate.o usdate.o num_unit_probe.o part_num_probe.o
AR=ar
RANLIB=@RANLIB@
MYLIBS=liboutfmt.a
READER=reader.o strip.o
FMTS=fmt-text.o fmt-latex.o fmt-html.o
#Set to gopt.o if you lack getopt_long
GETOPT=@getopt@
ALLOCA=@ALLOCA@
! PROGOBJS=$(READER) word2x.o $(FMTS) $(GETOPT) $(ALLOCA) $(MYLIBS)
TARGETS=word2x rtest2
MANP=word2x.1
prefix=@prefix@
--- 11,27 ----
CCFLAGS=-O3 @DEFS@
LIBOBJS=wordwrap.o nullproc.o tblock.o scan_num.o map_chars.o col-align.o \
! compat.o num_unit_probe.o part_num_probe.o
AR=ar
RANLIB=@RANLIB@
MYLIBS=liboutfmt.a
READER=reader.o strip.o
FMTS=fmt-text.o fmt-latex.o fmt-html.o
+ DATEFMTS=dedate.o deL1date.o deHTMLdate.o ukdate.o usdate.o
#Set to gopt.o if you lack getopt_long
GETOPT=@getopt@
ALLOCA=@ALLOCA@
! PROGOBJS=$(READER) word2x.o $(FMTS) $(DATEFMTS) $(GETOPT) $(ALLOCA) $(MYLIBS)
TARGETS=word2x rtest2
MANP=word2x.1
prefix=@prefix@
***************
*** 50,64 ****
rm -f $(TARGETS)
rm -f config.h Makefile
rm -f config.cache config.status config.log *~
! word2x.tar.gz: clobber configure config.h.in configure.in Makefile.in
chmod a-w configure
tar -C .. -czf word2x.tar.gz word2x \
--exclude word2x/RCS --exclude word2x/word2x.tar.gz \
--exclude word2x/samples --exclude word2x/config.cache \
--exclude word2x/config.status --exclude word2x/config.log \
! --exclude word2x/catdoc.c --exclude word2x/catdoc.msg
chmod 755 configure
liboutfmt.a: $(LIBOBJS)
$(AR) rc $@ $(LIBOBJS)
--- 51,85 ----
rm -f $(TARGETS)
rm -f config.h Makefile
rm -f config.cache config.status config.log *~
+ rm -f cfg-script make-script
! word2x.tar.gz: clobber configure config.h.in configure.in Makefile.in transcript
chmod a-w configure
tar -C .. -czf word2x.tar.gz word2x \
--exclude word2x/RCS --exclude word2x/word2x.tar.gz \
--exclude word2x/samples --exclude word2x/config.cache \
--exclude word2x/config.status --exclude word2x/config.log \
! --exclude word2x/catdoc.c --exclude word2x/catdoc.msg \
! --exclude word2x/config.h --exclude word2x/Makefile \
! --exclude word2x/config.cache --exclude word2x/config.status \
! --exclude word2x/config.log
chmod 755 configure
+
+ transcript: cfg-script make-script
+ @echo "word2x build transcript" >transcript
+ @echo "Note: the prompts are simulated. All the rest is real." >>transcript
+ @echo "Simulation> ./configure" >>transcript
+ @cat cfg-script >>transcript
+ @echo "Simulation> make all" >>transcript
+ @cat make-script >>transcript
+ rm -f cfg-script make-script
+
+ cfg-script:
+ @rm -f config.cache config.h
+ ./configure 2>&1 | tee cfg-script
+
+ make-script: config.h
+ make all 2>&1 | tee make-script
liboutfmt.a: $(LIBOBJS)
$(AR) rc $@ $(LIBOBJS)
Index: dps/word2x-zero/README
diff -c dps/word2x-zero/README:1.1.1.1 dps/word2x-zero/README:1.1.1.2
*** dps/word2x-zero/README:1.1.1.1 Sun May 9 16:13:51 1999
--- dps/word2x-zero/README Sun May 9 16:15:34 1999
***************
*** 1,16 ****
! $Id: README,v 1.1.1.1 1999/05/09 15:13:51 dps Exp $
! What it is new in version 0.003 of word2x
word2x-0.003 is version word2x-0.002 with a major bug in strip.cc
eliminated. word2x-0.002 was 0.001 retro-fitted with some quite new
junk filtering code with lots of tunable parameters (i.e. all of
tune.h). This code is extracted from the envolving, and currently
! incomplete source tree of the next major release. (When this happens I
will stop supporting or maintaing any 0.00x versions).
The major change is much better junk filtering, losing less text and
! throwing out more junk and unicode documents should now
work. Increasing numbers of problem document which have OLE junk in
places that break the code are appearing. Splitting the document with
lls (from the LAOLA package) and attacking the WordDocument stream
--- 1,24 ----
! $Id: README,v 1.1.1.2 1999/05/09 15:15:34 dps Exp $
! What is new in version 0.004 of word2x
+ Stupid bug in word2x_junk_filter::filter_junk bug which ignored the last
+ character read squashed.
+
+ Added german support from word2x port EX2.
+
+
+ What it was new in version 0.003 of word2x
+
word2x-0.003 is version word2x-0.002 with a major bug in strip.cc
eliminated. word2x-0.002 was 0.001 retro-fitted with some quite new
junk filtering code with lots of tunable parameters (i.e. all of
tune.h). This code is extracted from the envolving, and currently
! incomplete, source tree of the next major release. (When this happens I
will stop supporting or maintaing any 0.00x versions).
The major change is much better junk filtering, losing less text and
! throwing out more junk; unicode documents should now
work. Increasing numbers of problem document which have OLE junk in
places that break the code are appearing. Splitting the document with
lls (from the LAOLA package) and attacking the WordDocument stream
***************
*** 21,27 ****
Documents that do cause problems after the suggested work-around to
word2x@duncan.telstar.net please. The immediate fix is to try one of
the other two programs. (Free software people are prepared to
! co-operate with the "competition").
Installing word2x
--- 29,38 ----
Documents that do cause problems after the suggested work-around to
word2x@duncan.telstar.net please. The immediate fix is to try one of
the other two programs. (Free software people are prepared to
! co-operate with the "competition"). There are links to all the
! "competition" I know of on the word2x home page at
! http://word2x.alcom.co.uk (hosted by the alcom.co.uk free of charge,
! despite the fact charges normally apply).
Installing word2x
Index: dps/word2x-zero/confdefs.h
diff -c /dev/null dps/word2x-zero/confdefs.h:1.1.1.1
*** /dev/null Sun May 9 16:31:36 1999
--- dps/word2x-zero/confdefs.h Sun May 9 16:15:34 1999
***************
*** 0 ****
--- 1 ----
+
Index: dps/word2x-zero/config.log
diff -c /dev/null dps/word2x-zero/config.log:1.1.1.1
*** /dev/null Sun May 9 16:31:36 1999
--- dps/word2x-zero/config.log Sun May 9 16:15:34 1999
***************
*** 0 ****
--- 1,6 ----
+ This file contains any messages produced by compilers while
+ running configure, to aid debugging if configure makes a mistake.
+
+ configure:526: checking for gcc
+ configure:603: checking whether the C compiler (gcc ) works
+ configure:617: gcc -o conftest conftest.c 1>&5
Index: dps/word2x-zero/conftest
Index: dps/word2x-zero/conftest.c
diff -c /dev/null dps/word2x-zero/conftest.c:1.1.1.1
*** /dev/null Sun May 9 16:31:36 1999
--- dps/word2x-zero/conftest.c Sun May 9 16:15:34 1999
***************
*** 0 ****
--- 1,3 ----
+ #line 613 "configure"
+ #include "confdefs.h"
+ main(){return(0);}
Index: dps/word2x-zero/deHTMLdate.cc
diff -c /dev/null dps/word2x-zero/deHTMLdate.cc:1.1.1.1
*** /dev/null Sun May 9 16:31:36 1999
--- dps/word2x-zero/deHTMLdate.cc Sun May 9 16:15:34 1999
***************
*** 0 ****
--- 1,40 ----
+ // Date formatter for German dates (in German)
+ // With correct a-umlaut in HTML encoding
+ // Note: Using escape sequences in the
tag is not *really*
+ // defined. You may want to use deL1 for your HTML stuff
+ // 8/1998 jk
+ #include "config.h"
+
+ #ifdef TM_IN_SYS_TIME
+ #include
+ #else
+ #include
+ #endif
+
+ #ifdef HAVE_STRING_H
+ #include
+ #else /* do not have sting.h */
+ #include
+ #endif /* HAVE_STRING_H */
+ #define __EXCLUDE_READER_CLASSES
+ #include "lib.h"
+
+ char *deHTML_date(time_t when)
+ {
+ static const char *months[]=
+ {
+ "Januar", "Februar", "März", "April",
+ "Mai", "Juni", "Juli", "August",
+ "September", "Oktober", "November", "Dezember",
+ };
+
+ struct tm *tim;
+ char date_buf[200];
+
+ tim=localtime(&when);
+
+ sprintf(date_buf, "%d. %s %d", tim->tm_mday,
+ months[tim->tm_mon], 1900+tim->tm_year);
+
+ return strdup(date_buf);
+ }
Index: dps/word2x-zero/deL1date.cc
diff -c /dev/null dps/word2x-zero/deL1date.cc:1.1.1.1
*** /dev/null Sun May 9 16:31:36 1999
--- dps/word2x-zero/deL1date.cc Sun May 9 16:15:34 1999
***************
*** 0 ****
--- 1,38 ----
+ // Date formatter for German dates (in German)
+ // With correct a-umlaut in ISO-8859-1 (Latin-1) charset
+ // 8/1998 jk
+ #include "config.h"
+
+ #ifdef TM_IN_SYS_TIME
+ #include
+ #else
+ #include
+ #endif
+
+ #ifdef HAVE_STRING_H
+ #include
+ #else /* do not have sting.h */
+ #include
+ #endif /* HAVE_STRING_H */
+ #define __EXCLUDE_READER_CLASSES
+ #include "lib.h"
+
+ char *deL1_date(time_t when)
+ {
+ static const char *months[]=
+ {
+ "Januar", "Februar", "März", "April",
+ "Mai", "Juni", "Juli", "August",
+ "September", "Oktober", "November", "Dezember",
+ };
+
+ struct tm *tim;
+ char date_buf[200];
+
+ tim=localtime(&when);
+
+ sprintf(date_buf, "%d. %s %d", tim->tm_mday,
+ months[tim->tm_mon], 1900+tim->tm_year);
+
+ return strdup(date_buf);
+ }
Index: dps/word2x-zero/dedate.cc
diff -c /dev/null dps/word2x-zero/dedate.cc:1.1.1.1
*** /dev/null Sun May 9 16:31:36 1999
--- dps/word2x-zero/dedate.cc Sun May 9 16:15:34 1999
***************
*** 0 ****
--- 1,39 ----
+ // Date formatter for German dates (in German)
+ // a-umlaut in Maerz is written as "ae" for compatibility reasons
+ // use date format deL1 or deHTML for correct umlaut
+ // 8/1998 jk
+ #include "config.h"
+
+ #ifdef TM_IN_SYS_TIME
+ #include
+ #else
+ #include
+ #endif
+
+ #ifdef HAVE_STRING_H
+ #include
+ #else /* do not have sting.h */
+ #include
+ #endif /* HAVE_STRING_H */
+ #define __EXCLUDE_READER_CLASSES
+ #include "lib.h"
+
+ char *de_date(time_t when)
+ {
+ static const char *months[]=
+ {
+ "Januar", "Februar", "Maerz", "April",
+ "Mai", "Juni", "Juli", "August",
+ "September", "Oktober", "November", "Dezember",
+ };
+
+ struct tm *tim;
+ char date_buf[200];
+
+ tim=localtime(&when);
+
+ sprintf(date_buf, "%d. %s %d", tim->tm_mday,
+ months[tim->tm_mon], 1900+tim->tm_year);
+
+ return strdup(date_buf);
+ }
Index: dps/word2x-zero/lib.h
diff -c dps/word2x-zero/lib.h:1.1.1.1 dps/word2x-zero/lib.h:1.1.1.2
*** dps/word2x-zero/lib.h:1.1.1.1 Sun May 9 16:13:51 1999
--- dps/word2x-zero/lib.h Sun May 9 16:15:34 1999
***************
*** 1,4 ****
! /* $Id: lib.h,v 1.1.1.1 1999/05/09 15:13:51 dps Exp $ */
#ifndef __word2x_lib_h__
#define __word2x_lib_h__
--- 1,4 ----
! /* $Id: lib.h,v 1.1.1.2 1999/05/09 15:15:34 dps Exp $ */
#ifndef __word2x_lib_h__
#define __word2x_lib_h__
***************
*** 81,86 ****
--- 81,89 ----
#endif /* HAVE_TIME_H */
extern char *uk_date(time_t);
extern char *us_date(time_t);
+ extern char *de_date(time_t);
+ extern char *deHTML_date(time_t);
+ extern char *deL1_date(time_t);
/* Basic stuff for reading numbers from files */
extern unsigned long read_ulong(FILE *);
Index: dps/word2x-zero/strip.cc
diff -c dps/word2x-zero/strip.cc:1.1.1.1 dps/word2x-zero/strip.cc:1.1.1.2
*** dps/word2x-zero/strip.cc:1.1.1.1 Sun May 9 16:13:51 1999
--- dps/word2x-zero/strip.cc Sun May 9 16:15:34 1999
***************
*** 1,4 ****
! /* $Id: strip.cc,v 1.1.1.1 1999/05/09 15:13:51 dps Exp $ */
/* This code filters out almost all the "junk" in word documents and
* is useful for extracting the text from word documents. I think this
--- 1,4 ----
! /* $Id: strip.cc,v 1.1.1.2 1999/05/09 15:15:34 dps Exp $ */
/* This code filters out almost all the "junk" in word documents and
* is useful for extracting the text from word documents. I think this
***************
*** 79,85 ****
top_run=0;
/*** CS: __adj__=1 ****/
! for (i=ns+1, d=ns; i=0 here by induction ***/
/*** CS: if mode==SKIP_JUNK or SKIP_JUNK_WASPRN then __adj__>=1 ***/
--- 79,85 ----
top_run=0;
/*** CS: __adj__=1 ****/
! for (i=ns+1, d=ns; i<=n; i++) // Ignore already processed data pushed back
{
/*** CS: i==d+junk_end_usage+__adj__ and __adj__>=0 here by induction ***/
/*** CS: if mode==SKIP_JUNK or SKIP_JUNK_WASPRN then __adj__>=1 ***/
Index: dps/word2x-zero/transcript
diff -c /dev/null dps/word2x-zero/transcript:1.1.1.1
*** /dev/null Sun May 9 16:31:36 1999
--- dps/word2x-zero/transcript Sun May 9 16:15:34 1999
***************
*** 0 ****
--- 1,84 ----
+ word2x build transcript
+ Note: the prompts are simulated. All the rest is real.
+ Simulation> ./configure
+ creating cache ./config.cache
+ checking for gcc... gcc
+ checking whether the C compiler (gcc ) works... yes
+ checking whether the C compiler (gcc ) is a cross-compiler... no
+ checking whether we are using GNU C... yes
+ checking whether gcc accepts -g... yes
+ checking for c++... c++
+ checking whether the C++ compiler (c++ ) works... yes
+ checking whether the C++ compiler (c++ ) is a cross-compiler... no
+ checking whether we are using GNU C++... yes
+ checking whether c++ accepts -g... yes
+ checking for ranlib... ranlib
+ checking for a BSD compatible install... /usr/bin/install -c
+ checking for getopt_long... yes
+ checking how to run the C preprocessor... gcc -E
+ checking for working alloca.h... yes
+ checking for alloca... yes
+ checking whether time.h and sys/time.h may both be included... yes
+ checking whether struct tm is in sys/time.h or time.h... time.h
+ checking header file for SEEK_SET...
+ checking for ANSI C header files... yes
+ checking for ctype.h... yes
+ checking for string.h... yes
+ checking for strings.h... yes
+ checking for time.h... yes
+ checking for sys/time.h... yes
+ checking for sys/stat.h... yes
+ checking for unistd.h... yes
+ checking for strncasecmp... yes
+ checking for strcasecmp... yes
+ checking for strdup... yes
+ checking for localtime... yes
+ checking for fstat... yes
+ checking for in -lsun... no
+ checking if I know ld and c++ might not work together
+ checking host system type... i486-unknown-linux
+ checking target system type... i486-unknown-linux
+ checking build system type... i486-unknown-linux
+ Using ld, as on i486-unknown-linux I know ld and c++ mix
+ updating cache ./config.cache
+ creating ./config.status
+ creating Makefile
+ creating config.h
+ Simulation> make all
+ make[1]: Entering directory `/home/dps/word2x-0.002/word2x-pre-0.004'
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o reader.o reader.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o strip.o strip.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o word2x.o word2x.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o text-fmt.o text-fmt.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o text-table.o text-table.cc
+ ld -r -o fmt-text.o text-fmt.o text-table.o
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o latex-fmt.o latex-fmt.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o latex-table.o latex-table.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o latex-embed.o latex-embed.cc
+ ld -r -o fmt-latex.o latex-fmt.o latex-table.o latex-embed.o
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o html-fmt.o html-fmt.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o html-table.o html-table.cc
+ html-table.cc:19: warning: declaration of `char * __builtin_alloca(int)'
+ html-table.cc:19: warning: conflicts with built-in declaration `void * __builtin_alloca(unsigned int)'
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o html-embed.o html-embed.cc
+ ld -r -o fmt-html.o html-fmt.o html-table.o html-embed.o
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o dedate.o dedate.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o deL1date.o deL1date.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o deHTMLdate.o deHTMLdate.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o ukdate.o ukdate.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o usdate.o usdate.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o wordwrap.o wordwrap.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o nullproc.o nullproc.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o tblock.o tblock.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o scan_num.o scan_num.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o map_chars.o map_chars.cc
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o col-align.o col-align.cc
+ gcc -g -Wall -DHAVE_CONFIG_H -c compat.c -o compat.o
+ gcc -g -Wall -DHAVE_CONFIG_H -c num_unit_probe.c -o num_unit_probe.o
+ gcc -g -Wall -DHAVE_CONFIG_H -c part_num_probe.c -o part_num_probe.o
+ ar rc liboutfmt.a wordwrap.o nullproc.o tblock.o scan_num.o map_chars.o col-align.o compat.o num_unit_probe.o part_num_probe.o
+ ranlib liboutfmt.a
+ c++ -o word2x reader.o strip.o word2x.o fmt-text.o fmt-latex.o fmt-html.o dedate.o deL1date.o deHTMLdate.o ukdate.o usdate.o liboutfmt.a
+ c++ -g -Wall -DHAVE_CONFIG_H -c -o rtest2.o rtest2.cc
+ g++ -o rtest2 rtest2.o reader.o strip.o tblock.o
+ make[1]: Leaving directory `/home/dps/word2x-0.002/word2x-pre-0.004'
Index: dps/word2x-zero/word2x.cc
diff -c dps/word2x-zero/word2x.cc:1.1.1.1 dps/word2x-zero/word2x.cc:1.1.1.2
*** dps/word2x-zero/word2x.cc:1.1.1.1 Sun May 9 16:13:51 1999
--- dps/word2x-zero/word2x.cc Sun May 9 16:15:34 1999
***************
*** 1,4 ****
! /* $Id: word2x.cc,v 1.1.1.1 1999/05/09 15:13:51 dps Exp $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */
--- 1,4 ----
! /* $Id: word2x.cc,v 1.1.1.2 1999/05/09 15:15:34 dps Exp $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */
***************
*** 149,154 ****
--- 149,157 ----
{ "uk", uk_date },
{ "british", uk_date },
{ "us", us_date },
+ { "de", de_date },
+ { "deHTML", deHTML_date },
+ { "deL1", deL1_date },
};
static const struct
{
Index: dps/word2x-zero/word2x.tar.gz