This has been used to produce a Texinfo file from the entire page database See ExtractTexinfo. -- MeatBall:AlexSchroeder
#! /usr/bin/perl -w &rewrite("test"); sub rewrite { my ($directory) = @_; $FS = "\xb3"; # If you have set $NewFS to 1, then remove the # from the start of the next line. # $FS = "\x1e\xff\xfe\x1e"; $FS1 = $FS . "1"; $FS2 = $FS . "2"; $FS3 = $FS . "3"; my @files = read_directory($directory); foreach my $file (@files) { print "Reading $file...\n"; my %page = &split_page(&read_file("$directory/$file")); my %section = &split_section(%page); my %text = &split_text(%section); $file =~ s/.db$//; print "Writing $file...\n"; &write_file("$directory/$file", $text{text}); } print "Done.\n"; } sub read_directory { my ($dirname) = @_; opendir(DIR, $dirname) or die "can't opendir $dirname: $!"; @dots = grep { /\.db$/ && -f "$dirname/$_" } readdir(DIR); closedir DIR; return @dots; } sub read_file { my ($filename) = @_; my ($data); my (%page); local $/ = undef; # Read complete files open(IN, "<$filename") or die "can't read $filename: $!"; $data=<IN>; close IN; return $data; } sub split_page { my ($data) = @_; my (%page); %page = split(/$FS1/, $data, -1); # -1 keeps trailing null fields return %page; } sub split_section { my (%page) = @_; my (%section); %section = split(/$FS2/, $page{text_default}, -1); return %section; } sub split_text { my (%section) = @_; my (%text); %text = split(/$FS3/, $section{data}, -1); return %text; } sub write_file { my ($file, $data) = @_; open(F, ">" . $file) or die "can't open $file: $!"; print F $data; close(F); }
The script did not work for me, but when I changed line 8 from
$FS = "\xb3";to
$FS = "\x1e\xff\xfe\x1e";it was OK. Maybe the seperator has changed? Lars