#!/usr/bin/perl -w use strict; =head1 NAME freedb2txt - flatten Freedb downloadable database into greppable text file =head1 SYNOPSIS curl uri-of-freedb | \ bzip2 -d | \ freedb2txt | \ sort -u >$HOME/.freedb look cddbid .freedb =head1 DESCRIPTION The downloadable freedb is a tar.bz2, of an archive with one file per entry, named data/cddbid, containing the full CDDB database entry in text format, with a comment block at the top. The part of this database that Bennett needs is the cddbid (as a key), the artist/album, the genre, and the list of track titles. Unpacking the CDDB database tarchive would be ungodly. And parsing the tar in a stream would be a pain. Happily, the contents are easy to recognize out of the stream; for each album, these values appear, in this order: DISCID=cddbid DTITLE=artist / album DGENRE=genre TTITLE0=first track title TTITLE1=second track title TTITLE2=... A compact representation of the bits Bennett cares about is cddbid:artist:album:genre:title0:title1:... where each field has every instance of ":", "%", and newline URI-escaped. The file that results from this operation is quite compact --- only slighly larger than the bzip2-compressed full CDDB database --- and, when sorted, can be rapidly searched using look(1). Some cddbids are lists of ids separated by commas; for such, this generates a separate line for each cddbid. =cut use URI::Escape; my ($cddbid, $dtitle, $genre, @track); line: while (<>) { chomp; if (s/^DISCID=//) { my $tmp = $_; if (defined($cddbid)) { my ($artist, $album) = split "/", $dtitle, 2; $album = $artist unless defined $album; for ($artist, $album) { s/^\s+//; s/\s+$//; s/\s+/ /g; } for my $tmpid (split /,/, $cddbid) { print join(":", map { uri_escape($_,":%\n") } $tmpid, $artist, $album, $genre, @track), "\n"; } undef $cddbid; undef $dtitle; undef $genre; undef @track; } $cddbid = $tmp; next line; } if (s/^DTITLE=//) { if (defined($dtitle)) { $dtitle .= " " . $_; } else { $dtitle = $_; } next line; } if (s/^DGENRE=//) { $genre = $_; next line; } if (s/^TTITLE(\d+)=//) { $track[$1] = $_; next line; } } my ($artist, $album) = split "/", $dtitle, 2; $album = $artist unless defined $album; for ($artist, $album) { s/^\s+//; s/\s+$//; s/\s+/ /g; } for my $tmpid (split /,/, $cddbid) { print join(":", map { uri_escape($_,":%\n") } $tmpid, $artist, $album, $genre, @track), "\n"; }