diff options
author | Christoph Berg | 2020-02-21 14:24:41 +0000 |
---|---|---|
committer | Christoph Berg | 2020-02-21 14:24:41 +0000 |
commit | d9f446a9d768d0a3626467448f16401a6d07ff26 (patch) | |
tree | 27cd2c8b5d54acb746a1fb5457b6016555ea2654 | |
parent | a073f5cd5805950accb2a407c88d278f3770affd (diff) |
Import package data into database
-rwxr-xr-x | pgapt-db/import-packagelists | 211 | ||||
-rw-r--r-- | pgapt-db/sql/pgdg_apt.sql | 64 | ||||
-rw-r--r-- | pgapt-db/sql/pgdg_apt_data.sql | 22 | ||||
-rw-r--r-- | pgapt-db/sql/qa_apt.sql | 200 | ||||
-rw-r--r-- | pgapt-db/sql/qa_apt_distribution.sql | 17 | ||||
-rw-r--r-- | pgapt-db/sql/qa_apt_suite.sql | 50 | ||||
-rw-r--r-- | pgapt-db/sql/qa_public.sql | 9 | ||||
-rwxr-xr-x | repo/bin/import-dists | 25 | ||||
-rwxr-xr-x | repo/bin/import-packagesfile | 143 | ||||
-rwxr-xr-x | repo/bin/import-sourcesfile | 118 |
10 files changed, 345 insertions, 514 deletions
diff --git a/pgapt-db/import-packagelists b/pgapt-db/import-packagelists deleted file mode 100755 index 4edbbbe..0000000 --- a/pgapt-db/import-packagelists +++ /dev/null @@ -1,211 +0,0 @@ -#!/usr/bin/python - -# Copyright (c) 2009-2013 Christoph Berg <[email protected]> -# -# getdpkginfo by -# Copyright (C) 2005 Jeroen van Wolffelaar <[email protected]> -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. The name of the author may not be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import apt_pkg, psycopg2, os, re, sys, time - -ftp_prefix = "/home/cbe/projects/postgresql/apt.postgresql.org" - -pg = psycopg2.connect('dbname=pgweb') -cur = pg.cursor() -cur.execute("SET search_path TO apt") - -def parseSourceFile(distribution, component, packagesfile): - if not os.path.isfile(packagesfile): - raise Exception("%s not found" % packagesfile) - stdout = os.popen("bzcat '%s'" % packagesfile) - #stdout = file(packagesfile) - parse = apt_pkg.TagFile(stdout) - - # clear the suite - cur.execute("BEGIN") - cur.execute("""DELETE FROM sourcelist WHERE - (distribution, component) = (%s, %s)""", - [distribution, component]) - - # for every package ... - while parse.step(): - package = parse.section.get('Package') - version = parse.section.get('Version') - control = "\n".join([ "%s: %s" % (k, parse.section.get(k)) \ - for k in parse.section.keys() ]) - - # store package control file - cur.execute("""SELECT 1 FROM source - WHERE (source, srcversion) = (%s, %s)""", - [package, version]) - found = cur.fetchone() - if not found: # this is the first time we see this package, extract more information - cur.execute("""INSERT INTO source (source, srcversion, control, c) - VALUES (%s, %s, %s, control2hstore(%s))""", - [package, version, control, control]) - - # finally, add the package to the suite's package list - cur.execute("""INSERT INTO sourcelist - (distribution, component, source, srcversion) - VALUES (%s, %s, %s, %s)""", - [distribution, component, package, version]) - - cur.execute("COMMIT") - -def parseFile(distribution, component, architecture, packagesfile): - if not os.path.isfile(packagesfile): - raise Exception("%s not found" % packagesfile) - stdout = os.popen("bzcat '%s'" % packagesfile) - #stdout = file(packagesfile) - parse = apt_pkg.TagFile(stdout) - - source_re = re.compile('(.*) \((.*)\)') - binnmu_re = re.compile('\+b\d+$') - - # clear the suite - cur.execute("BEGIN") - cur.execute("""DELETE FROM packagelist WHERE - (distribution, component, architecture) = (%s, %s, %s)""", - [distribution, component, architecture]) - - # for every package ... - while parse.step(): - package = parse.section.get('Package') - version = parse.section.get('Version') - arch = parse.section.get('Architecture') - control = "\n".join([ "%s: %s" % (k, parse.section.get(k)) \ - for k in parse.section.keys() ]) - - # store package control file - cur.execute("""SELECT 1 FROM package - WHERE (package, version, arch) = (%s, %s, %s)""", - [package, version, arch]) - found = cur.fetchone() - if not found: # this is the first time we see this package, extract more information - - sourcefield = parse.section.get('Source') - if sourcefield: - match = source_re.match(sourcefield) - if match: - source, source_version = match.group(1), match.group(2) - else: - source, source_version = sourcefield, version - else: - source, source_version = package, version - if binnmu_re.search(source_version): - print "WARNING: %s %s %s: source %s %s for %s %s (%s) has binnmu version number, fixing" % \ - (distribution, component, architecture, source, source_version, - package, version, arch) - source_version = binnmu_re.sub('', source_version) - -# # find link to source package -# cur.execute("""SELECT 1 FROM source -# WHERE (source, srcversion) = (%s, %s)""", -# [source, source_version]) -# found = cur.fetchone() -# if not found: -# print "WARNING: %s %s %s: source %s %s for %s %s (%s) not found" % \ -# (distribution, component, architecture, source, source_version, -# package, version, arch) - - cur.execute("""INSERT INTO package (package, version, arch, control, c, source, srcversion) - VALUES (%s, %s, %s, %s, control2hstore(%s), %s, %s)""", - [package, version, arch, control, control, source, source_version]) - - # finally, add the package to the suite's package list - cur.execute("""INSERT INTO packagelist - (distribution, component, architecture, package, version, arch) - VALUES (%s, %s, %s, %s, %s, %s)""", - [distribution, component, architecture, package, version, arch]) - - cur.execute("COMMIT") - -verbose = sys.argv.__len__() > 1 - -cur.execute("""SELECT distribution, component, - extract (epoch from last_update) AS last_update - FROM srcdistribution - WHERE active - FOR UPDATE NOWAIT""") -suites = cur.fetchall() - -for distribution, component, last_update in suites: - packagesfile = ftp_prefix + "/dists/%s/%s/source/Sources.bz2" % \ - (distribution, component) - - if not os.path.exists(packagesfile): - print "WARNING:", packagesfile, "not found" - continue - - mtime = os.path.getmtime(packagesfile) - if not last_update or mtime > last_update: - if verbose: - print "Reading %s" % packagesfile - parseSourceFile(distribution, component, packagesfile) - - update = """UPDATE srcdistribution SET last_update = %s WHERE - (distribution, component) = (%s, %s)""" - cur.execute(update, [time.ctime(mtime), distribution, component]) - cur.execute("COMMIT") - else: - if verbose: - print "Skipping up to date %s" % packagesfile - -cur.execute("""SELECT distribution, component, architecture, - extract (epoch from last_update) AS last_update - FROM distribution - WHERE active - ORDER BY CASE WHEN architecture = 'source' THEN 1 ELSE 2 END - FOR UPDATE NOWAIT""") -suites = cur.fetchall() - -for distribution, component, architecture, last_update in suites: - packagesfile = ftp_prefix + "/dists/%s/%s/binary-%s/Packages.bz2" % \ - (distribution, component, architecture) - - if not os.path.exists(packagesfile): - print "WARNING:", packagesfile, "not found" - continue - - mtime = os.path.getmtime(packagesfile) - if not last_update or mtime > last_update: - if verbose: - print "Reading %s" % packagesfile - parseFile(distribution, component, architecture, packagesfile) - - update = """UPDATE distribution SET last_update = %s WHERE - (distribution, component, architecture) = (%s, %s, %s)""" - cur.execute(update, [time.ctime(mtime), distribution, component, architecture]) - cur.execute("COMMIT") - else: - if verbose: - print "Skipping up to date %s" % packagesfile - -# Insert missing source packages: -# INSERT INTO package_source SELECT p.package_id, s.package_id as source_id from package p JOIN package s ON (p.package = s.package AND p.version = s.version AND s.pkg_architecture='source') where p.package_id not in (select package_id from package_source ) and p.pkg_architecture <> 'source'; -# Insert missing source packages, packages with broken/missing binnmu Source: fieds: -# INSERT INTO package_source SELECT p.package_id, s.package_id as source_id from package p JOIN package s ON (p.package = s.package AND regexp_replace (p.version, E'\\+b\\d+$', '') = s.version AND s.pkg_architecture='source') where p.package_id not in (select package_id from package_source ) and p.pkg_architecture <> 'source'; - diff --git a/pgapt-db/sql/pgdg_apt.sql b/pgapt-db/sql/pgdg_apt.sql index 1d29bc8..c6123ca 100644 --- a/pgapt-db/sql/pgdg_apt.sql +++ b/pgapt-db/sql/pgdg_apt.sql @@ -1,17 +1,24 @@ BEGIN; CREATE SCHEMA apt; -SET search_path TO apt; +SET search_path TO apt, public; -CREATE EXTENSION debversion; -CREATE EXTENSION hstore; +--CREATE EXTENSION debversion; -CREATE OR REPLACE FUNCTION control2hstore (control text) -RETURNS hstore LANGUAGE sql IMMUTABLE AS -$$SELECT regexp_replace (regexp_replace ($1, '([\\"])', '\\\1', 'g'), - E'^([^:]*): (.*(?:\n .*)*)', '"\1"=>"\2",', 'gn')::hstore$$; --- intentionally no E'' in the first line +CREATE OR REPLACE FUNCTION list2jsonb (list text) +RETURNS jsonb LANGUAGE sql IMMUTABLE AS +$$SELECT jsonb_agg(m) from regexp_split_to_table($1, E'\n ') m$$; + +CREATE OR REPLACE FUNCTION control2jsonb (control text) +RETURNS jsonb LANGUAGE sql IMMUTABLE AS +$$SELECT jsonb_object_agg(lower(m[1]), + CASE WHEN m[1] IN ('Files', 'Checksums-Sha1', 'Checksums-Sha256') THEN + list2jsonb(m[2]) + ELSE + to_jsonb(m[2]) + END) + FROM regexp_matches($1||E'\n', E'^([^ :]*): ((.|\n )*)\n?', 'gm') m$$; -- ARCHIVE-WIDE DATA @@ -51,7 +58,8 @@ CREATE TABLE source ( source text NOT NULL, srcversion debversion NOT NULL, control text NOT NULL, - c hstore, + c jsonb, + time timestamptz(0) NOT NULL, PRIMARY KEY (source, srcversion) ); @@ -62,9 +70,10 @@ CREATE TABLE package ( arch text NOT NULL REFERENCES architecture (architecture), control text NOT NULL, - c hstore, + c jsonb, source text NOT NULL, srcversion debversion NOT NULL, + time timestamptz(0) NOT NULL, PRIMARY KEY (package, version, arch) ); @@ -102,6 +111,41 @@ CREATE INDEX ON packagelist (distribution, component, architecture); CREATE INDEX ON packagelist (package); +-- HISTORY + +CREATE TABLE sourcehist ( + distribution text NOT NULL, + component text NOT NULL, + source text NOT NULL, + srcversion debversion NOT NULL, + time timestamptz(0) NOT NULL, + + FOREIGN KEY (distribution, component) REFERENCES srcdistribution (distribution, component), + FOREIGN KEY (source, srcversion) REFERENCES source (source, srcversion) +); +CREATE INDEX ON sourcehist (distribution, component); +CREATE INDEX ON sourcehist (source); + +CREATE TABLE packagehist ( + distribution text NOT NULL, + component text NOT NULL, + architecture text NOT NULL, + package text NOT NULL, + version debversion NOT NULL, + arch text NOT NULL, + time timestamptz(0) NOT NULL, + CHECK ((architecture = arch) OR (arch = 'all')), + + FOREIGN KEY (distribution, component, architecture) + REFERENCES distribution (distribution, component, architecture), + FOREIGN KEY (package, version, arch) REFERENCES package (package, version, arch) +); +CREATE INDEX ON packagehist (distribution, component, architecture); +CREATE INDEX ON packagehist (package); + + +-- ACLs + GRANT USAGE ON SCHEMA apt TO PUBLIC; GRANT SELECT ON ALL TABLES IN SCHEMA apt TO PUBLIC; diff --git a/pgapt-db/sql/pgdg_apt_data.sql b/pgapt-db/sql/pgdg_apt_data.sql index 4ee4272..565ff4f 100644 --- a/pgapt-db/sql/pgdg_apt_data.sql +++ b/pgapt-db/sql/pgdg_apt_data.sql @@ -1,21 +1,9 @@ BEGIN; -INSERT INTO architecture - SELECT * FROM (VALUES ('all'), ('amd64'), ('i386')) arch(architecture) - WHERE NOT EXISTS (SELECT * FROM architecture - WHERE architecture = arch.architecture); - -INSERT INTO srcdistribution - SELECT * FROM (VALUES ('sid-pgdg'), ('wheezy-pgdg'), ('squeeze-pgdg')) dist(distribution), - (VALUES ('main'), ('9.2'), ('9.1'), ('9.0'), ('8.4'), ('8.3')) comp(component) - WHERE NOT EXISTS (SELECT * FROM srcdistribution - WHERE (distribution, component) = (dist.distribution, comp.component)); - -INSERT INTO distribution - SELECT * FROM (VALUES ('sid-pgdg'), ('wheezy-pgdg'), ('squeeze-pgdg')) dist(distribution), - (VALUES ('main'), ('9.2'), ('9.1'), ('9.0'), ('8.4'), ('8.3')) comp(component), - (VALUES ('amd64'), ('i386')) arch(architecture) - WHERE NOT EXISTS (SELECT * FROM distribution - WHERE (distribution, component, architecture) = (dist.distribution, comp.component, arch.architecture)); +INSERT INTO architecture VALUES + ('all'), + ('amd64'), + ('i386'), + ('ppc64el'); COMMIT; diff --git a/pgapt-db/sql/qa_apt.sql b/pgapt-db/sql/qa_apt.sql deleted file mode 100644 index 46e7f4a..0000000 --- a/pgapt-db/sql/qa_apt.sql +++ /dev/null @@ -1,200 +0,0 @@ -BEGIN; - -CREATE SCHEMA apt; -SET search_path TO apt; - - --- ARCHIVE-WIDE DATA - -CREATE TABLE architecture ( - architecture text PRIMARY KEY -); -COMMENT ON TABLE architecture IS 'All known architectures, including source and all'; - - -CREATE TABLE suite ( - suite_id serial PRIMARY KEY, - archive text NOT NULL, - suite text NOT NULL, - component text NOT NULL, - architecture text NOT NULL - REFERENCES architecture (architecture), - last_update timestamp with time zone, - active boolean NOT NULL DEFAULT TRUE -); -CREATE UNIQUE INDEX suite__archive_suite_component_architecture -ON suite (archive, suite, component, architecture); -COMMENT ON TABLE suite IS 'All known archives and suites'; - - --- Hopefully this is the only table that needs to be updated at release time -CREATE TABLE distribution ( - distribution text PRIMARY KEY, - archive text NOT NULL, - suite text NOT NULL -); -CREATE UNIQUE INDEX distribution__archive_suite ON distribution (archive, suite); -COMMENT ON TABLE distribution IS 'Symbolic names for archive/suite/* combinations'; - - --- PACKAGE DATA - -CREATE TABLE package ( - package_id serial PRIMARY KEY, - package text NOT NULL, - version public.debversion NOT NULL, - pkg_architecture text NOT NULL - REFERENCES architecture (architecture), - source_id integer - CONSTRAINT source_architecture CHECK (NOT (source_id IS NOT NULL AND pkg_architecture = 'source')) -); -COMMENT ON TABLE package IS 'All known packages and sources'; -ALTER TABLE package ADD FOREIGN KEY (source_id) REFERENCES package (package_id); - -CREATE INDEX package__package_version_pkg_architecture ON package (package, version, pkg_architecture); -CREATE INDEX package__package_pkg_architecture ON package (package, pkg_architecture); - - -CREATE TABLE package_control ( - package_id integer PRIMARY KEY REFERENCES package, - control text NOT NULL -); -COMMENT ON TABLE package_control IS -'Control files of all known packages and sources'; - - -/* -CREATE TABLE package_source ( - package_id integer PRIMARY KEY REFERENCES package, - source_id integer NOT NULL REFERENCES package (package_id) - CHECK (package_id <> source_id) -); -CREATE INDEX package_source__source_id ON package_source (source_id); -COMMENT ON TABLE package_source IS -'Table relating binary packages to their source package'; -*/ - - --- SUITE DATA - -CREATE TABLE packagelist ( - suite_id integer NOT NULL REFERENCES suite, - package_id integer NOT NULL REFERENCES package - -- no PK -); -CREATE INDEX packagelist__suite_id ON packagelist (suite_id); -CREATE INDEX packagelist__package_id ON packagelist (package_id); -COMMENT ON TABLE packagelist IS 'Association of packages with suites'; - - --- SOURCE-SPECIFIC DATA - -CREATE TABLE maintainer ( - maintainer serial PRIMARY KEY, - name text NOT NULL -); - -CREATE OR REPLACE FUNCTION maint_id_or_new (pname text) -RETURNS integer -LANGUAGE plpgsql VOLATILE STRICT -AS $$ -DECLARE - id integer; -BEGIN - SELECT maintainer INTO id - FROM maintainer - WHERE name = pname; - IF NOT FOUND THEN - INSERT INTO maintainer (name) VALUES (pname) - RETURNING maintainer INTO id; - END IF; - RETURN id; -END; -$$; - -CREATE OR REPLACE FUNCTION maint_id (pname text) -RETURNS integer -LANGUAGE SQL STABLE STRICT -AS $$ - SELECT maintainer FROM apt.maintainer WHERE name = $1; -$$; - -CREATE OR REPLACE FUNCTION maint_name (id integer) -RETURNS text -LANGUAGE SQL STABLE STRICT -AS $$ - SELECT name FROM apt.maintainer WHERE maintainer = $1; -$$; - -CREATE OR REPLACE FUNCTION apt.email_address (name text) -RETURNS text -LANGUAGE SQL IMMUTABLE STRICT -AS $$ - SELECT regexp_replace ($1, E'.*<(.*)>.*', E'\\1'); -$$; - -CREATE INDEX maintainer__email_address ON maintainer (email_address (name)); - - -CREATE TABLE source ( - package_id integer PRIMARY KEY REFERENCES package, - -- from sources: - maintainer integer NOT NULL REFERENCES maintainer, - section text NULL, - priority text NULL, - dm_upload_allowed boolean NOT NULL DEFAULT FALSE, - -- from projectb: (added by a separate script, hence all NULL) - changed_by integer REFERENCES maintainer (maintainer), - signed_by integer REFERENCES maintainer (maintainer), - date timestamp with time zone -); -CREATE INDEX source__maintainer ON source (maintainer); -CREATE INDEX source__changed_by ON source (changed_by); -CREATE INDEX source__signed_by ON source (signed_by); - - -CREATE TABLE uploader ( - package_id integer REFERENCES source, - maintainer integer REFERENCES maintainer, - PRIMARY KEY (package_id, maintainer) -); -CREATE INDEX uploader__maintainer ON uploader (maintainer); - -CREATE OR REPLACE FUNCTION uploaders (package_id integer) -RETURNS text[] -LANGUAGE SQL STABLE STRICT -AS $$ - SELECT array_agg (apt.maint_name (maintainer)) - FROM apt.uploader - WHERE package_id = $1; -$$; - - --- EXTRA PACKAGE INFORMATION - -CREATE TABLE package_info ( - package_id integer REFERENCES package, - field text, - value text NOT NULL, - PRIMARY KEY (package_id, field) -); - -CREATE OR REPLACE FUNCTION apt.package_info (package_id integer, field text) -RETURNS text -LANGUAGE SQL STABLE STRICT -AS $$ - SELECT value FROM apt.package_info - WHERE package_id = $1 AND field = $2; -$$; - - --- GRANTS - -GRANT USAGE ON SCHEMA apt TO PUBLIC; -GRANT SELECT ON - architecture, suite, distribution, release, package, package_control, - package_source, packagelist, maintainer, source, uploader, package_info - TO PUBLIC; - - -COMMIT; diff --git a/pgapt-db/sql/qa_apt_distribution.sql b/pgapt-db/sql/qa_apt_distribution.sql deleted file mode 100644 index 734bdbb..0000000 --- a/pgapt-db/sql/qa_apt_distribution.sql +++ /dev/null @@ -1,17 +0,0 @@ -BEGIN; - -SET search_path TO apt; - --- distribution table - -DELETE FROM apt.distribution; - -COPY apt.distribution (archive, suite, distribution) FROM STDIN WITH DELIMITER ' '; -pgapt etch-pgapt etch-pgapt -pgapt lenny-pgapt lenny-pgapt -pgapt squeeze-pgapt squeeze-pgapt -pgapt wheezy-pgapt wheezy-pgapt -pgapt sid-pgapt sid-pgapt -\. - -COMMIT; diff --git a/pgapt-db/sql/qa_apt_suite.sql b/pgapt-db/sql/qa_apt_suite.sql deleted file mode 100644 index 003d11e..0000000 --- a/pgapt-db/sql/qa_apt_suite.sql +++ /dev/null @@ -1,50 +0,0 @@ -BEGIN; - -SET search_path TO apt; - --- architecture table - -CREATE TEMP TABLE a ( architecture text ) ON COMMIT DROP; -INSERT INTO a VALUES ('source'); -INSERT INTO a VALUES ('amd64'); -INSERT INTO a VALUES ('i386'); --- no 'binary' here - -INSERT INTO architecture - SELECT architecture FROM a - WHERE architecture NOT IN (SELECT architecture FROM architecture); -INSERT INTO architecture - SELECT 'all' WHERE 'all' NOT IN (SELECT architecture FROM architecture); -INSERT INTO architecture - SELECT 'binary' WHERE 'binary' NOT IN (SELECT architecture FROM architecture); - --- suite table - -CREATE TEMP TABLE s ( suite text ) ON COMMIT DROP; -INSERT INTO s VALUES ('etch'); -INSERT INTO s VALUES ('lenny'); -INSERT INTO s VALUES ('squeeze'); -INSERT INTO s VALUES ('wheezy'); -INSERT INTO s VALUES ('sid'); - -CREATE TEMP TABLE c ( component text ) ON COMMIT DROP; -INSERT INTO c VALUES ('main'); - -CREATE TEMP TABLE tmp_suite ( - archive text NOT NULL, - suite text NOT NULL, - component text NOT NULL, - architecture text NOT NULL -) ON COMMIT DROP; - -INSERT INTO tmp_suite (archive, suite, component, architecture) - SELECT 'pgapt', suite||'-pgapt', component, architecture FROM s, c, a; - --- copy missing data from temp table over -INSERT INTO suite (archive, suite, component, architecture) - SELECT archive, suite, component, architecture - FROM tmp_suite - WHERE (archive, suite, component, architecture) NOT IN - (SELECT archive, suite, component, architecture FROM suite); - -COMMIT; diff --git a/pgapt-db/sql/qa_public.sql b/pgapt-db/sql/qa_public.sql deleted file mode 100644 index a37b5ec..0000000 --- a/pgapt-db/sql/qa_public.sql +++ /dev/null @@ -1,9 +0,0 @@ -BEGIN; - -SET search_path TO public; - -CREATE EXTENSION plpgsql; - -CREATE EXTENSION debversion; - -COMMIT; diff --git a/repo/bin/import-dists b/repo/bin/import-dists new file mode 100755 index 0000000..169e98c --- /dev/null +++ b/repo/bin/import-dists @@ -0,0 +1,25 @@ +#!/bin/sh + +set -eu + +cd $(dirname $0)/.. + +for distdir in dists/*-pgdg; do + distribution=${distdir##*/} + + for componentdir in $distdir/*; do + test -d $componentdir || continue + component=${componentdir##*/} + + # import Sources.bz2 + bin/import-sourcesfile $componentdir/source/Sources.bz2 $distribution $component + + for archdir in $componentdir/binary-*; do + architecture=${archdir##*/binary-} + + # import Packages.bz2 + bin/import-packagesfile $archdir/Packages.bz2 $distribution $component $architecture + done + + done +done diff --git a/repo/bin/import-packagesfile b/repo/bin/import-packagesfile new file mode 100755 index 0000000..15fc40c --- /dev/null +++ b/repo/bin/import-packagesfile @@ -0,0 +1,143 @@ +#!/usr/bin/python3 + +# Copyright (c) 2009-2013, 2020 Christoph Berg <[email protected]> +# +# getdpkginfo by +# Copyright (C) 2005 Jeroen van Wolffelaar <[email protected]> +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import apt_pkg, psycopg2, os, re, sys, time + +pg = psycopg2.connect('service=pgapt') +cur = pg.cursor() + +def parseFile(distribution, component, architecture, packagesfile, ctime): + if not os.path.isfile(packagesfile): + raise Exception("%s not found" % packagesfile) + stdout = os.popen("bzcat '%s'" % packagesfile) + #stdout = file(packagesfile) + parse = apt_pkg.TagFile(stdout) + + source_re = re.compile('(.*) \((.*)\)') + binnmu_re = re.compile('\+b\d+$') + + # clear the suite + cur.execute("BEGIN") + cur.execute("""DELETE FROM packagelist WHERE + (distribution, component, architecture) = (%s, %s, %s)""", + [distribution, component, architecture]) + + # for every package ... + while parse.step(): + package = parse.section.get('Package') + version = parse.section.get('Version') + arch = parse.section.get('Architecture') + control = "\n".join([ "%s: %s" % (k, parse.section.get(k)) \ + for k in parse.section.keys() ]) + + # store package control file + cur.execute("""SELECT 1 FROM package + WHERE (package, version, arch) = (%s, %s, %s)""", + [package, version, arch]) + found = cur.fetchone() + if not found: # this is the first time we see this package + + sourcefield = parse.section.get('Source') + if sourcefield: + match = source_re.match(sourcefield) + if match: + source, source_version = match.group(1), match.group(2) + else: + source, source_version = sourcefield, version + else: + source, source_version = package, version + if binnmu_re.search(source_version): + print("WARNING: %s %s %s: source %s %s for %s %s (%s) has binnmu version number, fixing" % \ + (distribution, component, architecture, source, source_version, + package, version, arch)) + source_version = binnmu_re.sub('', source_version) + +# # find link to source package +# cur.execute("""SELECT 1 FROM source +# WHERE (source, srcversion) = (%s, %s)""", +# [source, source_version]) +# found = cur.fetchone() +# if not found: +# print("WARNING: %s %s %s: source %s %s for %s %s (%s) not found" % \ +# (distribution, component, architecture, source, source_version, +# package, version, arch)) + + cur.execute("""INSERT INTO package (package, version, arch, control, c, source, srcversion, time) + VALUES (%s, %s, %s, %s, control2jsonb(%s), %s, %s, %s)""", + [package, version, arch, control, control, source, source_version, ctime]) + + # finally, add the package to the suite's package list + cur.execute("""INSERT INTO packagelist + (distribution, component, architecture, package, version, arch) + VALUES (%s, %s, %s, %s, %s, %s)""", + [distribution, component, architecture, package, version, arch]) + + cur.execute("""INSERT INTO packagehist + (distribution, component, architecture, package, version, arch, time) + SELECT distribution, component, architecture, package, version, arch, %s + FROM packagelist l + WHERE (distribution, component, architecture) = (%s, %s, %s) AND NOT EXISTS + (SELECT * FROM packagehist h WHERE + (l.distribution, l.component, l.architecture, l.package, l.version, l.arch) = + (h.distribution, h.component, h.architecture, h.package, h.version, h.arch))""", + [ctime, distribution, component, architecture]) + +(packagesfile, distribution, component, architecture) = (sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) +if not distribution in packagesfile: + raise NameError("distribution is not part of filename") +if not component in packagesfile: + raise NameError("component is not part of filename") +if not architecture in packagesfile: + raise NameError("architecture is not part of filename") +verbose = sys.argv.__len__() > 4 + +cur.execute("""SELECT extract (epoch from last_update) AS last_update + FROM distribution + WHERE (distribution, component, architecture) = (%s, %s, %s) + FOR UPDATE NOWAIT""", [distribution, component, architecture]) +last_update = cur.fetchone() + +mtime = os.path.getmtime(packagesfile) +ctime = time.ctime(mtime) +if not last_update or mtime > last_update[0]: + if verbose: + print("Reading %s" % packagesfile) + + update = """INSERT INTO distribution (distribution, component, architecture, last_update) VALUES (%s, %s, %s, %s) + ON CONFLICT (distribution, component, architecture) DO UPDATE SET last_update = %s""" + cur.execute(update, [distribution, component, architecture, ctime, ctime]) + + parseFile(distribution, component, architecture, packagesfile, ctime) + + cur.execute("COMMIT") +else: + if verbose: + print("Skipping up to date %s" % packagesfile) diff --git a/repo/bin/import-sourcesfile b/repo/bin/import-sourcesfile new file mode 100755 index 0000000..dd9cec4 --- /dev/null +++ b/repo/bin/import-sourcesfile @@ -0,0 +1,118 @@ +#!/usr/bin/python3 + +# Copyright (c) 2009-2013, 2020 Christoph Berg <[email protected]> +# +# getdpkginfo by +# Copyright (C) 2005 Jeroen van Wolffelaar <[email protected]> +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import apt_pkg, psycopg2, os, re, sys, time + +pg = psycopg2.connect('service=pgapt') +cur = pg.cursor() + +def parseSourceFile(distribution, component, packagesfile, ctime): + if not os.path.isfile(packagesfile): + raise Exception("%s not found" % packagesfile) + stdout = os.popen("bzcat '%s'" % packagesfile) + #stdout = file(packagesfile) + parse = apt_pkg.TagFile(stdout) + + # clear the suite + cur.execute("BEGIN") + cur.execute("""DELETE FROM sourcelist WHERE + (distribution, component) = (%s, %s)""", + [distribution, component]) + + # for every package ... + while parse.step(): + package = parse.section.get('Package') + version = parse.section.get('Version') + control = "\n".join([ "%s: %s" % (k, parse.section.get(k)) \ + for k in parse.section.keys() ]) + + # store package control file + cur.execute("""SELECT 1 FROM source + WHERE (source, srcversion) = (%s, %s)""", + [package, version]) + found = cur.fetchone() + if not found: # this is the first time we see this package + cur.execute("""INSERT INTO source (source, srcversion, control, c, time) + VALUES (%s, %s, %s, control2jsonb(%s), %s)""", + [package, version, control, control, ctime]) + + # finally, add the package to the suite's package list + cur.execute("""INSERT INTO sourcelist + (distribution, component, source, srcversion) + VALUES (%s, %s, %s, %s)""", + [distribution, component, package, version]) + + # record info in suite history table + cur.execute("""INSERT INTO packagehist + (distribution, component, package, version, time) + SELECT distribution, component, package, version, %s + FROM packagelist l + WHERE (distribution, component) = (%s, %s) AND NOT EXISTS + (SELECT * FROM packagehist h WHERE + (l.distribution, l.component, l.package, l.version) = + (h.distribution, h.component, h.package, h.version))""", + [ctime, distribution, component]) + +(packagesfile, distribution, component) = (sys.argv[1], sys.argv[2], sys.argv[3]) +if not distribution in packagesfile: + raise NameError("distribution is not part of filename") +if not component in packagesfile: + raise NameError("component is not part of filename") +verbose = sys.argv.__len__() > 3 + +cur.execute("""SELECT extract (epoch from last_update) AS last_update + FROM srcdistribution + WHERE (distribution, component) = (%s, %s) + FOR UPDATE NOWAIT""", [distribution, component]) +last_update = cur.fetchone() + +mtime = os.path.getmtime(packagesfile) +ctime = time.ctime(mtime) +if not last_update or mtime > last_update[0]: + if verbose: + print("Reading %s" % packagesfile) + + update = """INSERT INTO srcdistribution (distribution, component, last_update) VALUES (%s, %s, %s) + ON CONFLICT (distribution, component) DO UPDATE SET last_update = %s""" + cur.execute(update, [distribution, component, ctime, ctime]) + + parseSourceFile(distribution, component, packagesfile, ctime) + + cur.execute("COMMIT") +else: + if verbose: + print("Skipping up to date %s" % packagesfile) + +# Insert missing source packages: +# INSERT INTO package_source SELECT p.package_id, s.package_id as source_id from package p JOIN package s ON (p.package = s.package AND p.version = s.version AND s.pkg_architecture='source') where p.package_id not in (select package_id from package_source ) and p.pkg_architecture <> 'source'; +# Insert missing source packages, packages with broken/missing binnmu Source: fieds: +# INSERT INTO package_source SELECT p.package_id, s.package_id as source_id from package p JOIN package s ON (p.package = s.package AND regexp_replace (p.version, E'\\+b\\d+$', '') = s.version AND s.pkg_architecture='source') where p.package_id not in (select package_id from package_source ) and p.pkg_architecture <> 'source'; + |