summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Berg2020-02-21 14:24:41 +0000
committerChristoph Berg2020-02-21 14:24:41 +0000
commitd9f446a9d768d0a3626467448f16401a6d07ff26 (patch)
tree27cd2c8b5d54acb746a1fb5457b6016555ea2654
parenta073f5cd5805950accb2a407c88d278f3770affd (diff)
Import package data into database
-rwxr-xr-xpgapt-db/import-packagelists211
-rw-r--r--pgapt-db/sql/pgdg_apt.sql64
-rw-r--r--pgapt-db/sql/pgdg_apt_data.sql22
-rw-r--r--pgapt-db/sql/qa_apt.sql200
-rw-r--r--pgapt-db/sql/qa_apt_distribution.sql17
-rw-r--r--pgapt-db/sql/qa_apt_suite.sql50
-rw-r--r--pgapt-db/sql/qa_public.sql9
-rwxr-xr-xrepo/bin/import-dists25
-rwxr-xr-xrepo/bin/import-packagesfile143
-rwxr-xr-xrepo/bin/import-sourcesfile118
10 files changed, 345 insertions, 514 deletions
diff --git a/pgapt-db/import-packagelists b/pgapt-db/import-packagelists
deleted file mode 100755
index 4edbbbe..0000000
--- a/pgapt-db/import-packagelists
+++ /dev/null
@@ -1,211 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2009-2013 Christoph Berg <[email protected]>
-#
-# getdpkginfo by
-# Copyright (C) 2005 Jeroen van Wolffelaar <[email protected]>
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. The name of the author may not be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
-# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import apt_pkg, psycopg2, os, re, sys, time
-
-ftp_prefix = "/home/cbe/projects/postgresql/apt.postgresql.org"
-
-pg = psycopg2.connect('dbname=pgweb')
-cur = pg.cursor()
-cur.execute("SET search_path TO apt")
-
-def parseSourceFile(distribution, component, packagesfile):
- if not os.path.isfile(packagesfile):
- raise Exception("%s not found" % packagesfile)
- stdout = os.popen("bzcat '%s'" % packagesfile)
- #stdout = file(packagesfile)
- parse = apt_pkg.TagFile(stdout)
-
- # clear the suite
- cur.execute("BEGIN")
- cur.execute("""DELETE FROM sourcelist WHERE
- (distribution, component) = (%s, %s)""",
- [distribution, component])
-
- # for every package ...
- while parse.step():
- package = parse.section.get('Package')
- version = parse.section.get('Version')
- control = "\n".join([ "%s: %s" % (k, parse.section.get(k)) \
- for k in parse.section.keys() ])
-
- # store package control file
- cur.execute("""SELECT 1 FROM source
- WHERE (source, srcversion) = (%s, %s)""",
- [package, version])
- found = cur.fetchone()
- if not found: # this is the first time we see this package, extract more information
- cur.execute("""INSERT INTO source (source, srcversion, control, c)
- VALUES (%s, %s, %s, control2hstore(%s))""",
- [package, version, control, control])
-
- # finally, add the package to the suite's package list
- cur.execute("""INSERT INTO sourcelist
- (distribution, component, source, srcversion)
- VALUES (%s, %s, %s, %s)""",
- [distribution, component, package, version])
-
- cur.execute("COMMIT")
-
-def parseFile(distribution, component, architecture, packagesfile):
- if not os.path.isfile(packagesfile):
- raise Exception("%s not found" % packagesfile)
- stdout = os.popen("bzcat '%s'" % packagesfile)
- #stdout = file(packagesfile)
- parse = apt_pkg.TagFile(stdout)
-
- source_re = re.compile('(.*) \((.*)\)')
- binnmu_re = re.compile('\+b\d+$')
-
- # clear the suite
- cur.execute("BEGIN")
- cur.execute("""DELETE FROM packagelist WHERE
- (distribution, component, architecture) = (%s, %s, %s)""",
- [distribution, component, architecture])
-
- # for every package ...
- while parse.step():
- package = parse.section.get('Package')
- version = parse.section.get('Version')
- arch = parse.section.get('Architecture')
- control = "\n".join([ "%s: %s" % (k, parse.section.get(k)) \
- for k in parse.section.keys() ])
-
- # store package control file
- cur.execute("""SELECT 1 FROM package
- WHERE (package, version, arch) = (%s, %s, %s)""",
- [package, version, arch])
- found = cur.fetchone()
- if not found: # this is the first time we see this package, extract more information
-
- sourcefield = parse.section.get('Source')
- if sourcefield:
- match = source_re.match(sourcefield)
- if match:
- source, source_version = match.group(1), match.group(2)
- else:
- source, source_version = sourcefield, version
- else:
- source, source_version = package, version
- if binnmu_re.search(source_version):
- print "WARNING: %s %s %s: source %s %s for %s %s (%s) has binnmu version number, fixing" % \
- (distribution, component, architecture, source, source_version,
- package, version, arch)
- source_version = binnmu_re.sub('', source_version)
-
-# # find link to source package
-# cur.execute("""SELECT 1 FROM source
-# WHERE (source, srcversion) = (%s, %s)""",
-# [source, source_version])
-# found = cur.fetchone()
-# if not found:
-# print "WARNING: %s %s %s: source %s %s for %s %s (%s) not found" % \
-# (distribution, component, architecture, source, source_version,
-# package, version, arch)
-
- cur.execute("""INSERT INTO package (package, version, arch, control, c, source, srcversion)
- VALUES (%s, %s, %s, %s, control2hstore(%s), %s, %s)""",
- [package, version, arch, control, control, source, source_version])
-
- # finally, add the package to the suite's package list
- cur.execute("""INSERT INTO packagelist
- (distribution, component, architecture, package, version, arch)
- VALUES (%s, %s, %s, %s, %s, %s)""",
- [distribution, component, architecture, package, version, arch])
-
- cur.execute("COMMIT")
-
-verbose = sys.argv.__len__() > 1
-
-cur.execute("""SELECT distribution, component,
- extract (epoch from last_update) AS last_update
- FROM srcdistribution
- WHERE active
- FOR UPDATE NOWAIT""")
-suites = cur.fetchall()
-
-for distribution, component, last_update in suites:
- packagesfile = ftp_prefix + "/dists/%s/%s/source/Sources.bz2" % \
- (distribution, component)
-
- if not os.path.exists(packagesfile):
- print "WARNING:", packagesfile, "not found"
- continue
-
- mtime = os.path.getmtime(packagesfile)
- if not last_update or mtime > last_update:
- if verbose:
- print "Reading %s" % packagesfile
- parseSourceFile(distribution, component, packagesfile)
-
- update = """UPDATE srcdistribution SET last_update = %s WHERE
- (distribution, component) = (%s, %s)"""
- cur.execute(update, [time.ctime(mtime), distribution, component])
- cur.execute("COMMIT")
- else:
- if verbose:
- print "Skipping up to date %s" % packagesfile
-
-cur.execute("""SELECT distribution, component, architecture,
- extract (epoch from last_update) AS last_update
- FROM distribution
- WHERE active
- ORDER BY CASE WHEN architecture = 'source' THEN 1 ELSE 2 END
- FOR UPDATE NOWAIT""")
-suites = cur.fetchall()
-
-for distribution, component, architecture, last_update in suites:
- packagesfile = ftp_prefix + "/dists/%s/%s/binary-%s/Packages.bz2" % \
- (distribution, component, architecture)
-
- if not os.path.exists(packagesfile):
- print "WARNING:", packagesfile, "not found"
- continue
-
- mtime = os.path.getmtime(packagesfile)
- if not last_update or mtime > last_update:
- if verbose:
- print "Reading %s" % packagesfile
- parseFile(distribution, component, architecture, packagesfile)
-
- update = """UPDATE distribution SET last_update = %s WHERE
- (distribution, component, architecture) = (%s, %s, %s)"""
- cur.execute(update, [time.ctime(mtime), distribution, component, architecture])
- cur.execute("COMMIT")
- else:
- if verbose:
- print "Skipping up to date %s" % packagesfile
-
-# Insert missing source packages:
-# INSERT INTO package_source SELECT p.package_id, s.package_id as source_id from package p JOIN package s ON (p.package = s.package AND p.version = s.version AND s.pkg_architecture='source') where p.package_id not in (select package_id from package_source ) and p.pkg_architecture <> 'source';
-# Insert missing source packages, packages with broken/missing binnmu Source: fieds:
-# INSERT INTO package_source SELECT p.package_id, s.package_id as source_id from package p JOIN package s ON (p.package = s.package AND regexp_replace (p.version, E'\\+b\\d+$', '') = s.version AND s.pkg_architecture='source') where p.package_id not in (select package_id from package_source ) and p.pkg_architecture <> 'source';
-
diff --git a/pgapt-db/sql/pgdg_apt.sql b/pgapt-db/sql/pgdg_apt.sql
index 1d29bc8..c6123ca 100644
--- a/pgapt-db/sql/pgdg_apt.sql
+++ b/pgapt-db/sql/pgdg_apt.sql
@@ -1,17 +1,24 @@
BEGIN;
CREATE SCHEMA apt;
-SET search_path TO apt;
+SET search_path TO apt, public;
-CREATE EXTENSION debversion;
-CREATE EXTENSION hstore;
+--CREATE EXTENSION debversion;
-CREATE OR REPLACE FUNCTION control2hstore (control text)
-RETURNS hstore LANGUAGE sql IMMUTABLE AS
-$$SELECT regexp_replace (regexp_replace ($1, '([\\"])', '\\\1', 'g'),
- E'^([^:]*): (.*(?:\n .*)*)', '"\1"=>"\2",', 'gn')::hstore$$;
--- intentionally no E'' in the first line
+CREATE OR REPLACE FUNCTION list2jsonb (list text)
+RETURNS jsonb LANGUAGE sql IMMUTABLE AS
+$$SELECT jsonb_agg(m) from regexp_split_to_table($1, E'\n ') m$$;
+
+CREATE OR REPLACE FUNCTION control2jsonb (control text)
+RETURNS jsonb LANGUAGE sql IMMUTABLE AS
+$$SELECT jsonb_object_agg(lower(m[1]),
+ CASE WHEN m[1] IN ('Files', 'Checksums-Sha1', 'Checksums-Sha256') THEN
+ list2jsonb(m[2])
+ ELSE
+ to_jsonb(m[2])
+ END)
+ FROM regexp_matches($1||E'\n', E'^([^ :]*): ((.|\n )*)\n?', 'gm') m$$;
-- ARCHIVE-WIDE DATA
@@ -51,7 +58,8 @@ CREATE TABLE source (
source text NOT NULL,
srcversion debversion NOT NULL,
control text NOT NULL,
- c hstore,
+ c jsonb,
+ time timestamptz(0) NOT NULL,
PRIMARY KEY (source, srcversion)
);
@@ -62,9 +70,10 @@ CREATE TABLE package (
arch text NOT NULL
REFERENCES architecture (architecture),
control text NOT NULL,
- c hstore,
+ c jsonb,
source text NOT NULL,
srcversion debversion NOT NULL,
+ time timestamptz(0) NOT NULL,
PRIMARY KEY (package, version, arch)
);
@@ -102,6 +111,41 @@ CREATE INDEX ON packagelist (distribution, component, architecture);
CREATE INDEX ON packagelist (package);
+-- HISTORY
+
+CREATE TABLE sourcehist (
+ distribution text NOT NULL,
+ component text NOT NULL,
+ source text NOT NULL,
+ srcversion debversion NOT NULL,
+ time timestamptz(0) NOT NULL,
+
+ FOREIGN KEY (distribution, component) REFERENCES srcdistribution (distribution, component),
+ FOREIGN KEY (source, srcversion) REFERENCES source (source, srcversion)
+);
+CREATE INDEX ON sourcehist (distribution, component);
+CREATE INDEX ON sourcehist (source);
+
+CREATE TABLE packagehist (
+ distribution text NOT NULL,
+ component text NOT NULL,
+ architecture text NOT NULL,
+ package text NOT NULL,
+ version debversion NOT NULL,
+ arch text NOT NULL,
+ time timestamptz(0) NOT NULL,
+ CHECK ((architecture = arch) OR (arch = 'all')),
+
+ FOREIGN KEY (distribution, component, architecture)
+ REFERENCES distribution (distribution, component, architecture),
+ FOREIGN KEY (package, version, arch) REFERENCES package (package, version, arch)
+);
+CREATE INDEX ON packagehist (distribution, component, architecture);
+CREATE INDEX ON packagehist (package);
+
+
+-- ACLs
+
GRANT USAGE ON SCHEMA apt TO PUBLIC;
GRANT SELECT ON ALL TABLES IN SCHEMA apt TO PUBLIC;
diff --git a/pgapt-db/sql/pgdg_apt_data.sql b/pgapt-db/sql/pgdg_apt_data.sql
index 4ee4272..565ff4f 100644
--- a/pgapt-db/sql/pgdg_apt_data.sql
+++ b/pgapt-db/sql/pgdg_apt_data.sql
@@ -1,21 +1,9 @@
BEGIN;
-INSERT INTO architecture
- SELECT * FROM (VALUES ('all'), ('amd64'), ('i386')) arch(architecture)
- WHERE NOT EXISTS (SELECT * FROM architecture
- WHERE architecture = arch.architecture);
-
-INSERT INTO srcdistribution
- SELECT * FROM (VALUES ('sid-pgdg'), ('wheezy-pgdg'), ('squeeze-pgdg')) dist(distribution),
- (VALUES ('main'), ('9.2'), ('9.1'), ('9.0'), ('8.4'), ('8.3')) comp(component)
- WHERE NOT EXISTS (SELECT * FROM srcdistribution
- WHERE (distribution, component) = (dist.distribution, comp.component));
-
-INSERT INTO distribution
- SELECT * FROM (VALUES ('sid-pgdg'), ('wheezy-pgdg'), ('squeeze-pgdg')) dist(distribution),
- (VALUES ('main'), ('9.2'), ('9.1'), ('9.0'), ('8.4'), ('8.3')) comp(component),
- (VALUES ('amd64'), ('i386')) arch(architecture)
- WHERE NOT EXISTS (SELECT * FROM distribution
- WHERE (distribution, component, architecture) = (dist.distribution, comp.component, arch.architecture));
+INSERT INTO architecture VALUES
+ ('all'),
+ ('amd64'),
+ ('i386'),
+ ('ppc64el');
COMMIT;
diff --git a/pgapt-db/sql/qa_apt.sql b/pgapt-db/sql/qa_apt.sql
deleted file mode 100644
index 46e7f4a..0000000
--- a/pgapt-db/sql/qa_apt.sql
+++ /dev/null
@@ -1,200 +0,0 @@
-BEGIN;
-
-CREATE SCHEMA apt;
-SET search_path TO apt;
-
-
--- ARCHIVE-WIDE DATA
-
-CREATE TABLE architecture (
- architecture text PRIMARY KEY
-);
-COMMENT ON TABLE architecture IS 'All known architectures, including source and all';
-
-
-CREATE TABLE suite (
- suite_id serial PRIMARY KEY,
- archive text NOT NULL,
- suite text NOT NULL,
- component text NOT NULL,
- architecture text NOT NULL
- REFERENCES architecture (architecture),
- last_update timestamp with time zone,
- active boolean NOT NULL DEFAULT TRUE
-);
-CREATE UNIQUE INDEX suite__archive_suite_component_architecture
-ON suite (archive, suite, component, architecture);
-COMMENT ON TABLE suite IS 'All known archives and suites';
-
-
--- Hopefully this is the only table that needs to be updated at release time
-CREATE TABLE distribution (
- distribution text PRIMARY KEY,
- archive text NOT NULL,
- suite text NOT NULL
-);
-CREATE UNIQUE INDEX distribution__archive_suite ON distribution (archive, suite);
-COMMENT ON TABLE distribution IS 'Symbolic names for archive/suite/* combinations';
-
-
--- PACKAGE DATA
-
-CREATE TABLE package (
- package_id serial PRIMARY KEY,
- package text NOT NULL,
- version public.debversion NOT NULL,
- pkg_architecture text NOT NULL
- REFERENCES architecture (architecture),
- source_id integer
- CONSTRAINT source_architecture CHECK (NOT (source_id IS NOT NULL AND pkg_architecture = 'source'))
-);
-COMMENT ON TABLE package IS 'All known packages and sources';
-ALTER TABLE package ADD FOREIGN KEY (source_id) REFERENCES package (package_id);
-
-CREATE INDEX package__package_version_pkg_architecture ON package (package, version, pkg_architecture);
-CREATE INDEX package__package_pkg_architecture ON package (package, pkg_architecture);
-
-
-CREATE TABLE package_control (
- package_id integer PRIMARY KEY REFERENCES package,
- control text NOT NULL
-);
-COMMENT ON TABLE package_control IS
-'Control files of all known packages and sources';
-
-
-/*
-CREATE TABLE package_source (
- package_id integer PRIMARY KEY REFERENCES package,
- source_id integer NOT NULL REFERENCES package (package_id)
- CHECK (package_id <> source_id)
-);
-CREATE INDEX package_source__source_id ON package_source (source_id);
-COMMENT ON TABLE package_source IS
-'Table relating binary packages to their source package';
-*/
-
-
--- SUITE DATA
-
-CREATE TABLE packagelist (
- suite_id integer NOT NULL REFERENCES suite,
- package_id integer NOT NULL REFERENCES package
- -- no PK
-);
-CREATE INDEX packagelist__suite_id ON packagelist (suite_id);
-CREATE INDEX packagelist__package_id ON packagelist (package_id);
-COMMENT ON TABLE packagelist IS 'Association of packages with suites';
-
-
--- SOURCE-SPECIFIC DATA
-
-CREATE TABLE maintainer (
- maintainer serial PRIMARY KEY,
- name text NOT NULL
-);
-
-CREATE OR REPLACE FUNCTION maint_id_or_new (pname text)
-RETURNS integer
-LANGUAGE plpgsql VOLATILE STRICT
-AS $$
-DECLARE
- id integer;
-BEGIN
- SELECT maintainer INTO id
- FROM maintainer
- WHERE name = pname;
- IF NOT FOUND THEN
- INSERT INTO maintainer (name) VALUES (pname)
- RETURNING maintainer INTO id;
- END IF;
- RETURN id;
-END;
-$$;
-
-CREATE OR REPLACE FUNCTION maint_id (pname text)
-RETURNS integer
-LANGUAGE SQL STABLE STRICT
-AS $$
- SELECT maintainer FROM apt.maintainer WHERE name = $1;
-$$;
-
-CREATE OR REPLACE FUNCTION maint_name (id integer)
-RETURNS text
-LANGUAGE SQL STABLE STRICT
-AS $$
- SELECT name FROM apt.maintainer WHERE maintainer = $1;
-$$;
-
-CREATE OR REPLACE FUNCTION apt.email_address (name text)
-RETURNS text
-LANGUAGE SQL IMMUTABLE STRICT
-AS $$
- SELECT regexp_replace ($1, E'.*<(.*)>.*', E'\\1');
-$$;
-
-CREATE INDEX maintainer__email_address ON maintainer (email_address (name));
-
-
-CREATE TABLE source (
- package_id integer PRIMARY KEY REFERENCES package,
- -- from sources:
- maintainer integer NOT NULL REFERENCES maintainer,
- section text NULL,
- priority text NULL,
- dm_upload_allowed boolean NOT NULL DEFAULT FALSE,
- -- from projectb: (added by a separate script, hence all NULL)
- changed_by integer REFERENCES maintainer (maintainer),
- signed_by integer REFERENCES maintainer (maintainer),
- date timestamp with time zone
-);
-CREATE INDEX source__maintainer ON source (maintainer);
-CREATE INDEX source__changed_by ON source (changed_by);
-CREATE INDEX source__signed_by ON source (signed_by);
-
-
-CREATE TABLE uploader (
- package_id integer REFERENCES source,
- maintainer integer REFERENCES maintainer,
- PRIMARY KEY (package_id, maintainer)
-);
-CREATE INDEX uploader__maintainer ON uploader (maintainer);
-
-CREATE OR REPLACE FUNCTION uploaders (package_id integer)
-RETURNS text[]
-LANGUAGE SQL STABLE STRICT
-AS $$
- SELECT array_agg (apt.maint_name (maintainer))
- FROM apt.uploader
- WHERE package_id = $1;
-$$;
-
-
--- EXTRA PACKAGE INFORMATION
-
-CREATE TABLE package_info (
- package_id integer REFERENCES package,
- field text,
- value text NOT NULL,
- PRIMARY KEY (package_id, field)
-);
-
-CREATE OR REPLACE FUNCTION apt.package_info (package_id integer, field text)
-RETURNS text
-LANGUAGE SQL STABLE STRICT
-AS $$
- SELECT value FROM apt.package_info
- WHERE package_id = $1 AND field = $2;
-$$;
-
-
--- GRANTS
-
-GRANT USAGE ON SCHEMA apt TO PUBLIC;
-GRANT SELECT ON
- architecture, suite, distribution, release, package, package_control,
- package_source, packagelist, maintainer, source, uploader, package_info
- TO PUBLIC;
-
-
-COMMIT;
diff --git a/pgapt-db/sql/qa_apt_distribution.sql b/pgapt-db/sql/qa_apt_distribution.sql
deleted file mode 100644
index 734bdbb..0000000
--- a/pgapt-db/sql/qa_apt_distribution.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-BEGIN;
-
-SET search_path TO apt;
-
--- distribution table
-
-DELETE FROM apt.distribution;
-
-COPY apt.distribution (archive, suite, distribution) FROM STDIN WITH DELIMITER ' ';
-pgapt etch-pgapt etch-pgapt
-pgapt lenny-pgapt lenny-pgapt
-pgapt squeeze-pgapt squeeze-pgapt
-pgapt wheezy-pgapt wheezy-pgapt
-pgapt sid-pgapt sid-pgapt
-\.
-
-COMMIT;
diff --git a/pgapt-db/sql/qa_apt_suite.sql b/pgapt-db/sql/qa_apt_suite.sql
deleted file mode 100644
index 003d11e..0000000
--- a/pgapt-db/sql/qa_apt_suite.sql
+++ /dev/null
@@ -1,50 +0,0 @@
-BEGIN;
-
-SET search_path TO apt;
-
--- architecture table
-
-CREATE TEMP TABLE a ( architecture text ) ON COMMIT DROP;
-INSERT INTO a VALUES ('source');
-INSERT INTO a VALUES ('amd64');
-INSERT INTO a VALUES ('i386');
--- no 'binary' here
-
-INSERT INTO architecture
- SELECT architecture FROM a
- WHERE architecture NOT IN (SELECT architecture FROM architecture);
-INSERT INTO architecture
- SELECT 'all' WHERE 'all' NOT IN (SELECT architecture FROM architecture);
-INSERT INTO architecture
- SELECT 'binary' WHERE 'binary' NOT IN (SELECT architecture FROM architecture);
-
--- suite table
-
-CREATE TEMP TABLE s ( suite text ) ON COMMIT DROP;
-INSERT INTO s VALUES ('etch');
-INSERT INTO s VALUES ('lenny');
-INSERT INTO s VALUES ('squeeze');
-INSERT INTO s VALUES ('wheezy');
-INSERT INTO s VALUES ('sid');
-
-CREATE TEMP TABLE c ( component text ) ON COMMIT DROP;
-INSERT INTO c VALUES ('main');
-
-CREATE TEMP TABLE tmp_suite (
- archive text NOT NULL,
- suite text NOT NULL,
- component text NOT NULL,
- architecture text NOT NULL
-) ON COMMIT DROP;
-
-INSERT INTO tmp_suite (archive, suite, component, architecture)
- SELECT 'pgapt', suite||'-pgapt', component, architecture FROM s, c, a;
-
--- copy missing data from temp table over
-INSERT INTO suite (archive, suite, component, architecture)
- SELECT archive, suite, component, architecture
- FROM tmp_suite
- WHERE (archive, suite, component, architecture) NOT IN
- (SELECT archive, suite, component, architecture FROM suite);
-
-COMMIT;
diff --git a/pgapt-db/sql/qa_public.sql b/pgapt-db/sql/qa_public.sql
deleted file mode 100644
index a37b5ec..0000000
--- a/pgapt-db/sql/qa_public.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-BEGIN;
-
-SET search_path TO public;
-
-CREATE EXTENSION plpgsql;
-
-CREATE EXTENSION debversion;
-
-COMMIT;
diff --git a/repo/bin/import-dists b/repo/bin/import-dists
new file mode 100755
index 0000000..169e98c
--- /dev/null
+++ b/repo/bin/import-dists
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+set -eu
+
+cd $(dirname $0)/..
+
+for distdir in dists/*-pgdg; do
+ distribution=${distdir##*/}
+
+ for componentdir in $distdir/*; do
+ test -d $componentdir || continue
+ component=${componentdir##*/}
+
+ # import Sources.bz2
+ bin/import-sourcesfile $componentdir/source/Sources.bz2 $distribution $component
+
+ for archdir in $componentdir/binary-*; do
+ architecture=${archdir##*/binary-}
+
+ # import Packages.bz2
+ bin/import-packagesfile $archdir/Packages.bz2 $distribution $component $architecture
+ done
+
+ done
+done
diff --git a/repo/bin/import-packagesfile b/repo/bin/import-packagesfile
new file mode 100755
index 0000000..15fc40c
--- /dev/null
+++ b/repo/bin/import-packagesfile
@@ -0,0 +1,143 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2009-2013, 2020 Christoph Berg <[email protected]>
+#
+# getdpkginfo by
+# Copyright (C) 2005 Jeroen van Wolffelaar <[email protected]>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import apt_pkg, psycopg2, os, re, sys, time
+
+pg = psycopg2.connect('service=pgapt')
+cur = pg.cursor()
+
+def parseFile(distribution, component, architecture, packagesfile, ctime):
+ if not os.path.isfile(packagesfile):
+ raise Exception("%s not found" % packagesfile)
+ stdout = os.popen("bzcat '%s'" % packagesfile)
+ #stdout = file(packagesfile)
+ parse = apt_pkg.TagFile(stdout)
+
+ source_re = re.compile('(.*) \((.*)\)')
+ binnmu_re = re.compile('\+b\d+$')
+
+ # clear the suite
+ cur.execute("BEGIN")
+ cur.execute("""DELETE FROM packagelist WHERE
+ (distribution, component, architecture) = (%s, %s, %s)""",
+ [distribution, component, architecture])
+
+ # for every package ...
+ while parse.step():
+ package = parse.section.get('Package')
+ version = parse.section.get('Version')
+ arch = parse.section.get('Architecture')
+ control = "\n".join([ "%s: %s" % (k, parse.section.get(k)) \
+ for k in parse.section.keys() ])
+
+ # store package control file
+ cur.execute("""SELECT 1 FROM package
+ WHERE (package, version, arch) = (%s, %s, %s)""",
+ [package, version, arch])
+ found = cur.fetchone()
+ if not found: # this is the first time we see this package
+
+ sourcefield = parse.section.get('Source')
+ if sourcefield:
+ match = source_re.match(sourcefield)
+ if match:
+ source, source_version = match.group(1), match.group(2)
+ else:
+ source, source_version = sourcefield, version
+ else:
+ source, source_version = package, version
+ if binnmu_re.search(source_version):
+ print("WARNING: %s %s %s: source %s %s for %s %s (%s) has binnmu version number, fixing" % \
+ (distribution, component, architecture, source, source_version,
+ package, version, arch))
+ source_version = binnmu_re.sub('', source_version)
+
+# # find link to source package
+# cur.execute("""SELECT 1 FROM source
+# WHERE (source, srcversion) = (%s, %s)""",
+# [source, source_version])
+# found = cur.fetchone()
+# if not found:
+# print("WARNING: %s %s %s: source %s %s for %s %s (%s) not found" % \
+# (distribution, component, architecture, source, source_version,
+# package, version, arch))
+
+ cur.execute("""INSERT INTO package (package, version, arch, control, c, source, srcversion, time)
+ VALUES (%s, %s, %s, %s, control2jsonb(%s), %s, %s, %s)""",
+ [package, version, arch, control, control, source, source_version, ctime])
+
+ # finally, add the package to the suite's package list
+ cur.execute("""INSERT INTO packagelist
+ (distribution, component, architecture, package, version, arch)
+ VALUES (%s, %s, %s, %s, %s, %s)""",
+ [distribution, component, architecture, package, version, arch])
+
+ cur.execute("""INSERT INTO packagehist
+ (distribution, component, architecture, package, version, arch, time)
+ SELECT distribution, component, architecture, package, version, arch, %s
+ FROM packagelist l
+ WHERE (distribution, component, architecture) = (%s, %s, %s) AND NOT EXISTS
+ (SELECT * FROM packagehist h WHERE
+ (l.distribution, l.component, l.architecture, l.package, l.version, l.arch) =
+ (h.distribution, h.component, h.architecture, h.package, h.version, h.arch))""",
+ [ctime, distribution, component, architecture])
+
+(packagesfile, distribution, component, architecture) = (sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
+if not distribution in packagesfile:
+ raise NameError("distribution is not part of filename")
+if not component in packagesfile:
+ raise NameError("component is not part of filename")
+if not architecture in packagesfile:
+ raise NameError("architecture is not part of filename")
+verbose = sys.argv.__len__() > 4
+
+cur.execute("""SELECT extract (epoch from last_update) AS last_update
+ FROM distribution
+ WHERE (distribution, component, architecture) = (%s, %s, %s)
+ FOR UPDATE NOWAIT""", [distribution, component, architecture])
+last_update = cur.fetchone()
+
+mtime = os.path.getmtime(packagesfile)
+ctime = time.ctime(mtime)
+if not last_update or mtime > last_update[0]:
+ if verbose:
+ print("Reading %s" % packagesfile)
+
+ update = """INSERT INTO distribution (distribution, component, architecture, last_update) VALUES (%s, %s, %s, %s)
+ ON CONFLICT (distribution, component, architecture) DO UPDATE SET last_update = %s"""
+ cur.execute(update, [distribution, component, architecture, ctime, ctime])
+
+ parseFile(distribution, component, architecture, packagesfile, ctime)
+
+ cur.execute("COMMIT")
+else:
+ if verbose:
+ print("Skipping up to date %s" % packagesfile)
diff --git a/repo/bin/import-sourcesfile b/repo/bin/import-sourcesfile
new file mode 100755
index 0000000..dd9cec4
--- /dev/null
+++ b/repo/bin/import-sourcesfile
@@ -0,0 +1,118 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2009-2013, 2020 Christoph Berg <[email protected]>
+#
+# getdpkginfo by
+# Copyright (C) 2005 Jeroen van Wolffelaar <[email protected]>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import apt_pkg, psycopg2, os, re, sys, time
+
+pg = psycopg2.connect('service=pgapt')
+cur = pg.cursor()
+
+def parseSourceFile(distribution, component, packagesfile, ctime):
+ if not os.path.isfile(packagesfile):
+ raise Exception("%s not found" % packagesfile)
+ stdout = os.popen("bzcat '%s'" % packagesfile)
+ #stdout = file(packagesfile)
+ parse = apt_pkg.TagFile(stdout)
+
+ # clear the suite
+ cur.execute("BEGIN")
+ cur.execute("""DELETE FROM sourcelist WHERE
+ (distribution, component) = (%s, %s)""",
+ [distribution, component])
+
+ # for every package ...
+ while parse.step():
+ package = parse.section.get('Package')
+ version = parse.section.get('Version')
+ control = "\n".join([ "%s: %s" % (k, parse.section.get(k)) \
+ for k in parse.section.keys() ])
+
+ # store package control file
+ cur.execute("""SELECT 1 FROM source
+ WHERE (source, srcversion) = (%s, %s)""",
+ [package, version])
+ found = cur.fetchone()
+ if not found: # this is the first time we see this package
+ cur.execute("""INSERT INTO source (source, srcversion, control, c, time)
+ VALUES (%s, %s, %s, control2jsonb(%s), %s)""",
+ [package, version, control, control, ctime])
+
+ # finally, add the package to the suite's package list
+ cur.execute("""INSERT INTO sourcelist
+ (distribution, component, source, srcversion)
+ VALUES (%s, %s, %s, %s)""",
+ [distribution, component, package, version])
+
+ # record info in suite history table
+ cur.execute("""INSERT INTO packagehist
+ (distribution, component, package, version, time)
+ SELECT distribution, component, package, version, %s
+ FROM packagelist l
+ WHERE (distribution, component) = (%s, %s) AND NOT EXISTS
+ (SELECT * FROM packagehist h WHERE
+ (l.distribution, l.component, l.package, l.version) =
+ (h.distribution, h.component, h.package, h.version))""",
+ [ctime, distribution, component])
+
+(packagesfile, distribution, component) = (sys.argv[1], sys.argv[2], sys.argv[3])
+if not distribution in packagesfile:
+ raise NameError("distribution is not part of filename")
+if not component in packagesfile:
+ raise NameError("component is not part of filename")
+verbose = sys.argv.__len__() > 3
+
+cur.execute("""SELECT extract (epoch from last_update) AS last_update
+ FROM srcdistribution
+ WHERE (distribution, component) = (%s, %s)
+ FOR UPDATE NOWAIT""", [distribution, component])
+last_update = cur.fetchone()
+
+mtime = os.path.getmtime(packagesfile)
+ctime = time.ctime(mtime)
+if not last_update or mtime > last_update[0]:
+ if verbose:
+ print("Reading %s" % packagesfile)
+
+ update = """INSERT INTO srcdistribution (distribution, component, last_update) VALUES (%s, %s, %s)
+ ON CONFLICT (distribution, component) DO UPDATE SET last_update = %s"""
+ cur.execute(update, [distribution, component, ctime, ctime])
+
+ parseSourceFile(distribution, component, packagesfile, ctime)
+
+ cur.execute("COMMIT")
+else:
+ if verbose:
+ print("Skipping up to date %s" % packagesfile)
+
+# Insert missing source packages:
+# INSERT INTO package_source SELECT p.package_id, s.package_id as source_id from package p JOIN package s ON (p.package = s.package AND p.version = s.version AND s.pkg_architecture='source') where p.package_id not in (select package_id from package_source ) and p.pkg_architecture <> 'source';
+# Insert missing source packages, packages with broken/missing binnmu Source: fieds:
+# INSERT INTO package_source SELECT p.package_id, s.package_id as source_id from package p JOIN package s ON (p.package = s.package AND regexp_replace (p.version, E'\\+b\\d+$', '') = s.version AND s.pkg_architecture='source') where p.package_id not in (select package_id from package_source ) and p.pkg_architecture <> 'source';
+